From b20ac22ab7e8eaa3493688f99f61c15fb2b2acab Mon Sep 17 00:00:00 2001 From: Ian Chamberlain Date: Wed, 14 Sep 2022 13:26:23 -0400 Subject: [PATCH] Use __tdata_align to align thread local storage (#504) --- libctru/source/internal.h | 10 ++++++++++ libctru/source/system/syscalls.c | 11 ++++------- libctru/source/thread.c | 33 +++++++++++++++++++------------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/libctru/source/internal.h b/libctru/source/internal.h index 4328e35..0d2d477 100644 --- a/libctru/source/internal.h +++ b/libctru/source/internal.h @@ -8,6 +8,12 @@ #define THREADVARS_MAGIC 0x21545624 // !TV$ #define FS_OVERRIDE_MAGIC 0x21465324 // !FS$ +extern const size_t __tdata_align; +extern const u8 __tdata_lma[]; +extern const u8 __tdata_lma_end[]; +extern u8 __tls_start[]; +extern u8 __tls_end[]; + // Keep this structure under 0x80 bytes typedef struct { @@ -48,3 +54,7 @@ static inline ThreadVars* getThreadVars(void) } void initThreadVars(struct Thread_tag *thread); + +static inline size_t alignTo(const size_t base, const size_t align) { + return (base + (align - 1)) & ~(align - 1); +} diff --git a/libctru/source/system/syscalls.c b/libctru/source/system/syscalls.c index f990d62..8921bb3 100644 --- a/libctru/source/system/syscalls.c +++ b/libctru/source/system/syscalls.c @@ -16,10 +16,6 @@ void __ctru_exit(int rc); -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; - struct _reent* __SYSCALL(getreent)() { ThreadVars* tv = getThreadVars(); @@ -43,7 +39,7 @@ int __SYSCALL(clock_gettime)(clockid_t clock_id, struct timespec *tp) { tp->tv_nsec = (ms_since_epoch % 1000) * 1000000; } } - else if (clock_id == CLOCK_MONOTONIC) + else if (clock_id == CLOCK_MONOTONIC) { if (tp != NULL) { @@ -66,7 +62,7 @@ int __SYSCALL(clock_gettime)(clockid_t clock_id, struct timespec *tp) { int __SYSCALL(clock_getres)(clockid_t clock_id, struct timespec *res) { if (clock_id == CLOCK_REALTIME) { - if (res != NULL) + if (res != NULL) { res->tv_sec = 0; res->tv_nsec = 1000000; @@ -185,6 +181,7 @@ void __system_initSyscalls(void) // Initialize thread vars for the main thread initThreadVars(NULL); u32 tls_size = __tdata_lma_end - __tdata_lma; + size_t tdata_start = alignTo((size_t)__tls_start, __tdata_align); if (tls_size) - memcpy(__tls_start, __tdata_lma, tls_size); + memcpy((void*)tdata_start, __tdata_lma, tls_size); } diff --git a/libctru/source/thread.c b/libctru/source/thread.c index 688900f..e052fd5 100644 --- a/libctru/source/thread.c +++ b/libctru/source/thread.c @@ -3,11 +3,6 @@ #include #include -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; -extern u8 __tls_end[]; - static void __panic(void) { svcBreak(USERBREAK_PANIC); @@ -24,18 +19,26 @@ static void _thread_begin(void* arg) Thread threadCreate(ThreadFunc entrypoint, void* arg, size_t stack_size, int prio, int core_id, bool detached) { - size_t stackoffset = (sizeof(struct Thread_tag)+7)&~7; - size_t allocsize = stackoffset + ((stack_size+7)&~7); + // The stack must be 8-aligned at minimum. + size_t align = __tdata_align > 8 ? __tdata_align : 8; + + size_t stackoffset = alignTo(sizeof(struct Thread_tag), align); + size_t allocsize = alignTo(stackoffset + stack_size, align); + size_t tlssize = __tls_end-__tls_start; size_t tlsloadsize = __tdata_lma_end-__tdata_lma; - size_t tbsssize = tlssize-tlsloadsize; + size_t tbsssize = tlssize - tlsloadsize; + + // memalign seems to have an implicit requirement that (size % align) == 0. + // Without this, it seems to return NULL whenever (align > 8). + size_t size = alignTo(allocsize + tlssize, align); // Guard against overflow if (allocsize < stackoffset) return NULL; - if ((allocsize-stackoffset) < stack_size) return NULL; - if ((allocsize+tlssize) < allocsize) return NULL; + if ((allocsize - stackoffset) < stack_size) return NULL; + if (size < allocsize) return NULL; - Thread t = (Thread)memalign(8,allocsize+tlssize); + Thread t = (Thread)memalign(align, size); if (!t) return NULL; t->ep = entrypoint; @@ -44,10 +47,14 @@ Thread threadCreate(ThreadFunc entrypoint, void* arg, size_t stack_size, int pri t->finished = false; t->stacktop = (u8*)t + allocsize; + // ThreadVars.tls_tp must be aligned correctly, so we bump tdata_start to + // ensure that after subtracting 8 bytes for the TLS header, it will be aligned. + size_t tdata_start = 8 + alignTo((size_t)t->stacktop - 8, align); + if (tlsloadsize) - memcpy(t->stacktop, __tdata_lma, tlsloadsize); + memcpy((void*)tdata_start, __tdata_lma, tlsloadsize); if (tbsssize) - memset((u8*)t->stacktop+tlsloadsize, 0, tbsssize); + memset((void*)tdata_start + tlsloadsize, 0, tbsssize); // Set up child thread's reent struct, inheriting standard file handles _REENT_INIT_PTR(&t->reent);