diff --git a/libctru/source/internal.h b/libctru/source/internal.h index 4328e35..a3ab481 100644 --- a/libctru/source/internal.h +++ b/libctru/source/internal.h @@ -8,6 +8,12 @@ #define THREADVARS_MAGIC 0x21545624 // !TV$ #define FS_OVERRIDE_MAGIC 0x21465324 // !FS$ +extern const size_t __tdata_align; +extern const u8 __tdata_lma[]; +extern const u8 __tdata_lma_end[]; +extern u8 __tls_start[]; +extern u8 __tls_end[]; + // Keep this structure under 0x80 bytes typedef struct { @@ -48,3 +54,11 @@ static inline ThreadVars* getThreadVars(void) } void initThreadVars(struct Thread_tag *thread); + +static inline size_t getThreadLocalStartOffset(size_t tls_tp) { + size_t align = 8; + if (__tdata_align > align) align = __tdata_align; + // ARM ELF TLS ABI mandates an 8-byte header, so we include an extra 8 bytes + // then add padding to align the .tdata properly + return (8 + (size_t)tls_tp + (__tdata_align - 1)) & ~(__tdata_align - 1); +} diff --git a/libctru/source/system/syscalls.c b/libctru/source/system/syscalls.c index f990d62..393de1b 100644 --- a/libctru/source/system/syscalls.c +++ b/libctru/source/system/syscalls.c @@ -16,10 +16,6 @@ void __ctru_exit(int rc); -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; - struct _reent* __SYSCALL(getreent)() { ThreadVars* tv = getThreadVars(); @@ -43,7 +39,7 @@ int __SYSCALL(clock_gettime)(clockid_t clock_id, struct timespec *tp) { tp->tv_nsec = (ms_since_epoch % 1000) * 1000000; } } - else if (clock_id == CLOCK_MONOTONIC) + else if (clock_id == CLOCK_MONOTONIC) { if (tp != NULL) { @@ -66,7 +62,7 @@ int __SYSCALL(clock_gettime)(clockid_t clock_id, struct timespec *tp) { int __SYSCALL(clock_getres)(clockid_t clock_id, struct timespec *res) { if (clock_id == CLOCK_REALTIME) { - if (res != NULL) + if (res != NULL) { res->tv_sec = 0; res->tv_nsec = 1000000; @@ -168,7 +164,7 @@ void initThreadVars(struct Thread_tag *thread) tv->thread_ptr = thread; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" - tv->tls_tp = (thread != NULL ? (u8*)thread->stacktop : __tls_start) - 8; // Arm ELF TLS ABI mandates an 8-byte header + tv->tls_tp = (thread != NULL ? (u8*)thread->stacktop : __tls_start); #pragma GCC diagnostic pop tv->srv_blocking_policy = false; @@ -185,6 +181,7 @@ void __system_initSyscalls(void) // Initialize thread vars for the main thread initThreadVars(NULL); u32 tls_size = __tdata_lma_end - __tdata_lma; + size_t tdata_start = getThreadLocalStartOffset((size_t)__tls_start); if (tls_size) - memcpy(__tls_start, __tdata_lma, tls_size); + memcpy((void*)tdata_start, __tdata_lma, tls_size); } diff --git a/libctru/source/thread.c b/libctru/source/thread.c index 688900f..b71b549 100644 --- a/libctru/source/thread.c +++ b/libctru/source/thread.c @@ -3,11 +3,6 @@ #include #include -extern const u8 __tdata_lma[]; -extern const u8 __tdata_lma_end[]; -extern u8 __tls_start[]; -extern u8 __tls_end[]; - static void __panic(void) { svcBreak(USERBREAK_PANIC); @@ -24,8 +19,8 @@ static void _thread_begin(void* arg) Thread threadCreate(ThreadFunc entrypoint, void* arg, size_t stack_size, int prio, int core_id, bool detached) { - size_t stackoffset = (sizeof(struct Thread_tag)+7)&~7; - size_t allocsize = stackoffset + ((stack_size+7)&~7); + size_t stackoffset = (sizeof(struct Thread_tag) + 7) & ~7; + size_t allocsize = getThreadLocalStartOffset(stackoffset + stack_size); size_t tlssize = __tls_end-__tls_start; size_t tlsloadsize = __tdata_lma_end-__tdata_lma; size_t tbsssize = tlssize-tlsloadsize; @@ -35,19 +30,20 @@ Thread threadCreate(ThreadFunc entrypoint, void* arg, size_t stack_size, int pri if ((allocsize-stackoffset) < stack_size) return NULL; if ((allocsize+tlssize) < allocsize) return NULL; - Thread t = (Thread)memalign(8,allocsize+tlssize); + Thread t = (Thread)memalign(__tdata_align, allocsize + tlssize); if (!t) return NULL; t->ep = entrypoint; t->arg = arg; t->detached = detached; t->finished = false; - t->stacktop = (u8*)t + allocsize; + t->stacktop = (u8*)t + stackoffset + stack_size; + void* tdata_start = (void*)getThreadLocalStartOffset((size_t)t->stacktop); if (tlsloadsize) - memcpy(t->stacktop, __tdata_lma, tlsloadsize); + memcpy(tdata_start, __tdata_lma, tlsloadsize); if (tbsssize) - memset((u8*)t->stacktop+tlsloadsize, 0, tbsssize); + memset(tdata_start + tlsloadsize, 0, tbsssize); // Set up child thread's reent struct, inheriting standard file handles _REENT_INIT_PTR(&t->reent);