Don't allocate for 8-byte header, just fake it

Since only ARM_TLS_LE32 is used in practice with this library, the
8-byte TLS header goes unused, so we can just fake it by subtracting 8
from the dato offset and using that as tls_tp instead.
This commit is contained in:
Ian Chamberlain 2022-08-20 13:12:34 -04:00
parent 6b2d2fb1ab
commit cafdcd39a9
No known key found for this signature in database
GPG Key ID: AE5484D09405AA60
3 changed files with 25 additions and 18 deletions

View File

@ -55,10 +55,6 @@ static inline ThreadVars* getThreadVars(void)
void initThreadVars(struct Thread_tag *thread);
static inline size_t getThreadLocalStartOffset(size_t tls_tp) {
size_t align = 8;
if (__tdata_align > align) align = __tdata_align;
// ARM ELF TLS ABI mandates an 8-byte header, so we include an extra 8 bytes
// then add padding to align the .tdata properly
return (8 + (size_t)tls_tp + (__tdata_align - 1)) & ~(__tdata_align - 1);
static inline size_t alignTo(const size_t base, const size_t align) {
return (base + (align - 1)) & ~(align - 1);
}

View File

@ -164,7 +164,7 @@ void initThreadVars(struct Thread_tag *thread)
tv->thread_ptr = thread;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
tv->tls_tp = (thread != NULL ? (u8*)thread->stacktop : __tls_start);
tv->tls_tp = (thread != NULL ? (u8*)thread->stacktop : __tls_start) - 8; // Arm ELF TLS ABI mandates an 8-byte header
#pragma GCC diagnostic pop
tv->srv_blocking_policy = false;
@ -181,7 +181,7 @@ void __system_initSyscalls(void)
// Initialize thread vars for the main thread
initThreadVars(NULL);
u32 tls_size = __tdata_lma_end - __tdata_lma;
size_t tdata_start = getThreadLocalStartOffset((size_t)__tls_start);
size_t tdata_start = alignTo((size_t)__tls_start, __tdata_align);
if (tls_size)
memcpy((void*)tdata_start, __tdata_lma, tls_size);
}

View File

@ -19,31 +19,42 @@ static void _thread_begin(void* arg)
Thread threadCreate(ThreadFunc entrypoint, void* arg, size_t stack_size, int prio, int core_id, bool detached)
{
size_t stackoffset = (sizeof(struct Thread_tag) + 7) & ~7;
size_t allocsize = getThreadLocalStartOffset(stackoffset + stack_size);
// The stack must be 8-aligned at minimum.
size_t align = __tdata_align > 8 ? __tdata_align : 8;
size_t stackoffset = alignTo(sizeof(struct Thread_tag), align);
size_t allocsize = alignTo(stackoffset + stack_size, align);
size_t tlssize = __tls_end-__tls_start;
size_t tlsloadsize = __tdata_lma_end-__tdata_lma;
size_t tbsssize = tlssize-tlsloadsize;
size_t tbsssize = tlssize - tlsloadsize;
// memalign seems to have an implicit requirement that (size % align) == 0.
// Without this, it seems to return NULL whenever (align > 8).
size_t size = alignTo(allocsize + tlssize, align);
// Guard against overflow
if (allocsize < stackoffset) return NULL;
if ((allocsize-stackoffset) < stack_size) return NULL;
if ((allocsize+tlssize) < allocsize) return NULL;
if ((allocsize - stackoffset) < stack_size) return NULL;
if (size < allocsize) return NULL;
Thread t = (Thread)memalign(__tdata_align, allocsize + tlssize);
Thread t = (Thread)memalign(align, size);
if (!t) return NULL;
t->ep = entrypoint;
t->arg = arg;
t->detached = detached;
t->finished = false;
t->stacktop = (u8*)t + stackoffset + stack_size;
t->stacktop = (u8*)t + allocsize;
// ThreadVars.tls_tp must be aligned correctly, so we bump tdata_start to
// ensure that after subtracting 8 bytes for the TLS header, it will be aligned.
size_t tdata_start = 8 + alignTo((size_t)t->stacktop - 8, align);
void* tdata_start = (void*)getThreadLocalStartOffset((size_t)t->stacktop);
if (tlsloadsize)
memcpy(tdata_start, __tdata_lma, tlsloadsize);
memcpy((void*)tdata_start, __tdata_lma, tlsloadsize);
if (tbsssize)
memset(tdata_start + tlsloadsize, 0, tbsssize);
memset((void*)tdata_start + tlsloadsize, 0, tbsssize);
// Set up child thread's reent struct, inheriting standard file handles
_REENT_INIT_PTR(&t->reent);