diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c index 30e391c52a0..a35fffbb1b7 100644 --- a/libgomp/config/accel/target-indirect.c +++ b/libgomp/config/accel/target-indirect.c @@ -25,73 +25,52 @@ . */ #include -#include #include "libgomp.h" -struct indirect_map_t -{ - void *host_addr; - void *target_addr; -}; +void *GOMP_INDIRECT_ADDR_MAP = NULL; -typedef struct indirect_map_t *hash_entry_type; +#define USE_HASHTAB_LOOKUP + +#ifdef USE_HASHTAB_LOOKUP + +#include /* For memset. */ + +/* Use a hashtab to lookup the target address instead of using a linear + search. + + With newer libgomp on the host the hash is already initialized on the host + (i.e plugin/plugin-gcn.c). Thus, build_indirect_map is only used as + fallback with older glibc. */ + +void *GOMP_INDIRECT_ADDR_HMAP = NULL; + +typedef unsigned __int128 hash_entry_type; +#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64)) +#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p) +#define SET_INDIRECT_HOST_ADDR(p, host) p = (((unsigned __int128) (uintptr_t) host)) +#define SET_INDIRECT_ADDRS(p, h, d) \ + p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64)) + +/* Besides the sizes, also the endianness either needs to agree or + host-device memcpy needs to take care of this. */ +_Static_assert (sizeof (unsigned __int128) == 2*sizeof(void*), + "indirect_target_map_t size mismatch"); static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } -static inline void htab_free (void *ptr) { free (ptr); } +static inline void htab_free (void *ptr) { __builtin_unreachable (); } #include "hashtab.h" static inline hashval_t htab_hash (hash_entry_type element) { - return hash_pointer (element->host_addr); + return hash_pointer (INDIRECT_HOST_ADDR (element)); } static inline bool htab_eq (hash_entry_type x, hash_entry_type y) { - return x->host_addr == y->host_addr; -} - -void **GOMP_INDIRECT_ADDR_MAP = NULL; - -/* Use a hashtab to lookup the target address instead of using a linear - search. */ -#define USE_HASHTAB_LOOKUP - -#ifdef USE_HASHTAB_LOOKUP - -static htab_t indirect_htab = NULL; - -/* Build the hashtab used for host->target address lookups. */ - -void -build_indirect_map (void) -{ - size_t num_ind_funcs = 0; - void **map_entry; - - if (!GOMP_INDIRECT_ADDR_MAP) - return; - - if (!indirect_htab) - { - /* Count the number of entries in the NULL-terminated address map. */ - for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; - map_entry += 2, num_ind_funcs++); - - /* Build hashtab for address lookup. */ - indirect_htab = htab_create (num_ind_funcs); - map_entry = GOMP_INDIRECT_ADDR_MAP; - - for (int i = 0; i < num_ind_funcs; i++, map_entry += 2) - { - struct indirect_map_t element = { *map_entry, NULL }; - hash_entry_type *slot = htab_find_slot (&indirect_htab, &element, - INSERT); - *slot = (hash_entry_type) map_entry; - } - } + return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (y); } void * @@ -101,11 +80,42 @@ GOMP_target_map_indirect_ptr (void *ptr) if (!ptr) return ptr; - assert (indirect_htab); + assert (GOMP_INDIRECT_ADDR_HMAP); - struct indirect_map_t element = { ptr, NULL }; - hash_entry_type entry = htab_find (indirect_htab, &element); - return entry ? entry->target_addr : ptr; + hash_entry_type element; + SET_INDIRECT_HOST_ADDR (element, ptr); + hash_entry_type entry = htab_find ((htab_t) GOMP_INDIRECT_ADDR_HMAP, element); + return entry ? INDIRECT_DEV_ADDR (entry) : ptr; +} + +/* Build the hashtab used for host->target address lookups. */ + +void +build_indirect_map (void) +{ + size_t num_ind_funcs = 0; + uint64_t *map_entry; + + if (!GOMP_INDIRECT_ADDR_MAP || GOMP_INDIRECT_ADDR_HMAP) + return; + + /* Count the number of entries in the NULL-terminated address map. */ + for (map_entry = (uint64_t *) GOMP_INDIRECT_ADDR_MAP; *map_entry; + map_entry += 2, num_ind_funcs++); + + /* Build hashtab for address lookup. */ + htab_t indirect_htab = htab_create (num_ind_funcs); + GOMP_INDIRECT_ADDR_HMAP = (void *) indirect_htab; + + map_entry = GOMP_INDIRECT_ADDR_MAP; + for (int i = 0; i < num_ind_funcs; i++, map_entry += 2) + { + hash_entry_type element; + SET_INDIRECT_ADDRS (element, *map_entry, *(map_entry + 1)); + hash_entry_type *slot = htab_find_slot (&indirect_htab, element, + INSERT); + *slot = element; + } } #else diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c index 40827ce8560..df5e065be16 100644 --- a/libgomp/config/gcn/team.c +++ b/libgomp/config/gcn/team.c @@ -32,6 +32,10 @@ #define LITTLEENDIAN_CPU #include "hsa.h" +#define UNLIKELY(x) (__builtin_expect ((x), 0)) + +extern void *GOMP_INDIRECT_ADDR_MAP; + /* Defined in basic-allocator.c via config/amdgcn/allocator.c. */ void __gcn_lowlat_init (void *heap, size_t size); @@ -57,8 +61,8 @@ gomp_gcn_enter_kernel (void) int numthreads = __builtin_gcn_dim_size (1); int teamid = __builtin_gcn_dim_pos(0); - /* Initialize indirect function support. */ - if (teamid == 0) + /* Initialize indirect function support for older libgomp. */ + if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && teamid == 0)) build_indirect_map (); /* Set up the global state. diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index 422734459de..6a34144b1dd 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -31,6 +31,10 @@ #include #include +#define UNLIKELY(x) (__builtin_expect ((x), 0)) + +extern void *GOMP_INDIRECT_ADDR_MAP; + struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); int __gomp_team_num __attribute__((shared,nocommon)); @@ -71,10 +75,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); - /* Initialize indirect function support. */ + /* Initialize indirect function support for older libgomp. */ unsigned int block_id; asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id)); - if (block_id == 0) + if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && block_id == 0)) build_indirect_map (); /* Find the low-latency heap details .... */ diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 191106b5275..f3823c0b461 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -134,7 +134,11 @@ enum gomp_interop_flag must be stringified). */ #define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs +/* GOMP_INDIRECT_ADDR_HMAP points to a hash table and is to be used by + newer libgomp, while GOMP_INDIRECT_ADDR_MAP points to a linear table + and exists for backward compatibility. */ #define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map +#define GOMP_INDIRECT_ADDR_HMAP __gomp_indirect_addr_hmap /* Miscellaneous functions. */ extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc)); diff --git a/libgomp/plugin/build-target-indirect-htab.h b/libgomp/plugin/build-target-indirect-htab.h new file mode 100644 index 00000000000..d732aca3e03 --- /dev/null +++ b/libgomp/plugin/build-target-indirect-htab.h @@ -0,0 +1,83 @@ +/* Copyright (C) 2023-2025 Free Software Foundation, Inc. + + Contributed by Siemens. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + +/* This file is used to create a hash table on the host that is supposed + to get use on the device - that's for the 'indirect' clause feature. + + In order to habe host initialization work, the pointer sizes must be + the same - and either the the endianess or the host-device memcopy + has to take of it. */ + +typedef unsigned __int128 hash_entry_type; +#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p) +#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64)) +#define SET_INDIRECT_ADDRS(p, h, d) \ + p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64)) + +_Static_assert (sizeof (unsigned __int128) == 2 * sizeof (void*), + "hash_entry_type size mismatch"); + +static inline void *htab_alloc (size_t size) { + return malloc (size); +} + +static inline void htab_free (void *ptr) { free (ptr); } + +#include "hashtab.h" + +static inline hashval_t +htab_hash (hash_entry_type element) +{ + return hash_pointer (INDIRECT_HOST_ADDR (element)); +} + +static inline bool +htab_eq (hash_entry_type x, hash_entry_type y) +{ + return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (x); +} + +void* +create_target_indirect_map (size_t *h_size, size_t count, + uint64_t *host_addrs, uint64_t *device_addrs) +{ + assert (htab_find); /* Silence -Werror=unused-function. */ + + htab_t indirect_htab = htab_create (count); + + hash_entry_type element; + for (int i = 0; i < count; i++) + { + SET_INDIRECT_ADDRS (element, host_addrs[i], device_addrs[i]); + hash_entry_type *slot = htab_find_slot (&indirect_htab, element, + INSERT); + *slot = element; + } + *h_size = (sizeof (struct htab) + + htab_size (indirect_htab) * sizeof (hash_entry_type)); + return (void*) indirect_htab; +} diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 498b549886f..18f01e09002 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -51,6 +51,14 @@ #include "oacc-int.h" #include +/* Create hash-table for declare target's indirect clause on the host; + see build-target-indirect-htab.h for details. */ +#define USE_HASHTAB_LOOKUP_FOR_INDIRECT +#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT +static void* create_target_indirect_map (size_t *, size_t, + uint64_t *, uint64_t *); +#endif + /* These probably won't be in elf.h for a while. */ #ifndef R_AMDGPU_NONE #define R_AMDGPU_NONE 0 @@ -3688,37 +3696,28 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, (void*) ind_funcs_table_addr, sizeof (ind_funcs_table)); - /* Build host->target address map for indirect functions. */ - uint64_t ind_fn_map[ind_func_count * 2 + 1]; - for (unsigned i = 0; i < ind_func_count; i++) - { - ind_fn_map[i * 2] = host_ind_fn_table[i]; - ind_fn_map[i * 2 + 1] = ind_funcs_table[i]; - GCN_DEBUG ("Indirect function %d: %lx->%lx\n", - i, host_ind_fn_table[i], ind_funcs_table[i]); - } - ind_fn_map[ind_func_count * 2] = 0; + /* For newer binaries, the hash table for 'indirect' is created on the + host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the + device side - and have to create the table themselves using + GOMP_INDIRECT_ADDR_MAP. */ - /* Write the map onto the target. */ - void *map_target_addr - = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map)); - GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr); - - GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr, - (void*) ind_fn_map, - sizeof (ind_fn_map)); - - /* Write address of the map onto the target. */ hsa_executable_symbol_t symbol; - + bool host_init_htab = true; + #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, - XSTRING (GOMP_INDIRECT_ADDR_MAP), + XSTRING (GOMP_INDIRECT_ADDR_HMAP), agent->id, 0, &symbol); + if (status != HSA_STATUS_SUCCESS) + #endif + { + host_init_htab = false; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + XSTRING (GOMP_INDIRECT_ADDR_MAP), agent->id, 0, &symbol); + } if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object", status); - uint64_t varptr; uint32_t varsize; @@ -3734,9 +3733,51 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, hsa_fatal ("Could not extract a variable size from its symbol", status); - GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n", - varptr, varsize); + GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_%sMAP at %lx with size %d\n", + host_init_htab ? "H" : "", varptr, varsize); + void *map_target_addr; + if (!host_init_htab) + { + /* Build host->target address map for indirect functions. */ + uint64_t ind_fn_map[ind_func_count * 2 + 1]; + for (unsigned i = 0; i < ind_func_count; i++) + { + ind_fn_map[i * 2] = host_ind_fn_table[i]; + ind_fn_map[i * 2 + 1] = ind_funcs_table[i]; + GCN_DEBUG ("Indirect function %d: %lx->%lx\n", + i, host_ind_fn_table[i], ind_funcs_table[i]); + } + ind_fn_map[ind_func_count * 2] = 0; + /* Write the map onto the target. */ + map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id, + sizeof (ind_fn_map)); + GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr, + (void*) ind_fn_map, sizeof (ind_fn_map)); + } + #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT + else + { + /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */ + size_t host_map_size; + void *host_map; + host_map = create_target_indirect_map (&host_map_size, ind_func_count, + host_ind_fn_table, + ind_funcs_table); + for (unsigned i = 0; i < ind_func_count; i++) + GCN_DEBUG ("Indirect function %d: %lx->%lx\n", + i, host_ind_fn_table[i], ind_funcs_table[i]); + /* Write the map onto the target. */ + map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id, + host_map_size); + GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr, + host_map, host_map_size); + } + #endif + + GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr); + + /* Write address of the map onto the target. */ GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr, &map_target_addr, sizeof (map_target_addr)); @@ -5247,4 +5288,8 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data) free (data); } +#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT + #include "build-target-indirect-htab.h" +#endif + /* }}} */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 0ba445eab9b..eb7b5e59d8f 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -60,6 +60,14 @@ #include #include +/* Create hash-table for declare target's indirect clause on the host; + see build-target-indirect-htab.h for details. */ +#define USE_HASHTAB_LOOKUP_FOR_INDIRECT +#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT +static void* create_target_indirect_map (size_t *, size_t, + uint64_t *, uint64_t *); +#endif + /* An arbitrary fixed limit (128MB) for the size of the OpenMP soft stacks block to cache between kernel invocations. For soft-stacks blocks bigger than this, we will free the block before attempting another GPU memory @@ -1626,39 +1634,71 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r)); - /* Build host->target address map for indirect functions. */ - uint64_t ind_fn_map[ind_fn_entries * 2 + 1]; - for (unsigned k = 0; k < ind_fn_entries; k++) - { - ind_fn_map[k * 2] = host_ind_fn_table[k]; - ind_fn_map[k * 2 + 1] = ind_fn_table[k]; - GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n", - k, host_ind_fn_table[k], ind_fn_table[k]); - } - ind_fn_map[ind_fn_entries * 2] = 0; + /* For newer binaries, the hash table for 'indirect' is created on the + host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the + device side - and have to create the table themselves using + GOMP_INDIRECT_ADDR_MAP. */ - /* Write the map onto the target. */ - void *map_target_addr - = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map)); - GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr); - - GOMP_OFFLOAD_host2dev (ord, map_target_addr, - (void*) ind_fn_map, - sizeof (ind_fn_map)); - - /* Write address of the map onto the target. */ CUdeviceptr varptr; size_t varsize; + bool host_init_htab = true; + #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize, - module, XSTRING (GOMP_INDIRECT_ADDR_MAP)); + module, XSTRING (GOMP_INDIRECT_ADDR_HMAP)); + if (r != CUDA_SUCCESS) + #endif + { + host_init_htab = false; + r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize, + module, XSTRING (GOMP_INDIRECT_ADDR_MAP)); + } if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s", cuda_error (r)); - GOMP_PLUGIN_debug (0, - "Indirect map variable found at %llx with size %ld\n", + "%s-style indirect map variable found at %llx with " + "size %ld\n", host_init_htab ? "New" : "Old", varptr, varsize); + void *map_target_addr; + if (!host_init_htab) + { + /* Build host->target address map for indirect functions. */ + uint64_t ind_fn_map[ind_fn_entries * 2 + 1]; + for (unsigned k = 0; k < ind_fn_entries; k++) + { + ind_fn_map[k * 2] = host_ind_fn_table[k]; + ind_fn_map[k * 2 + 1] = ind_fn_table[k]; + GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n", + k, host_ind_fn_table[k], ind_fn_table[k]); + } + ind_fn_map[ind_fn_entries * 2] = 0; + /* Write the map onto the target. */ + map_target_addr = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map)); + GOMP_OFFLOAD_host2dev (ord, map_target_addr, + (void *) ind_fn_map, sizeof (ind_fn_map)); + } + #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT + else + { + /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */ + size_t host_map_size; + void *host_map; + host_map = create_target_indirect_map (&host_map_size, ind_fn_entries, + host_ind_fn_table, + ind_fn_table); + for (unsigned k = 0; k < ind_fn_entries; k++) + GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n", + k, host_ind_fn_table[k], ind_fn_table[k]); + /* Write the map onto the target. */ + map_target_addr = GOMP_OFFLOAD_alloc (ord, host_map_size); + GOMP_OFFLOAD_host2dev (ord, map_target_addr, host_map, host_map_size); + } + #endif + + GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr); + + /* Write address of the map onto the target. */ GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr, sizeof (map_target_addr)); } @@ -2898,3 +2938,7 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args) } /* TODO: Implement GOMP_OFFLOAD_async_run. */ + +#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT + #include "build-target-indirect-htab.h" +#endif