diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c
index 30e391c52a0..a35fffbb1b7 100644
--- a/libgomp/config/accel/target-indirect.c
+++ b/libgomp/config/accel/target-indirect.c
@@ -25,73 +25,52 @@
. */
#include
-#include
#include "libgomp.h"
-struct indirect_map_t
-{
- void *host_addr;
- void *target_addr;
-};
+void *GOMP_INDIRECT_ADDR_MAP = NULL;
-typedef struct indirect_map_t *hash_entry_type;
+#define USE_HASHTAB_LOOKUP
+
+#ifdef USE_HASHTAB_LOOKUP
+
+#include /* For memset. */
+
+/* Use a hashtab to lookup the target address instead of using a linear
+ search.
+
+ With newer libgomp on the host the hash is already initialized on the host
+ (i.e plugin/plugin-gcn.c). Thus, build_indirect_map is only used as
+ fallback with older glibc. */
+
+void *GOMP_INDIRECT_ADDR_HMAP = NULL;
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define SET_INDIRECT_HOST_ADDR(p, host) p = (((unsigned __int128) (uintptr_t) host))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+ p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+/* Besides the sizes, also the endianness either needs to agree or
+ host-device memcpy needs to take care of this. */
+_Static_assert (sizeof (unsigned __int128) == 2*sizeof(void*),
+ "indirect_target_map_t size mismatch");
static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
-static inline void htab_free (void *ptr) { free (ptr); }
+static inline void htab_free (void *ptr) { __builtin_unreachable (); }
#include "hashtab.h"
static inline hashval_t
htab_hash (hash_entry_type element)
{
- return hash_pointer (element->host_addr);
+ return hash_pointer (INDIRECT_HOST_ADDR (element));
}
static inline bool
htab_eq (hash_entry_type x, hash_entry_type y)
{
- return x->host_addr == y->host_addr;
-}
-
-void **GOMP_INDIRECT_ADDR_MAP = NULL;
-
-/* Use a hashtab to lookup the target address instead of using a linear
- search. */
-#define USE_HASHTAB_LOOKUP
-
-#ifdef USE_HASHTAB_LOOKUP
-
-static htab_t indirect_htab = NULL;
-
-/* Build the hashtab used for host->target address lookups. */
-
-void
-build_indirect_map (void)
-{
- size_t num_ind_funcs = 0;
- void **map_entry;
-
- if (!GOMP_INDIRECT_ADDR_MAP)
- return;
-
- if (!indirect_htab)
- {
- /* Count the number of entries in the NULL-terminated address map. */
- for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
- map_entry += 2, num_ind_funcs++);
-
- /* Build hashtab for address lookup. */
- indirect_htab = htab_create (num_ind_funcs);
- map_entry = GOMP_INDIRECT_ADDR_MAP;
-
- for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
- {
- struct indirect_map_t element = { *map_entry, NULL };
- hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
- INSERT);
- *slot = (hash_entry_type) map_entry;
- }
- }
+ return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (y);
}
void *
@@ -101,11 +80,42 @@ GOMP_target_map_indirect_ptr (void *ptr)
if (!ptr)
return ptr;
- assert (indirect_htab);
+ assert (GOMP_INDIRECT_ADDR_HMAP);
- struct indirect_map_t element = { ptr, NULL };
- hash_entry_type entry = htab_find (indirect_htab, &element);
- return entry ? entry->target_addr : ptr;
+ hash_entry_type element;
+ SET_INDIRECT_HOST_ADDR (element, ptr);
+ hash_entry_type entry = htab_find ((htab_t) GOMP_INDIRECT_ADDR_HMAP, element);
+ return entry ? INDIRECT_DEV_ADDR (entry) : ptr;
+}
+
+/* Build the hashtab used for host->target address lookups. */
+
+void
+build_indirect_map (void)
+{
+ size_t num_ind_funcs = 0;
+ uint64_t *map_entry;
+
+ if (!GOMP_INDIRECT_ADDR_MAP || GOMP_INDIRECT_ADDR_HMAP)
+ return;
+
+ /* Count the number of entries in the NULL-terminated address map. */
+ for (map_entry = (uint64_t *) GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2, num_ind_funcs++);
+
+ /* Build hashtab for address lookup. */
+ htab_t indirect_htab = htab_create (num_ind_funcs);
+ GOMP_INDIRECT_ADDR_HMAP = (void *) indirect_htab;
+
+ map_entry = GOMP_INDIRECT_ADDR_MAP;
+ for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
+ {
+ hash_entry_type element;
+ SET_INDIRECT_ADDRS (element, *map_entry, *(map_entry + 1));
+ hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+ INSERT);
+ *slot = element;
+ }
}
#else
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index 40827ce8560..df5e065be16 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -32,6 +32,10 @@
#define LITTLEENDIAN_CPU
#include "hsa.h"
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
/* Defined in basic-allocator.c via config/amdgcn/allocator.c. */
void __gcn_lowlat_init (void *heap, size_t size);
@@ -57,8 +61,8 @@ gomp_gcn_enter_kernel (void)
int numthreads = __builtin_gcn_dim_size (1);
int teamid = __builtin_gcn_dim_pos(0);
- /* Initialize indirect function support. */
- if (teamid == 0)
+ /* Initialize indirect function support for older libgomp. */
+ if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && teamid == 0))
build_indirect_map ();
/* Set up the global state.
diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
index 422734459de..6a34144b1dd 100644
--- a/libgomp/config/nvptx/team.c
+++ b/libgomp/config/nvptx/team.c
@@ -31,6 +31,10 @@
#include
#include
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
int __gomp_team_num __attribute__((shared,nocommon));
@@ -71,10 +75,10 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
- /* Initialize indirect function support. */
+ /* Initialize indirect function support for older libgomp. */
unsigned int block_id;
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
- if (block_id == 0)
+ if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && block_id == 0))
build_indirect_map ();
/* Find the low-latency heap details .... */
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 191106b5275..f3823c0b461 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -134,7 +134,11 @@ enum gomp_interop_flag
must be stringified). */
#define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
+/* GOMP_INDIRECT_ADDR_HMAP points to a hash table and is to be used by
+ newer libgomp, while GOMP_INDIRECT_ADDR_MAP points to a linear table
+ and exists for backward compatibility. */
#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
+#define GOMP_INDIRECT_ADDR_HMAP __gomp_indirect_addr_hmap
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
diff --git a/libgomp/plugin/build-target-indirect-htab.h b/libgomp/plugin/build-target-indirect-htab.h
new file mode 100644
index 00000000000..d732aca3e03
--- /dev/null
+++ b/libgomp/plugin/build-target-indirect-htab.h
@@ -0,0 +1,83 @@
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ . */
+
+
+/* This file is used to create a hash table on the host that is supposed
+ to get use on the device - that's for the 'indirect' clause feature.
+
+ In order to habe host initialization work, the pointer sizes must be
+ the same - and either the the endianess or the host-device memcopy
+ has to take of it. */
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+ p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+_Static_assert (sizeof (unsigned __int128) == 2 * sizeof (void*),
+ "hash_entry_type size mismatch");
+
+static inline void *htab_alloc (size_t size) {
+ return malloc (size);
+}
+
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+ return hash_pointer (INDIRECT_HOST_ADDR (element));
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+ return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (x);
+}
+
+void*
+create_target_indirect_map (size_t *h_size, size_t count,
+ uint64_t *host_addrs, uint64_t *device_addrs)
+{
+ assert (htab_find); /* Silence -Werror=unused-function. */
+
+ htab_t indirect_htab = htab_create (count);
+
+ hash_entry_type element;
+ for (int i = 0; i < count; i++)
+ {
+ SET_INDIRECT_ADDRS (element, host_addrs[i], device_addrs[i]);
+ hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+ INSERT);
+ *slot = element;
+ }
+ *h_size = (sizeof (struct htab)
+ + htab_size (indirect_htab) * sizeof (hash_entry_type));
+ return (void*) indirect_htab;
+}
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 498b549886f..18f01e09002 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -51,6 +51,14 @@
#include "oacc-int.h"
#include
+/* Create hash-table for declare target's indirect clause on the host;
+ see build-target-indirect-htab.h for details. */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+ uint64_t *, uint64_t *);
+#endif
+
/* These probably won't be in elf.h for a while. */
#ifndef R_AMDGPU_NONE
#define R_AMDGPU_NONE 0
@@ -3688,37 +3696,28 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
(void*) ind_funcs_table_addr,
sizeof (ind_funcs_table));
- /* Build host->target address map for indirect functions. */
- uint64_t ind_fn_map[ind_func_count * 2 + 1];
- for (unsigned i = 0; i < ind_func_count; i++)
- {
- ind_fn_map[i * 2] = host_ind_fn_table[i];
- ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
- GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
- i, host_ind_fn_table[i], ind_funcs_table[i]);
- }
- ind_fn_map[ind_func_count * 2] = 0;
+ /* For newer binaries, the hash table for 'indirect' is created on the
+ host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+ device side - and have to create the table themselves using
+ GOMP_INDIRECT_ADDR_MAP. */
- /* Write the map onto the target. */
- void *map_target_addr
- = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
- GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
-
- GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
- (void*) ind_fn_map,
- sizeof (ind_fn_map));
-
- /* Write address of the map onto the target. */
hsa_executable_symbol_t symbol;
-
+ bool host_init_htab = true;
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
status
= hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
- XSTRING (GOMP_INDIRECT_ADDR_MAP),
+ XSTRING (GOMP_INDIRECT_ADDR_HMAP),
agent->id, 0, &symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ #endif
+ {
+ host_init_htab = false;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ XSTRING (GOMP_INDIRECT_ADDR_MAP), agent->id, 0, &symbol);
+ }
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
status);
-
uint64_t varptr;
uint32_t varsize;
@@ -3734,9 +3733,51 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
hsa_fatal ("Could not extract a variable size from its symbol",
status);
- GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
- varptr, varsize);
+ GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_%sMAP at %lx with size %d\n",
+ host_init_htab ? "H" : "", varptr, varsize);
+ void *map_target_addr;
+ if (!host_init_htab)
+ {
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_func_count * 2 + 1];
+ for (unsigned i = 0; i < ind_func_count; i++)
+ {
+ ind_fn_map[i * 2] = host_ind_fn_table[i];
+ ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ }
+ ind_fn_map[ind_func_count * 2] = 0;
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+ sizeof (ind_fn_map));
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ (void*) ind_fn_map, sizeof (ind_fn_map));
+ }
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ else
+ {
+ /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
+ size_t host_map_size;
+ void *host_map;
+ host_map = create_target_indirect_map (&host_map_size, ind_func_count,
+ host_ind_fn_table,
+ ind_funcs_table);
+ for (unsigned i = 0; i < ind_func_count; i++)
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+ host_map_size);
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ host_map, host_map_size);
+ }
+ #endif
+
+ GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
+
+ /* Write address of the map onto the target. */
GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
&map_target_addr,
sizeof (map_target_addr));
@@ -5247,4 +5288,8 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
free (data);
}
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ #include "build-target-indirect-htab.h"
+#endif
+
/* }}} */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 0ba445eab9b..eb7b5e59d8f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -60,6 +60,14 @@
#include
#include
+/* Create hash-table for declare target's indirect clause on the host;
+ see build-target-indirect-htab.h for details. */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+ uint64_t *, uint64_t *);
+#endif
+
/* An arbitrary fixed limit (128MB) for the size of the OpenMP soft stacks
block to cache between kernel invocations. For soft-stacks blocks bigger
than this, we will free the block before attempting another GPU memory
@@ -1626,39 +1634,71 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
- /* Build host->target address map for indirect functions. */
- uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
- for (unsigned k = 0; k < ind_fn_entries; k++)
- {
- ind_fn_map[k * 2] = host_ind_fn_table[k];
- ind_fn_map[k * 2 + 1] = ind_fn_table[k];
- GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
- k, host_ind_fn_table[k], ind_fn_table[k]);
- }
- ind_fn_map[ind_fn_entries * 2] = 0;
+ /* For newer binaries, the hash table for 'indirect' is created on the
+ host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+ device side - and have to create the table themselves using
+ GOMP_INDIRECT_ADDR_MAP. */
- /* Write the map onto the target. */
- void *map_target_addr
- = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
- GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
-
- GOMP_OFFLOAD_host2dev (ord, map_target_addr,
- (void*) ind_fn_map,
- sizeof (ind_fn_map));
-
- /* Write address of the map onto the target. */
CUdeviceptr varptr;
size_t varsize;
+ bool host_init_htab = true;
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
- module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ module, XSTRING (GOMP_INDIRECT_ADDR_HMAP));
+ if (r != CUDA_SUCCESS)
+ #endif
+ {
+ host_init_htab = false;
+ r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
+ module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ }
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
cuda_error (r));
-
GOMP_PLUGIN_debug (0,
- "Indirect map variable found at %llx with size %ld\n",
+ "%s-style indirect map variable found at %llx with "
+ "size %ld\n", host_init_htab ? "New" : "Old",
varptr, varsize);
+ void *map_target_addr;
+ if (!host_init_htab)
+ {
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ {
+ ind_fn_map[k * 2] = host_ind_fn_table[k];
+ ind_fn_map[k * 2 + 1] = ind_fn_table[k];
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ }
+ ind_fn_map[ind_fn_entries * 2] = 0;
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr,
+ (void *) ind_fn_map, sizeof (ind_fn_map));
+ }
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ else
+ {
+ /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
+ size_t host_map_size;
+ void *host_map;
+ host_map = create_target_indirect_map (&host_map_size, ind_fn_entries,
+ host_ind_fn_table,
+ ind_fn_table);
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (ord, host_map_size);
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr, host_map, host_map_size);
+ }
+ #endif
+
+ GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
+
+ /* Write address of the map onto the target. */
GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
sizeof (map_target_addr));
}
@@ -2898,3 +2938,7 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
}
/* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ #include "build-target-indirect-htab.h"
+#endif