From 5737298f4f5e5471667b05e207b22c9c91b94ca0 Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung Date: Mon, 29 Jan 2024 17:40:04 +0000 Subject: [PATCH 1/2] openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls A splay-tree was previously used to lookup equivalent target addresses for a given host address on offload targets. However, as splay-trees can modify their structure on lookup, they are not suitable for concurrent access from separate teams/threads without some form of locking. This patch changes the lookup data structure to a hashtab instead, which does not have these issues. The call to build_indirect_map to initialize the data structure is now called from just the first thread of the first team to avoid redundant calls to this function. 2024-01-29 Kwok Cheung Yeung libgomp/ * config/accel/target-indirect.c: Include string.h and hashtab.h. Remove include of splay-tree.h. Update comments. (splay_tree_prefix, splay_tree_c): Delete. (struct indirect_map_t): New. (hash_entry_type, htab_alloc, htab_free, htab_hash, htab_eq): New. (GOMP_INDIRECT_ADD_MAP): Remove volatile qualifier. (USE_SPLAY_TREE_LOOKUP): Rename to... (USE_HASHTAB_LOOKUP): ..this. (indirect_map, indirect_array): Delete. (indirect_htab): New. (build_indirect_map): Remove locking. Build indirect map using hashtab. (GOMP_target_map_indirect_ptr): Use indirect_htab to lookup target address. (GOMP_target_map_indirect_ptr): Remove volatile qualifier. * config/gcn/team.c (gomp_gcn_enter_kernel): Call build_indirect_map from first thread of first team only. * config/nvptx/team.c (gomp_nvptx_main): Likewise. * testsuite/libgomp.c-c++-common/declare-target-indirect-2.c (main): Add missing break statements. --- libgomp/config/accel/target-indirect.c | 83 ++++++++++--------- libgomp/config/gcn/team.c | 7 +- libgomp/config/nvptx/team.c | 9 +- .../declare-target-indirect-2.c | 14 ++-- 4 files changed, 63 insertions(+), 50 deletions(-) diff --git a/libgomp/config/accel/target-indirect.c b/libgomp/config/accel/target-indirect.c index c60fd547cb6..cfef1ddbc49 100644 --- a/libgomp/config/accel/target-indirect.c +++ b/libgomp/config/accel/target-indirect.c @@ -25,60 +25,73 @@ . */ #include +#include #include "libgomp.h" -#define splay_tree_prefix indirect -#define splay_tree_c -#include "splay-tree.h" +struct indirect_map_t +{ + void *host_addr; + void *target_addr; +}; + +typedef struct indirect_map_t *hash_entry_type; + +static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } +static inline void htab_free (void *ptr) { free (ptr); } + +#include "hashtab.h" + +static inline hashval_t +htab_hash (hash_entry_type element) +{ + return hash_pointer (element->host_addr); +} -volatile void **GOMP_INDIRECT_ADDR_MAP = NULL; +static inline bool +htab_eq (hash_entry_type x, hash_entry_type y) +{ + return x->host_addr == y->host_addr; +} -/* Use a splay tree to lookup the target address instead of using a - linear search. */ -#define USE_SPLAY_TREE_LOOKUP +void **GOMP_INDIRECT_ADDR_MAP = NULL; -#ifdef USE_SPLAY_TREE_LOOKUP +/* Use a hashtab to lookup the target address instead of using a linear + search. */ +#define USE_HASHTAB_LOOKUP -static struct indirect_splay_tree_s indirect_map; -static indirect_splay_tree_node indirect_array = NULL; +#ifdef USE_HASHTAB_LOOKUP -/* Build the splay tree used for host->target address lookups. */ +static htab_t indirect_htab = NULL; + +/* Build the hashtab used for host->target address lookups. */ void build_indirect_map (void) { size_t num_ind_funcs = 0; - volatile void **map_entry; - static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */ + void **map_entry; if (!GOMP_INDIRECT_ADDR_MAP) return; - gomp_mutex_lock (&lock); - - if (!indirect_array) + if (!indirect_htab) { /* Count the number of entries in the NULL-terminated address map. */ for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; map_entry += 2, num_ind_funcs++); - /* Build splay tree for address lookup. */ - indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array)); - indirect_splay_tree_node array = indirect_array; + /* Build hashtab for address lookup. */ + indirect_htab = htab_create (num_ind_funcs); map_entry = GOMP_INDIRECT_ADDR_MAP; - for (int i = 0; i < num_ind_funcs; i++, array++) + for (int i = 0; i < num_ind_funcs; i++, map_entry += 2) { - indirect_splay_tree_key k = &array->key; - k->host_addr = (uint64_t) *map_entry++; - k->target_addr = (uint64_t) *map_entry++; - array->left = NULL; - array->right = NULL; - indirect_splay_tree_insert (&indirect_map, array); + struct indirect_map_t element = { *map_entry, NULL }; + hash_entry_type *slot = htab_find_slot (&indirect_htab, &element, + INSERT); + *slot = (hash_entry_type) map_entry; } } - - gomp_mutex_unlock (&lock); } void * @@ -88,15 +101,11 @@ GOMP_target_map_indirect_ptr (void *ptr) if (!ptr) return ptr; - assert (indirect_array); - - struct indirect_splay_tree_key_s k; - indirect_splay_tree_key node = NULL; - - k.host_addr = (uint64_t) ptr; - node = indirect_splay_tree_lookup (&indirect_map, &k); + assert (indirect_htab); - return node ? (void *) node->target_addr : ptr; + struct indirect_map_t element = { ptr, NULL }; + hash_entry_type entry = htab_find (indirect_htab, &element); + return entry ? entry->target_addr : ptr; } #else @@ -115,7 +124,7 @@ GOMP_target_map_indirect_ptr (void *ptr) assert (GOMP_INDIRECT_ADDR_MAP); - for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; + for (void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry; map_entry += 2) if (*map_entry == ptr) return (void *) *(map_entry + 1); diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c index 61e9c616b67..bd3df448b52 100644 --- a/libgomp/config/gcn/team.c +++ b/libgomp/config/gcn/team.c @@ -52,14 +52,15 @@ gomp_gcn_enter_kernel (void) { int threadid = __builtin_gcn_dim_pos (1); - /* Initialize indirect function support. */ - build_indirect_map (); - if (threadid == 0) { int numthreads = __builtin_gcn_dim_size (1); int teamid = __builtin_gcn_dim_pos(0); + /* Initialize indirect function support. */ + if (teamid == 0) + build_indirect_map (); + /* Set up the global state. Every team will do this, but that should be harmless. */ gomp_global_icv.nthreads_var = 16; diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index 0cf5dad39ca..d5361917a24 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -60,9 +60,6 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) asm ("mov.u32 %0, %%tid.y;" : "=r" (tid)); asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids)); - /* Initialize indirect function support. */ - build_indirect_map (); - if (tid == 0) { gomp_global_icv.nthreads_var = ntids; @@ -74,6 +71,12 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); + /* Initialize indirect function support. */ + unsigned int block_id; + asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id)); + if (block_id == 0) + build_indirect_map (); + /* Find the low-latency heap details .... */ uint32_t *shared_pool; uint32_t shared_pool_size = 0; diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c index 9fe190efce8..545f1a9fcbf 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c +++ b/libgomp/testsuite/libgomp.c-c++-common/declare-target-indirect-2.c @@ -17,17 +17,17 @@ int main (void) { switch (i % 3) { - case 0: fn_ptr[i] = &foo; - case 1: fn_ptr[i] = &bar; - case 2: fn_ptr[i] = &baz; + case 0: fn_ptr[i] = &foo; break; + case 1: fn_ptr[i] = &bar; break; + case 2: fn_ptr[i] = &baz; break; } expected += (*fn_ptr[i]) (); } -#pragma omp target teams distribute parallel for reduction(+: x) \ - map (to: fn_ptr) map (tofrom: x) - for (int i = 0; i < N; i++) - x += (*fn_ptr[i]) (); + #pragma omp target teams distribute parallel for \ + reduction (+: x) map (to: fn_ptr) map (tofrom: x) + for (int i = 0; i < N; i++) + x += (*fn_ptr[i]) (); return x - expected; } -- 2.34.1