libgomp: Add reverse-offload splay tree Adds a splay tree for reverse offloading; effectively, it is not yet filled as reverse offload is not enabled for any device type. It also does not yet track variables later mapped (and then unmapped). Those will be added in follow-up commits. libgomp/ChangeLog: * libgomp.h: Include splay-tree.h with splay_tree_prefix set to 'reverse'. (struct target_mem_desc): Declare at old line; move definition down and add 'reverse_splay_tree_node rev_array' member. (struct reverse_splay_tree_key_s, reverse_splay_compare): New. (reverse_splay_tree_node, reverse_splay_tree, reverse_splay_tree_key): New typedef. (struct gomp_device_descr): Add 'reverse_splay_tree_s mem_map_rev'. * oacc-host.c (host_dispatch): NULL init mem_map_rev. * target.c: Include splay-tree.h with splay_tree_c set and splay_tree_prefix set to 'reverse'. (gomp_map_lookup_rev): New function (for now inside '#if 0'). (gomp_unmap_tgt): Free array_rev. (gomp_load_image_to_device): Fill reverse-offload splay tree with funct and var address. (gomp_unload_image_from_device): Empty reverse-offload splay tree. (gomp_target_init): Init mem_map_rev. libgomp/libgomp.h | 83 ++++++++++++++++++++++++++++++++++++++--------------- libgomp/oacc-host.c | 1 + libgomp/target.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 137 insertions(+), 25 deletions(-) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index c243c4d6cf4..10518a8348b 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1038,29 +1038,7 @@ struct target_var_desc { uintptr_t length; }; -struct target_mem_desc { - /* Reference count. */ - uintptr_t refcount; - /* All the splay nodes allocated together. */ - splay_tree_node array; - /* Start of the target region. */ - uintptr_t tgt_start; - /* End of the targer region. */ - uintptr_t tgt_end; - /* Handle to free. */ - void *to_free; - /* Previous target_mem_desc. */ - struct target_mem_desc *prev; - /* Number of items in following list. */ - size_t list_count; - - /* Corresponding target device descriptor. */ - struct gomp_device_descr *device_descr; - - /* List of target items to remove (or decrease refcount) - at the end of region. */ - struct target_var_desc list[]; -}; +struct target_mem_desc; /* Special value for refcount - mask to indicate existence of special values. Right now we allocate 3 bits. */ @@ -1154,6 +1132,64 @@ splay_compare (splay_tree_key x, splay_tree_key y) #include "splay-tree.h" +/* Reverse offload splay-tree handling. */ + +struct reverse_splay_tree_key_s { + /* Address of the device object. */ + uintptr_t dev_start; + /* Address immediately after the device object. */ + uintptr_t dev_end; + + splay_tree_key k; +}; + +typedef struct reverse_splay_tree_node_s *reverse_splay_tree_node; +typedef struct reverse_splay_tree_s *reverse_splay_tree; +typedef struct reverse_splay_tree_key_s *reverse_splay_tree_key; + +static inline int +reverse_splay_compare (reverse_splay_tree_key x, reverse_splay_tree_key y) +{ + if (x->dev_start == x->dev_end + && y->dev_start == y->dev_end) + return 0; + if (x->dev_end <= y->dev_start) + return -1; + if (x->dev_start >= y->dev_end) + return 1; + return 0; +} + +#define splay_tree_prefix reverse +#include "splay-tree.h" + +struct target_mem_desc { + /* Reference count. */ + uintptr_t refcount; + /* All the splay nodes allocated together. */ + splay_tree_node array; + /* Likeverse for the reverse lookup device->host for reverse offload. */ + reverse_splay_tree_node rev_array; + /* Start of the target region. */ + uintptr_t tgt_start; + /* End of the targer region. */ + uintptr_t tgt_end; + /* Handle to free. */ + void *to_free; + /* Previous target_mem_desc. */ + struct target_mem_desc *prev; + /* Number of items in following list. */ + size_t list_count; + + /* Corresponding target device descriptor. */ + struct gomp_device_descr *device_descr; + + /* List of target items to remove (or decrease refcount) + at the end of region. */ + struct target_var_desc list[]; +}; + + typedef struct acc_dispatch_t { /* Execute. */ @@ -1248,6 +1284,7 @@ struct gomp_device_descr /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; + struct reverse_splay_tree_s mem_map_rev; /* Mutex for the mutable data. */ gomp_mutex_t lock; diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index eb11b9cf16a..89a15a4d80f 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -283,6 +283,7 @@ static struct gomp_device_descr host_dispatch = .run_func = host_run, .mem_map = { NULL }, + .mem_map_rev = { NULL }, /* .lock initialized in goacc_host_init. */ .state = GOMP_DEVICE_UNINITIALIZED, diff --git a/libgomp/target.c b/libgomp/target.c index 135db1d88ab..b22678f687f 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -45,6 +45,11 @@ #include "plugin-suffix.h" #endif +/* Define another splay tree instantiation - for reverse offload. */ +#define splay_tree_prefix reverse +#define splay_tree_c +#include "splay-tree.h" + typedef uintptr_t *hash_entry_type; static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } static inline void htab_free (void *ptr) { free (ptr); } @@ -200,6 +205,27 @@ gomp_map_lookup (splay_tree mem_map, splay_tree_key key) return splay_tree_lookup (mem_map, key); } +#if 0 +static inline reverse_splay_tree_key +gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key) +{ + if (key->dev_start != key->dev_end) + return reverse_splay_tree_lookup (mem_map_rev, key); + + key->dev_end++; + reverse_splay_tree_key n = reverse_splay_tree_lookup (mem_map_rev, key); + key->dev_end--; + if (n) + return n; + key->dev_start--; + n = reverse_splay_tree_lookup (mem_map_rev, key); + key->dev_start++; + if (n) + return n; + return reverse_splay_tree_lookup (mem_map_rev, key); +} +#endif + static inline splay_tree_key gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) { @@ -1811,6 +1837,7 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) if (tgt->tgt_end) gomp_free_device_memory (tgt->device_descr, tgt->to_free); + free (tgt->rev_array); free (tgt->array); free (tgt); } @@ -2133,11 +2160,16 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Load image to device and get target addresses for the image. */ struct addr_pair *target_table = NULL; + uint64_t *rev_target_fn_table = NULL; int i, num_target_entries; + /* With reverse offload, insert also target-host addresses. */ + bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD; + num_target_entries = devicep->load_image_func (devicep->target_id, version, - target_data, &target_table); + target_data, &target_table /* , + rev_lookup ? &rev_target_fn_table : NULL */); if (num_target_entries != num_funcs + num_vars /* Others (device_num) are included as trailing entries in pair list. */ @@ -2154,6 +2186,15 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, /* Insert host-target address mapping into splay tree. */ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); + if (rev_lookup) + { + size_t n = num_vars; + if (rev_target_fn_table) + n += num_funcs; + tgt->rev_array = gomp_malloc (n * sizeof (*tgt->rev_array)); + } + else + tgt->rev_array = NULL; tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; @@ -2162,6 +2203,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, tgt->list_count = 0; tgt->device_descr = devicep; splay_tree_node array = tgt->array; + reverse_splay_tree_node rev_array = tgt->rev_array; for (i = 0; i < num_funcs; i++) { @@ -2176,6 +2218,18 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); + if (rev_target_fn_table) + { + reverse_splay_tree_key k2 = &rev_array->key; + k2->dev_start = rev_target_fn_table[i]; + k2->dev_end = rev_target_fn_table[i] + 1; + k2->k = k; + rev_array->left = NULL; + rev_array->right = NULL; + if (k2->dev_start != 0) + reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array); + rev_array++; + } array++; } @@ -2210,6 +2264,17 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); + if (rev_lookup) + { + reverse_splay_tree_key k2 = &rev_array->key; + k2->dev_start = target_table[num_funcs + i].start; + k2->dev_end = k2->dev_start + k->host_end - k->host_start; + k2->k = k; + rev_array->left = NULL; + rev_array->right = NULL; + reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array); + rev_array++; + } array++; } @@ -2242,11 +2307,16 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, } } + if (rev_target_fn_table) + free (rev_target_fn_table); free (target_table); } /* Unload the mappings described by target_data from device DEVICE_P. - The device must be locked. */ + The device must be locked. + Note: For reverse offload, the code assumes that the splay tree is + no longer needed and all images for this device are deleted. + (This is the case for all current callers.) */ static void gomp_unload_image_from_device (struct gomp_device_descr *devicep, @@ -2266,6 +2336,9 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, struct splay_tree_key_s k; splay_tree_key node = NULL; + /* Remove splay tree access; the memory is freed in gomp_unmap_tgt. */ + devicep->mem_map_rev.root = NULL; + /* Find mapping at start of node array */ if (num_funcs || num_vars) { @@ -4233,6 +4306,7 @@ gomp_target_init (void) /* current_device.capabilities has already been set. */ current_device.type = current_device.get_type_func (); current_device.mem_map.root = NULL; + current_device.mem_map_rev.root = NULL; current_device.state = GOMP_DEVICE_UNINITIALIZED; for (i = 0; i < new_num_devs; i++) {