diff --git a/libgomp/hashtab.h b/libgomp/hashtab.h index 93223e3bc5e..41b4fbb3b92 100644 --- a/libgomp/hashtab.h +++ b/libgomp/hashtab.h @@ -220,33 +220,39 @@ htab_mod (hashval_t hash, htab_t htab) static inline hashval_t htab_mod_m2 (hashval_t hash, htab_t htab) { const struct prime_ent *p = &prime_tab[htab->size_prime_index]; return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); } +static inline htab_t +htab_clear (htab_t htab) +{ + htab->n_elements = 0; + htab->n_deleted = 0; + memset (htab->entries, 0, htab->size * sizeof (hash_entry_type)); + return htab; +} + /* Create hash table of size SIZE. */ static htab_t htab_create (size_t size) { htab_t result; unsigned int size_prime_index; size_prime_index = higher_prime_index (size); size = prime_tab[size_prime_index].prime; result = (htab_t) htab_alloc (sizeof (struct htab) + size * sizeof (hash_entry_type)); result->size = size; - result->n_elements = 0; - result->n_deleted = 0; result->size_prime_index = size_prime_index; - memset (result->entries, 0, size * sizeof (hash_entry_type)); - return result; + return htab_clear (result); } /* Similar to htab_find_slot, but without several unwanted side effects: - Does not call htab_eq when it finds an existing entry. - Does not change the count of elements in the hash table. This function also assumes there are no deleted entries in the table. HASH is the hash value for the element to be inserted. */ diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 070d29c969e..5ec96827027 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -992,19 +992,43 @@ struct target_mem_desc { struct gomp_device_descr *device_descr; /* List of target items to remove (or decrease refcount) at the end of region. */ struct target_var_desc list[]; }; +/* Special value for refcount - mask to indicate existence of special + values. Right now we allocate 3 bits. */ +#define REFCOUNT_SPECIAL (~(uintptr_t) 0x7) + /* Special value for refcount - infinity. */ -#define REFCOUNT_INFINITY (~(uintptr_t) 0) +#define REFCOUNT_INFINITY (REFCOUNT_SPECIAL | 0) /* Special value for refcount - tgt_offset contains target address of the artificial pointer to "omp declare target link" object. */ -#define REFCOUNT_LINK (~(uintptr_t) 1) +#define REFCOUNT_LINK (REFCOUNT_SPECIAL | 1) + +/* Special value for refcount - structure element sibling list items. + All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST + and _FLAG_LAST indicating first and last in the created sibling sequence. */ +#define REFCOUNT_STRUCTELEM (REFCOUNT_SPECIAL | 4) +#define REFCOUNT_STRUCTELEM_P(V) \ + (((V) & REFCOUNT_STRUCTELEM) == REFCOUNT_STRUCTELEM) +/* The first leading key with _FLAG_FIRST set houses the actual reference count + in the structelem_refcount field. Other siblings point to this counter value + through its structelem_refcount_ptr field. */ +#define REFCOUNT_STRUCTELEM_FLAG_FIRST (1) +/* The last key in the sibling sequence has this set. This is required to + indicate the sequence boundary, when we remove the structure sibling list + from the map. */ +#define REFCOUNT_STRUCTELEM_FLAG_LAST (2) + +#define REFCOUNT_STRUCTELEM_FIRST_P(V) \ + (REFCOUNT_STRUCTELEM_P (V) && ((V) & REFCOUNT_STRUCTELEM_FLAG_FIRST)) +#define REFCOUNT_STRUCTELEM_LAST_P(V) \ + (REFCOUNT_STRUCTELEM_P (V) && ((V) & REFCOUNT_STRUCTELEM_FLAG_LAST)) /* Special offset values. */ #define OFFSET_INLINED (~(uintptr_t) 0) #define OFFSET_POINTER (~(uintptr_t) 1) #define OFFSET_STRUCT (~(uintptr_t) 2) /* Auxiliary structure for infrequently-used or API-specific data. */ @@ -1024,16 +1048,30 @@ struct splay_tree_key_s { uintptr_t host_end; /* Descriptor of the target memory. */ struct target_mem_desc *tgt; /* Offset from tgt->tgt_start to the start of the target object. */ uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Dynamic reference count. */ - uintptr_t dynamic_refcount; + union { + /* Dynamic reference count. */ + uintptr_t dynamic_refcount; + + /* Unified reference count for structure element siblings, this is used + when REFCOUNT_STRUCTELEM_FIRST_P(k->refcount) == true, the first sibling + in a structure element sibling list item sequence. */ + uintptr_t structelem_refcount; + + /* When REFCOUNT_STRUCTELEM_P (k->refcount) == true, this field points + into the (above) structelem_refcount field of the _FIRST splay_tree_key, + the first key in the created sequence. All structure element siblings + share a single refcount in this manner. Since these two fields won't be + used at the same time, they are stashed in a union. */ + uintptr_t *structelem_refcount_ptr; + }; struct splay_tree_aux *aux; }; /* The comparison function. */ static inline int splay_compare (splay_tree_key x, splay_tree_key y) @@ -1180,27 +1218,21 @@ extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); extern void gomp_attach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree, splay_tree_key, uintptr_t, size_t, struct gomp_coalesce_buf *); extern void gomp_detach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree_key, uintptr_t, bool, struct gomp_coalesce_buf *); - -extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, - size_t, void **, void **, - size_t *, void *, bool, - enum gomp_map_vars_kind); -extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *, - struct goacc_asyncqueue *, - size_t, void **, void **, - size_t *, void *, bool, - enum gomp_map_vars_kind); -extern void gomp_unmap_vars (struct target_mem_desc *, bool); -extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, - struct goacc_asyncqueue *); +extern struct target_mem_desc *goacc_map_vars (struct gomp_device_descr *, + struct goacc_asyncqueue *, + size_t, void **, void **, + size_t *, void *, bool, + enum gomp_map_vars_kind); +extern void goacc_unmap_vars (struct target_mem_desc *, bool, + struct goacc_asyncqueue *); extern void gomp_init_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *); extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key, struct goacc_asyncqueue *); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 4c8f0e0828e..d289213a176 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -398,17 +398,16 @@ acc_map_data (void *h, void *d, size_t s) gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, (int)s); } gomp_mutex_unlock (&acc_dev->lock); struct target_mem_desc *tgt - = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, - GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA); + = goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes, + &kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); splay_tree_key n = tgt->list[0].key; assert (n); assert (n->refcount == 1); assert (n->dynamic_refcount == 0); /* Special reference counting behavior. */ @@ -568,17 +567,16 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) const size_t mapnum = 1; gomp_mutex_unlock (&acc_dev->lock); goacc_aq aq = get_goacc_asyncqueue (async); struct target_mem_desc *tgt - = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, (GOMP_MAP_VARS_OPENACC - | GOMP_MAP_VARS_ENTER_DATA)); + = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, + kinds, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); n = tgt->list[0].key; assert (n); assert (n->refcount == 1); assert (n->dynamic_refcount == 0); n->dynamic_refcount++; @@ -1066,15 +1064,15 @@ find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) pos++; } return pos; } /* Map variables for OpenACC "enter data". We can't just call - gomp_map_vars_async once, because individual mapped variables might have + goacc_map_vars once, because individual mapped variables might have "exit data" called for them at different times. */ static void goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, goacc_aq aq) { @@ -1198,18 +1196,17 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, { /* The data is not mapped already. Map it now, unless the first member in the group has a NULL pointer (e.g. a non-present optional parameter). */ gomp_mutex_unlock (&acc_dev->lock); struct target_mem_desc *tgt - = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, - (GOMP_MAP_VARS_OPENACC - | GOMP_MAP_VARS_ENTER_DATA)); + = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); assert (tgt); gomp_mutex_lock (&acc_dev->lock); for (size_t j = 0; j < tgt->list_count; j++) { n = tgt->list[j].key; diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index c7e46e35bd6..d2259bb31ba 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -286,25 +286,25 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), enter_exit_data_event_info.other_event.tool_info = NULL; goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); } goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + true, 0); if (profiling_p) { prof_info.event_type = acc_ev_enter_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); } - + devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); if (aq == NULL) acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, tgt); @@ -317,19 +317,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_exit_data_start; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; enter_exit_data_event_info.other_event.tool_info = NULL; goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); } - /* If running synchronously, unmap immediately. */ - if (aq == NULL) - gomp_unmap_vars (tgt, true); - else - gomp_unmap_vars_async (tgt, true, aq); + /* If running synchronously (aq == NULL), this will unmap immediately. */ + goacc_unmap_vars (tgt, true, aq); if (profiling_p) { prof_info.event_type = acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, &api_info); @@ -452,25 +449,24 @@ GOACC_data_start (int flags_m, size_t mapnum, /* Host fallback or 'do nothing'. */ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) { prof_info.device_type = acc_device_host; api_info.device_type = prof_info.device_type; - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, - GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; goto out_prof; } gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC); + tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, + true, 0); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; out_prof: if (profiling_p) { @@ -538,15 +534,15 @@ GOACC_data_end (void) } if (profiling_p) goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); thr->mapped_data = tgt->prev; - gomp_unmap_vars (tgt, true); + goacc_unmap_vars (tgt, true, NULL); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); if (profiling_p) { prof_info.event_type = acc_ev_exit_data_end; exit_data_event_info.other_event.event_type = prof_info.event_type; goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); diff --git a/libgomp/target.c b/libgomp/target.c index 6152f58e13d..f725529f35f 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -40,14 +40,31 @@ #include #ifdef PLUGIN_SUPPORT #include #include "plugin-suffix.h" #endif +typedef uintptr_t *hash_entry_type; +static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } +static inline void htab_free (void *ptr) { free (ptr); } +#include "hashtab.h" + +static inline hashval_t +htab_hash (hash_entry_type element) +{ + return hash_pointer ((void *) element); +} + +static inline bool +htab_eq (hash_entry_type x, hash_entry_type y) +{ + return x == y; +} + #define FIELD_TGT_EMPTY (~(size_t) 0) static void gomp_target_init (void); /* The whole initialization code for offloading plugins is only run one. */ static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; @@ -356,24 +373,132 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) if (!devicep->free_func (devicep->target_id, devptr)) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("error in freeing device memory block at %p", devptr); } } +/* Increment reference count of a splay_tree_key region K by 1. + If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only + increment the value if refcount is not yet contained in the set (used for + OpenMP 5.0, which specifies that a region's refcount is adjusted at most + once for each construct). */ + +static inline void +gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) +{ + if (k == NULL || k->refcount == REFCOUNT_INFINITY) + return; + + uintptr_t *refcount_ptr = &k->refcount; + + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) + refcount_ptr = &k->structelem_refcount; + else if (REFCOUNT_STRUCTELEM_P (k->refcount)) + refcount_ptr = k->structelem_refcount_ptr; + + if (refcount_set) + { + if (htab_find (*refcount_set, refcount_ptr)) + return; + uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); + *slot = refcount_ptr; + } + + *refcount_ptr += 1; + return; +} + +/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P + is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to + track already seen refcounts, and only adjust the value if refcount is not + yet contained in the set (like gomp_increment_refcount). + + Return out-values: set *DO_COPY to true if we set the refcount to zero, or + it is already zero and we know we decremented it earlier. This signals that + associated maps should be copied back to host. + + *DO_REMOVE is set to true when we this is the first handling of this refcount + and we are setting it to zero. This signals a removal of this key from the + splay-tree map. + + Copy and removal are separated due to cases like handling of structure + elements, e.g. each map of a structure element representing a possible copy + out of a structure field has to be handled individually, but we only signal + removal for one (the first encountered) sibing map. */ + +static inline void +gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, + bool *do_copy, bool *do_remove) +{ + if (k == NULL || k->refcount == REFCOUNT_INFINITY) + { + *do_copy = *do_remove = false; + return; + } + + uintptr_t *refcount_ptr = &k->refcount; + + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) + refcount_ptr = &k->structelem_refcount; + else if (REFCOUNT_STRUCTELEM_P (k->refcount)) + refcount_ptr = k->structelem_refcount_ptr; + + bool new_encountered_refcount; + bool set_to_zero = false; + bool is_zero = false; + + uintptr_t orig_refcount = *refcount_ptr; + + if (refcount_set) + { + if (htab_find (*refcount_set, refcount_ptr)) + { + new_encountered_refcount = false; + goto end; + } + + uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); + *slot = refcount_ptr; + new_encountered_refcount = true; + } + else + /* If no refcount_set being used, assume all keys are being decremented + for the first time. */ + new_encountered_refcount = true; + + if (delete_p) + *refcount_ptr = 0; + else if (*refcount_ptr > 0) + *refcount_ptr -= 1; + + end: + if (*refcount_ptr == 0) + { + if (orig_refcount > 0) + set_to_zero = true; + + is_zero = true; + } + + *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); + *do_remove = (new_encountered_refcount && set_to_zero); +} + /* Handle the case where gomp_map_lookup, splay_tree_lookup or gomp_map_0len_lookup found oldn for newn. Helper function of gomp_map_vars. */ static inline void gomp_map_vars_existing (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, splay_tree_key oldn, splay_tree_key newn, struct target_var_desc *tgt_var, unsigned char kind, bool always_to_flag, - struct gomp_coalesce_buf *cbuf) + struct gomp_coalesce_buf *cbuf, + htab_t *refcount_set) { assert (kind != GOMP_MAP_ATTACH); tgt_var->key = oldn; tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); tgt_var->is_attach = false; @@ -394,16 +519,15 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag) gomp_copy_host2dev (devicep, aq, (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + newn->host_start - oldn->host_start), (void *) newn->host_start, newn->host_end - newn->host_start, cbuf); - if (oldn->refcount != REFCOUNT_INFINITY) - oldn->refcount++; + gomp_increment_refcount (oldn, refcount_set); } static int get_kind (bool short_mapkind, void *kinds, int idx) { return short_mapkind ? ((unsigned short *) kinds)[idx] : ((unsigned char *) kinds)[idx]; @@ -449,15 +573,15 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, } static void gomp_map_fields_existing (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, splay_tree_key n, size_t first, size_t i, void **hostaddrs, size_t *sizes, void *kinds, - struct gomp_coalesce_buf *cbuf) + struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) { struct gomp_device_descr *devicep = tgt->device_descr; struct splay_tree_s *mem_map = &devicep->mem_map; struct splay_tree_key_s cur_node; int kind; const bool short_mapkind = true; const int typemask = short_mapkind ? 0xff : 0x7; @@ -467,15 +591,15 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); kind = get_kind (short_mapkind, kinds, i); if (n2 && n2->tgt == n->tgt && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } if (sizes[i] == 0) { if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) { cur_node.host_start--; @@ -483,27 +607,27 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, cur_node.host_start++; if (n2 && n2->tgt == n->tgt && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } } cur_node.host_end++; n2 = splay_tree_lookup (mem_map, &cur_node); cur_node.host_end--; if (n2 && n2->tgt == n->tgt && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) { gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], - kind & typemask, false, cbuf); + kind & typemask, false, cbuf, refcount_set); return; } } gomp_mutex_unlock (&devicep->lock); gomp_fatal ("Trying to map into device [%p..%p) structure element when " "other mapped elements from the same structure weren't mapped " "together with it", (void *) cur_node.host_start, @@ -667,19 +791,21 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) } static inline __attribute__((always_inline)) struct target_mem_desc * gomp_map_vars_internal (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, bool short_mapkind, + htab_t *refcount_set, enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; bool has_firstprivate = false; bool has_always_ptrset = false; + bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; const int rshift = short_mapkind ? 8 : 3; const int typemask = short_mapkind ? 0xff : 0x7; struct splay_tree_s *mem_map = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; @@ -809,15 +935,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, sizes[i]); } i--; continue; } for (i = first; i <= last; i++) gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, - sizes, kinds, NULL); + sizes, kinds, NULL, refcount_set); i--; continue; } else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) { tgt->list[i].key = NULL; tgt->list[i].offset = OFFSET_POINTER; @@ -905,15 +1031,16 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, { has_always_ptrset = true; ++always_to_cnt; } } } gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], - kind & typemask, always_to_cnt > 0, NULL); + kind & typemask, always_to_cnt > 0, NULL, + refcount_set); i += always_to_cnt; } else { tgt->list[i].key = NULL; if ((kind & typemask) == GOMP_MAP_IF_PRESENT) @@ -1018,14 +1145,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (not_found_cnt || has_firstprivate || has_always_ptrset) { if (not_found_cnt) tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); splay_tree_node array = tgt->array; size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; uintptr_t field_tgt_base = 0; + splay_tree_key field_tgt_structelem_first = NULL; for (i = 0; i < mapnum; i++) if (has_always_ptrset && tgt->list[i].key && (get_kind (short_mapkind, kinds, i) & typemask) == GOMP_MAP_TO_PSET) { @@ -1060,16 +1188,15 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, { if (*(void **) hostaddrs[j] == NULL) tgt->list[i].has_null_ptr_assoc = true; tgt->list[j].key = k; tgt->list[j].copy_from = false; tgt->list[j].always_copy_from = false; tgt->list[j].is_attach = false; - if (k->refcount != REFCOUNT_INFINITY) - k->refcount++; + gomp_increment_refcount (k, refcount_set); gomp_map_pointer (k->tgt, aq, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset + ((uintptr_t) hostaddrs[j] - k->host_start), sizes[j], cbufp); } } @@ -1149,21 +1276,22 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, - (uintptr_t) hostaddrs[i]; tgt_size = (tgt_size + align - 1) & ~(align - 1); tgt_size += (uintptr_t) hostaddrs[first] - (uintptr_t) hostaddrs[i]; field_tgt_base = (uintptr_t) hostaddrs[first]; field_tgt_offset = tgt_size; field_tgt_clear = last; + field_tgt_structelem_first = NULL; tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[first]; continue; } for (i = first; i <= last; i++) gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, - sizes, kinds, cbufp); + sizes, kinds, cbufp, refcount_set); i--; continue; case GOMP_MAP_ALWAYS_POINTER: cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start + sizeof (void *); n = splay_tree_lookup (mem_map, &cur_node); if (n == NULL @@ -1232,15 +1360,16 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (!GOMP_MAP_POINTER_P (kind & typemask)) k->host_end = k->host_start + sizes[i]; else k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); if (n && n->refcount != REFCOUNT_LINK) gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], - kind & typemask, false, cbufp); + kind & typemask, false, cbufp, + refcount_set); else { k->aux = NULL; if (n && n->refcount == REFCOUNT_LINK) { /* Replace target address of the pointer with target address of mapped object in the splay tree. */ @@ -1248,18 +1377,42 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, k->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); k->aux->link_key = n; } size_t align = (size_t) 1 << (kind >> rshift); tgt->list[i].key = k; k->tgt = tgt; + k->refcount = 1; + k->dynamic_refcount = 0; if (field_tgt_clear != FIELD_TGT_EMPTY) { k->tgt_offset = k->host_start - field_tgt_base + field_tgt_offset; + if (openmp_p) + { + k->refcount = REFCOUNT_STRUCTELEM; + if (field_tgt_structelem_first == NULL) + { + /* Set to first structure element of sequence. */ + k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; + k->structelem_refcount = 1; + field_tgt_structelem_first = k; + } + else + /* Point to refcount of leading element, but do not + increment again. */ + k->structelem_refcount_ptr + = &field_tgt_structelem_first->structelem_refcount; + + if (i == field_tgt_clear) + { + k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; + field_tgt_structelem_first = NULL; + } + } if (i == field_tgt_clear) field_tgt_clear = FIELD_TGT_EMPTY; } else { tgt_size = (tgt_size + align - 1) & ~(align - 1); k->tgt_offset = tgt_size; @@ -1267,16 +1420,14 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); tgt->list[i].always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); tgt->list[i].is_attach = false; tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; - k->refcount = 1; - k->dynamic_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; splay_tree_insert (mem_map, array); switch (kind & typemask) { case GOMP_MAP_ALLOC: @@ -1324,16 +1475,22 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, else { tgt->list[j].key = k; tgt->list[j].copy_from = false; tgt->list[j].always_copy_from = false; tgt->list[j].is_attach = false; tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); - if (k->refcount != REFCOUNT_INFINITY) - k->refcount++; + /* For OpenMP, the use of refcount_sets causes + errors if we set k->refcount = 1 above but also + increment it again here, for decrementing will + not properly match, since we decrement only once + for each key's refcount. Therefore avoid this + increment for OpenMP constructs. */ + if (!openmp_p) + gomp_increment_refcount (k, refcount_set); gomp_map_pointer (tgt, aq, (uintptr_t) *(void **) hostaddrs[j], k->tgt_offset + ((uintptr_t) hostaddrs[j] - k->host_start), sizes[j], cbufp); } @@ -1422,32 +1579,49 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt = NULL; } gomp_mutex_unlock (&devicep->lock); return tgt; } -attribute_hidden struct target_mem_desc * +static struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool short_mapkind, enum gomp_map_vars_kind pragma_kind) + bool short_mapkind, htab_t *refcount_set, + enum gomp_map_vars_kind pragma_kind) { - return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, - sizes, kinds, short_mapkind, pragma_kind); + /* This management of a local refcount_set is for convenience of callers + who do not share a refcount_set over multiple map/unmap uses. */ + htab_t local_refcount_set = NULL; + if (refcount_set == NULL) + { + local_refcount_set = htab_create (mapnum); + refcount_set = &local_refcount_set; + } + + struct target_mem_desc *tgt; + tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, + sizes, kinds, short_mapkind, refcount_set, + pragma_kind); + if (local_refcount_set) + htab_free (local_refcount_set); + + return tgt; } attribute_hidden struct target_mem_desc * -gomp_map_vars_async (struct gomp_device_descr *devicep, - struct goacc_asyncqueue *aq, size_t mapnum, - void **hostaddrs, void **devaddrs, size_t *sizes, - void *kinds, bool short_mapkind, - enum gomp_map_vars_kind pragma_kind) +goacc_map_vars (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, size_t mapnum, + void **hostaddrs, void **devaddrs, size_t *sizes, + void *kinds, bool short_mapkind, + enum gomp_map_vars_kind pragma_kind) { return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, - sizes, kinds, short_mapkind, pragma_kind); + sizes, kinds, short_mapkind, NULL, + GOMP_MAP_VARS_OPENACC | pragma_kind); } static void gomp_unmap_tgt (struct target_mem_desc *tgt) { /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ if (tgt->tgt_end) @@ -1477,30 +1651,64 @@ gomp_unref_tgt (void *ptr) static void gomp_unref_tgt_void (void *ptr) { (void) gomp_unref_tgt (ptr); } -static inline __attribute__((always_inline)) bool -gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, - struct goacc_asyncqueue *aq) +static void +gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) { - bool is_tgt_unmapped = false; - splay_tree_remove (&devicep->mem_map, k); + splay_tree_remove (sp, k); if (k->aux) { if (k->aux->link_key) - splay_tree_insert (&devicep->mem_map, - (splay_tree_node) k->aux->link_key); + splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); if (k->aux->attach_count) free (k->aux->attach_count); free (k->aux); k->aux = NULL; } +} + +static inline __attribute__((always_inline)) bool +gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, + struct goacc_asyncqueue *aq) +{ + bool is_tgt_unmapped = false; + + if (REFCOUNT_STRUCTELEM_P (k->refcount)) + { + if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) + /* Infer the splay_tree_key of the first structelem key using the + pointer to the first structleme_refcount. */ + k = (splay_tree_key) ((char *) k->structelem_refcount_ptr + - offsetof (struct splay_tree_key_s, + structelem_refcount)); + assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); + + /* The array created by gomp_map_vars is an array of splay_tree_nodes, + with the splay_tree_keys embedded inside. */ + splay_tree_node node = + (splay_tree_node) ((char *) k + - offsetof (struct splay_tree_node_s, key)); + while (true) + { + /* Starting from the _FIRST key, and continue for all following + sibling keys. */ + gomp_remove_splay_tree_key (&devicep->mem_map, k); + if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) + break; + else + k = &(++node)->key; + } + } + else + gomp_remove_splay_tree_key (&devicep->mem_map, k); + if (aq) devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, (void *) k->tgt); else is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); return is_tgt_unmapped; } @@ -1526,15 +1734,15 @@ gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant variables back from device to host: if it is false, it is assumed that this has been done already. */ static inline __attribute__((always_inline)) void gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq) + htab_t *refcount_set, struct goacc_asyncqueue *aq) { struct gomp_device_descr *devicep = tgt->device_descr; if (tgt->list_count == 0) { free (tgt); return; @@ -1569,31 +1777,25 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, continue; /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference counts ('n->refcount', 'n->dynamic_refcount'). */ if (tgt->list[i].is_attach) continue; - bool do_unmap = false; - if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) - k->refcount--; - else if (k->refcount == 1) - { - k->refcount--; - do_unmap = true; - } + bool do_copy, do_remove; + gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); - if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) + if ((do_copy && do_copyfrom && tgt->list[i].copy_from) || tgt->list[i].always_copy_from) gomp_copy_dev2host (devicep, aq, (void *) (k->host_start + tgt->list[i].offset), (void *) (k->tgt->tgt_start + k->tgt_offset + tgt->list[i].offset), tgt->list[i].length); - if (do_unmap) + if (do_remove) { struct target_mem_desc *k_tgt = k->tgt; bool is_tgt_unmapped = gomp_remove_var (devicep, k); /* It would be bad if TGT got unmapped while we're still iterating over its LIST_COUNT, and also expect to use it in the following code. */ assert (!is_tgt_unmapped @@ -1606,25 +1808,38 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, (void *) tgt); else gomp_unref_tgt ((void *) tgt); gomp_mutex_unlock (&devicep->lock); } -attribute_hidden void -gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) +static void +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, + htab_t *refcount_set) { - gomp_unmap_vars_internal (tgt, do_copyfrom, NULL); + /* This management of a local refcount_set is for convenience of callers + who do not share a refcount_set over multiple map/unmap uses. */ + htab_t local_refcount_set = NULL; + if (refcount_set == NULL) + { + local_refcount_set = htab_create (tgt->list_count); + refcount_set = &local_refcount_set; + } + + gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); + + if (local_refcount_set) + htab_free (local_refcount_set); } attribute_hidden void -gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq) +goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, + struct goacc_asyncqueue *aq) { - gomp_unmap_vars_internal (tgt, do_copyfrom, aq); + gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); } static void gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, bool short_mapkind) { size_t i; @@ -2126,20 +2341,23 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) /* All shared memory devices should use the GOMP_target_ext function. */ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) return gomp_target_fallback (fn, hostaddrs, devicep); + htab_t refcount_set = htab_create (mapnum); struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - GOMP_MAP_VARS_TARGET); + &refcount_set, GOMP_MAP_VARS_TARGET); devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, NULL); - gomp_unmap_vars (tgt_vars, true); + htab_clear (refcount_set); + gomp_unmap_vars (tgt_vars, true, &refcount_set); + htab_free (refcount_set); } static inline unsigned int clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) { /* If we cannot run asynchronously, simply ignore nowait. */ if (devicep != NULL && devicep->async_run_func == NULL) @@ -2265,14 +2483,16 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } } gomp_target_fallback (fn, hostaddrs, devicep); return; } struct target_mem_desc *tgt_vars; + htab_t refcount_set = NULL; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { if (!fpc_done) { calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, &tgt_size); if (tgt_align) @@ -2281,21 +2501,29 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, tgt_size); } } tgt_vars = NULL; } else - tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, - true, GOMP_MAP_VARS_TARGET); + { + refcount_set = htab_create (mapnum); + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, &refcount_set, GOMP_MAP_VARS_TARGET); + } devicep->run_func (devicep->target_id, fn_addr, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, args); if (tgt_vars) - gomp_unmap_vars (tgt_vars, true); + { + htab_clear (refcount_set); + gomp_unmap_vars (tgt_vars, true, &refcount_set); + } + if (refcount_set) + htab_free (refcount_set); } /* Host fallback for GOMP_target_data{,_ext} routines. */ static void gomp_target_data_fallback (struct gomp_device_descr *devicep) { @@ -2310,15 +2538,15 @@ gomp_target_data_fallback (struct gomp_device_descr *devicep) { /* Even when doing a host fallback, if there are any active #pragma omp target data constructs, need to remember the new #pragma omp target data, otherwise GOMP_target_end_data would get out of sync. */ struct target_mem_desc *tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); tgt->prev = icv->target_data; icv->target_data = tgt; } } void GOMP_target_data (int device, const void *unused, size_t mapnum, @@ -2329,15 +2557,15 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) return gomp_target_data_fallback (devicep); struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; } void GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, @@ -2348,29 +2576,29 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return gomp_target_data_fallback (devicep); struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_DATA); + NULL, GOMP_MAP_VARS_DATA); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; } void GOMP_target_end_data (void) { struct gomp_task_icv *icv = gomp_icv (false); if (icv->target_data) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, true, NULL); } } void GOMP_target_update (int device, const void *unused, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { @@ -2461,15 +2689,16 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, } gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); } static void gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds) + void **hostaddrs, size_t *sizes, unsigned short *kinds, + htab_t *refcount_set) { const int typemask = 0xff; size_t i; gomp_mutex_lock (&devicep->lock); if (devicep->state == GOMP_DEVICE_FINALIZED) { gomp_mutex_unlock (&devicep->lock); @@ -2485,14 +2714,17 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); if (n) gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i], false, NULL); } + int nrmvars = 0; + splay_tree_key remove_vars[mapnum]; + for (i = 0; i < mapnum; i++) { struct splay_tree_key_s cur_node; unsigned char kind = kinds[i] & typemask; switch (kind) { case GOMP_MAP_FROM: @@ -2506,41 +2738,54 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) : splay_tree_lookup (&devicep->mem_map, &cur_node); if (!k) continue; - if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) - k->refcount--; - if ((kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) - && k->refcount != REFCOUNT_INFINITY) - k->refcount = 0; + bool delete_p = (kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION); + bool do_copy, do_remove; + gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy, + &do_remove); - if ((kind == GOMP_MAP_FROM && k->refcount == 0) + if ((kind == GOMP_MAP_FROM && do_copy) || kind == GOMP_MAP_ALWAYS_FROM) gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, (void *) (k->tgt->tgt_start + k->tgt_offset + cur_node.host_start - k->host_start), cur_node.host_end - cur_node.host_start); - if (k->refcount == 0) - gomp_remove_var (devicep, k); + + /* Structure elements lists are removed altogether at once, which + may cause immediate deallocation of the target_mem_desc, causing + errors if we still have following element siblings to copy back. + While we're at it, it also seems more disciplined to simply + queue all removals together for processing below. + + Structured block unmapping (i.e. gomp_unmap_vars_internal) should + not have this problem, since they maintain an additional + tgt->refcount = 1 reference to the target_mem_desc to start with. + */ + if (do_remove) + remove_vars[nrmvars++] = k; break; case GOMP_MAP_DETACH: break; default: gomp_mutex_unlock (&devicep->lock); gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", kind); } } + for (int i = 0; i < nrmvars; i++) + gomp_remove_var (devicep, remove_vars[i]); + gomp_mutex_unlock (&devicep->lock); } void GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend) @@ -2612,48 +2857,53 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, if (thr->task->taskgroup->workshare && thr->task->taskgroup->prev && thr->task->taskgroup->prev->cancelled) return; } } + htab_t refcount_set = htab_create (mapnum); + /* The variables are mapped separately such that they can be released independently. */ size_t i, j; if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) for (i = 0; i < mapnum; i++) if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) { gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], - &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + &kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); i += sizes[i]; } else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) { for (j = i + 1; j < mapnum; j++) if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff) && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff)) break; gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], - &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); + &kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); i += j - i - 1; } else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH) { /* An attach operation must be processed together with the mapped base-pointer list item. */ gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], - true, GOMP_MAP_VARS_ENTER_DATA); + true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); i += 1; } else gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], - true, GOMP_MAP_VARS_ENTER_DATA); + true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); else - gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); + gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set); + htab_free (refcount_set); } bool gomp_target_task_fn (void *data) { struct gomp_target_task *ttask = (struct gomp_target_task *) data; struct gomp_device_descr *devicep = ttask->devicep; @@ -2670,29 +2920,29 @@ gomp_target_task_fn (void *data) gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep); return false; } if (ttask->state == GOMP_TARGET_TASK_FINISHED) { if (ttask->tgt) - gomp_unmap_vars (ttask->tgt, true); + gomp_unmap_vars (ttask->tgt, true, NULL); return false; } void *actual_arguments; if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { ttask->tgt = NULL; actual_arguments = ttask->hostaddrs; } else { ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL, ttask->sizes, ttask->kinds, true, - GOMP_MAP_VARS_TARGET); + NULL, GOMP_MAP_VARS_TARGET); actual_arguments = (void *) ttask->tgt->tgt_start; } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; assert (devicep->async_run_func); devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, ttask->args, (void *) ttask); @@ -2703,29 +2953,35 @@ gomp_target_task_fn (void *data) || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return false; size_t i; if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, ttask->kinds, true); - else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) - for (i = 0; i < ttask->mapnum; i++) - if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) - { - gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], - NULL, &ttask->sizes[i], &ttask->kinds[i], true, - GOMP_MAP_VARS_ENTER_DATA); - i += ttask->sizes[i]; - } - else - gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], - &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); else - gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, - ttask->kinds); + { + htab_t refcount_set = htab_create (ttask->mapnum); + if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) + for (i = 0; i < ttask->mapnum; i++) + if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) + { + gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], + NULL, &ttask->sizes[i], &ttask->kinds[i], true, + &refcount_set, GOMP_MAP_VARS_ENTER_DATA); + i += ttask->sizes[i]; + } + else + gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], + &ttask->kinds[i], true, &refcount_set, + GOMP_MAP_VARS_ENTER_DATA); + else + gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, + ttask->kinds, &refcount_set); + htab_free (refcount_set); + } return false; } void GOMP_teams (unsigned int num_teams, unsigned int thread_limit) { if (thread_limit) diff --git a/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c new file mode 100644 index 00000000000..5b7c31406c6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/refcount-1.c @@ -0,0 +1,52 @@ +#include +#include + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + unsigned int a = 0xcdcdcdcd; + #pragma omp target enter data map (to:a) + + a = 0xabababab; + unsigned char *p = (unsigned char *) &a; + unsigned char *q = p + 2; + + #pragma omp target enter data map (alloc:p[:1], q[:1]) + + if (!omp_target_is_present (&a, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + + #pragma omp target exit data map (release:a) + + if (!omp_target_is_present (&a, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + + #pragma omp target exit data map (from:q[:1]) + + if (omp_target_is_present (&a, d)) + abort (); + if (omp_target_is_present (&p[0], d)) + abort (); + if (omp_target_is_present (&q[0], d)) + abort (); + + if (q[0] != 0xcd) + abort (); + if (p[0] != 0xab) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c new file mode 100644 index 00000000000..c49d8c12c05 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-1.c @@ -0,0 +1,29 @@ +#include +#include + +struct S +{ + int a, b; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + #pragma omp target enter data map (alloc: s.a, s.b) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + "s.a" should be removed together by above 'exit data'. */ + if (omp_target_is_present (&s.a, d)) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c new file mode 100644 index 00000000000..555c6e3e8e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-2.c @@ -0,0 +1,44 @@ +#include +#include + +struct S +{ + int a, b, c, d; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + #pragma omp target enter data map (alloc: s.a, s.b, s.c, s.d) + #pragma omp target enter data map (alloc: s.c) + #pragma omp target enter data map (alloc: s.b, s.d) + #pragma omp target enter data map (alloc: s.a, s.c, s.b) + + #pragma omp target exit data map (release: s.a) + #pragma omp target exit data map (release: s.d) + #pragma omp target exit data map (release: s.c) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c new file mode 100644 index 00000000000..4850eabd879 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-3.c @@ -0,0 +1,63 @@ +#include +#include + +struct S +{ + int a, b, c, d; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s; + + #pragma omp target enter data map (alloc: s) + #pragma omp target enter data map (alloc: s) + + #pragma omp target exit data map (release: s.a) + #pragma omp target exit data map (release: s.b) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + + #pragma omp target enter data map (alloc: s.a, s.b) + #pragma omp target enter data map (alloc: s.a) + #pragma omp target enter data map (alloc: s.b) + + #pragma omp target exit data map (release: s) + #pragma omp target exit data map (release: s) + #pragma omp target exit data map (release: s) + + /* OpenMP 5.0 structure element mapping rules describe that elements of same + structure variable should allocate/deallocate in a uniform fashion, so + all elements of 's' should be removed together by above 'exit data's. */ + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&s.a, d)) + abort (); + if (omp_target_is_present (&s.b, d)) + abort (); + if (omp_target_is_present (&s.c, d)) + abort (); + if (omp_target_is_present (&s.d, d)) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c new file mode 100644 index 00000000000..d50fbf87c02 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-4.c @@ -0,0 +1,50 @@ +#include +#include + +struct S +{ + int a, b, c, d, e; +}; +typedef struct S S; + +int main (void) +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + S s = { 1, 2, 3, 4, 5 }; + #pragma omp target enter data map (to:s) + + int *p = &s.b; + int *q = &s.d; + #pragma omp target enter data map (alloc: p[:1], q[:1]) + + s.b = 88; + s.d = 99; + + #pragma omp target exit data map (release: s) + if (!omp_target_is_present (&s, d)) + abort (); + if (!omp_target_is_present (&p[0], d)) + abort (); + if (!omp_target_is_present (&q[0], d)) + abort (); + + #pragma omp target exit data map (from: q[:1]) + if (omp_target_is_present (&s, d)) + abort (); + if (omp_target_is_present (&p[0], d)) + abort (); + if (omp_target_is_present (&q[0], d)) + abort (); + + if (q[0] != 4) + abort (); + if (p[0] != 88) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c new file mode 100644 index 00000000000..814c30120e5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/struct-elem-5.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ + +struct S +{ + int a, b, c; +}; +typedef struct S S; + +int main (void) +{ + S s; + #pragma omp target data map (alloc: s.a, s.c) + { + #pragma omp target enter data map (alloc: s.b) + } + + return 0; +} +/* { dg-output "Trying to map into device \\\[\[0-9a-fA-FxX\]+..\[0-9a-fA-FxX\]+\\\) structure element when other mapped elements from the same structure weren't mapped together with it" } */ +/* { dg-shouldfail "" } */