Index: libgomp/oacc-async.c =================================================================== --- libgomp/oacc-async.c (revision 267226) +++ libgomp/oacc-async.c (working copy) @@ -27,10 +27,97 @@ . */ #include +#include #include "openacc.h" #include "libgomp.h" #include "oacc-int.h" +static struct goacc_thread * +get_goacc_thread (void) +{ + struct goacc_thread *thr = goacc_thread (); + + if (!thr || !thr->dev) + gomp_fatal ("no device active"); + + return thr; +} + +static struct gomp_device_descr * +get_goacc_thread_device (void) +{ + struct goacc_thread *thr = goacc_thread (); + + if (!thr || !thr->dev) + gomp_fatal ("no device active"); + + return thr->dev; +} + +attribute_hidden struct goacc_asyncqueue * +lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) +{ + /* The special value acc_async_noval (-1) maps to the thread-specific + default async stream. */ + if (async == acc_async_noval) + async = thr->default_async; + + if (async == acc_async_sync) + return NULL; + + if (async < 0) + gomp_fatal ("bad async %d", async); + + struct gomp_device_descr *dev = thr->dev; + + gomp_mutex_lock (&dev->openacc.async.lock); + + if (!create + && (async >= dev->openacc.async.nasyncqueue + || !dev->openacc.async.asyncqueue[async])) + { + gomp_mutex_unlock (&dev->openacc.async.lock); + return NULL; + } + + if (async >= dev->openacc.async.nasyncqueue) + { + int diff = async + 1 - dev->openacc.async.nasyncqueue; + dev->openacc.async.asyncqueue + = gomp_realloc (dev->openacc.async.asyncqueue, + sizeof (goacc_aq) * (async + 1)); + memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, + 0, sizeof (goacc_aq) * diff); + dev->openacc.async.nasyncqueue = async + 1; + } + + if (!dev->openacc.async.asyncqueue[async]) + { + dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func (); + + if (!dev->openacc.async.asyncqueue[async]) + { + gomp_mutex_unlock (&dev->openacc.async.lock); + gomp_fatal ("async %d creation failed", async); + } + + /* Link new async queue into active list. */ + goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); + n->aq = dev->openacc.async.asyncqueue[async]; + n->next = dev->openacc.async.active; + dev->openacc.async.active = n; + } + gomp_mutex_unlock (&dev->openacc.async.lock); + return dev->openacc.async.asyncqueue[async]; +} + +attribute_hidden struct goacc_asyncqueue * +get_goacc_asyncqueue (int async) +{ + struct goacc_thread *thr = get_goacc_thread (); + return lookup_goacc_asyncqueue (thr, true, async); +} + int acc_async_test (int async) { @@ -42,18 +129,25 @@ acc_async_test (int async) if (!thr || !thr->dev) gomp_fatal ("no device active"); - return thr->dev->openacc.async_test_func (async); + goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async); + return thr->dev->openacc.async.test_func (aq); } int acc_async_test_all (void) { - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); - - return thr->dev->openacc.async_test_all_func (); + int ret = 1; + gomp_mutex_lock (&thr->dev->openacc.async.lock); + for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) + if (!thr->dev->openacc.async.test_func (l->aq)) + { + ret = 0; + break; + } + gomp_mutex_unlock (&thr->dev->openacc.async.lock); + return ret; } void @@ -62,12 +156,10 @@ acc_wait (int async) if (!async_valid_p (async)) gomp_fatal ("invalid async argument: %d", async); - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); - - thr->dev->openacc.async_wait_func (async); + goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async); + thr->dev->openacc.async.synchronize_func (aq); } /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ @@ -84,23 +176,28 @@ acc_async_wait (int async) void acc_wait_async (int async1, int async2) { - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); + goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); + if (!aq1) + gomp_fatal ("invalid async 1"); + if (aq1 == aq2) + gomp_fatal ("identical parameters"); - thr->dev->openacc.async_wait_async_func (async1, async2); + thr->dev->openacc.async.synchronize_func (aq1); + thr->dev->openacc.async.serialize_func (aq1, aq2); } void acc_wait_all (void) { - struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *dev = get_goacc_thread_device (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); - - thr->dev->openacc.async_wait_all_func (); + gomp_mutex_lock (&dev->openacc.async.lock); + for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next) + dev->openacc.async.synchronize_func (l->aq); + gomp_mutex_unlock (&dev->openacc.async.lock); } /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ @@ -120,10 +217,74 @@ acc_wait_all_async (int async) if (!async_valid_p (async)) gomp_fatal ("invalid async argument: %d", async); - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); - thr->dev->openacc.async_wait_all_async_func (async); + gomp_mutex_lock (&thr->dev->openacc.async.lock); + for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) + { + thr->dev->openacc.async.synchronize_func (l->aq); + if (waiting_queue) + thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); + } + gomp_mutex_unlock (&thr->dev->openacc.async.lock); } + +int +acc_get_default_async (void) +{ + struct goacc_thread *thr = get_goacc_thread (); + return thr->default_async; +} + +void +acc_set_default_async (int async) +{ + if (async < acc_async_sync) + gomp_fatal ("invalid async argument: %d", async); + + struct goacc_thread *thr = get_goacc_thread (); + thr->default_async = async; +} + +attribute_hidden void +goacc_async_free (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, void *ptr) +{ + if (!aq) + free (ptr); + else + devicep->openacc.async.queue_callback_func (aq, free, ptr); +} + +attribute_hidden void +goacc_init_asyncqueues (struct gomp_device_descr *devicep) +{ + gomp_mutex_init (&devicep->openacc.async.lock); + devicep->openacc.async.nasyncqueue = 0; + devicep->openacc.async.asyncqueue = NULL; + devicep->openacc.async.active = NULL; +} + +attribute_hidden bool +goacc_fini_asyncqueues (struct gomp_device_descr *devicep) +{ + bool ret = true; + if (devicep->openacc.async.nasyncqueue > 0) + { + goacc_aq_list next; + for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) + { + ret &= devicep->openacc.async.destruct_func (l->aq); + next = l->next; + free (l); + } + free (devicep->openacc.async.asyncqueue); + devicep->openacc.async.nasyncqueue = 0; + devicep->openacc.async.asyncqueue = NULL; + devicep->openacc.async.active = NULL; + } + gomp_mutex_destroy (&devicep->openacc.async.lock); + return ret; +} Index: libgomp/oacc-cuda.c =================================================================== --- libgomp/oacc-cuda.c (revision 267226) +++ libgomp/oacc-cuda.c (working copy) @@ -62,7 +62,11 @@ acc_get_cuda_stream (int async) return NULL; if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) - return thr->dev->openacc.cuda.get_stream_func (async); + { + goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); + if (aq) + return thr->dev->openacc.cuda.get_stream_func (aq); + } return NULL; } @@ -79,8 +83,14 @@ acc_set_cuda_stream (int async, void *stream) thr = goacc_thread (); + int ret = -1; if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) - return thr->dev->openacc.cuda.set_stream_func (async, stream); + { + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_mutex_lock (&thr->dev->openacc.async.lock); + ret = thr->dev->openacc.cuda.set_stream_func (aq, stream); + gomp_mutex_unlock (&thr->dev->openacc.async.lock); + } - return -1; + return ret; } Index: libgomp/oacc-host.c =================================================================== --- libgomp/oacc-host.c (revision 267226) +++ libgomp/oacc-host.c (working copy) @@ -140,8 +140,7 @@ host_openacc_exec (void (*fn) (void *), size_t mapnum __attribute__ ((unused)), void **hostaddrs, void **devaddrs __attribute__ ((unused)), - int async __attribute__ ((unused)), - unsigned *dims __attribute ((unused)), + unsigned *dims __attribute__ ((unused)), void *targ_mem_desc __attribute__ ((unused))) { fn (hostaddrs); @@ -148,49 +147,81 @@ host_openacc_exec (void (*fn) (void *), } static void -host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), - int async __attribute__ ((unused))) +host_openacc_async_exec (void (*fn) (void *), + size_t mapnum __attribute__ ((unused)), + void **hostaddrs, + void **devaddrs __attribute__ ((unused)), + unsigned *dims __attribute__ ((unused)), + void *targ_mem_desc __attribute__ ((unused)), + struct goacc_asyncqueue *aq __attribute__ ((unused))) { + fn (hostaddrs); } static int -host_openacc_async_test (int async __attribute__ ((unused))) +host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused))) { return 1; } -static int -host_openacc_async_test_all (void) +static void +host_openacc_async_synchronize (struct goacc_asyncqueue *aq + __attribute__ ((unused))) { - return 1; } static void -host_openacc_async_wait (int async __attribute__ ((unused))) +host_openacc_async_serialize (struct goacc_asyncqueue *aq1 + __attribute__ ((unused)), + struct goacc_asyncqueue *aq2 + __attribute__ ((unused))) { } -static void -host_openacc_async_wait_async (int async1 __attribute__ ((unused)), - int async2 __attribute__ ((unused))) +static bool +host_openacc_async_host2dev (int ord __attribute__ ((unused)), + void *dst __attribute__ ((unused)), + const void *src __attribute__ ((unused)), + size_t n __attribute__ ((unused)), + struct goacc_asyncqueue *aq + __attribute__ ((unused))) { + return true; } -static void -host_openacc_async_wait_all (void) +static bool +host_openacc_async_dev2host (int ord __attribute__ ((unused)), + void *dst __attribute__ ((unused)), + const void *src __attribute__ ((unused)), + size_t n __attribute__ ((unused)), + struct goacc_asyncqueue *aq + __attribute__ ((unused))) { + return true; } static void -host_openacc_async_wait_all_async (int async __attribute__ ((unused))) +host_openacc_async_queue_callback (struct goacc_asyncqueue *aq + __attribute__ ((unused)), + void (*callback_fn)(void *) + __attribute__ ((unused)), + void *userptr __attribute__ ((unused))) { } -static void -host_openacc_async_set_async (int async __attribute__ ((unused))) +static struct goacc_asyncqueue * +host_openacc_async_construct (void) { + return NULL; } +static bool +host_openacc_async_destruct (struct goacc_asyncqueue *aq + __attribute__ ((unused))) +{ + return true; +} + static void * host_openacc_create_thread_data (int ord __attribute__ ((unused))) { @@ -235,19 +266,21 @@ static struct gomp_device_descr host_dispatch = .exec_func = host_openacc_exec, - .register_async_cleanup_func = host_openacc_register_async_cleanup, - - .async_test_func = host_openacc_async_test, - .async_test_all_func = host_openacc_async_test_all, - .async_wait_func = host_openacc_async_wait, - .async_wait_async_func = host_openacc_async_wait_async, - .async_wait_all_func = host_openacc_async_wait_all, - .async_wait_all_async_func = host_openacc_async_wait_all_async, - .async_set_async_func = host_openacc_async_set_async, - .create_thread_data_func = host_openacc_create_thread_data, .destroy_thread_data_func = host_openacc_destroy_thread_data, + .async = { + .construct_func = host_openacc_async_construct, + .destruct_func = host_openacc_async_destruct, + .test_func = host_openacc_async_test, + .synchronize_func = host_openacc_async_synchronize, + .serialize_func = host_openacc_async_serialize, + .queue_callback_func = host_openacc_async_queue_callback, + .exec_func = host_openacc_async_exec, + .dev2host_func = host_openacc_async_dev2host, + .host2dev_func = host_openacc_async_host2dev, + }, + .cuda = { .get_current_device_func = NULL, .get_current_context_func = NULL, Index: libgomp/oacc-init.c =================================================================== --- libgomp/oacc-init.c (revision 267226) +++ libgomp/oacc-init.c (working copy) @@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d) if (acc_dev->state == GOMP_DEVICE_INITIALIZED) { devices_active = true; - ret &= acc_dev->fini_device_func (acc_dev->target_id); + ret &= gomp_fini_device (acc_dev); acc_dev->state = GOMP_DEVICE_UNINITIALIZED; } gomp_mutex_unlock (&acc_dev->lock); @@ -426,8 +426,8 @@ goacc_attach_host_thread_to_device (int ord) thr->target_tls = acc_dev->openacc.create_thread_data_func (ord); - - acc_dev->openacc.async_set_async_func (acc_async_sync); + + thr->default_async = acc_async_default; } /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of Index: libgomp/oacc-int.h =================================================================== --- libgomp/oacc-int.h (revision 267226) +++ libgomp/oacc-int.h (working copy) @@ -73,6 +73,9 @@ struct goacc_thread /* Target-specific data (used by plugin). */ void *target_tls; + + /* Default OpenACC async queue for current thread, exported to plugin. */ + int default_async; }; #if defined HAVE_TLS || defined USE_EMUTLS @@ -99,6 +102,14 @@ void goacc_restore_bind (void); void goacc_lazy_initialize (void); void goacc_host_init (void); +void goacc_init_asyncqueues (struct gomp_device_descr *); +bool goacc_fini_asyncqueues (struct gomp_device_descr *); +void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *, + void *); +struct goacc_asyncqueue *get_goacc_asyncqueue (int); +struct goacc_asyncqueue *lookup_goacc_asyncqueue (struct goacc_thread *, bool, + int); + static inline bool async_valid_stream_id_p (int async) { Index: libgomp/oacc-mem.c =================================================================== --- libgomp/oacc-mem.c (revision 267226) +++ libgomp/oacc-mem.c (working copy) @@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h, return; } - if (async > acc_async_sync) - thr->dev->openacc.async_set_async_func (async); - - bool ret = (from - ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s) - : thr->dev->host2dev_func (thr->dev->target_id, d, h, s)); - - if (async > acc_async_sync) - thr->dev->openacc.async_set_async_func (acc_async_sync); - - if (!ret) - gomp_fatal ("error in %s", libfnname); + goacc_aq aq = get_goacc_asyncqueue (async); + if (from) + gomp_copy_dev2host (thr->dev, aq, h, d, s); + else + gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); } void @@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s gomp_mutex_unlock (&acc_dev->lock); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); + goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, - GOMP_MAP_VARS_OPENACC); + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, + &kinds, true, GOMP_MAP_VARS_OPENACC); /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); - gomp_mutex_lock (&acc_dev->lock); d = tgt->to_free; @@ -676,13 +665,9 @@ delete_copyout (unsigned f, void *h, size_t s, int if (f & FLAG_COPYOUT) { - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_copy_dev2host (acc_dev, aq, h, d, s); } - gomp_remove_var (acc_dev, n); } @@ -765,17 +750,13 @@ update_dev_host (int is_dev, void *h, size_t s, in d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); + goacc_aq aq = get_goacc_asyncqueue (async); if (is_dev) - acc_dev->host2dev_func (acc_dev->target_id, d, h, s); + gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); else - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + gomp_copy_dev2host (acc_dev, aq, h, d, s); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); - gomp_mutex_unlock (&acc_dev->lock); } @@ -805,7 +786,7 @@ acc_update_self_async (void *h, size_t s, int asyn void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, - void *kinds) + void *kinds, int async) { struct target_mem_desc *tgt; struct goacc_thread *thr = goacc_thread (); @@ -835,8 +816,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hos } gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, - NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); + goacc_aq aq = get_goacc_asyncqueue (async); + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, + NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); /* Initialize dynamic refcount. */ @@ -930,7 +912,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool f if (async < acc_async_noval) gomp_unmap_vars (t, true); else - t->device_descr->openacc.register_async_cleanup_func (t, async); + { + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_unmap_vars_async (t, true, aq); + } } gomp_mutex_unlock (&acc_dev->lock); Index: libgomp/oacc-parallel.c =================================================================== --- libgomp/oacc-parallel.c (revision 267226) +++ libgomp/oacc-parallel.c (working copy) @@ -208,8 +208,6 @@ GOACC_parallel_keyed (int device, void (*fn) (void } va_end (ap); - acc_dev->openacc.async_set_async_func (async); - if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -226,44 +224,29 @@ GOACC_parallel_keyed (int device, void (*fn) (void else tgt_fn = (void (*)) fn; - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC); + goacc_aq aq = get_goacc_asyncqueue (async); + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_OPENACC); + devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + tgt->list[i].key->tgt_offset + tgt->list[i].offset); - - acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, - async, dims, tgt); - - /* If running synchronously, unmap immediately. */ - bool copyfrom = true; - if (async_synchronous_p (async)) - gomp_unmap_vars (tgt, true); + if (aq == NULL) + { + acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, + dims, tgt); + /* If running synchronously, unmap immediately. */ + gomp_unmap_vars (tgt, true); + } else { - bool async_unmap = false; - for (size_t i = 0; i < tgt->list_count; i++) - { - splay_tree_key k = tgt->list[i].key; - if (k && k->refcount == 1) - { - async_unmap = true; - break; - } - } - if (async_unmap) - tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); - else - { - copyfrom = false; - gomp_unmap_vars (tgt, copyfrom); - } + acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, + dims, tgt, aq); + gomp_unmap_vars_async (tgt, true, aq); } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } /* Legacy entry point, only provide host execution. */ @@ -372,8 +355,6 @@ GOACC_enter_exit_data (int device, size_t mapnum, finalize = true; } - acc_dev->openacc.async_set_async_func (async); - /* Determine if this is an "acc enter data". */ for (i = 0; i < mapnum; ++i) { @@ -441,7 +422,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, else { gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); + &sizes[i], &kinds[i], async); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -466,17 +447,17 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (acc_is_present (hostaddrs[i], sizes[i])) { if (finalize) - acc_delete_finalize (hostaddrs[i], sizes[i]); + acc_delete_finalize_async (hostaddrs[i], sizes[i], async); else - acc_delete (hostaddrs[i], sizes[i]); + acc_delete_async (hostaddrs[i], sizes[i], async); } break; case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: if (finalize) - acc_copyout_finalize (hostaddrs[i], sizes[i]); + acc_copyout_finalize_async (hostaddrs[i], sizes[i], async); else - acc_copyout (hostaddrs[i], sizes[i]); + acc_copyout_async (hostaddrs[i], sizes[i], async); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -494,8 +475,6 @@ GOACC_enter_exit_data (int device, size_t mapnum, i += pointer - 1; } } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } static void @@ -508,17 +487,22 @@ goacc_wait (int async, int num_waits, va_list *ap) { int qid = va_arg (*ap, int); - if (acc_async_test (qid)) + goacc_aq aq = get_goacc_asyncqueue (qid); + if (acc_dev->openacc.async.test_func (aq)) continue; - if (async == acc_async_sync) - acc_wait (qid); + acc_dev->openacc.async.synchronize_func (aq); else if (qid == async) - ;/* If we're waiting on the same asynchronous queue as we're - launching on, the queue itself will order work as - required, so there's no need to wait explicitly. */ + /* If we're waiting on the same asynchronous queue as we're + launching on, the queue itself will order work as + required, so there's no need to wait explicitly. */ + ; else - acc_dev->openacc.async_wait_async_func (qid, async); + { + goacc_aq aq2 = get_goacc_asyncqueue (async); + acc_dev->openacc.async.synchronize_func (aq); + acc_dev->openacc.async.serialize_func (aq, aq2); + } } } @@ -548,8 +532,6 @@ GOACC_update (int device, size_t mapnum, va_end (ap); } - acc_dev->openacc.async_set_async_func (async); - bool update_device = false; for (i = 0; i < mapnum; ++i) { @@ -589,7 +571,7 @@ GOACC_update (int device, size_t mapnum, /* Fallthru */ case GOMP_MAP_FORCE_TO: update_device = true; - acc_update_device (hostaddrs[i], sizes[i]); + acc_update_device_async (hostaddrs[i], sizes[i], async); break; case GOMP_MAP_FROM: @@ -601,7 +583,7 @@ GOACC_update (int device, size_t mapnum, /* Fallthru */ case GOMP_MAP_FORCE_FROM: update_device = false; - acc_update_self (hostaddrs[i], sizes[i]); + acc_update_self_async (hostaddrs[i], sizes[i], async); break; default: @@ -609,8 +591,6 @@ GOACC_update (int device, size_t mapnum, break; } } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } void Index: libgomp/oacc-plugin.c =================================================================== --- libgomp/oacc-plugin.c (revision 267226) +++ libgomp/oacc-plugin.c (working copy) @@ -31,14 +31,10 @@ #include "oacc-int.h" void -GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) +GOMP_PLUGIN_async_unmap_vars (void *ptr __attribute__((unused)), + int async __attribute__((unused))) { - struct target_mem_desc *tgt = ptr; - struct gomp_device_descr *devicep = tgt->device_descr; - - devicep->openacc.async_set_async_func (async); - gomp_unmap_vars (tgt, true); - devicep->openacc.async_set_async_func (acc_async_sync); + gomp_fatal ("invalid plugin function"); } /* Return the target-specific part of the TLS data for the current thread. */