diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c index a4e1863..68aaf19 100644 --- a/libgomp/oacc-async.c +++ b/libgomp/oacc-async.c @@ -27,10 +27,87 @@ . */ #include +#include #include "openacc.h" #include "libgomp.h" #include "oacc-int.h" +static struct goacc_thread * +get_goacc_thread (void) +{ + struct goacc_thread *thr = goacc_thread (); + + if (!thr || !thr->dev) + gomp_fatal ("no device active"); + + return thr; +} + +static struct gomp_device_descr * +get_goacc_thread_device (void) +{ + struct goacc_thread *thr = goacc_thread (); + + if (!thr || !thr->dev) + gomp_fatal ("no device active"); + + return thr->dev; +} + +attribute_hidden struct goacc_asyncqueue * +lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) +{ + /* The special value acc_async_noval (-1) maps to the thread-specific + default async stream. */ + if (async == acc_async_noval) + async = thr->default_async; + + if (async == acc_async_sync) + return NULL; + + if (async < 0) + gomp_fatal ("bad async %d", async); + + struct gomp_device_descr *dev = thr->dev; + + if (!create + && (async >= dev->openacc.async.nasyncqueue + || !dev->openacc.async.asyncqueue[async])) + return NULL; + + gomp_mutex_lock (&dev->openacc.async.lock); + if (async >= dev->openacc.async.nasyncqueue) + { + int diff = async + 1 - dev->openacc.async.nasyncqueue; + dev->openacc.async.asyncqueue + = gomp_realloc (dev->openacc.async.asyncqueue, + sizeof (goacc_aq) * (async + 1)); + memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, + 0, sizeof (goacc_aq) * diff); + dev->openacc.async.nasyncqueue = async + 1; + } + + if (!dev->openacc.async.asyncqueue[async]) + { + dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func (); + + /* Link new async queue into active list. */ + goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); + n->aq = dev->openacc.async.asyncqueue[async]; + n->next = dev->openacc.async.active; + dev->openacc.async.active = n; + } + gomp_mutex_unlock (&dev->openacc.async.lock); + return dev->openacc.async.asyncqueue[async]; +} + +attribute_hidden struct goacc_asyncqueue * +get_goacc_asyncqueue (int async) +{ + struct goacc_thread *thr = get_goacc_thread (); + return lookup_goacc_asyncqueue (thr, true, async); +} + int acc_async_test (int async) { @@ -42,18 +119,25 @@ acc_async_test (int async) if (!thr || !thr->dev) gomp_fatal ("no device active"); - return thr->dev->openacc.async_test_func (async); + goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async); + return thr->dev->openacc.async.test_func (aq); } int acc_async_test_all (void) { - struct goacc_thread *thr = goacc_thread (); - - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + struct goacc_thread *thr = get_goacc_thread (); - return thr->dev->openacc.async_test_all_func (); + int ret = 1; + gomp_mutex_lock (&thr->dev->openacc.async.lock); + for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) + if (!thr->dev->openacc.async.test_func (l->aq)) + { + ret = 0; + break; + } + gomp_mutex_unlock (&thr->dev->openacc.async.lock); + return ret; } void @@ -62,12 +146,10 @@ acc_wait (int async) if (!async_valid_p (async)) gomp_fatal ("invalid async argument: %d", async); - struct goacc_thread *thr = goacc_thread (); - - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + struct goacc_thread *thr = get_goacc_thread (); - thr->dev->openacc.async_wait_func (async); + goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async); + thr->dev->openacc.async.synchronize_func (aq); } /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ @@ -84,23 +166,28 @@ acc_async_wait (int async) void acc_wait_async (int async1, int async2) { - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); + goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); + if (!aq1) + gomp_fatal ("invalid async 1"); + if (aq1 == aq2) + gomp_fatal ("identical parameters"); - thr->dev->openacc.async_wait_async_func (async1, async2); + thr->dev->openacc.async.synchronize_func (aq1); + thr->dev->openacc.async.serialize_func (aq1, aq2); } void acc_wait_all (void) { - struct goacc_thread *thr = goacc_thread (); - - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + struct gomp_device_descr *dev = get_goacc_thread_device (); - thr->dev->openacc.async_wait_all_func (); + gomp_mutex_lock (&dev->openacc.async.lock); + for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next) + dev->openacc.async.synchronize_func (l->aq); + gomp_mutex_unlock (&dev->openacc.async.lock); } /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ @@ -120,10 +207,99 @@ acc_wait_all_async (int async) if (!async_valid_p (async)) gomp_fatal ("invalid async argument: %d", async); - struct goacc_thread *thr = goacc_thread (); + struct goacc_thread *thr = get_goacc_thread (); - if (!thr || !thr->dev) - gomp_fatal ("no device active"); + goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); - thr->dev->openacc.async_wait_all_async_func (async); + gomp_mutex_lock (&thr->dev->openacc.async.lock); + for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) + { + thr->dev->openacc.async.synchronize_func (l->aq); + if (waiting_queue) + thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); + } + gomp_mutex_unlock (&thr->dev->openacc.async.lock); +} + +int +acc_get_default_async (void) +{ + struct goacc_thread *thr = get_goacc_thread (); + return thr->default_async; +} + +void +acc_set_default_async (int async) +{ + if (async < acc_async_sync) + gomp_fatal ("invalid async argument: %d", async); + + struct goacc_thread *thr = get_goacc_thread (); + thr->default_async = async; +} + +static void +goacc_async_unmap_tgt (void *ptr) +{ + struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; + + if (tgt->refcount > 1) + tgt->refcount--; + else + gomp_unmap_tgt (tgt); +} + +attribute_hidden void +goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt, + struct goacc_asyncqueue *aq) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + + /* Increment reference to delay freeing of device memory until callback + has triggered. */ + tgt->refcount++; + gomp_unmap_vars_async (tgt, true, aq); + devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt, + (void *) tgt); +} + +attribute_hidden void +goacc_async_free (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, void *ptr) +{ + if (!aq) + free (ptr); + else + devicep->openacc.async.queue_callback_func (aq, free, ptr); +} + +attribute_hidden void +goacc_init_asyncqueues (struct gomp_device_descr *devicep) +{ + gomp_mutex_init (&devicep->openacc.async.lock); + devicep->openacc.async.nasyncqueue = 0; + devicep->openacc.async.asyncqueue = NULL; + devicep->openacc.async.active = NULL; +} + +attribute_hidden bool +goacc_fini_asyncqueues (struct gomp_device_descr *devicep) +{ + bool ret = true; + if (devicep->openacc.async.nasyncqueue > 0) + { + goacc_aq_list next; + for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) + { + ret &= devicep->openacc.async.destruct_func (l->aq); + next = l->next; + free (l); + } + free (devicep->openacc.async.asyncqueue); + devicep->openacc.async.nasyncqueue = 0; + devicep->openacc.async.asyncqueue = NULL; + devicep->openacc.async.active = NULL; + } + gomp_mutex_destroy (&devicep->openacc.async.lock); + return ret; } diff --git a/libgomp/oacc-cuda.c b/libgomp/oacc-cuda.c index 20774c1..0a842ea 100644 --- a/libgomp/oacc-cuda.c +++ b/libgomp/oacc-cuda.c @@ -62,7 +62,11 @@ acc_get_cuda_stream (int async) return NULL; if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func) - return thr->dev->openacc.cuda.get_stream_func (async); + { + goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); + if (aq) + return thr->dev->openacc.cuda.get_stream_func (aq); + } return NULL; } @@ -79,8 +83,14 @@ acc_set_cuda_stream (int async, void *stream) thr = goacc_thread (); + int ret = -1; if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func) - return thr->dev->openacc.cuda.set_stream_func (async, stream); - - return -1; + { + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_mutex_lock (&thr->dev->openacc.async.lock); + ret = thr->dev->openacc.cuda.set_stream_func (aq, stream); + gomp_mutex_unlock (&thr->dev->openacc.async.lock); + } + + return ret; } diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 2de3c37..53658c8 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -140,55 +140,86 @@ host_openacc_exec (void (*fn) (void *), size_t mapnum __attribute__ ((unused)), void **hostaddrs, void **devaddrs __attribute__ ((unused)), - int async __attribute__ ((unused)), - unsigned *dims __attribute ((unused)), + unsigned *dims __attribute__ ((unused)), void *targ_mem_desc __attribute__ ((unused))) { fn (hostaddrs); } static void -host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)), - int async __attribute__ ((unused))) +host_openacc_async_exec (void (*fn) (void *), + size_t mapnum __attribute__ ((unused)), + void **hostaddrs, + void **devaddrs __attribute__ ((unused)), + unsigned *dims __attribute__ ((unused)), + void *targ_mem_desc __attribute__ ((unused)), + struct goacc_asyncqueue *aq __attribute__ ((unused))) { + fn (hostaddrs); } static int -host_openacc_async_test (int async __attribute__ ((unused))) +host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused))) { return 1; } -static int -host_openacc_async_test_all (void) +static void +host_openacc_async_synchronize (struct goacc_asyncqueue *aq + __attribute__ ((unused))) { - return 1; } static void -host_openacc_async_wait (int async __attribute__ ((unused))) +host_openacc_async_serialize (struct goacc_asyncqueue *aq1 + __attribute__ ((unused)), + struct goacc_asyncqueue *aq2 + __attribute__ ((unused))) { } -static void -host_openacc_async_wait_async (int async1 __attribute__ ((unused)), - int async2 __attribute__ ((unused))) +static bool +host_openacc_async_host2dev (int ord __attribute__ ((unused)), + void *dst __attribute__ ((unused)), + const void *src __attribute__ ((unused)), + size_t n __attribute__ ((unused)), + struct goacc_asyncqueue *aq + __attribute__ ((unused))) { + return true; } -static void -host_openacc_async_wait_all (void) +static bool +host_openacc_async_dev2host (int ord __attribute__ ((unused)), + void *dst __attribute__ ((unused)), + const void *src __attribute__ ((unused)), + size_t n __attribute__ ((unused)), + struct goacc_asyncqueue *aq + __attribute__ ((unused))) { + return true; } static void -host_openacc_async_wait_all_async (int async __attribute__ ((unused))) +host_openacc_async_queue_callback (struct goacc_asyncqueue *aq + __attribute__ ((unused)), + void (*callback_fn)(void *) + __attribute__ ((unused)), + void *userptr __attribute__ ((unused))) { } -static void -host_openacc_async_set_async (int async __attribute__ ((unused))) +static struct goacc_asyncqueue * +host_openacc_async_construct (void) { + return NULL; +} + +static bool +host_openacc_async_destruct (struct goacc_asyncqueue *aq + __attribute__ ((unused))) +{ + return true; } static void * @@ -235,15 +266,17 @@ static struct gomp_device_descr host_dispatch = .exec_func = host_openacc_exec, - .register_async_cleanup_func = host_openacc_register_async_cleanup, - - .async_test_func = host_openacc_async_test, - .async_test_all_func = host_openacc_async_test_all, - .async_wait_func = host_openacc_async_wait, - .async_wait_async_func = host_openacc_async_wait_async, - .async_wait_all_func = host_openacc_async_wait_all, - .async_wait_all_async_func = host_openacc_async_wait_all_async, - .async_set_async_func = host_openacc_async_set_async, + .async = { + .construct_func = host_openacc_async_construct, + .destruct_func = host_openacc_async_destruct, + .test_func = host_openacc_async_test, + .synchronize_func = host_openacc_async_synchronize, + .serialize_func = host_openacc_async_serialize, + .queue_callback_func = host_openacc_async_queue_callback, + .exec_func = host_openacc_async_exec, + .dev2host_func = host_openacc_async_dev2host, + .host2dev_func = host_openacc_async_host2dev, + }, .create_thread_data_func = host_openacc_create_thread_data, .destroy_thread_data_func = host_openacc_destroy_thread_data, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 8db24b1..2c2f91c 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d) if (acc_dev->state == GOMP_DEVICE_INITIALIZED) { devices_active = true; - ret &= acc_dev->fini_device_func (acc_dev->target_id); + ret &= gomp_fini_device (acc_dev); acc_dev->state = GOMP_DEVICE_UNINITIALIZED; } gomp_mutex_unlock (&acc_dev->lock); @@ -426,8 +426,8 @@ goacc_attach_host_thread_to_device (int ord) thr->target_tls = acc_dev->openacc.create_thread_data_func (ord); - - acc_dev->openacc.async_set_async_func (acc_async_sync); + + thr->default_async = acc_async_default; } /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 72414b7..07a2524 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, return; } - if (async > acc_async_sync) - thr->dev->openacc.async_set_async_func (async); - - bool ret = (from - ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s) - : thr->dev->host2dev_func (thr->dev->target_id, d, h, s)); - - if (async > acc_async_sync) - thr->dev->openacc.async_set_async_func (acc_async_sync); - - if (!ret) - gomp_fatal ("error in %s", libfnname); + goacc_aq aq = get_goacc_asyncqueue (async); + if (from) + gomp_copy_dev2host (thr->dev, aq, h, d, s); + else + gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); } void @@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s, int async) gomp_mutex_unlock (&acc_dev->lock); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); + goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, - GOMP_MAP_VARS_OPENACC); + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, + &kinds, true, GOMP_MAP_VARS_OPENACC); /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); - gomp_mutex_lock (&acc_dev->lock); d = tgt->to_free; @@ -673,13 +662,9 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) if (f & FLAG_COPYOUT) { - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_copy_dev2host (acc_dev, aq, h, d, s); } - gomp_remove_var (acc_dev, n); } @@ -762,16 +747,12 @@ update_dev_host (int is_dev, void *h, size_t s, int async) d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (async); + goacc_aq aq = get_goacc_asyncqueue (async); if (is_dev) - acc_dev->host2dev_func (acc_dev->target_id, d, h, s); + gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); else - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); - - if (async > acc_async_sync) - acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_copy_dev2host (acc_dev, aq, h, d, s); gomp_mutex_unlock (&acc_dev->lock); } @@ -802,7 +783,7 @@ acc_update_self_async (void *h, size_t s, int async) void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, - void *kinds) + void *kinds, int async) { struct target_mem_desc *tgt; struct goacc_thread *thr = goacc_thread (); @@ -832,8 +813,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, } gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, - NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); + goacc_aq aq = get_goacc_asyncqueue (async); + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, + NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); /* Initialize dynamic refcount. */ @@ -927,7 +909,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, if (async < acc_async_noval) gomp_unmap_vars (t, true); else - t->device_descr->openacc.register_async_cleanup_func (t, async); + { + goacc_aq aq = get_goacc_asyncqueue (async); + goacc_async_copyout_unmap_vars (t, aq); + } } gomp_mutex_unlock (&acc_dev->lock); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index bfe8876..07d0338 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -212,8 +212,6 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), } va_end (ap); - acc_dev->openacc.async_set_async_func (async); - if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -230,43 +228,28 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), else tgt_fn = (void (*)) fn; - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC); + goacc_aq aq = get_goacc_asyncqueue (async); + + tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_OPENACC); devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + tgt->list[i].key->tgt_offset); - - acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, - async, dims, tgt); - - /* If running synchronously, unmap immediately. */ - bool copyfrom = true; - if (async_synchronous_p (async)) - gomp_unmap_vars (tgt, true); + if (aq == NULL) + { + acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, + dims, tgt); + /* If running synchronously, unmap immediately. */ + gomp_unmap_vars (tgt, true); + } else { - bool async_unmap = false; - for (size_t i = 0; i < tgt->list_count; i++) - { - splay_tree_key k = tgt->list[i].key; - if (k && k->refcount == 1) - { - async_unmap = true; - break; - } - } - if (async_unmap) - tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); - else - { - copyfrom = false; - gomp_unmap_vars (tgt, copyfrom); - } + acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, + dims, tgt, aq); + goacc_async_copyout_unmap_vars (tgt, aq); } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } /* Legacy entry point, only provide host execution. */ @@ -377,8 +360,6 @@ GOACC_enter_exit_data (int device, size_t mapnum, finalize = true; } - acc_dev->openacc.async_set_async_func (async); - /* Determine if this is an "acc enter data". */ for (i = 0; i < mapnum; ++i) { @@ -450,7 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, else { gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); + &sizes[i], &kinds[i], async); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -475,17 +456,17 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (acc_is_present (hostaddrs[i], sizes[i])) { if (finalize) - acc_delete_finalize (hostaddrs[i], sizes[i]); + acc_delete_finalize_async (hostaddrs[i], sizes[i], async); else - acc_delete (hostaddrs[i], sizes[i]); + acc_delete_async (hostaddrs[i], sizes[i], async); } break; case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: if (finalize) - acc_copyout_finalize (hostaddrs[i], sizes[i]); + acc_copyout_finalize_async (hostaddrs[i], sizes[i], async); else - acc_copyout (hostaddrs[i], sizes[i]); + acc_copyout_async (hostaddrs[i], sizes[i], async); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -503,8 +484,6 @@ GOACC_enter_exit_data (int device, size_t mapnum, i += pointer - 1; } } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } static void @@ -517,17 +496,22 @@ goacc_wait (int async, int num_waits, va_list *ap) { int qid = va_arg (*ap, int); - if (acc_async_test (qid)) + goacc_aq aq = get_goacc_asyncqueue (qid); + if (acc_dev->openacc.async.test_func (aq)) continue; - if (async == acc_async_sync) - acc_wait (qid); + acc_dev->openacc.async.synchronize_func (aq); else if (qid == async) - ;/* If we're waiting on the same asynchronous queue as we're - launching on, the queue itself will order work as - required, so there's no need to wait explicitly. */ + /* If we're waiting on the same asynchronous queue as we're + launching on, the queue itself will order work as + required, so there's no need to wait explicitly. */ + ; else - acc_dev->openacc.async_wait_async_func (qid, async); + { + goacc_aq aq2 = get_goacc_asyncqueue (async); + acc_dev->openacc.async.synchronize_func (aq); + acc_dev->openacc.async.serialize_func (aq, aq2); + } } } @@ -559,8 +543,6 @@ GOACC_update (int device, size_t mapnum, else if (num_waits == acc_async_noval) acc_wait_all_async (async); - acc_dev->openacc.async_set_async_func (async); - bool update_device = false; for (i = 0; i < mapnum; ++i) { @@ -600,7 +582,7 @@ GOACC_update (int device, size_t mapnum, /* Fallthru */ case GOMP_MAP_FORCE_TO: update_device = true; - acc_update_device (hostaddrs[i], sizes[i]); + acc_update_device_async (hostaddrs[i], sizes[i], async); break; case GOMP_MAP_FROM: @@ -612,7 +594,7 @@ GOACC_update (int device, size_t mapnum, /* Fallthru */ case GOMP_MAP_FORCE_FROM: update_device = false; - acc_update_self (hostaddrs[i], sizes[i]); + acc_update_self_async (hostaddrs[i], sizes[i], async); break; default: @@ -620,8 +602,6 @@ GOACC_update (int device, size_t mapnum, break; } } - - acc_dev->openacc.async_set_async_func (acc_async_sync); } void @@ -638,7 +618,7 @@ GOACC_wait (int async, int num_waits, ...) else if (async == acc_async_sync) acc_wait_all (); else if (async == acc_async_noval) - goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); + acc_wait_all_async (async); } int diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c index c04db90..a114cc7 100644 --- a/libgomp/oacc-plugin.c +++ b/libgomp/oacc-plugin.c @@ -30,17 +30,6 @@ #include "oacc-plugin.h" #include "oacc-int.h" -void -GOMP_PLUGIN_async_unmap_vars (void *ptr, int async) -{ - struct target_mem_desc *tgt = ptr; - struct gomp_device_descr *devicep = tgt->device_descr; - - devicep->openacc.async_set_async_func (async); - gomp_unmap_vars (tgt, true); - devicep->openacc.async_set_async_func (acc_async_sync); -} - /* Return the target-specific part of the TLS data for the current thread. */ void * diff --git a/libgomp/openacc.h b/libgomp/openacc.h index f61bb77..ede59d7 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -63,6 +63,7 @@ typedef enum acc_device_t { typedef enum acc_async_t { /* Keep in sync with include/gomp-constants.h. */ + acc_async_default = 0, acc_async_noval = -1, acc_async_sync = -2 } acc_async_t; @@ -72,6 +73,8 @@ void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; +void acc_set_default_async (int) __GOACC_NOTHROW; +int acc_get_default_async (void) __GOACC_NOTHROW; int acc_async_test (int) __GOACC_NOTHROW; int acc_async_test_all (void) __GOACC_NOTHROW; void acc_wait (int) __GOACC_NOTHROW;