Index: libgomp/libgomp.map =================================================================== --- libgomp/libgomp.map (revision 264192) +++ libgomp/libgomp.map (working copy) @@ -388,14 +388,48 @@ OACC_2.0.1 { OACC_2.5 { global: + acc_copyin_async; + acc_copyin_async_32_h_; + acc_copyin_async_64_h_; + acc_copyin_async_array_h_; + acc_copyout_async; + acc_copyout_async_32_h_; + acc_copyout_async_64_h_; + acc_copyout_async_array_h_; acc_copyout_finalize; acc_copyout_finalize_32_h_; acc_copyout_finalize_64_h_; acc_copyout_finalize_array_h_; + acc_copyout_finalize_async; + acc_copyout_finalize_async_32_h_; + acc_copyout_finalize_async_64_h_; + acc_copyout_finalize_async_array_h_; + acc_create_async; + acc_create_async_32_h_; + acc_create_async_64_h_; + acc_create_async_array_h_; + acc_delete_async; + acc_delete_async_32_h_; + acc_delete_async_64_h_; + acc_delete_async_array_h_; acc_delete_finalize; acc_delete_finalize_32_h_; acc_delete_finalize_64_h_; acc_delete_finalize_array_h_; + acc_delete_finalize_async; + acc_delete_finalize_async_32_h_; + acc_delete_finalize_async_64_h_; + acc_delete_finalize_async_array_h_; + acc_memcpy_from_device_async; + acc_memcpy_to_device_async; + acc_update_device_async; + acc_update_device_async_32_h_; + acc_update_device_async_64_h_; + acc_update_device_async_array_h_; + acc_update_self_async; + acc_update_self_async_32_h_; + acc_update_self_async_64_h_; + acc_update_self_async_array_h_; } OACC_2.0.1; GOACC_2.0 { Index: libgomp/oacc-mem.c =================================================================== --- libgomp/oacc-mem.c (revision 264192) +++ libgomp/oacc-mem.c (working copy) @@ -153,8 +153,9 @@ acc_free (void *d) gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); } -void -acc_memcpy_to_device (void *d, void *h, size_t s) +static void +memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, + const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ @@ -164,31 +165,49 @@ acc_free (void *d) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - memmove (d, h, s); + if (from) + memmove (h, d, s); + else + memmove (d, h, s); return; } - if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s)) - gomp_fatal ("error in %s", __FUNCTION__); + if (async > acc_async_sync) + thr->dev->openacc.async_set_async_func (async); + + bool ret = (from + ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s) + : thr->dev->host2dev_func (thr->dev->target_id, d, h, s)); + + if (async > acc_async_sync) + thr->dev->openacc.async_set_async_func (acc_async_sync); + + if (!ret) + gomp_fatal ("error in %s", libfnname); } void -acc_memcpy_from_device (void *h, void *d, size_t s) +acc_memcpy_to_device (void *d, void *h, size_t s) { - /* No need to call lazy open here, as the device pointer must have - been obtained from a routine that did that. */ - struct goacc_thread *thr = goacc_thread (); + memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); +} - assert (thr && thr->dev); +void +acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) +{ + memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); +} - if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - { - memmove (h, d, s); - return; - } +void +acc_memcpy_from_device (void *h, void *d, size_t s) +{ + memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); +} - if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s)) - gomp_fatal ("error in %s", __FUNCTION__); +void +acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) +{ + memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL @@ -428,7 +447,7 @@ acc_unmap_data (void *h) #define FLAG_COPY (1 << 2) static void * -present_create_copy (unsigned f, void *h, size_t s) +present_create_copy (unsigned f, void *h, size_t s, int async) { void *d; splay_tree_key n; @@ -490,11 +509,17 @@ static void * gomp_mutex_unlock (&acc_dev->lock); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, GOMP_MAP_VARS_OPENACC); /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_mutex_lock (&acc_dev->lock); d = tgt->to_free; @@ -510,19 +535,32 @@ static void * void * acc_create (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); } +void +acc_create_async (void *h, size_t s, int async) +{ + present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async); +} + void * acc_copyin (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, + acc_async_sync); } +void +acc_copyin_async (void *h, size_t s, int async) +{ + present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async); +} + void * acc_present_or_create (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); } /* acc_pcreate is acc_present_or_create by a different name. */ @@ -539,7 +577,8 @@ acc_pcreate (void *h, size_t s) void * acc_present_or_copyin (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, + acc_async_sync); } /* acc_pcopyin is acc_present_or_copyin by a different name. */ @@ -557,7 +596,7 @@ acc_pcopyin (void *h, size_t s) #define FLAG_FINALIZE (1 << 1) static void -delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) +delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) { size_t host_size; splay_tree_key n; @@ -633,7 +672,13 @@ static void } if (f & FLAG_COPYOUT) - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + { + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + } gomp_remove_var (acc_dev, n); } @@ -644,41 +689,54 @@ static void void acc_delete (void *h , size_t s) { - delete_copyout (0, h, s, __FUNCTION__); + delete_copyout (0, h, s, acc_async_sync, __FUNCTION__); } void +acc_delete_async (void *h , size_t s, int async) +{ + delete_copyout (0, h, s, async, __FUNCTION__); +} + +void acc_delete_finalize (void *h , size_t s) { - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__); } void acc_delete_finalize_async (void *h , size_t s, int async) { - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__); } void acc_copyout (void *h, size_t s) { - delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__); } void +acc_copyout_async (void *h, size_t s, int async) +{ + delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__); +} + +void acc_copyout_finalize (void *h, size_t s) { - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync, + __FUNCTION__); } void acc_copyout_finalize_async (void *h, size_t s, int async) { - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__); } static void -update_dev_host (int is_dev, void *h, size_t s) +update_dev_host (int is_dev, void *h, size_t s, int async) { splay_tree_key n; void *d; @@ -704,11 +762,17 @@ static void d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + if (is_dev) acc_dev->host2dev_func (acc_dev->target_id, d, h, s); else acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_mutex_unlock (&acc_dev->lock); } @@ -715,16 +779,28 @@ static void void acc_update_device (void *h, size_t s) { - update_dev_host (1, h, s); + update_dev_host (1, h, s, acc_async_sync); } void +acc_update_device_async (void *h, size_t s, int async) +{ + update_dev_host (1, h, s, async); +} + +void acc_update_self (void *h, size_t s) { - update_dev_host (0, h, s); + update_dev_host (0, h, s, acc_async_sync); } void +acc_update_self_async (void *h, size_t s, int async) +{ + update_dev_host (0, h, s, async); +} + +void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds) { Index: libgomp/openacc.f90 =================================================================== --- libgomp/openacc.f90 (revision 264192) +++ libgomp/openacc.f90 (working copy) @@ -332,6 +332,150 @@ module openacc_internal logical acc_is_present_array_h type (*), dimension (..), contiguous :: a end function + + subroutine acc_copyin_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyin_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyin_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + + subroutine acc_create_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_create_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_create_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_device_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_device_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_device_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_self_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_self_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_self_async_array_h (a, async) + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + end subroutine end interface interface @@ -510,6 +654,60 @@ module openacc_internal type (*), dimension (*) :: a integer (c_size_t), value :: len end function + + subroutine acc_copyin_async_l (a, len, async) & + bind (C, name = "acc_copyin_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + + subroutine acc_create_async_l (a, len, async) & + bind (C, name = "acc_create_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + + subroutine acc_copyout_async_l (a, len, async) & + bind (C, name = "acc_copyout_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + + subroutine acc_delete_async_l (a, len, async) & + bind (C, name = "acc_delete_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + + subroutine acc_update_device_async_l (a, len, async) & + bind (C, name = "acc_update_device_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine + + subroutine acc_update_self_async_l (a, len, async) & + bind (C, name = "acc_update_self_async") + use iso_c_binding, only: c_size_t, c_int + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_size_t), value :: len + integer (c_int), value :: async + end subroutine end interface end module @@ -529,6 +727,8 @@ module openacc public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete public :: acc_update_device, acc_update_self, acc_is_present + public :: acc_copyin_async, acc_create_async, acc_copyout_async + public :: acc_delete_async, acc_update_device_async, acc_update_self_async integer, parameter :: openacc_version = 201306 @@ -694,6 +894,42 @@ module openacc ! acc_memcpy_to_device: Only available in C/C++ ! acc_memcpy_from_device: Only available in C/C++ + interface acc_copyin_async + procedure :: acc_copyin_async_32_h + procedure :: acc_copyin_async_64_h + procedure :: acc_copyin_async_array_h + end interface + + interface acc_create_async + procedure :: acc_create_async_32_h + procedure :: acc_create_async_64_h + procedure :: acc_create_async_array_h + end interface + + interface acc_copyout_async + procedure :: acc_copyout_async_32_h + procedure :: acc_copyout_async_64_h + procedure :: acc_copyout_async_array_h + end interface + + interface acc_delete_async + procedure :: acc_delete_async_32_h + procedure :: acc_delete_async_64_h + procedure :: acc_delete_async_array_h + end interface + + interface acc_update_device_async + procedure :: acc_update_device_async_32_h + procedure :: acc_update_device_async_64_h + procedure :: acc_update_device_async_array_h + end interface + + interface acc_update_self_async + procedure :: acc_update_self_async_32_h + procedure :: acc_update_self_async_64_h + procedure :: acc_update_self_async_array_h + end interface + end module function acc_get_num_devices_h (d) @@ -1078,3 +1314,189 @@ function acc_is_present_array_h (a) type (*), dimension (..), contiguous :: a acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1 end function + +subroutine acc_copyin_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_copyin_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyin_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_copyin_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyin_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_copyin_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_copyin_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + +subroutine acc_create_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_create_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_create_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_create_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_create_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_create_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_create_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyout_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_copyout_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyout_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_copyout_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_copyout_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_copyout_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + +subroutine acc_delete_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_delete_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_delete_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_delete_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_delete_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_delete_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_device_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_update_device_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_device_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_update_device_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_device_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_update_device_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_update_device_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_self_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t, c_size_t, c_int + use openacc_internal, only: acc_update_self_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_self_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t, c_size_t, c_int + use openacc_internal, only: acc_update_self_async_l + use openacc_kinds, only: acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int)) +end subroutine + +subroutine acc_update_self_async_array_h (a, async) + use iso_c_binding, only: c_int + use openacc_internal, only: acc_update_self_async_l + use openacc_kinds, only: acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async + call acc_update_self_async_l (a, sizeof (a), int (async, kind = c_int)) +end subroutine Index: libgomp/openacc.h =================================================================== --- libgomp/openacc.h (revision 264192) +++ libgomp/openacc.h (working copy) @@ -115,6 +115,16 @@ void acc_copyout_finalize_async (void *, size_t, i void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; +/* Async functions, specified in OpenACC 2.5. */ +void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_create_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; + /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; void *acc_get_current_cuda_context (void) __GOACC_NOTHROW; Index: libgomp/openacc_lib.h =================================================================== --- libgomp/openacc_lib.h (revision 264192) +++ libgomp/openacc_lib.h (working copy) @@ -403,3 +403,159 @@ ! acc_memcpy_to_device: Only available in C/C++ ! acc_memcpy_from_device: Only available in C/C++ + + interface acc_copyin_async + subroutine acc_copyin_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyin_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyin_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + + interface acc_create_async + subroutine acc_create_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_create_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_create_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + + interface acc_copyout_async + subroutine acc_copyout_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_copyout_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + + interface acc_delete_async + subroutine acc_delete_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_delete_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + + interface acc_update_device_async + subroutine acc_update_device_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_device_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_device_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface + + interface acc_update_self_async + subroutine acc_update_self_async_32_h (a, len, async) + use iso_c_binding, only: c_int32_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int32_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_self_async_64_h (a, len, async) + use iso_c_binding, only: c_int64_t + import acc_handle_kind + !GCC$ ATTRIBUTES NO_ARG_CHECK :: a + type (*), dimension (*) :: a + integer (c_int64_t) len + integer (acc_handle_kind) async + end subroutine + + subroutine acc_update_self_async_array_h (a, async_) + import acc_handle_kind + type (*), dimension (..), contiguous :: a + integer (acc_handle_kind) async_ + end subroutine + end interface Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (nonexistent) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (working copy) @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + int async = 8; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + acc_copyin_async (h, N, async); + + memset (h, 0, N); + + acc_wait (async); + + acc_copyout_async (h, N, async + 1); + + acc_wait (async + 1); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (nonexistent) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (working copy) @@ -0,0 +1,45 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i, q = 5; + unsigned char *h, *g; + void *d; + + h = (unsigned char *) malloc (N); + g = (unsigned char *) malloc (N); + for (i = 0; i < N; i++) + { + g[i] = i; + } + + acc_create_async (h, N, q); + + acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q); + memset (&h[0], 0, N); + + acc_wait (q); + + acc_update_self_async (h, N, q + 1); + acc_delete_async (h, N, q + 1); + + acc_wait (q + 1); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + free (g); + + return 0; +} Index: libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (nonexistent) +++ libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (working copy) @@ -0,0 +1,57 @@ +! { dg-do run } +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } + +program main + use openacc + implicit none + + integer, parameter :: N = 256 + integer, allocatable :: h(:) + integer :: i + integer :: async = 5 + + allocate (h(N)) + + do i = 1, N + h(i) = i + end do + + call acc_copyin (h) + + do i = 1, N + h(i) = i + i + end do + + call acc_update_device_async (h, sizeof (h), async) + + if (acc_is_present (h) .neqv. .TRUE.) call abort + + h(:) = 0 + + call acc_copyout_async (h, sizeof (h), async) + + call acc_wait (async) + + do i = 1, N + if (h(i) /= i + i) call abort + end do + + call acc_copyin (h, sizeof (h)) + + h(:) = 0 + + call acc_update_self_async (h, sizeof (h), async) + + if (acc_is_present (h) .neqv. .TRUE.) call abort + + do i = 1, N + if (h(i) /= i + i) call abort + end do + + call acc_delete_async (h, async) + + call acc_wait (async) + + if (acc_is_present (h) .neqv. .FALSE.) call abort + +end program