public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] Async related additions to OpenACC runtime library
@ 2017-02-13 10:15 Chung-Lin Tang
  2017-02-14 11:29 ` Thomas Schwinge
  2018-11-18  1:37 ` OpenACC ICV acc-default-async-var (was: [gomp4] Async related additions to OpenACC runtime library) Thomas Schwinge
  0 siblings, 2 replies; 9+ messages in thread
From: Chung-Lin Tang @ 2017-02-13 10:15 UTC (permalink / raw)
  To: gcc-patches; +Cc: Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 2913 bytes --]

This patch adds:

// New functions to set/get the current default async queue
void acc_set_default_async (int);
int acc_get_default_async (void);

and _async versions of a few existing API functions:

void acc_copyin_async (void *, size_t, int);
void acc_create_async (void *, size_t, int);
void acc_copyout_async (void *, size_t, int);
void acc_delete_async (void *, size_t, int);
void acc_update_device_async (void *, size_t, int);
void acc_update_self_async (void *, size_t, int);
void acc_memcpy_to_device_async (void *, void *, size_t, int);
void acc_memcpy_from_device_async (void *, void *, size_t, int);

These implement part of the additional requirements for OpenACC 2.5
Tested and committed to gomp-4_0-branch.

Chung-Lin

2017-02-13  Chung-Lin Tang  <cltang@codesourcery.com>

        libgomp/
        * oacc-async.c (acc_get_default_async): New API function.
        (acc_set_default_async): Likewise.
        * oacc-init.c ():
        * oacc-int.h (struct goacc_thread): Add default_async field.
        * oacc-mem.c (memcpy_tofrom_device): New function, combined from
        acc_memcpy_to/from_device functions, now with async parameter.
        (acc_memcpy_to_device): Modify to use memcpy_tofrom_device.
        (acc_memcpy_from_device): Likewise.
        (acc_memcpy_to_device_async): New API function.
        (acc_memcpy_from_device_async): Likewise.
        (present_create_copy): Add async parameter.
        (acc_create): Adjust present_create_copy call.
        (acc_copyin): Likewise.
        (acc_present_or_create): Likewise.
        (acc_present_or_copyin): Likewise.
        (acc_create_async): New API function.
        (acc_copyin_async): New API function.
        (delete_copyout): Add async parameter.
        (acc_delete): Adjust delete_copyout call.
        (acc_copyout): Likewise.
        (acc_delete_async): New API function.
        (acc_copyout_async): Likewise.
        (update_dev_host): Add async parameter.
        (acc_update_device): Adjust update_dev_host call.
        (acc_update_self): Likewise.
        (acc_update_device_async): New API function.
        (acc_update_self_async): Likewise.
        * oacc-plugin.c (GOMP_PLUGIN_acc_thread_default_async): New function.
        * oacc-plugin.h (GOMP_PLUGIN_acc_thread_default_async): Declare.
        * openacc.f90 (acc_async_default): Declare.
        (acc_set_default_async): Likewise.
        (acc_get_default_async): Likewise.
        * openacc_lib.h (acc_async_default): Declare.
        (acc_set_default_async): Likewise.
        (acc_get_default_async): Likewise.
        * testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c: New test.
        * testsuite/libgomp.oacc-c-c++-common/lib-94.c: New test.
        * testsuite/libgomp.oacc-c-c++-common/lib-95.c: New test.
        * testsuite/libgomp.oacc-fortran/lib-16.f90: New test.

        include/
        * gomp-constants.h (GOMP_ASYNC_DEFAULT): Define.

[-- Attachment #2: x.diff --]
[-- Type: text/plain, Size: 57309 bytes --]

Index: libgomp/oacc-async.c
===================================================================
--- libgomp/oacc-async.c	(revision 245382)
+++ libgomp/oacc-async.c	(working copy)
@@ -105,3 +105,28 @@ acc_wait_all_async (int async)
 
   thr->dev->openacc.async_wait_all_async_func (async);
 }
+
+int
+acc_get_default_async (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  return thr->default_async;
+}
+
+void
+acc_set_default_async (int async)
+{
+  if (async < acc_async_sync)
+    gomp_fatal ("invalid async argument: %d", async);
+
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  thr->default_async = async;
+}
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 245382)
+++ libgomp/oacc-init.c	(working copy)
@@ -437,6 +437,8 @@ goacc_attach_host_thread_to_device (int ord)
   
   thr->target_tls
     = acc_dev->openacc.create_thread_data_func (ord);
+
+  thr->default_async = acc_async_default;
   
   acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
Index: libgomp/oacc-int.h
===================================================================
--- libgomp/oacc-int.h	(revision 245382)
+++ libgomp/oacc-int.h	(working copy)
@@ -73,6 +73,9 @@ struct goacc_thread
 
   /* Target-specific data (used by plugin).  */
   void *target_tls;
+
+  /* Default OpenACC async queue for current thread, exported to plugin.  */
+  int default_async;
 };
 
 #if defined HAVE_TLS || defined USE_EMUTLS
Index: libgomp/oacc-mem.c
===================================================================
--- libgomp/oacc-mem.c	(revision 245382)
+++ libgomp/oacc-mem.c	(working copy)
@@ -153,8 +153,9 @@ acc_free (void *d)
     gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
 }
 
-void
-acc_memcpy_to_device (void *d, void *h, size_t s)
+static void
+memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
+		      const char *libfnname)
 {
   /* No need to call lazy open here, as the device pointer must have
      been obtained from a routine that did that.  */
@@ -164,31 +165,49 @@ acc_free (void *d)
 
   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     {
-      memmove (d, h, s);
+      if (from)
+	memmove (h, d, s);
+      else
+	memmove (d, h, s);
       return;
     }
 
-  if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
-    gomp_fatal ("error in %s", __FUNCTION__);
+  if (async > acc_async_sync)
+    thr->dev->openacc.async_set_async_func (async);
+
+  bool ret = (from
+	      ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
+	      : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
+
+  if (async > acc_async_sync)
+    thr->dev->openacc.async_set_async_func (acc_async_sync);
+
+  if (!ret)
+    gomp_fatal ("error in %s", libfnname);
 }
 
 void
-acc_memcpy_from_device (void *h, void *d, size_t s)
+acc_memcpy_to_device (void *d, void *h, size_t s)
 {
-  /* No need to call lazy open here, as the device pointer must have
-     been obtained from a routine that did that.  */
-  struct goacc_thread *thr = goacc_thread ();
+  memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
+}
 
-  assert (thr && thr->dev);
+void
+acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
+{
+  memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
+}
 
-  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
-    {
-      memmove (h, d, s);
-      return;
-    }
+void
+acc_memcpy_from_device (void *h, void *d, size_t s)
+{
+  memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
+}
 
-  if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
-    gomp_fatal ("error in %s", __FUNCTION__);
+void
+acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
+{
+  memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
 }
 
 /* Return the device pointer that corresponds to host data H.  Or NULL
@@ -424,7 +443,7 @@ acc_unmap_data (void *h)
 #define FLAG_COPY (1 << 2)
 
 static void *
-present_create_copy (unsigned f, void *h, size_t s)
+present_create_copy (unsigned f, void *h, size_t s, int async)
 {
   void *d;
   splay_tree_key n;
@@ -481,9 +500,15 @@ static void *
 
       gomp_mutex_unlock (&acc_dev->lock);
 
+      if (async > acc_async_sync)
+	acc_dev->openacc.async_set_async_func (async);
+
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
 			   GOMP_MAP_VARS_OPENACC);
 
+      if (async > acc_async_sync)
+	acc_dev->openacc.async_set_async_func (acc_async_sync);
+
       gomp_mutex_lock (&acc_dev->lock);
 
       d = tgt->to_free;
@@ -499,31 +524,44 @@ static void *
 void *
 acc_create (void *h, size_t s)
 {
-  return present_create_copy (FLAG_CREATE, h, s);
+  return present_create_copy (FLAG_CREATE, h, s, acc_async_sync);
 }
 
+void
+acc_create_async (void *h, size_t s, int async)
+{
+  present_create_copy (FLAG_CREATE, h, s, async);
+}
+
 void *
 acc_copyin (void *h, size_t s)
 {
-  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
+  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s, acc_async_sync);
 }
 
+void
+acc_copyin_async (void *h, size_t s, int async)
+{
+  present_create_copy (FLAG_CREATE | FLAG_COPY, h, s, async);
+}
+
 void *
 acc_present_or_create (void *h, size_t s)
 {
-  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
+  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync);
 }
 
 void *
 acc_present_or_copyin (void *h, size_t s)
 {
-  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
+  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s,
+			      acc_async_sync);
 }
 
 #define FLAG_COPYOUT (1 << 0)
 
 static void
-delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
+delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 {
   size_t host_size;
   splay_tree_key n;
@@ -561,11 +599,17 @@ static void
 
   gomp_mutex_unlock (&acc_dev->lock);
 
+  if (async > acc_async_sync)
+    acc_dev->openacc.async_set_async_func (async);
+
   if (f & FLAG_COPYOUT)
     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
 
   acc_unmap_data (h);
 
+  if (async > acc_async_sync)
+    acc_dev->openacc.async_set_async_func (acc_async_sync);
+
   if (!acc_dev->free_func (acc_dev->target_id, d))
     gomp_fatal ("error in freeing device memory in %s", libfnname);
 }
@@ -573,17 +617,29 @@ static void
 void
 acc_delete (void *h , size_t s)
 {
-  delete_copyout (0, h, s, __FUNCTION__);
+  delete_copyout (0, h, s, acc_async_sync, __FUNCTION__);
 }
 
 void
+acc_delete_async (void *h , size_t s, int async)
+{
+  delete_copyout (0, h, s, async, __FUNCTION__);
+}
+
+void
 acc_copyout (void *h, size_t s)
 {
-  delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
+  delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__);
 }
 
+void
+acc_copyout_async (void *h, size_t s, int async)
+{
+  delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__);
+}
+
 static void
-update_dev_host (int is_dev, void *h, size_t s)
+update_dev_host (int is_dev, void *h, size_t s, int async)
 {
   splay_tree_key n;
   void *d;
@@ -609,27 +665,45 @@ static void
   d = (void *) (n->tgt->tgt_start + n->tgt_offset
 		+ (uintptr_t) h - n->host_start);
 
+  if (async > acc_async_sync)
+    acc_dev->openacc.async_set_async_func (async);
+
   if (is_dev)
     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
   else
     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
 
+  if (async > acc_async_sync)
+    acc_dev->openacc.async_set_async_func (acc_async_sync);
+
   gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
 acc_update_device (void *h, size_t s)
 {
-  update_dev_host (1, h, s);
+  update_dev_host (1, h, s, acc_async_sync);
 }
 
 void
+acc_update_device_async (void *h, size_t s, int async)
+{
+  update_dev_host (1, h, s, async);
+}
+
+void
 acc_update_self (void *h, size_t s)
 {
-  update_dev_host (0, h, s);
+  update_dev_host (0, h, s, acc_async_sync);
 }
 
 void
+acc_update_self_async (void *h, size_t s, int async)
+{
+  update_dev_host (0, h, s, async);
+}
+
+void
 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 			 void *kinds)
 {
Index: libgomp/oacc-plugin.c
===================================================================
--- libgomp/oacc-plugin.c	(revision 245382)
+++ libgomp/oacc-plugin.c	(working copy)
@@ -49,3 +49,12 @@ GOMP_PLUGIN_acc_thread (void)
   struct goacc_thread *thr = goacc_thread ();
   return thr ? thr->target_tls : NULL;
 }
+
+/* Return the default async number from the TLS data for the current thread.  */
+
+int
+GOMP_PLUGIN_acc_thread_default_async (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+  return thr ? thr->default_async : acc_async_default;
+}
Index: libgomp/oacc-plugin.h
===================================================================
--- libgomp/oacc-plugin.h	(revision 245382)
+++ libgomp/oacc-plugin.h	(working copy)
@@ -29,5 +29,6 @@
 
 extern void GOMP_PLUGIN_async_unmap_vars (void *, int);
 extern void *GOMP_PLUGIN_acc_thread (void);
+extern int GOMP_PLUGIN_acc_thread_default_async (void);
 
 #endif
Index: libgomp/openacc.f90
===================================================================
--- libgomp/openacc.f90	(revision 245382)
+++ libgomp/openacc.f90	(working copy)
@@ -51,9 +51,10 @@ module openacc_kinds
 
   integer, parameter :: acc_handle_kind = int32
 
-  public :: acc_async_noval, acc_async_sync
+  public :: acc_async_default, acc_async_noval, acc_async_sync
 
   ! Keep in sync with include/gomp-constants.h.
+  integer (acc_handle_kind), parameter :: acc_async_default = 0
   integer (acc_handle_kind), parameter :: acc_async_noval = -1
   integer (acc_handle_kind), parameter :: acc_async_sync = -2
 
@@ -92,6 +93,16 @@ module openacc_internal
       integer (acc_device_kind) d
     end function
 
+    subroutine acc_set_default_async_h (a)
+      import
+      integer a
+    end subroutine
+
+    function acc_get_default_async_h ()
+      import
+      integer acc_get_default_async_h
+    end function
+
     function acc_async_test_h (a)
       logical acc_async_test_h
       integer a
@@ -296,6 +307,150 @@ module openacc_internal
       logical acc_is_present_array_h
       type (*), dimension (..), contiguous :: a
     end function
+
+    subroutine acc_copyin_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyin_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyin_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_create_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_create_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_create_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyout_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyout_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_copyout_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_delete_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_delete_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_delete_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_device_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_device_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_device_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_self_async_32_h (a, len, async)
+      use iso_c_binding, only: c_int32_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_self_async_64_h (a, len, async)
+      use iso_c_binding, only: c_int64_t
+      use openacc_kinds, only: acc_handle_kind
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+      integer (acc_handle_kind) async
+    end subroutine
+
+    subroutine acc_update_self_async_array_h (a, async)
+      use openacc_kinds, only: acc_handle_kind
+      type (*), dimension (..), contiguous :: a
+      integer (acc_handle_kind) async
+    end subroutine
   end interface
 
   interface
@@ -458,6 +613,60 @@ module openacc_internal
       type (*), dimension (*) :: a
       integer (c_size_t), value :: len
     end function
+
+    subroutine acc_copyin_async_l (a, len, async) &
+        bind (C, name = "acc_copyin_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
+    subroutine acc_create_async_l (a, len, async) &
+        bind (C, name = "acc_create_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
+    subroutine acc_copyout_async_l (a, len, async) &
+        bind (C, name = "acc_copyout_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
+    subroutine acc_delete_async_l (a, len, async) &
+        bind (C, name = "acc_delete_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
+    subroutine acc_update_device_async_l (a, len, async) &
+        bind (C, name = "acc_update_device_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
+
+    subroutine acc_update_self_async_l (a, len, async) &
+        bind (C, name = "acc_update_self_async")
+      use iso_c_binding, only: c_size_t, c_int
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+      integer (c_int), value :: async
+    end subroutine
   end interface
 end module
 
@@ -470,11 +679,14 @@ module openacc
 
   public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
   public :: acc_set_device_num, acc_get_device_num, acc_async_test
+  public :: acc_set_default_async, acc_get_default_async
   public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all
   public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device
   public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
   public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
   public :: acc_update_device, acc_update_self, acc_is_present
+  public :: acc_copyin_async, acc_create_async, acc_copyout_async
+  public :: acc_delete_async, acc_update_device_async, acc_update_self_async
 
   integer, parameter :: openacc_version = 201306
 
@@ -498,6 +710,14 @@ module openacc
     procedure :: acc_get_device_num_h
   end interface
 
+  interface acc_set_default_async
+    procedure :: acc_set_default_async_h
+  end interface
+
+  interface acc_get_default_async
+    procedure :: acc_get_default_async_h
+  end interface
+
   interface acc_async_test
     procedure :: acc_async_test_h
   end interface
@@ -618,6 +838,42 @@ module openacc
   ! acc_memcpy_to_device: Only available in C/C++
   ! acc_memcpy_from_device: Only available in C/C++
 
+  interface acc_copyin_async
+    procedure :: acc_copyin_async_32_h
+    procedure :: acc_copyin_async_64_h
+    procedure :: acc_copyin_async_array_h
+  end interface
+
+  interface acc_create_async
+    procedure :: acc_create_async_32_h
+    procedure :: acc_create_async_64_h
+    procedure :: acc_create_async_array_h
+  end interface
+
+  interface acc_copyout_async
+    procedure :: acc_copyout_async_32_h
+    procedure :: acc_copyout_async_64_h
+    procedure :: acc_copyout_async_array_h
+  end interface
+
+  interface acc_delete_async
+    procedure :: acc_delete_async_32_h
+    procedure :: acc_delete_async_64_h
+    procedure :: acc_delete_async_array_h
+  end interface
+
+  interface acc_update_device_async
+    procedure :: acc_update_device_async_32_h
+    procedure :: acc_update_device_async_64_h
+    procedure :: acc_update_device_async_array_h
+  end interface
+
+  interface acc_update_self_async
+    procedure :: acc_update_self_async_32_h
+    procedure :: acc_update_self_async_64_h
+    procedure :: acc_update_self_async_array_h
+  end interface
+
 end module
 
 function acc_get_num_devices_h (d)
@@ -954,3 +1210,189 @@ function acc_is_present_array_h (a)
   type (*), dimension (..), contiguous :: a
   acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
 end function
+
+subroutine acc_copyin_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyin_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyin_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyin_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_copyin_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyin_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_copyin_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_copyin_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_create_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_create_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_create_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_create_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_create_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_create_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_create_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_create_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyout_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyout_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyout_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_copyout_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_copyout_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_copyout_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_copyout_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_copyout_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_delete_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_delete_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_delete_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_delete_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_delete_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_delete_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_delete_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_delete_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_device_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_update_device_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_device_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_update_device_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_update_device_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_device_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_update_device_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_update_device_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_self_async_32_h (a, len, async)
+  use iso_c_binding, only: c_int32_t, c_size_t, c_int
+  use openacc_internal, only: acc_update_self_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  integer (acc_handle_kind) async
+  call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_self_async_64_h (a, len, async)
+  use iso_c_binding, only: c_int64_t, c_size_t, c_int
+  use openacc_internal, only: acc_update_self_async_l
+  use openacc_kinds, only: acc_handle_kind
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  integer (acc_handle_kind) async
+  call acc_update_self_async_l (a, int (len, kind = c_size_t), int (async, kind = c_int))
+end subroutine
+
+subroutine acc_update_self_async_array_h (a, async)
+  use iso_c_binding, only: c_int
+  use openacc_internal, only: acc_update_self_async_l
+  use openacc_kinds, only: acc_handle_kind
+  type (*), dimension (..), contiguous :: a
+  integer (acc_handle_kind) async
+  call acc_update_self_async_l (a, sizeof (a), int (async, kind = c_int))
+end subroutine
Index: libgomp/openacc.h
===================================================================
--- libgomp/openacc.h	(revision 245382)
+++ libgomp/openacc.h	(working copy)
@@ -63,6 +63,7 @@ typedef enum acc_device_t {
 
 typedef enum acc_async_t {
   /* Keep in sync with include/gomp-constants.h.  */
+  acc_async_default = 0,
   acc_async_noval = -1,
   acc_async_sync  = -2
 } acc_async_t;
@@ -72,6 +73,8 @@ void acc_set_device_type (acc_device_t) __GOACC_NO
 acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
 void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
 int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
+void acc_set_default_async (int) __GOACC_NOTHROW;
+int acc_get_default_async (void) __GOACC_NOTHROW;
 int acc_async_test (int) __GOACC_NOTHROW;
 int acc_async_test_all (void) __GOACC_NOTHROW;
 void acc_wait (int) __GOACC_NOTHROW;
@@ -105,6 +108,16 @@ int acc_is_present (void *, size_t) __GOACC_NOTHRO
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
 
+/* Async functions, specified in OpenACC 2.5.  */
+void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_delete_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
+void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
+
 /* Old names.  OpenACC does not specify whether these can or must
    not be macros, inlines or aliases for the new names.  */
 #define acc_pcreate acc_present_or_create
Index: libgomp/openacc_lib.h
===================================================================
--- libgomp/openacc_lib.h	(revision 245382)
+++ libgomp/openacc_lib.h	(working copy)
@@ -46,6 +46,7 @@
       integer, parameter :: acc_handle_kind = 4
 
 !     Keep in sync with include/gomp-constants.h.
+      integer (acc_handle_kind), parameter :: acc_async_default = 0
       integer (acc_handle_kind), parameter :: acc_async_noval = -1
       integer (acc_handle_kind), parameter :: acc_async_sync = -2
 
@@ -89,6 +90,18 @@
         end function
       end interface
 
+      interface acc_set_default_async
+        subroutine acc_set_default_async_h (a)
+          integer a
+        end subroutine
+      end interface
+
+      interface acc_get_default_async
+        function acc_get_default_async_h ()
+          integer acc_get_default_async_h
+        end function
+      end interface
+
       interface acc_async_test
         function acc_async_test_h (a)
           logical acc_async_test_h
@@ -380,3 +393,159 @@
 
       ! acc_memcpy_to_device: Only available in C/C++
       ! acc_memcpy_from_device: Only available in C/C++
+
+      interface acc_copyin_async
+        subroutine acc_copyin_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyin_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyin_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
+      interface acc_create_async
+        subroutine acc_create_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_create_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_create_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
+      interface acc_copyout_async
+        subroutine acc_copyout_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyout_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_copyout_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
+      interface acc_delete_async
+        subroutine acc_delete_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_delete_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_delete_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
+      interface acc_update_device_async
+        subroutine acc_update_device_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_update_device_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_update_device_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
+
+      interface acc_update_self_async
+        subroutine acc_update_self_async_32_h (a, len, async)
+          use iso_c_binding, only: c_int32_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_update_self_async_64_h (a, len, async)
+          use iso_c_binding, only: c_int64_t
+          import acc_handle_kind
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+          integer (acc_handle_kind) async
+        end subroutine
+
+        subroutine acc_update_self_async_array_h (a, async_)
+          import acc_handle_kind
+          type (*), dimension (..), contiguous :: a
+          integer (acc_handle_kind) async_
+        end subroutine
+      end interface
Index: libgomp/plugin/plugin-nvptx.c
===================================================================
--- libgomp/plugin/plugin-nvptx.c	(revision 245382)
+++ libgomp/plugin/plugin-nvptx.c	(working copy)
@@ -414,13 +414,10 @@ select_stream_for_async (int async, pthread_t thre
   struct ptx_stream *stream = NULL;
   int orig_async = async;
 
-  /* The special value acc_async_noval (-1) maps (for now) to an
-     implicitly-created stream, which is then handled the same as any other
-     numbered async stream.  Other options are available, e.g. using the null
-     stream for anonymous async operations, or choosing an idle stream from an
-     active set.  But, stick with this for now.  */
-  if (async > acc_async_sync)
-    async++;
+  /* The special value acc_async_noval (-1) maps to the thread-specific
+     default async stream.  */
+  if (async == acc_async_noval)
+    async = GOMP_PLUGIN_acc_thread_default_async ();
 
   if (create)
     pthread_mutex_lock (&ptx_dev->stream_lock);
Index: libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90	(revision 0)
@@ -0,0 +1,57 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+program main
+  use openacc
+  implicit none
+
+  integer, parameter :: N = 256
+  integer, allocatable :: h(:)
+  integer :: i
+  integer :: async = 5
+
+  allocate (h(N))
+
+  do i = 1, N
+    h(i) = i
+  end do 
+
+  call acc_copyin (h)
+
+  do i = 1, N
+    h(i) = i + i
+  end do 
+
+  call acc_update_device_async (h, sizeof (h), async)
+
+  if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+  h(:) = 0
+
+  call acc_copyout_async (h, sizeof (h), async)
+
+  call acc_wait (async)
+
+  do i = 1, N
+    if (h(i) /= i + i) call abort
+  end do 
+
+  call acc_copyin (h, sizeof (h))
+
+  h(:) = 0
+
+  call acc_update_self_async (h, sizeof (h), async)
+  
+  if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+  do i = 1, N
+    if (h(i) /= i + i) call abort
+  end do 
+
+  call acc_delete_async (h, async)
+
+  call acc_wait (async)
+
+  if (acc_is_present (h) .neqv. .FALSE.) call abort
+  
+end program
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c	(revision 0)
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i;
+  int async = 8;
+  unsigned char *h;
+
+  h = (unsigned char *) malloc (N);
+
+  for (i = 0; i < N; i++)
+    {
+      h[i] = i;
+    }
+
+  acc_copyin_async (h, N, async);
+
+  memset (h, 0, N);
+
+  acc_wait (async);
+
+  acc_copyout_async (h, N, async + 1);
+
+  acc_wait (async + 1);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c	(revision 0)
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  int i, q = 5;
+  unsigned char *h, *g;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+  g = (unsigned char *) malloc (N);
+  for (i = 0; i < N; i++)
+    {
+      g[i] = i;
+    }
+
+  acc_create_async (h, N, q);
+
+  acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q);
+  memset (&h[0], 0, N);
+
+  acc_wait (q);
+
+  acc_update_self_async (h, N, q + 1);
+  acc_delete_async (h, N, q + 1);
+
+  acc_wait (q + 1);
+
+  for (i = 0; i < N; i++)
+    {
+      if (h[i] != i)
+	abort ();
+    }
+
+  free (h);
+  free (g);
+
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c	(revision 0)
@@ -0,0 +1,904 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include "cuda.h"
+
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+
+int
+main (int argc, char **argv)
+{
+    CUresult r;
+    CUstream stream1;
+    int N = 128; //1024 * 1024;
+    float *a, *b, *c, *d, *e;
+    int i;
+    int nbytes;
+
+    srand (time (NULL));
+    int s = rand () % 100;
+
+    acc_init (acc_device_nvidia);
+
+    nbytes = N * sizeof (float);
+
+    a = (float *) malloc (nbytes);
+    b = (float *) malloc (nbytes);
+    c = (float *) malloc (nbytes);
+    d = (float *) malloc (nbytes);
+    e = (float *) malloc (nbytes);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+    acc_set_default_async (s);
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc parallel wait (s) async (s)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 11.0)
+            abort ();
+    }
+
+
+    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+        abort ();
+    }
+
+    acc_set_cuda_stream (1, stream1);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort ();
+
+        if (b[i] != 49.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc parallel wait (s) async (s)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 17.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 16.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc parallel async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 25.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 3.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 2.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 2.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc kernels wait (s) async (s)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 2.0)
+            abort ();
+
+        if (b[i] != 4.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 11.0)
+            abort ();
+    }
+
+
+    r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+    if (r != CUDA_SUCCESS)
+    {
+        fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+        abort ();
+    }
+
+    acc_set_cuda_stream (1, stream1);
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 5.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 7.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 7.0)
+            abort ();
+
+        if (b[i] != 49.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 3.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+    }
+
+#pragma acc kernels wait (s) async (s)
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+    }
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 3.0)
+            abort ();
+
+        if (b[i] != 9.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+
+        if (d[i] != 1.0)
+            abort ();
+
+        if (e[i] != 17.0)
+            abort ();
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 4.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 4.0)
+            abort ();
+
+        if (b[i] != 16.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+
+    for (i = 0; i < N; i++)
+    {
+        a[i] = 5.0;
+        b[i] = 0.0;
+        c[i] = 0.0;
+        d[i] = 0.0;
+        e[i] = 0.0;
+    }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+    {
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+    }
+
+#pragma acc kernels async
+    {
+        int ii;
+
+        for (ii = 0; ii < N; ii++)
+            c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+    }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async
+
+#pragma acc wait (s)
+
+    }
+
+    for (i = 0; i < N; i++)
+    {
+        if (a[i] != 5.0)
+            abort ();
+
+        if (b[i] != 25.0)
+            abort ();
+
+        if (c[i] != 4.0)
+            abort ();
+    }
+
+    acc_shutdown (acc_device_nvidia);
+
+    return 0;
+}
Index: include/gomp-constants.h
===================================================================
--- include/gomp-constants.h	(revision 245382)
+++ include/gomp-constants.h	(working copy)
@@ -182,6 +182,7 @@ enum gomp_map_kind
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
 
+#define GOMP_ASYNC_DEFAULT		0
 #define GOMP_ASYNC_NOVAL		-1
 #define GOMP_ASYNC_SYNC			-2
 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-12-14 21:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-13 10:15 [gomp4] Async related additions to OpenACC runtime library Chung-Lin Tang
2017-02-14 11:29 ` Thomas Schwinge
2017-02-14 13:05   ` Chung-Lin Tang
2017-02-15 20:04     ` Thomas Schwinge
2018-11-18  1:37 ` OpenACC ICV acc-default-async-var (was: [gomp4] Async related additions to OpenACC runtime library) Thomas Schwinge
2018-11-19  7:33   ` OpenACC ICV acc-default-async-var Chung-Lin Tang
2018-12-05 14:14     ` [PR88370] acc_get_cuda_stream/acc_set_cuda_stream: acc_async_sync, acc_async_noval (was: OpenACC ICV acc-default-async-var) Thomas Schwinge
2018-12-14 21:07       ` [PR88370] acc_get_cuda_stream/acc_set_cuda_stream: acc_async_sync, acc_async_noval Thomas Schwinge
2018-12-05 14:25     ` OpenACC ICV acc-default-async-var Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).