public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts
@ 2018-09-25 13:11 Chung-Lin Tang
  2018-12-07 11:33 ` Thomas Schwinge
                   ` (5 more replies)
  0 siblings, 6 replies; 28+ messages in thread
From: Chung-Lin Tang @ 2018-09-25 13:11 UTC (permalink / raw)
  To: gcc-patches, Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 2888 bytes --]

Hi Thomas,
These are the OpenACC specific changes, mostly the re-implementation of async-related acc_* runtime
library API functions to use the new backend plugin interfaces, in a non-target specific way.

Thanks,
Chung-Lin


	* oacc-async.c (get_goacc_thread): New function.
	(get_goacc_thread_device): New function.
	(lookup_goacc_asyncqueue): New function.
	(get_goacc_asyncqueue): New function.
	(acc_async_test): Adjust code to use new async design.
	(acc_async_test_all): Likewise.
	(acc_wait): Likewise.
	(acc_wait_async): Likewise.
	(acc_wait_all): Likewise.
	(acc_wait_all_async): Likewise.
	(acc_get_default_async): New API function.
	(acc_set_default_async): Likewise.
	(goacc_async_unmap_tgt): New function.
	(goacc_async_copyout_unmap_vars): Likewise.
	(goacc_async_free): Likewise.
	(goacc_init_asyncqueues): Likewise.
	(goacc_fini_asyncqueues): Likewise.
	* oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async
	design.
	(acc_set_cuda_stream): Likewise.
	* oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'.
	(host_openacc_register_async_cleanup): Remove.
	(host_openacc_async_exec): New function.
	(host_openacc_async_test): Adjust parameters.
	(host_openacc_async_test_all): Remove.
	(host_openacc_async_wait): Remove.
	(host_openacc_async_wait_async): Remove.
	(host_openacc_async_wait_all): Remove.
	(host_openacc_async_wait_all_async): Remove.
	(host_openacc_async_set_async): Remove.
	(host_openacc_async_synchronize): New function.
	(host_openacc_async_serialize): New function.
	(host_openacc_async_host2dev): New function.
	(host_openacc_async_dev2host): New function.
	(host_openacc_async_queue_callback): New function.
	(host_openacc_async_construct): New function.
	(host_openacc_async_destruct): New function.
	(struct gomp_device_descr host_dispatch): Remove initialization of old
	interface, add intialization of new async sub-struct.
	* oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device.
	(goacc_attach_host_thread_to_device): Remove old async code usage, add
	initialization of per-thread default_async.
	* oacc-int.h (struct goacc_thread): Add default_async field.
	(goacc_init_asyncqueues): New declaration.
	(goacc_fini_asyncqueues): Likewise.
	(goacc_async_copyout_unmap_vars): Likewise.
	(goacc_async_free): Likewise.
	(get_goacc_asyncqueue): Likewise.
	(lookup_goacc_asyncqueue): Likewise.

	* oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async
	design.
	(present_create_copy): Likewise.
	(delete_copyout): Likewise.
	(update_dev_host): Likewise.
	(gomp_acc_insert_pointer): Add async parameter, adjust code to use new
	async design.
	(gomp_acc_remove_pointer): Adjust code to use new async design.
	* oacc-parallel.c (GOACC_parallel_keyed): Likewise.
	(GOACC_enter_exit_data): Likewise.
	(goacc_wait): Likewise.
	(GOACC_update): Likewise.
	* oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Remove.


[-- Attachment #2: async-02.openacc-parts.patch --]
[-- Type: text/plain, Size: 25670 bytes --]

diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index a4e1863..68aaf19 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -27,10 +27,87 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <string.h>
 #include "openacc.h"
 #include "libgomp.h"
 #include "oacc-int.h"
 
+static struct goacc_thread *
+get_goacc_thread (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  return thr;
+}
+
+static struct gomp_device_descr *
+get_goacc_thread_device (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  return thr->dev;
+}
+
+attribute_hidden struct goacc_asyncqueue *
+lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
+{
+  /* The special value acc_async_noval (-1) maps to the thread-specific
+     default async stream.  */
+  if (async == acc_async_noval)
+    async = thr->default_async;
+
+  if (async == acc_async_sync)
+    return NULL;
+
+  if (async < 0)
+    gomp_fatal ("bad async %d", async);
+
+  struct gomp_device_descr *dev = thr->dev;
+
+  if (!create
+      && (async >= dev->openacc.async.nasyncqueue
+	  || !dev->openacc.async.asyncqueue[async]))
+    return NULL;
+
+  gomp_mutex_lock (&dev->openacc.async.lock);
+  if (async >= dev->openacc.async.nasyncqueue)
+    {
+      int diff = async + 1 - dev->openacc.async.nasyncqueue;
+      dev->openacc.async.asyncqueue
+	= gomp_realloc (dev->openacc.async.asyncqueue,
+			sizeof (goacc_aq) * (async + 1));
+      memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
+	      0, sizeof (goacc_aq) * diff);
+      dev->openacc.async.nasyncqueue = async + 1;
+    }
+
+  if (!dev->openacc.async.asyncqueue[async])
+    {
+      dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
+
+      /* Link new async queue into active list.  */
+      goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
+      n->aq = dev->openacc.async.asyncqueue[async];
+      n->next = dev->openacc.async.active;
+      dev->openacc.async.active = n;
+    }
+  gomp_mutex_unlock (&dev->openacc.async.lock);
+  return dev->openacc.async.asyncqueue[async];
+}
+
+attribute_hidden struct goacc_asyncqueue *
+get_goacc_asyncqueue (int async)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+  return lookup_goacc_asyncqueue (thr, true, async);
+}
+
 int
 acc_async_test (int async)
 {
@@ -42,18 +119,25 @@ acc_async_test (int async)
   if (!thr || !thr->dev)
     gomp_fatal ("no device active");
 
-  return thr->dev->openacc.async_test_func (async);
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
+  return thr->dev->openacc.async.test_func (aq);
 }
 
 int
 acc_async_test_all (void)
 {
-  struct goacc_thread *thr = goacc_thread ();
-
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  return thr->dev->openacc.async_test_all_func ();
+  int ret = 1;
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    if (!thr->dev->openacc.async.test_func (l->aq))
+      {
+	ret = 0;
+	break;
+      }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+  return ret;
 }
 
 void
@@ -62,12 +146,10 @@ acc_wait (int async)
   if (!async_valid_p (async))
     gomp_fatal ("invalid async argument: %d", async);
 
-  struct goacc_thread *thr = goacc_thread ();
-
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  thr->dev->openacc.async_wait_func (async);
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
+  thr->dev->openacc.async.synchronize_func (aq);
 }
 
 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
@@ -84,23 +166,28 @@ acc_async_wait (int async)
 void
 acc_wait_async (int async1, int async2)
 {
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
+  goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
+  if (!aq1)
+    gomp_fatal ("invalid async 1");
+  if (aq1 == aq2)
+    gomp_fatal ("identical parameters");
 
-  thr->dev->openacc.async_wait_async_func (async1, async2);
+  thr->dev->openacc.async.synchronize_func (aq1);
+  thr->dev->openacc.async.serialize_func (aq1, aq2);
 }
 
 void
 acc_wait_all (void)
 {
-  struct goacc_thread *thr = goacc_thread ();
-
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  struct gomp_device_descr *dev = get_goacc_thread_device ();
 
-  thr->dev->openacc.async_wait_all_func ();
+  gomp_mutex_lock (&dev->openacc.async.lock);
+  for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next)
+    dev->openacc.async.synchronize_func (l->aq);
+  gomp_mutex_unlock (&dev->openacc.async.lock);
 }
 
 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
@@ -120,10 +207,99 @@ acc_wait_all_async (int async)
   if (!async_valid_p (async))
     gomp_fatal ("invalid async argument: %d", async);
 
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
 
-  thr->dev->openacc.async_wait_all_async_func (async);
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    {
+      thr->dev->openacc.async.synchronize_func (l->aq);
+      if (waiting_queue)
+	thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
+    }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+}
+
+int
+acc_get_default_async (void)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+  return thr->default_async;
+}
+
+void
+acc_set_default_async (int async)
+{
+  if (async < acc_async_sync)
+    gomp_fatal ("invalid async argument: %d", async);
+
+  struct goacc_thread *thr = get_goacc_thread ();
+  thr->default_async = async;
+}
+
+static void
+goacc_async_unmap_tgt (void *ptr)
+{
+  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
+  else
+    gomp_unmap_tgt (tgt);
+}
+
+attribute_hidden void
+goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
+				struct goacc_asyncqueue *aq)
+{
+  struct gomp_device_descr *devicep = tgt->device_descr;
+
+  /* Increment reference to delay freeing of device memory until callback
+     has triggered.  */
+  tgt->refcount++;
+  gomp_unmap_vars_async (tgt, true, aq);
+  devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
+					      (void *) tgt);
+}
+
+attribute_hidden void
+goacc_async_free (struct gomp_device_descr *devicep,
+		  struct goacc_asyncqueue *aq, void *ptr)
+{
+  if (!aq)
+    free (ptr);
+  else
+    devicep->openacc.async.queue_callback_func (aq, free, ptr);
+}
+
+attribute_hidden void
+goacc_init_asyncqueues (struct gomp_device_descr *devicep)
+{
+  gomp_mutex_init (&devicep->openacc.async.lock);
+  devicep->openacc.async.nasyncqueue = 0;
+  devicep->openacc.async.asyncqueue = NULL;
+  devicep->openacc.async.active = NULL;
+}
+
+attribute_hidden bool
+goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
+{
+  bool ret = true;
+  if (devicep->openacc.async.nasyncqueue > 0)
+    {
+      goacc_aq_list next;
+      for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
+	{
+	  ret &= devicep->openacc.async.destruct_func (l->aq);
+	  next = l->next;
+	  free (l);
+	}
+      free (devicep->openacc.async.asyncqueue);
+      devicep->openacc.async.nasyncqueue = 0;
+      devicep->openacc.async.asyncqueue = NULL;
+      devicep->openacc.async.active = NULL;
+    }
+  gomp_mutex_destroy (&devicep->openacc.async.lock);
+  return ret;
 }
diff --git a/libgomp/oacc-cuda.c b/libgomp/oacc-cuda.c
index 20774c1..0a842ea 100644
--- a/libgomp/oacc-cuda.c
+++ b/libgomp/oacc-cuda.c
@@ -62,7 +62,11 @@ acc_get_cuda_stream (int async)
     return NULL;
 
   if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
-    return thr->dev->openacc.cuda.get_stream_func (async);
+    {
+      goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+      if (aq)
+	return thr->dev->openacc.cuda.get_stream_func (aq);
+    }
  
   return NULL;
 }
@@ -79,8 +83,14 @@ acc_set_cuda_stream (int async, void *stream)
 
   thr = goacc_thread ();
 
+  int ret = -1;
   if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
-    return thr->dev->openacc.cuda.set_stream_func (async, stream);
-
-  return -1;
+    {
+      goacc_aq aq = get_goacc_asyncqueue (async);
+      gomp_mutex_lock (&thr->dev->openacc.async.lock);
+      ret = thr->dev->openacc.cuda.set_stream_func (aq, stream);
+      gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+    }
+
+  return ret;
 }
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 2de3c37..53658c8 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -140,55 +140,86 @@ host_openacc_exec (void (*fn) (void *),
 		   size_t mapnum __attribute__ ((unused)),
 		   void **hostaddrs,
 		   void **devaddrs __attribute__ ((unused)),
-		   int async __attribute__ ((unused)),
-		   unsigned *dims __attribute ((unused)),
+		   unsigned *dims __attribute__ ((unused)),
 		   void *targ_mem_desc __attribute__ ((unused)))
 {
   fn (hostaddrs);
 }
 
 static void
-host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ ((unused)),
-				     int async __attribute__ ((unused)))
+host_openacc_async_exec (void (*fn) (void *),
+			 size_t mapnum __attribute__ ((unused)),
+			 void **hostaddrs,
+			 void **devaddrs __attribute__ ((unused)),
+			 unsigned *dims __attribute__ ((unused)),
+			 void *targ_mem_desc __attribute__ ((unused)),
+			 struct goacc_asyncqueue *aq __attribute__ ((unused)))
 {
+  fn (hostaddrs);
 }
 
 static int
-host_openacc_async_test (int async __attribute__ ((unused)))
+host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused)))
 {
   return 1;
 }
 
-static int
-host_openacc_async_test_all (void)
+static void
+host_openacc_async_synchronize (struct goacc_asyncqueue *aq
+				__attribute__ ((unused)))
 {
-  return 1;
 }
 
 static void
-host_openacc_async_wait (int async __attribute__ ((unused)))
+host_openacc_async_serialize (struct goacc_asyncqueue *aq1
+			      __attribute__ ((unused)),
+			      struct goacc_asyncqueue *aq2
+			      __attribute__ ((unused)))
 {
 }
 
-static void
-host_openacc_async_wait_async (int async1 __attribute__ ((unused)),
-			       int async2 __attribute__ ((unused)))
+static bool
+host_openacc_async_host2dev (int ord __attribute__ ((unused)),
+			     void *dst __attribute__ ((unused)),
+			     const void *src __attribute__ ((unused)),
+			     size_t n __attribute__ ((unused)),
+			     struct goacc_asyncqueue *aq
+			     __attribute__ ((unused)))
 {
+  return true;
 }
 
-static void
-host_openacc_async_wait_all (void)
+static bool
+host_openacc_async_dev2host (int ord __attribute__ ((unused)),
+			     void *dst __attribute__ ((unused)),
+			     const void *src __attribute__ ((unused)),
+			     size_t n __attribute__ ((unused)),
+			     struct goacc_asyncqueue *aq
+			     __attribute__ ((unused)))
 {
+  return true;
 }
 
 static void
-host_openacc_async_wait_all_async (int async __attribute__ ((unused)))
+host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
+				   __attribute__ ((unused)),
+				   void (*callback_fn)(void *)
+				   __attribute__ ((unused)),
+				   void *userptr __attribute__ ((unused)))
 {
 }
 
-static void
-host_openacc_async_set_async (int async __attribute__ ((unused)))
+static struct goacc_asyncqueue *
+host_openacc_async_construct (void)
 {
+  return NULL;
+}
+
+static bool
+host_openacc_async_destruct (struct goacc_asyncqueue *aq
+			     __attribute__ ((unused)))
+{
+  return true;
 }
 
 static void *
@@ -235,15 +266,17 @@ static struct gomp_device_descr host_dispatch =
 
       .exec_func = host_openacc_exec,
 
-      .register_async_cleanup_func = host_openacc_register_async_cleanup,
-
-      .async_test_func = host_openacc_async_test,
-      .async_test_all_func = host_openacc_async_test_all,
-      .async_wait_func = host_openacc_async_wait,
-      .async_wait_async_func = host_openacc_async_wait_async,
-      .async_wait_all_func = host_openacc_async_wait_all,
-      .async_wait_all_async_func = host_openacc_async_wait_all_async,
-      .async_set_async_func = host_openacc_async_set_async,
+      .async = {
+	.construct_func = host_openacc_async_construct,
+	.destruct_func = host_openacc_async_destruct,
+	.test_func = host_openacc_async_test,
+	.synchronize_func = host_openacc_async_synchronize,
+	.serialize_func = host_openacc_async_serialize,
+	.queue_callback_func = host_openacc_async_queue_callback,
+	.exec_func = host_openacc_async_exec,
+	.dev2host_func = host_openacc_async_dev2host,
+	.host2dev_func = host_openacc_async_host2dev,
+      },
 
       .create_thread_data_func = host_openacc_create_thread_data,
       .destroy_thread_data_func = host_openacc_destroy_thread_data,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 8db24b1..2c2f91c 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d)
       if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
         {
 	  devices_active = true;
-	  ret &= acc_dev->fini_device_func (acc_dev->target_id);
+	  ret &= gomp_fini_device (acc_dev);
 	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
 	}
       gomp_mutex_unlock (&acc_dev->lock);
@@ -426,8 +426,8 @@ goacc_attach_host_thread_to_device (int ord)
   
   thr->target_tls
     = acc_dev->openacc.create_thread_data_func (ord);
-  
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
+
+  thr->default_async = acc_async_default;
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 72414b7..07a2524 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
       return;
     }
 
-  if (async > acc_async_sync)
-    thr->dev->openacc.async_set_async_func (async);
-
-  bool ret = (from
-	      ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
-	      : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
-
-  if (async > acc_async_sync)
-    thr->dev->openacc.async_set_async_func (acc_async_sync);
-
-  if (!ret)
-    gomp_fatal ("error in %s", libfnname);
+  goacc_aq aq = get_goacc_asyncqueue (async);
+  if (from)
+    gomp_copy_dev2host (thr->dev, aq, h, d, s);
+  else
+    gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
 }
 
 void
@@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
 
       gomp_mutex_unlock (&acc_dev->lock);
 
-      if (async > acc_async_sync)
-	acc_dev->openacc.async_set_async_func (async);
+      goacc_aq aq = get_goacc_asyncqueue (async);
 
-      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
-			   GOMP_MAP_VARS_OPENACC);
+      tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
+				 &kinds, true, GOMP_MAP_VARS_OPENACC);
       /* Initialize dynamic refcount.  */
       tgt->list[0].key->dynamic_refcount = 1;
 
-      if (async > acc_async_sync)
-	acc_dev->openacc.async_set_async_func (acc_async_sync);
-
       gomp_mutex_lock (&acc_dev->lock);
 
       d = tgt->to_free;
@@ -673,13 +662,9 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 
       if (f & FLAG_COPYOUT)
 	{
-	  if (async > acc_async_sync)
-	    acc_dev->openacc.async_set_async_func (async);
-	  acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
-	  if (async > acc_async_sync)
-	    acc_dev->openacc.async_set_async_func (acc_async_sync);
+	  goacc_aq aq = get_goacc_asyncqueue (async);
+	  gomp_copy_dev2host (acc_dev, aq, h, d, s);
 	}
-
       gomp_remove_var (acc_dev, n);
     }
 
@@ -762,16 +747,12 @@ update_dev_host (int is_dev, void *h, size_t s, int async)
   d = (void *) (n->tgt->tgt_start + n->tgt_offset
 		+ (uintptr_t) h - n->host_start);
 
-  if (async > acc_async_sync)
-    acc_dev->openacc.async_set_async_func (async);
+  goacc_aq aq = get_goacc_asyncqueue (async);
 
   if (is_dev)
-    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+    gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
   else
-    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
-
-  if (async > acc_async_sync)
-    acc_dev->openacc.async_set_async_func (acc_async_sync);
+    gomp_copy_dev2host (acc_dev, aq, h, d, s);
 
   gomp_mutex_unlock (&acc_dev->lock);
 }
@@ -802,7 +783,7 @@ acc_update_self_async (void *h, size_t s, int async)
 
 void
 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
-			 void *kinds)
+			 void *kinds, int async)
 {
   struct target_mem_desc *tgt;
   struct goacc_thread *thr = goacc_thread ();
@@ -832,8 +813,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
     }
 
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
-		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+  goacc_aq aq = get_goacc_asyncqueue (async);
+  tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
+			     NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
   /* Initialize dynamic refcount.  */
@@ -927,7 +909,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
       if (async < acc_async_noval)
 	gomp_unmap_vars (t, true);
       else
-	t->device_descr->openacc.register_async_cleanup_func (t, async);
+	{
+	  goacc_aq aq = get_goacc_asyncqueue (async);
+	  goacc_async_copyout_unmap_vars (t, aq);
+	}
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index bfe8876..07d0338 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -212,8 +212,6 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
     }
   va_end (ap);
   
-  acc_dev->openacc.async_set_async_func (async);
-
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -230,43 +228,28 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
   else
     tgt_fn = (void (*)) fn;
 
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		       GOMP_MAP_VARS_OPENACC);
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
+			     true, GOMP_MAP_VARS_OPENACC);
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
 			    + tgt->list[i].key->tgt_offset);
-
-  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
-			      async, dims, tgt);
-
-  /* If running synchronously, unmap immediately.  */
-  bool copyfrom = true;
-  if (async_synchronous_p (async))
-    gomp_unmap_vars (tgt, true);
+  if (aq == NULL)
+    {
+      acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+				  dims, tgt);
+      /* If running synchronously, unmap immediately.  */
+      gomp_unmap_vars (tgt, true);
+    }
   else
     {
-      bool async_unmap = false;
-      for (size_t i = 0; i < tgt->list_count; i++)
-	{
-	  splay_tree_key k = tgt->list[i].key;
-	  if (k && k->refcount == 1)
-	    {
-	      async_unmap = true;
-	      break;
-	    }
-	}
-      if (async_unmap)
-	tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
-      else
-	{
-	  copyfrom = false;
-	  gomp_unmap_vars (tgt, copyfrom);
-	}
+      acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+					dims, tgt, aq);
+      goacc_async_copyout_unmap_vars (tgt, aq);
     }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 /* Legacy entry point, only provide host execution.  */
@@ -377,8 +360,6 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	finalize = true;
     }
 
-  acc_dev->openacc.async_set_async_func (async);
-
   /* Determine if this is an "acc enter data".  */
   for (i = 0; i < mapnum; ++i)
     {
@@ -450,7 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	  else
 	    {
 	      gomp_acc_insert_pointer (pointer, &hostaddrs[i],
-				       &sizes[i], &kinds[i]);
+				       &sizes[i], &kinds[i], async);
 	      /* Increment 'i' by two because OpenACC requires fortran
 		 arrays to be contiguous, so each PSET is associated with
 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@@ -475,17 +456,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		if (acc_is_present (hostaddrs[i], sizes[i]))
 		  {
 		    if (finalize)
-		      acc_delete_finalize (hostaddrs[i], sizes[i]);
+		      acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
 		    else
-		      acc_delete (hostaddrs[i], sizes[i]);
+		      acc_delete_async (hostaddrs[i], sizes[i], async);
 		  }
 		break;
 	      case GOMP_MAP_FROM:
 	      case GOMP_MAP_FORCE_FROM:
 		if (finalize)
-		  acc_copyout_finalize (hostaddrs[i], sizes[i]);
+		  acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
 		else
-		  acc_copyout (hostaddrs[i], sizes[i]);
+		  acc_copyout_async (hostaddrs[i], sizes[i], async);
 		break;
 	      default:
 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -503,8 +484,6 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	    i += pointer - 1;
 	  }
       }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 static void
@@ -517,17 +496,22 @@ goacc_wait (int async, int num_waits, va_list *ap)
     {
       int qid = va_arg (*ap, int);
       
-      if (acc_async_test (qid))
+      goacc_aq aq = get_goacc_asyncqueue (qid);
+      if (acc_dev->openacc.async.test_func (aq))
 	continue;
-
       if (async == acc_async_sync)
-	acc_wait (qid);
+	acc_dev->openacc.async.synchronize_func (aq);
       else if (qid == async)
-	;/* If we're waiting on the same asynchronous queue as we're
-	    launching on, the queue itself will order work as
-	    required, so there's no need to wait explicitly.  */
+	/* If we're waiting on the same asynchronous queue as we're
+	   launching on, the queue itself will order work as
+	   required, so there's no need to wait explicitly.  */
+	;
       else
-	acc_dev->openacc.async_wait_async_func (qid, async);
+	{
+	  goacc_aq aq2 = get_goacc_asyncqueue (async);
+	  acc_dev->openacc.async.synchronize_func (aq);
+	  acc_dev->openacc.async.serialize_func (aq, aq2);
+	}
     }
 }
 
@@ -559,8 +543,6 @@ GOACC_update (int device, size_t mapnum,
   else if (num_waits == acc_async_noval)
     acc_wait_all_async (async);
 
-  acc_dev->openacc.async_set_async_func (async);
-
   bool update_device = false;
   for (i = 0; i < mapnum; ++i)
     {
@@ -600,7 +582,7 @@ GOACC_update (int device, size_t mapnum,
 	  /* Fallthru  */
 	case GOMP_MAP_FORCE_TO:
 	  update_device = true;
-	  acc_update_device (hostaddrs[i], sizes[i]);
+	  acc_update_device_async (hostaddrs[i], sizes[i], async);
 	  break;
 
 	case GOMP_MAP_FROM:
@@ -612,7 +594,7 @@ GOACC_update (int device, size_t mapnum,
 	  /* Fallthru  */
 	case GOMP_MAP_FORCE_FROM:
 	  update_device = false;
-	  acc_update_self (hostaddrs[i], sizes[i]);
+	  acc_update_self_async (hostaddrs[i], sizes[i], async);
 	  break;
 
 	default:
@@ -620,8 +602,6 @@ GOACC_update (int device, size_t mapnum,
 	  break;
 	}
     }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 void
@@ -638,7 +618,7 @@ GOACC_wait (int async, int num_waits, ...)
   else if (async == acc_async_sync)
     acc_wait_all ();
   else if (async == acc_async_noval)
-    goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
+    acc_wait_all_async (async);
 }
 
 int
diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c
index c04db90..a114cc7 100644
--- a/libgomp/oacc-plugin.c
+++ b/libgomp/oacc-plugin.c
@@ -30,17 +30,6 @@
 #include "oacc-plugin.h"
 #include "oacc-int.h"
 
-void
-GOMP_PLUGIN_async_unmap_vars (void *ptr, int async)
-{
-  struct target_mem_desc *tgt = ptr;
-  struct gomp_device_descr *devicep = tgt->device_descr;
-
-  devicep->openacc.async_set_async_func (async);
-  gomp_unmap_vars (tgt, true);
-  devicep->openacc.async_set_async_func (acc_async_sync);
-}
-
 /* Return the target-specific part of the TLS data for the current thread.  */
 
 void *
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index f61bb77..ede59d7 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -63,6 +63,7 @@ typedef enum acc_device_t {
 
 typedef enum acc_async_t {
   /* Keep in sync with include/gomp-constants.h.  */
+  acc_async_default = 0,
   acc_async_noval = -1,
   acc_async_sync  = -2
 } acc_async_t;
@@ -72,6 +73,8 @@ void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
 acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
 void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
 int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
+void acc_set_default_async (int) __GOACC_NOTHROW;
+int acc_get_default_async (void) __GOACC_NOTHROW;
 int acc_async_test (int) __GOACC_NOTHROW;
 int acc_async_test_all (void) __GOACC_NOTHROW;
 void acc_wait (int) __GOACC_NOTHROW;

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2019-01-08 14:04 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-25 13:11 [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts Chung-Lin Tang
2018-12-07 11:33 ` Thomas Schwinge
2018-12-07 14:19   ` Chung-Lin Tang
2018-12-14 14:11     ` Thomas Schwinge
2018-12-14 14:17 ` Thomas Schwinge
2018-12-14 14:52   ` Chung-Lin Tang
2018-12-17 13:52     ` Thomas Schwinge
2018-12-18  9:35       ` Chung-Lin Tang
2018-12-14 14:32 ` Thomas Schwinge
2018-12-14 14:42   ` Chung-Lin Tang
2018-12-17 13:56     ` Thomas Schwinge
2018-12-14 14:54 ` Thomas Schwinge
2018-12-14 15:01   ` Chung-Lin Tang
2018-12-17 14:11     ` Thomas Schwinge
2018-12-14 14:56 ` Thomas Schwinge
2018-12-17 11:03   ` Chung-Lin Tang
2018-12-17 14:32     ` Thomas Schwinge
2018-12-18 10:03       ` Chung-Lin Tang
2018-12-18 11:44         ` Thomas Schwinge
2018-12-18 15:06 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2018-12-18 21:04   ` Thomas Schwinge
2018-12-21 16:25     ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v3) Chung-Lin Tang
2018-12-28 14:52       ` Thomas Schwinge
2019-01-02 12:46     ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2019-01-05  9:47       ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v4) Chung-Lin Tang
2019-01-07 14:16         ` Thomas Schwinge
2019-01-08 14:04           ` Chung-Lin Tang
2019-01-07 14:15       ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).