public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-11] Fix OpenACC "ephemeral" asynchronous host-to-device copies
@ 2021-05-13 16:13 Kwok Yeung
  0 siblings, 0 replies; only message in thread
From: Kwok Yeung @ 2021-05-13 16:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0e72dbd0ba257ff2c88ec50395c3008327d66cc5

commit 0e72dbd0ba257ff2c88ec50395c3008327d66cc5
Author: Julian Brown <julian@codesourcery.com>
Date:   Wed Sep 11 13:22:03 2019 -0700

    Fix OpenACC "ephemeral" asynchronous host-to-device copies
    
            libgomp/
            * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update
            prototype.
            * libgomp.h (gomp_copy_host2dev): Update prototype.
            * oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter.
            * oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev.
            (update_dev_host): Likewise.
            * oacc-parallel.c (GOACC_enter_exit_data): Call async versions of
            acc_attach/acc_detach/acc_detach_finalize functions.
            * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): Add
            ephemeral parameter.  Copy source data to temporary space immediately
            if true, and pass to queue_push_copy.
            * plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_host2dev):
            Add EPHEMERAL parameter, and FIXME function comment.
            * target.c (goacc_device_copy_async): Remove.
            (gomp_copy_host2dev): Add ephemeral parameter. Update function comment.
            Call async host2dev plugin hook directly.
            (gomp_copy_dev2host): Call async dev2host plugin hook directly.
            (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer,
            gomp_detach_pointer): Update calls to gomp_copy_host2dev.
            (gomp_map_vars_internal): Don't use coalescing buffer for asynchronous
            copies. Update calls to gomp_copy_host2dev.
            (gomp_update): Update calls to gomp_copy_host2dev.
            * testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix
            async-safety issue. Increase number of iterations.
            * testsuite/libgomp.oacc-fortran/lib-16.f90: Fix async-safety issue.
            * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Likewise.

Diff:
---
 libgomp/ChangeLog.omp                              | 30 +++++++
 libgomp/libgomp-plugin.h                           |  3 +-
 libgomp/libgomp.h                                  |  2 +-
 libgomp/oacc-host.c                                |  1 +
 libgomp/oacc-mem.c                                 | 10 ++-
 libgomp/plugin/plugin-gcn.c                        | 23 ++---
 libgomp/plugin/plugin-nvptx.c                      | 13 ++-
 libgomp/target.c                                   | 98 ++++++++++++----------
 .../libgomp.oacc-c-c++-common/deep-copy-10.c       | 14 ++--
 .../testsuite/libgomp.oacc-fortran/lib-16-2.f90    |  5 ++
 libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90  |  5 ++
 11 files changed, 137 insertions(+), 67 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 8c5acc10f1c..bcb1b5b1518 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,33 @@
+2019-09-17  Julian Brown  <julian@codesourcery.com>
+	    Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update
+	prototype.
+	* libgomp.h (gomp_copy_host2dev): Update prototype.
+	* oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter.
+	* oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev.
+	(update_dev_host): Likewise.
+	* oacc-parallel.c (GOACC_enter_exit_data): Call async versions of
+	acc_attach/acc_detach/acc_detach_finalize functions.
+	* plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): Add
+	ephemeral parameter.  Copy source data to temporary space immediately
+	if true, and pass to queue_push_copy.
+	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_host2dev):
+	Add EPHEMERAL parameter, and FIXME function comment.
+	* target.c (goacc_device_copy_async): Remove.
+	(gomp_copy_host2dev): Add ephemeral parameter. Update function comment.
+	Call async host2dev plugin hook directly.
+	(gomp_copy_dev2host): Call async dev2host plugin hook directly.
+	(gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer,
+	gomp_detach_pointer): Update calls to gomp_copy_host2dev.
+	(gomp_map_vars_internal): Don't use coalescing buffer for asynchronous
+	copies. Update calls to gomp_copy_host2dev.
+	(gomp_update): Update calls to gomp_copy_host2dev.
+	* testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix
+	async-safety issue. Increase number of iterations.
+	* testsuite/libgomp.oacc-fortran/lib-16.f90: Fix async-safety issue.
+	* testsuite/libgomp.oacc-fortran/lib-16-2.f90: Likewise.
+
 2019-05-20  Julian Brown  <julian@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Expect
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 62645ce9954..bff2193dd3a 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -152,7 +152,8 @@ extern void GOMP_OFFLOAD_openacc_async_exec (void (*) (void *), size_t, void **,
 					     struct goacc_asyncqueue *);
 extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t,
 						 struct goacc_asyncqueue *);
-extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
+extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *,
+						 size_t, bool,
 						 struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 38aa589c8c3..d22210b4cbd 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1198,7 +1198,7 @@ enum gomp_map_vars_kind
 struct gomp_coalesce_buf;
 extern void gomp_copy_host2dev (struct gomp_device_descr *,
 				struct goacc_asyncqueue *, void *, const void *,
-				size_t, struct gomp_coalesce_buf *);
+				size_t, bool, struct gomp_coalesce_buf *);
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
 				struct goacc_asyncqueue *, void *, const void *,
 				size_t);
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 1cbff4caace..369abfffac9 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -184,6 +184,7 @@ host_openacc_async_host2dev (int ord __attribute__ ((unused)),
 			     void *dst __attribute__ ((unused)),
 			     const void *src __attribute__ ((unused)),
 			     size_t n __attribute__ ((unused)),
+			     bool eph __attribute__ ((unused)),
 			     struct goacc_asyncqueue *aq
 			     __attribute__ ((unused)))
 {
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 3b98b65a859..685daab6341 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -202,7 +202,7 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
   if (from)
     gomp_copy_dev2host (thr->dev, aq, h, d, s);
   else
-    gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
 
   if (profiling_p)
     {
@@ -876,7 +876,7 @@ update_dev_host (int is_dev, void *h, size_t s, int async)
   goacc_aq aq = get_goacc_asyncqueue (async);
 
   if (is_dev)
-    gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
   else
     gomp_copy_dev2host (acc_dev, aq, h, d, s);
 
@@ -1435,7 +1435,8 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
 	  || kind == GOMP_MAP_ATTACH
 	  || kind == GOMP_MAP_FORCE_TO
 	  || kind == GOMP_MAP_TO
-	  || kind == GOMP_MAP_ALLOC)
+	  || kind == GOMP_MAP_ALLOC
+	  || kind == GOMP_MAP_DECLARE_ALLOCATE)
 	{
 	  data_enter = true;
 	  break;
@@ -1446,7 +1447,8 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
 	  || kind == GOMP_MAP_DETACH
 	  || kind == GOMP_MAP_FORCE_DETACH
 	  || kind == GOMP_MAP_FROM
-	  || kind == GOMP_MAP_FORCE_FROM)
+	  || kind == GOMP_MAP_FORCE_FROM
+	  || kind == GOMP_MAP_DECLARE_DEALLOCATE)
 	break;
 
       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 86263a67d32..cde04c0fe76 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3928,19 +3928,22 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
 
 bool
 GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
-				     size_t n, struct goacc_asyncqueue *aq)
+				     size_t n, bool ephemeral,
+				     struct goacc_asyncqueue *aq)
 {
   struct agent_info *agent = get_agent_info (device);
   assert (agent == aq->agent);
-  /* The source data does not necessarily remain live until the deferred
-     copy happens.  Taking a snapshot of the data here avoids reading
-     uninitialised data later, but means that (a) data is copied twice and
-     (b) modifications to the copied data between the "spawning" point of
-     the asynchronous kernel and when it is executed will not be seen.
-     But, that is probably correct.  */
-  void *src_copy = GOMP_PLUGIN_malloc (n);
-  memcpy (src_copy, src, n);
-  queue_push_copy (aq, dst, src_copy, n, true);
+
+  if (ephemeral)
+    {
+      /* The source data is on the stack or otherwise may be deallocated
+	 before the asynchronous copy takes place.  Take a copy of the source
+	 data.  */
+      void *src_copy = GOMP_PLUGIN_malloc (n);
+      memcpy (src_copy, src, n);
+      src = src_copy;
+    }
+  queue_push_copy (aq, dst, src, n, ephemeral);
   return true;
 }
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 1bea0bedccb..82bf97948c6 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1658,9 +1658,20 @@ GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
   return true;
 }
 
+/* FIXME: It is unknown whether the cuMemcpyHtoDAsync API call caches source
+   data before the asynchronous copy takes place.  Either way there is a data
+   race associated with ignoring the EPHEMERAL parameter here -- either if it
+   is TRUE (because we are copying uncached data that may disappear before the
+   async copy takes place) or if it is FALSE (because the source data may be
+   cached/snapshotted here before it is modified by an earlier async operation,
+   so stale data gets copied to the target).
+   Neither problem has been observed in practice, so far.  */
+
 bool
 GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src,
-				     size_t n, struct goacc_asyncqueue *aq)
+				     size_t n,
+				     bool ephemeral __attribute__((unused)),
+				     struct goacc_asyncqueue *aq)
 {
   if (!nvptx_attach_host_thread_to_device (ord)
       || !cuda_memcpy_sanity_check (src, dst, n))
diff --git a/libgomp/target.c b/libgomp/target.c
index bf7c86a8009..851609586bd 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -194,22 +194,6 @@ gomp_device_copy (struct gomp_device_descr *devicep,
     }
 }
 
-static inline void
-goacc_device_copy_async (struct gomp_device_descr *devicep,
-			 bool (*copy_func) (int, void *, const void *, size_t,
-					    struct goacc_asyncqueue *),
-			 const char *dst, void *dstaddr,
-			 const char *src, const void *srcaddr,
-			 size_t size, struct goacc_asyncqueue *aq)
-{
-  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
-    {
-      gomp_mutex_unlock (&devicep->lock);
-      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
-		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
-    }
-}
-
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    host to device memory transfers.  */
 
@@ -303,11 +287,18 @@ gomp_to_device_kind_p (int kind)
     }
 }
 
+ /* Copy host memory to an offload device.  In asynchronous mode (if AQ is
+   non-NULL), when the source data is stack or may otherwise be deallocated
+   before the asynchronous copy takes place, EPHEMERAL must be passed as
+   TRUE.  The CBUF isn't used for non-ephemeral asynchronous copies, because
+   the host data might not be computed yet (by an earlier asynchronous compute
+   region).  */
+
 attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
 		    struct goacc_asyncqueue *aq,
 		    void *d, const void *h, size_t sz,
-		    struct gomp_coalesce_buf *cbuf)
+		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
 {
   if (cbuf)
     {
@@ -335,8 +326,15 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 	}
     }
   if (__builtin_expect (aq != NULL, 0))
-    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
-			     "dev", d, "host", h, sz, aq);
+    {
+      if (!devicep->openacc.async.host2dev_func (devicep->target_id, d, h, sz,
+						 ephemeral, aq))
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("Copying of host object [%p..%p) to dev object [%p..%p) "
+		      "failed", h, h + sz, d, d + sz);
+	}
+    }
   else
     gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
@@ -347,8 +345,15 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
 		    void *h, const void *d, size_t sz)
 {
   if (__builtin_expect (aq != NULL, 0))
-    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
-			     "host", h, "dev", d, sz, aq);
+    {
+      if (!devicep->openacc.async.dev2host_func (devicep->target_id, h, d, sz,
+						 aq))
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("Copying of dev object [%p..%p) to host object [%p..%p) "
+		      "failed", d, d + sz, h, h + sz);
+	}
+    }
   else
     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
@@ -578,7 +583,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
 				  + newn->host_start - oldn->host_start),
 			(void *) newn->host_start,
-			newn->host_end - newn->host_start, cbuf);
+			newn->host_end - newn->host_start, false, cbuf);
 
   if (oldn->refcount != REFCOUNT_INFINITY)
     oldn->refcount++;
@@ -607,7 +612,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
       gomp_copy_host2dev (devicep, aq,
 			  (void *) (tgt->tgt_start + target_offset),
 			  (void *) &cur_node.tgt_offset,
-			  sizeof (void *), cbuf);
+			  sizeof (void *), true, cbuf);
       return;
     }
   /* Add bias to the pointer value.  */
@@ -627,7 +632,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
-		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
+		      (void *) &cur_node.tgt_offset, sizeof (void *), true,
+		      cbuf);
 }
 
 static void
@@ -760,7 +766,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
-			  sizeof (void *), cbufp);
+			  sizeof (void *), true, cbufp);
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -813,7 +819,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep,
 		  (void *) target);
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
-			  sizeof (void *), cbufp);
+			  sizeof (void *), true, cbufp);
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -985,8 +991,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	      for (i = first; i <= last; i++)
 		{
 		  tgt->list[i].key = NULL;
-		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
-					     & typemask))
+		  if (!aq
+		      && gomp_to_device_kind_p (get_kind (short_mapkind, kinds,
+							  i) & typemask))
 		    gomp_coalesce_buf_add (&cbuf,
 					   tgt_size - cur_node.host_end
 					   + (uintptr_t) hostaddrs[i],
@@ -1049,8 +1056,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  if (tgt_align < align)
 	    tgt_align = align;
 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	  gomp_coalesce_buf_add (&cbuf, tgt_size,
-				 cur_node.host_end - cur_node.host_start);
+	  if (!aq)
+	    gomp_coalesce_buf_add (&cbuf, tgt_size,
+				   cur_node.host_end - cur_node.host_start);
 	  tgt_size += cur_node.host_end - cur_node.host_start;
 	  has_firstprivate = true;
 	  continue;
@@ -1142,7 +1150,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  if (tgt_align < align)
 	    tgt_align = align;
 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	  if (gomp_to_device_kind_p (kind & typemask))
+	  if (!aq && gomp_to_device_kind_p (kind & typemask))
 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
 				   cur_node.host_end - cur_node.host_start);
 	  tgt_size += cur_node.host_end - cur_node.host_start;
@@ -1335,7 +1343,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		len = sizes[i];
 		gomp_copy_host2dev (devicep, aq,
 				    (void *) (tgt->tgt_start + tgt_size),
-				    (void *) hostaddrs[i], len, cbufp);
+				    (void *) hostaddrs[i], len, false, cbufp);
 		tgt_size += len;
 		continue;
 	      case GOMP_MAP_FIRSTPRIVATE_INT:
@@ -1423,12 +1431,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		if (cur_node.tgt_offset)
 		  cur_node.tgt_offset -= sizes[i];
 		gomp_copy_host2dev (devicep, aq,
-				    (void *) (n->tgt->tgt_start
-					      + n->tgt_offset
+				    (void *) (n->tgt->tgt_start + n->tgt_offset
 					      + cur_node.host_start
 					      - n->host_start),
 				    (void *) &cur_node.tgt_offset,
-				    sizeof (void *), cbufp);
+				    sizeof (void *), true, cbufp);
 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
 				      + cur_node.host_start - n->host_start;
 		continue;
@@ -1548,7 +1555,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
-					k->host_end - k->host_start, cbufp);
+					k->host_end - k->host_start, false,
+					cbufp);
 		    break;
 		  case GOMP_MAP_POINTER:
 		    gomp_map_pointer (tgt, aq,
@@ -1560,7 +1568,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
-					k->host_end - k->host_start, cbufp);
+					k->host_end - k->host_start, false,
+					cbufp);
 		    tgt->list[i].has_null_ptr_assoc = false;
 
 		    for (j = i + 1; j < mapnum; j++)
@@ -1617,7 +1626,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
-					sizeof (void *), cbufp);
+					sizeof (void *), false, cbufp);
 		    break;
 		  default:
 		    gomp_mutex_unlock (&devicep->lock);
@@ -1633,7 +1642,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		    /* We intentionally do not use coalescing here, as it's not
 		       data allocated by the current call to this function.  */
 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
-					&tgt_addr, sizeof (void *), NULL);
+					&tgt_addr, sizeof (void *), true, NULL);
 		  }
 		array++;
 	      }
@@ -1712,7 +1721,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 					    (void *) tgt->tgt_start + k->tgt_offset,
 					    (void *) k->host_start,
 					    nca->data_row_size,
-					    cbufp);
+					    true, cbufp);
 		      array++;
 		    }
 		  nca->tgt_data_rows[j]
@@ -1727,7 +1736,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		  void *ptrblock = goacc_noncontig_array_create_ptrblock
 		    (nca, target_ptrblock);
 		  gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
-				      nca->ptrblock_size, cbufp);
+				      nca->ptrblock_size, true, cbufp);
 		  free (ptrblock);
 		}
 	    }
@@ -1742,7 +1751,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  gomp_copy_host2dev (devicep, aq,
 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
 			      (void *) &cur_node.tgt_offset, sizeof (void *),
-			      cbufp);
+			      true, cbufp);
 	}
     }
 
@@ -1754,7 +1763,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
 			    (char *) cbuf.buf + (cbuf.chunks[c].start
 						 - cbuf.chunks[0].start),
-			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
+			    cbuf.chunks[c].end - cbuf.chunks[c].start, true,
+			    NULL);
       free (cbuf.buf);
       cbuf.buf = NULL;
       cbufp = NULL;
@@ -2033,7 +2043,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 
 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
 	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
-				  NULL);
+				  false, NULL);
 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
 	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
 	  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c
index 573a8214bf0..dadb6d37942 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c
@@ -1,6 +1,8 @@
 #include <stdlib.h>
 
-/* Test asyncronous attach and detach operation.  */
+#define ITERATIONS 1023
+
+/* Test asynchronous attach and detach operation.  */
 
 typedef struct {
   int *a;
@@ -25,13 +27,13 @@ main (int argc, char* argv[])
 
 #pragma acc enter data copyin(m)
 
-  for (int i = 0; i < 99; i++)
+  for (int i = 0; i < ITERATIONS; i++)
     {
       int j;
-#pragma acc parallel loop copy(m.a[0:N]) async(i % 2)
+#pragma acc parallel loop copy(m.a[0:N]) async(0)
       for (j = 0; j < N; j++)
 	m.a[j]++;
-#pragma acc parallel loop copy(m.b[0:N]) async((i + 1) % 2)
+#pragma acc parallel loop copy(m.b[0:N]) async(1)
       for (j = 0; j < N; j++)
 	m.b[j]++;
     }
@@ -40,9 +42,9 @@ main (int argc, char* argv[])
 
   for (i = 0; i < N; i++)
     {
-      if (m.a[i] != 99)
+      if (m.a[i] != ITERATIONS)
 	abort ();
-      if (m.b[i] != 99)
+      if (m.b[i] != ITERATIONS)
 	abort ();
     }
 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90
index ddd557d3be0..e2e47c967fa 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90
@@ -27,6 +27,9 @@ program main
 
   if (acc_is_present (h) .neqv. .TRUE.) stop 1
 
+  ! We must wait for the update to be done.
+  call acc_wait (async)
+
   h(:) = 0
 
   call acc_copyout_async (h, sizeof (h), async)
@@ -45,6 +48,8 @@ program main
   
   if (acc_is_present (h) .neqv. .TRUE.) stop 3
 
+  call acc_wait (async)
+
   do i = 1, N
     if (h(i) /= i + i) stop 4
   end do 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90
index ccd1ce6ee18..ef9a6f6626c 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90
@@ -27,6 +27,9 @@ program main
 
   if (acc_is_present (h) .neqv. .TRUE.) stop 1
 
+  ! We must wait for the update to be done.
+  call acc_wait (async)
+
   h(:) = 0
 
   call acc_copyout_async (h, sizeof (h), async)
@@ -45,6 +48,8 @@ program main
   
   if (acc_is_present (h) .neqv. .TRUE.) stop 3
 
+  call acc_wait (async)
+
   do i = 1, N
     if (h(i) /= i + i) stop 4
   end do


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-13 16:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-13 16:13 [gcc/devel/omp/gcc-11] Fix OpenACC "ephemeral" asynchronous host-to-device copies Kwok Yeung

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).