public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-2530] Don't use libgomp 'cbuf' buffering with OpenACC 'async'
@ 2021-07-27  9:19 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2021-07-27  9:19 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d88a6951586c7229b25708f4486eaaf4bf4b5bbe

commit r12-2530-gd88a6951586c7229b25708f4486eaaf4bf4b5bbe
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Fri Jul 23 22:01:32 2021 +0200

    Don't use libgomp 'cbuf' buffering with OpenACC 'async'
    
    The host data might not be computed yet (by an earlier asynchronous compute
    region, for example.
    
            libgomp/
            * target.c (gomp_coalesce_buf_add): Update comment.
            (gomp_copy_host2dev, gomp_map_vars_internal): Don't expect to see
            'aq && cbuf'.
            (gomp_map_vars_internal): Only 'if (!aq)', do
            'gomp_coalesce_buf_add'.
            * testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c: Remove
            XFAIL.
    
    Co-Authored-By: Julian Brown <julian@codesourcery.com>

Diff:
---
 libgomp/target.c                                   | 71 ++++++++++++++--------
 .../libgomp.oacc-c-c++-common/async-data-1-2.c     |  5 +-
 2 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/libgomp/target.c b/libgomp/target.c
index 5576e57f822..453b3210e40 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -275,7 +275,14 @@ struct gomp_coalesce_buf
    host to device (e.g. map(alloc:), map(from:) etc.).  */
 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
 
-/* Add region with device tgt_start relative offset and length to CBUF.  */
+/* Add region with device tgt_start relative offset and length to CBUF.
+
+   This must not be used for asynchronous copies, because the host data might
+   not be computed yet (by an earlier asynchronous compute region, for
+   example).
+   TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
+   is it more performant to use libgomp CBUF buffering or individual device
+   asyncronous copying?)  */
 
 static inline void
 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
@@ -339,6 +346,30 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 		    void *d, const void *h, size_t sz,
 		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
 {
+  if (__builtin_expect (aq != NULL, 0))
+    {
+      /* See 'gomp_coalesce_buf_add'.  */
+      assert (!cbuf);
+
+      void *h_buf = (void *) h;
+      if (ephemeral)
+	{
+	  /* We're queueing up an asynchronous copy from data that may
+	     disappear before the transfer takes place (i.e. because it is a
+	     stack local in a function that is no longer executing).  Make a
+	     copy of the data into a temporary buffer in those cases.  */
+	  h_buf = gomp_malloc (sz);
+	  memcpy (h_buf, h, sz);
+	}
+      goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+			       "dev", d, "host", h_buf, h, sz, aq);
+      if (ephemeral)
+	/* Free temporary buffer once the transfer has completed.  */
+	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
+
+      return;
+    }
+
   if (cbuf)
     {
       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
@@ -364,26 +395,8 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 	    }
 	}
     }
-  if (__builtin_expect (aq != NULL, 0))
-    {
-      void *h_buf = (void *) h;
-      if (ephemeral)
-	{
-	  /* We're queueing up an asynchronous copy from data that may
-	     disappear before the transfer takes place (i.e. because it is a
-	     stack local in a function that is no longer executing).  Make a
-	     copy of the data into a temporary buffer in those cases.  */
-	  h_buf = gomp_malloc (sz);
-	  memcpy (h_buf, h, sz);
-	}
-      goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
-			       "dev", d, "host", h_buf, h, sz, aq);
-      if (ephemeral)
-	/* Free temporary buffer once the transfer has completed.  */
-	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
-    }
-  else
-    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+
+  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
 
 attribute_hidden void
@@ -959,8 +972,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	      for (i = first; i <= last; i++)
 		{
 		  tgt->list[i].key = NULL;
-		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
-					     & typemask))
+		  if (!aq
+		      && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
+						& typemask))
 		    gomp_coalesce_buf_add (&cbuf,
 					   tgt_size - cur_node.host_end
 					   + (uintptr_t) hostaddrs[i],
@@ -1001,8 +1015,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  if (tgt_align < align)
 	    tgt_align = align;
 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	  gomp_coalesce_buf_add (&cbuf, tgt_size,
-				 cur_node.host_end - cur_node.host_start);
+	  if (!aq)
+	    gomp_coalesce_buf_add (&cbuf, tgt_size,
+				   cur_node.host_end - cur_node.host_start);
 	  tgt_size += cur_node.host_end - cur_node.host_start;
 	  has_firstprivate = true;
 	  continue;
@@ -1095,7 +1110,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	  if (tgt_align < align)
 	    tgt_align = align;
 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	  if (gomp_to_device_kind_p (kind & typemask))
+	  if (!aq
+	      && gomp_to_device_kind_p (kind & typemask))
 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
 				   cur_node.host_end - cur_node.host_start);
 	  tgt_size += cur_node.host_end - cur_node.host_start;
@@ -1596,6 +1612,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 
   if (cbufp)
     {
+      /* See 'gomp_coalesce_buf_add'.  */
+      assert (!aq);
+
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
 	gomp_copy_host2dev (devicep, aq,
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c
index 385237698e2..3299499312f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/async-data-1-2.c
@@ -1,10 +1,9 @@
 /* Verify back to back 'async' operations, two data mappings.
 
-   Due to two data mappings, this is using the libgomp 'cbuf' buffering.
+   Make sure that despite two data mappings, this isn't using the libgomp
+   'cbuf' buffering.
 */
 
-/* { dg-xfail-run-if "TODO" { openacc_radeon_accel_selected } } */
-
 
 #include <stdlib.h>


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-07-27  9:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-27  9:19 [gcc r12-2530] Don't use libgomp 'cbuf' buffering with OpenACC 'async' Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).