public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data
@ 2023-03-10 15:25 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2023-03-10 15:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ac859ac4df59373db27f2b39ef37a50c51c14e64

commit ac859ac4df59373db27f2b39ef37a50c51c14e64
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon Feb 27 16:41:17 2023 +0100

    Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data
    
    This does *allow*, but under no circumstances is this currently going to be
    used: all potentially applicable data is non-'ephemeral', and thus not
    considered for 'gomp_coalesce_buf_add' for OpenACC 'async'.  (But a use will
    emerge later.)
    
    Follow-up to commit r12-2530-gd88a6951586c7229b25708f4486eaaf4bf4b5bbe
    "Don't use libgomp 'cbuf' buffering with OpenACC 'async'", addressing this
    TODO comment:
    
        TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
        is it more performant to use libgomp CBUF buffering or individual device
        asyncronous copying?)
    
    Ephemeral data is small, and therefore individual device asyncronous copying
    does seem dubious -- in particular given that for all those, we'd individually
    have to allocate and queue for deallocation a temporary buffer to capture the
    ephemeral data.  Instead, just let the 'cbuf' *be* the temporary buffer.
    
            libgomp/
            * target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow
            libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral'
            data.
    
    (cherry picked from commit 2b2340e236c0bba8aaca358ea25a5accd8249fbd)

Diff:
---
 libgomp/ChangeLog.omp |  7 ++++++
 libgomp/target.c      | 70 ++++++++++++++++++++++++++-------------------------
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 39ddd0b73ee..e3bab148f77 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -3,6 +3,13 @@
 	Backported from master:
 	2023-03-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow
+	libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral'
+	data.
+
+	Backported from master:
+	2023-03-10  Thomas Schwinge  <thomas@codesourcery.com>
+
 	* target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for
 	'GOMP_MAP_IF_PRESENT'.
 	* plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec)
diff --git a/libgomp/target.c b/libgomp/target.c
index b5b1af64d53..60f9b432dc4 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -310,10 +310,8 @@ struct gomp_coalesce_buf
 
    This must not be used for asynchronous copies, because the host data might
    not be computed yet (by an earlier asynchronous compute region, for
-   example).
-   TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
-   is it more performant to use libgomp CBUF buffering or individual device
-   asyncronous copying?)  */
+   example).  The exception is for EPHEMERAL data, that we know is available
+   already "by construction".  */
 
 static inline void
 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
@@ -379,30 +377,6 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 		    void *d, const void *h, size_t sz,
 		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
 {
-  if (__builtin_expect (aq != NULL, 0))
-    {
-      /* See 'gomp_coalesce_buf_add'.  */
-      assert (!cbuf);
-
-      void *h_buf = (void *) h;
-      if (ephemeral)
-	{
-	  /* We're queueing up an asynchronous copy from data that may
-	     disappear before the transfer takes place (i.e. because it is a
-	     stack local in a function that is no longer executing).  Make a
-	     copy of the data into a temporary buffer in those cases.  */
-	  h_buf = gomp_malloc (sz);
-	  memcpy (h_buf, h, sz);
-	}
-      goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
-			       "dev", d, "host", h_buf, h, sz, aq);
-      if (ephemeral)
-	/* Free temporary buffer once the transfer has completed.  */
-	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
-
-      return;
-    }
-
   if (cbuf)
     {
       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
@@ -422,6 +396,12 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 		      gomp_mutex_unlock (&devicep->lock);
 		      gomp_fatal ("internal libgomp cbuf error");
 		    }
+
+		  /* In an asynchronous context, verify that CBUF isn't used
+		     with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'.  */
+		  if (__builtin_expect (aq != NULL, 0))
+		    assert (ephemeral);
+
 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
 			  h, sz);
 		  return;
@@ -432,7 +412,28 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 	}
     }
 
-  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+  if (__builtin_expect (aq != NULL, 0))
+    {
+      void *h_buf = (void *) h;
+      if (ephemeral)
+	{
+	  /* We're queueing up an asynchronous copy from data that may
+	     disappear before the transfer takes place (i.e. because it is a
+	     stack local in a function that is no longer executing).  As we've
+	     not been able to use CBUF, make a copy of the data into a
+	     temporary buffer.  */
+	  h_buf = gomp_malloc (sz);
+	  memcpy (h_buf, h, sz);
+	}
+      goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+			       "dev", d, "host", h_buf, h, sz, aq);
+      if (ephemeral)
+	/* Free once the transfer has completed.  */
+	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
+    }
+  else
+    gomp_device_copy (devicep, devicep->host2dev_func,
+		      "dev", d, "host", h, sz);
 }
 
 attribute_hidden void
@@ -1957,9 +1958,6 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 
   if (cbufp)
     {
-      /* See 'gomp_coalesce_buf_add'.  */
-      assert (!aq);
-
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
 	gomp_copy_host2dev (devicep, aq,
@@ -1967,8 +1965,12 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 			    (char *) cbuf.buf + (cbuf.chunks[c].start
 						 - cbuf.chunks[0].start),
 			    cbuf.chunks[c].end - cbuf.chunks[c].start,
-			    true, NULL);
-      free (cbuf.buf);
+			    false, NULL);
+      if (aq)
+	/* Free once the transfer has completed.  */
+	devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
+      else
+	free (cbuf.buf);
       cbuf.buf = NULL;
       cbufp = NULL;
     }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-10 15:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-10 15:25 [gcc/devel/omp/gcc-12] Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).