public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-6588] Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data
@ 2023-03-10 15:20 Thomas Schwinge
0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2023-03-10 15:20 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2b2340e236c0bba8aaca358ea25a5accd8249fbd
commit r13-6588-g2b2340e236c0bba8aaca358ea25a5accd8249fbd
Author: Thomas Schwinge <thomas@codesourcery.com>
Date: Mon Feb 27 16:41:17 2023 +0100
Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data
This does *allow*, but under no circumstances is this currently going to be
used: all potentially applicable data is non-'ephemeral', and thus not
considered for 'gomp_coalesce_buf_add' for OpenACC 'async'. (But a use will
emerge later.)
Follow-up to commit r12-2530-gd88a6951586c7229b25708f4486eaaf4bf4b5bbe
"Don't use libgomp 'cbuf' buffering with OpenACC 'async'", addressing this
TODO comment:
TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
is it more performant to use libgomp CBUF buffering or individual device
asyncronous copying?)
Ephemeral data is small, and therefore individual device asyncronous copying
does seem dubious -- in particular given that for all those, we'd individually
have to allocate and queue for deallocation a temporary buffer to capture the
ephemeral data. Instead, just let the 'cbuf' *be* the temporary buffer.
libgomp/
* target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow
libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral'
data.
Diff:
---
libgomp/target.c | 70 +++++++++++++++++++++++++++++---------------------------
1 file changed, 36 insertions(+), 34 deletions(-)
diff --git a/libgomp/target.c b/libgomp/target.c
index 0344f68a936..074caa6a4dc 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -310,10 +310,8 @@ struct gomp_coalesce_buf
This must not be used for asynchronous copies, because the host data might
not be computed yet (by an earlier asynchronous compute region, for
- example).
- TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
- is it more performant to use libgomp CBUF buffering or individual device
- asyncronous copying?) */
+ example). The exception is for EPHEMERAL data, that we know is available
+ already "by construction". */
static inline void
gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
@@ -377,30 +375,6 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
void *d, const void *h, size_t sz,
bool ephemeral, struct gomp_coalesce_buf *cbuf)
{
- if (__builtin_expect (aq != NULL, 0))
- {
- /* See 'gomp_coalesce_buf_add'. */
- assert (!cbuf);
-
- void *h_buf = (void *) h;
- if (ephemeral)
- {
- /* We're queueing up an asynchronous copy from data that may
- disappear before the transfer takes place (i.e. because it is a
- stack local in a function that is no longer executing). Make a
- copy of the data into a temporary buffer in those cases. */
- h_buf = gomp_malloc (sz);
- memcpy (h_buf, h, sz);
- }
- goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
- "dev", d, "host", h_buf, h, sz, aq);
- if (ephemeral)
- /* Free temporary buffer once the transfer has completed. */
- devicep->openacc.async.queue_callback_func (aq, free, h_buf);
-
- return;
- }
-
if (cbuf)
{
uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
@@ -420,6 +394,12 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("internal libgomp cbuf error");
}
+
+ /* In an asynchronous context, verify that CBUF isn't used
+ with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
+ if (__builtin_expect (aq != NULL, 0))
+ assert (ephemeral);
+
memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
h, sz);
return;
@@ -430,7 +410,28 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
}
}
- gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+ if (__builtin_expect (aq != NULL, 0))
+ {
+ void *h_buf = (void *) h;
+ if (ephemeral)
+ {
+ /* We're queueing up an asynchronous copy from data that may
+ disappear before the transfer takes place (i.e. because it is a
+ stack local in a function that is no longer executing). As we've
+ not been able to use CBUF, make a copy of the data into a
+ temporary buffer. */
+ h_buf = gomp_malloc (sz);
+ memcpy (h_buf, h, sz);
+ }
+ goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+ "dev", d, "host", h_buf, h, sz, aq);
+ if (ephemeral)
+ /* Free once the transfer has completed. */
+ devicep->openacc.async.queue_callback_func (aq, free, h_buf);
+ }
+ else
+ gomp_device_copy (devicep, devicep->host2dev_func,
+ "dev", d, "host", h, sz);
}
attribute_hidden void
@@ -1751,9 +1752,6 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
if (cbufp)
{
- /* See 'gomp_coalesce_buf_add'. */
- assert (!aq);
-
long c = 0;
for (c = 0; c < cbuf.chunk_cnt; ++c)
gomp_copy_host2dev (devicep, aq,
@@ -1761,8 +1759,12 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
(char *) cbuf.buf + (cbuf.chunks[c].start
- cbuf.chunks[0].start),
cbuf.chunks[c].end - cbuf.chunks[c].start,
- true, NULL);
- free (cbuf.buf);
+ false, NULL);
+ if (aq)
+ /* Free once the transfer has completed. */
+ devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
+ else
+ free (cbuf.buf);
cbuf.buf = NULL;
cbufp = NULL;
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-03-10 15:20 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-10 15:20 [gcc r13-6588] Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data Thomas Schwinge
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).