public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] Simplify OpenACC 'no_create' clause implementation
@ 2023-03-10 15:00 Thomas Schwinge
0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2023-03-10 15:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:cb770c92219bd1b29cc366fc8f7e54eb9518a90f
commit cb770c92219bd1b29cc366fc8f7e54eb9518a90f
Author: Thomas Schwinge <thomas@codesourcery.com>
Date: Mon Feb 27 12:02:02 2023 +0100
Simplify OpenACC 'no_create' clause implementation
For 'OFFSET_INLINED', 'gomp_map_val' does the right thing, and we may then
simplify the device plugins accordingly.
This is a follow-up to
Subversion r279551 (Git commit a6163563f2ce502bd4ef444bd5de33570bb8eeb1)
"Add OpenACC 2.6's no_create",
Subversion r279622 (Git commit 5bcd470bf0749e1f56d05dd43aa9584ff2e3a090)
"Use gomp_map_val for OpenACC host-to-device address translation".
libgomp/
* target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for
'GOMP_MAP_IF_PRESENT'.
* plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec)
(GOMP_OFFLOAD_openacc_async_exec): Adjust.
* plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
(GOMP_OFFLOAD_openacc_async_exec): Likewise.
* testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async'
testing.
* testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise.
(cherry picked from commit 199867d07be65cb0227a318ebf42b8376ca09313)
Diff:
---
libgomp/ChangeLog.omp | 13 ++++++++++
libgomp/plugin/plugin-gcn.c | 18 ++++++-------
libgomp/plugin/plugin-nvptx.c | 19 +++++++-------
libgomp/target.c | 2 +-
.../libgomp.oacc-c-c++-common/no_create-1.c | 30 ++++++++++++++++++----
.../libgomp.oacc-c-c++-common/no_create-2.c | 12 ++++++++-
6 files changed, 67 insertions(+), 27 deletions(-)
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index da11f40651f..39ddd0b73ee 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -3,6 +3,19 @@
Backported from master:
2023-03-10 Thomas Schwinge <thomas@codesourcery.com>
+ * target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for
+ 'GOMP_MAP_IF_PRESENT'.
+ * plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec)
+ (GOMP_OFFLOAD_openacc_async_exec): Adjust.
+ * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
+ (GOMP_OFFLOAD_openacc_async_exec): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async'
+ testing.
+ * testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise.
+
+ Backported from master:
+ 2023-03-10 Thomas Schwinge <thomas@codesourcery.com>
+
* oacc-async.c (goacc_wait): Remove 'acc_async_test' -> skip
shortcut.
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 028fd1c1b3b..11b7f25d223 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3219,7 +3219,7 @@ usm_heap_create (size_t size)
/* Execute an OpenACC kernel, synchronously or asynchronously. */
static void
-gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
+gcn_exec (struct kernel_info *kernel, size_t mapnum,
void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
struct goacc_asyncqueue *aq)
{
@@ -3232,9 +3232,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
/* devaddrs must be double-indirect on the target. */
void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
for (size_t i = 0; i < mapnum; i++)
- hsa_fns.hsa_memory_copy_fn (&ind_da[i],
- devaddrs[i] ? &devaddrs[i] : &hostaddrs[i],
- sizeof (void *));
+ hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
struct hsa_kernel_description *hsa_kernel_desc = NULL;
for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
@@ -4098,13 +4096,13 @@ GOMP_OFFLOAD_is_usm_ptr (void *ptr)
void
GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
- void **hostaddrs, void **devaddrs, unsigned *dims,
+ void **hostaddrs __attribute__((unused)),
+ void **devaddrs, unsigned *dims,
void *targ_mem_desc)
{
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, false,
- NULL);
+ gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
}
/* Run an asynchronous OpenACC kernel on the specified queue. */
@@ -4119,14 +4117,14 @@ GOMP_OFFLOAD_openacc_exec_params (void (*fn_ptr) (void *), size_t mapnum,
void
GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
- void **hostaddrs, void **devaddrs,
+ void **hostaddrs __attribute__((unused)),
+ void **devaddrs,
unsigned *dims, void *targ_mem_desc,
struct goacc_asyncqueue *aq)
{
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, true,
- aq);
+ gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
}
/* Create a new asynchronous thread and queue for running future kernels. */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a7896e4dabe..1c056592643 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -773,8 +773,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
}
static void
-nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
- unsigned *dims, void *targ_mem_desc,
+nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
CUdeviceptr dp, CUstream stream)
{
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
@@ -1742,7 +1741,8 @@ GOMP_OFFLOAD_page_locked_host_free (void *ptr)
void
GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
- void **hostaddrs, void **devaddrs,
+ void **hostaddrs __attribute__((unused)),
+ void **devaddrs,
unsigned *dims, void *targ_mem_desc)
{
GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
@@ -1761,7 +1761,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
size_t s = mapnum * sizeof (void *);
hp = alloca (s);
for (int i = 0; i < mapnum; i++)
- hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
+ hp[i] = devaddrs[i];
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
if (profiling_p)
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
@@ -1803,8 +1803,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
}
}
- nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
- dp, NULL);
+ nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
const char *maybe_abort_msg = "(perhaps abort was called)";
@@ -1829,7 +1828,8 @@ cuda_free_argmem (void *ptr)
void
GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
- void **hostaddrs, void **devaddrs,
+ void **hostaddrs __attribute__((unused)),
+ void **devaddrs,
unsigned *dims, void *targ_mem_desc,
struct goacc_asyncqueue *aq)
{
@@ -1851,7 +1851,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
hp = block + 2;
for (int i = 0; i < mapnum; i++)
- hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
+ hp[i] = devaddrs[i];
CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
if (profiling_p)
goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
@@ -1900,8 +1900,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
}
}
- nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
- dp, aq->cuda_stream);
+ nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
if (mapnum > 0)
GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
diff --git a/libgomp/target.c b/libgomp/target.c
index e4fc7da6f07..b5b1af64d53 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1252,7 +1252,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
{
/* Not present, hence, skip entry - including its MAP_POINTER,
when existing. */
- tgt->list[i].offset = OFFSET_POINTER;
+ tgt->list[i].offset = OFFSET_INLINED;
if (i + 1 < mapnum
&& ((typemask & get_kind (short_mapkind, kinds, i + 1))
== GOMP_MAP_POINTER))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
index 22e0c20cce9..05297d3a280 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
@@ -22,15 +22,10 @@ main (int argc, char *argv[])
devptr[0] = &var;
devptr[1] = &arr[2];
}
-
if (acc_hostptr (devptr[0]) != (void *) &var)
__builtin_abort ();
if (acc_hostptr (devptr[1]) != (void *) &arr[2])
__builtin_abort ();
-
- acc_delete (&var, sizeof (var));
- acc_delete (arr, N * sizeof (*arr));
-
#if ACC_MEM_SHARED
if (devptr[0] != &var)
__builtin_abort ();
@@ -43,6 +38,31 @@ main (int argc, char *argv[])
__builtin_abort ();
#endif
+#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
+ {
+ devptr[0] = &arr[N - 2];
+ devptr[1] = &var;
+ }
+#pragma acc wait
+ if (acc_hostptr (devptr[0]) != (void *) &arr[N - 2])
+ __builtin_abort ();
+ if (acc_hostptr (devptr[1]) != (void *) &var)
+ __builtin_abort ();
+#if ACC_MEM_SHARED
+ if (devptr[0] != &arr[N - 2])
+ __builtin_abort ();
+ if (devptr[1] != &var)
+ __builtin_abort ();
+#else
+ if (devptr[0] == &arr[N - 2])
+ __builtin_abort ();
+ if (devptr[1] == &var)
+ __builtin_abort ();
+#endif
+
+ acc_delete (&var, sizeof (var));
+ acc_delete (arr, N * sizeof (*arr));
+
free (arr);
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
index fbd01a25956..202092fe8a8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
@@ -18,12 +18,22 @@ main (int argc, char *argv[])
devptr[0] = &var;
devptr[1] = &arr[2];
}
-
if (devptr[0] != &var)
__builtin_abort ();
if (devptr[1] != &arr[2])
__builtin_abort ();
+#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
+ {
+ devptr[0] = &arr[N - 2];
+ devptr[1] = &var;
+ }
+#pragma acc wait
+ if (devptr[0] != &arr[N - 2])
+ __builtin_abort ();
+ if (devptr[1] != &var)
+ __builtin_abort ();
+
free (arr);
return 0;
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-03-10 15:00 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-10 15:00 [gcc/devel/omp/gcc-12] Simplify OpenACC 'no_create' clause implementation Thomas Schwinge
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).