public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-6587] Simplify OpenACC 'no_create' clause implementation
@ 2023-03-10 14:56 Thomas Schwinge
  0 siblings, 0 replies; only message in thread
From: Thomas Schwinge @ 2023-03-10 14:56 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:199867d07be65cb0227a318ebf42b8376ca09313

commit r13-6587-g199867d07be65cb0227a318ebf42b8376ca09313
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon Feb 27 12:02:02 2023 +0100

    Simplify OpenACC 'no_create' clause implementation
    
    For 'OFFSET_INLINED', 'gomp_map_val' does the right thing, and we may then
    simplify the device plugins accordingly.
    
    This is a follow-up to
    Subversion r279551 (Git commit a6163563f2ce502bd4ef444bd5de33570bb8eeb1)
    "Add OpenACC 2.6's no_create",
    Subversion r279622 (Git commit 5bcd470bf0749e1f56d05dd43aa9584ff2e3a090)
    "Use gomp_map_val for OpenACC host-to-device address translation".
    
            libgomp/
            * target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for
            'GOMP_MAP_IF_PRESENT'.
            * plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec)
            (GOMP_OFFLOAD_openacc_async_exec): Adjust.
            * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
            (GOMP_OFFLOAD_openacc_async_exec): Likewise.
            * testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async'
            testing.
            * testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise.

Diff:
---
 libgomp/plugin/plugin-gcn.c                        | 18 ++++++-------
 libgomp/plugin/plugin-nvptx.c                      | 19 +++++++-------
 libgomp/target.c                                   |  2 +-
 .../libgomp.oacc-c-c++-common/no_create-1.c        | 30 ++++++++++++++++++----
 .../libgomp.oacc-c-c++-common/no_create-2.c        | 12 ++++++++-
 5 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 96920a48d2e..954a140ba5e 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3064,7 +3064,7 @@ wait_queue (struct goacc_asyncqueue *aq)
 /* Execute an OpenACC kernel, synchronously or asynchronously.  */
 
 static void
-gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
+gcn_exec (struct kernel_info *kernel, size_t mapnum,
 	  void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
 	  struct goacc_asyncqueue *aq)
 {
@@ -3077,9 +3077,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
   /* devaddrs must be double-indirect on the target.  */
   void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
   for (size_t i = 0; i < mapnum; i++)
-    hsa_fns.hsa_memory_copy_fn (&ind_da[i],
-				devaddrs[i] ? &devaddrs[i] : &hostaddrs[i],
-				sizeof (void *));
+    hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
 
   struct hsa_kernel_description *hsa_kernel_desc = NULL;
   for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
@@ -3887,27 +3885,27 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
 
 void
 GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
-			   void **hostaddrs, void **devaddrs, unsigned *dims,
+			   void **hostaddrs __attribute__((unused)),
+			   void **devaddrs, unsigned *dims,
 			   void *targ_mem_desc)
 {
   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
 
-  gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, false,
-	    NULL);
+  gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
 }
 
 /* Run an asynchronous OpenACC kernel on the specified queue.  */
 
 void
 GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
-				 void **hostaddrs, void **devaddrs,
+				 void **hostaddrs __attribute__((unused)),
+				 void **devaddrs,
 				 unsigned *dims, void *targ_mem_desc,
 				 struct goacc_asyncqueue *aq)
 {
   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
 
-  gcn_exec (kernel, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, true,
-	    aq);
+  gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
 }
 
 /* Create a new asynchronous thread and queue for running future kernels.  */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 1166807f68f..13e31156d36 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -742,8 +742,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
 }
 
 static void
-nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
-	    unsigned *dims, void *targ_mem_desc,
+nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
 	    CUdeviceptr dp, CUstream stream)
 {
   struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
@@ -1530,7 +1529,8 @@ GOMP_OFFLOAD_free (int ord, void *ptr)
 
 void
 GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
-			   void **hostaddrs, void **devaddrs,
+			   void **hostaddrs __attribute__((unused)),
+			   void **devaddrs,
 			   unsigned *dims, void *targ_mem_desc)
 {
   GOMP_PLUGIN_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
@@ -1549,7 +1549,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
       size_t s = mapnum * sizeof (void *);
       hp = alloca (s);
       for (int i = 0; i < mapnum; i++)
-	hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
+	hp[i] = devaddrs[i];
       CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
       if (profiling_p)
 	goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
@@ -1591,8 +1591,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
 	}
     }
 
-  nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
-	      dp, NULL);
+  nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
 
   CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
   const char *maybe_abort_msg = "(perhaps abort was called)";
@@ -1617,7 +1616,8 @@ cuda_free_argmem (void *ptr)
 
 void
 GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
-				 void **hostaddrs, void **devaddrs,
+				 void **hostaddrs __attribute__((unused)),
+				 void **devaddrs,
 				 unsigned *dims, void *targ_mem_desc,
 				 struct goacc_asyncqueue *aq)
 {
@@ -1639,7 +1639,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
       block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
       hp = block + 2;
       for (int i = 0; i < mapnum; i++)
-	hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]);
+	hp[i] = devaddrs[i];
       CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
       if (profiling_p)
 	goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
@@ -1688,8 +1688,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
 	}
     }
 
-  nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc,
-	      dp, aq->cuda_stream);
+  nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
 
   if (mapnum > 0)
     GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
diff --git a/libgomp/target.c b/libgomp/target.c
index 483851c95ac..0344f68a936 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1207,7 +1207,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 	    {
 	      /* Not present, hence, skip entry - including its MAP_POINTER,
 		 when existing.  */
-	      tgt->list[i].offset = OFFSET_POINTER;
+	      tgt->list[i].offset = OFFSET_INLINED;
 	      if (i + 1 < mapnum
 		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
 		      == GOMP_MAP_POINTER))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
index 22e0c20cce9..05297d3a280 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-1.c
@@ -22,15 +22,10 @@ main (int argc, char *argv[])
     devptr[0] = &var;
     devptr[1] = &arr[2];
   }
-
   if (acc_hostptr (devptr[0]) != (void *) &var)
     __builtin_abort ();
   if (acc_hostptr (devptr[1]) != (void *) &arr[2])
     __builtin_abort ();
-
-  acc_delete (&var, sizeof (var));
-  acc_delete (arr, N * sizeof (*arr));
-
 #if ACC_MEM_SHARED
   if (devptr[0] != &var)
     __builtin_abort ();
@@ -43,6 +38,31 @@ main (int argc, char *argv[])
     __builtin_abort ();
 #endif
 
+#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
+  {
+    devptr[0] = &arr[N - 2];
+    devptr[1] = &var;
+  }
+#pragma acc wait
+  if (acc_hostptr (devptr[0]) != (void *) &arr[N - 2])
+    __builtin_abort ();
+  if (acc_hostptr (devptr[1]) != (void *) &var)
+    __builtin_abort ();
+#if ACC_MEM_SHARED
+  if (devptr[0] != &arr[N - 2])
+    __builtin_abort ();
+  if (devptr[1] != &var)
+    __builtin_abort ();
+#else
+  if (devptr[0] == &arr[N - 2])
+    __builtin_abort ();
+  if (devptr[1] == &var)
+    __builtin_abort ();
+#endif
+
+  acc_delete (&var, sizeof (var));
+  acc_delete (arr, N * sizeof (*arr));
+
   free (arr);
 
   return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
index fbd01a25956..202092fe8a8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/no_create-2.c
@@ -18,12 +18,22 @@ main (int argc, char *argv[])
     devptr[0] = &var;
     devptr[1] = &arr[2];
   }
-
   if (devptr[0] != &var)
     __builtin_abort ();
   if (devptr[1] != &arr[2])
     __builtin_abort ();
 
+#pragma acc parallel no_create(var, arr[0:N]) copyout(devptr) async
+  {
+    devptr[0] = &arr[N - 2];
+    devptr[1] = &var;
+  }
+#pragma acc wait
+  if (devptr[0] != &arr[N - 2])
+    __builtin_abort ();
+  if (devptr[1] != &var)
+    __builtin_abort ();
+
   free (arr);
 
   return 0;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-10 14:56 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-10 14:56 [gcc r13-6587] Simplify OpenACC 'no_create' clause implementation Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).