public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] openmp: fix max_vf setting for amdgcn offloading
@ 2022-07-12 14:39 Andrew Stubbs
  0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2022-07-12 14:39 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:55722a87dd223149dcd41ca9c8eba16ad5b3eddc

commit 55722a87dd223149dcd41ca9c8eba16ad5b3eddc
Author: Andrew Stubbs <ams@codesourcery.com>
Date:   Fri Jul 8 11:58:46 2022 +0100

    openmp: fix max_vf setting for amdgcn offloading
    
    Ensure that the "max_vf" figure used for the "safelen" attribute is large
    enough for the largest configured offload device.
    
    This change gives ~10x speed improvement on the Bablestream "dot" benchmark for
    AMD GCN.
    
    gcc/ChangeLog:
    
            * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
            comment.
            * omp-general.cc (omp_max_simd_vf): New function.
            * omp-general.h (omp_max_simd_vf): New prototype.
            * omp-low.cc (lower_rec_simd_input_clauses): Select largest from
              omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
    
    gcc/testsuite/ChangeLog:
    
            * lib/target-supports.exp
            (check_effective_target_amdgcn_offloading_enabled): New.
            (check_effective_target_nvptx_offloading_enabled): New.
            * gcc.dg/gomp/target-vf.c: New test.

Diff:
---
 gcc/ChangeLog.omp                     |  9 +++++++++
 gcc/gimple-loop-versioning.cc         |  5 ++++-
 gcc/omp-general.cc                    | 18 ++++++++++++++++++
 gcc/omp-general.h                     |  1 +
 gcc/omp-low.cc                        |  9 ++++++++-
 gcc/testsuite/ChangeLog.omp           |  7 +++++++
 gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp | 10 ++++++++++
 8 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 8296bc927ea..0e840ba301d 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,12 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	* gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
+	comment.
+	* omp-general.cc (omp_max_simd_vf): New function.
+	* omp-general.h (omp_max_simd_vf): New prototype.
+	* omp-low.cc (lower_rec_simd_input_clauses): Select largest from
+	  omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
+
 2022-07-08  Tobias Burnus  <tobias@codesourcery.com>
 
 	Backport from mainline:
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 3175a1e5249..01d2bf03cce 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn)
      unvectorizable code, since it is the largest size that can be
      handled efficiently by scalar code.  omp_max_vf calculates the
      maximum number of bytes in a vector, when such a value is relevant
-     to loop optimization.  */
+     to loop optimization.
+     FIXME: this probably needs to use omp_max_simd_vf when in a target
+     region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that
+     it doesn't actually matter.)  */
   m_maximum_scale = estimated_poly_value (omp_max_vf ());
   m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
 }
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index c044d808afc..1cfb118225c 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1012,6 +1012,24 @@ omp_max_simt_vf (void)
   return 0;
 }
 
+/* Return maximum SIMD width if offloading may target SIMD hardware.  */
+
+int
+omp_max_simd_vf (void)
+{
+  if (!optimize)
+    return 0;
+  if (ENABLE_OFFLOADING)
+    for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
+      {
+	if (startswith (c, "amdgcn"))
+	  return 64;
+	else if ((c = strchr (c, ':')))
+	  c++;
+      }
+  return 0;
+}
+
 /* Store the construct selectors as tree codes from last to first,
    return their number.  */
 
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index dba350a28d0..fd66697ff67 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -116,6 +116,7 @@ extern gimple *omp_build_barrier (tree lhs);
 extern tree find_combined_omp_for (tree *, int *, void *);
 extern poly_uint64 omp_max_vf (void);
 extern int omp_max_simt_vf (void);
+extern int omp_max_simd_vf (void);
 extern int omp_constructor_traits_to_codes (tree, enum tree_code *);
 extern tree omp_check_context_selector (location_t loc, tree ctx);
 extern void omp_mark_declare_variant (location_t loc, tree variant,
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 8ea8cf4fd54..c69440fc91d 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -5160,7 +5160,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
 {
   if (known_eq (sctx->max_vf, 0U))
     {
-      sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
+      /* If we are compiling for multiple devices choose the largest VF.  */
+      sctx->max_vf = omp_max_vf ();
+      if (omp_maybe_offloaded_ctx (ctx))
+	{
+	  if (sctx->is_simt)
+	    sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simt_vf ());
+	  sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simd_vf ());
+	}
       if (maybe_gt (sctx->max_vf, 1U))
 	{
 	  tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 13141b9768e..2567e10231f 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	* lib/target-supports.exp
+	(check_effective_target_amdgcn_offloading_enabled): New.
+	(check_effective_target_nvptx_offloading_enabled): New.
+	* gcc.dg/gomp/target-vf.c: New test.
+
 2022-07-05  Tobias Burnus  <tobias@codesourcery.com>
 
 	Backport from mainline:
diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c
new file mode 100644
index 00000000000..14cea45e53c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ 
+
+/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working
+   properly to set the OpenMP vectorization factor for the offload target, and
+   not just for the host.  */
+
+float
+foo (float * __restrict x, float * __restrict y)
+{
+  float sum = 0.0;
+
+#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+  for (int i=0; i<1024; i++)
+    sum += x[i] * y[i];
+
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump  "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */
+/* { dg-final { scan-tree-dump  "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4a845d3c239..e40b06c48a5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1025,6 +1025,16 @@ proc check_effective_target_offloading_enabled {} {
     return [check_configured_with "--enable-offload-targets"]
 }
 
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_amdgcn_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}]
+}
+
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_nvptx_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}]
+}
+
 # Return 1 if compilation with -fopenacc is error-free for trivial
 # code, 0 otherwise.


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-07-12 14:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-12 14:39 [gcc/devel/omp/gcc-12] openmp: fix max_vf setting for amdgcn offloading Andrew Stubbs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).