public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] openmp: fix max_vf setting for amdgcn offloading
@ 2022-07-12 14:39 Andrew Stubbs
0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2022-07-12 14:39 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:55722a87dd223149dcd41ca9c8eba16ad5b3eddc
commit 55722a87dd223149dcd41ca9c8eba16ad5b3eddc
Author: Andrew Stubbs <ams@codesourcery.com>
Date: Fri Jul 8 11:58:46 2022 +0100
openmp: fix max_vf setting for amdgcn offloading
Ensure that the "max_vf" figure used for the "safelen" attribute is large
enough for the largest configured offload device.
This change gives ~10x speed improvement on the Bablestream "dot" benchmark for
AMD GCN.
gcc/ChangeLog:
* gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
comment.
* omp-general.cc (omp_max_simd_vf): New function.
* omp-general.h (omp_max_simd_vf): New prototype.
* omp-low.cc (lower_rec_simd_input_clauses): Select largest from
omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp
(check_effective_target_amdgcn_offloading_enabled): New.
(check_effective_target_nvptx_offloading_enabled): New.
* gcc.dg/gomp/target-vf.c: New test.
Diff:
---
gcc/ChangeLog.omp | 9 +++++++++
gcc/gimple-loop-versioning.cc | 5 ++++-
gcc/omp-general.cc | 18 ++++++++++++++++++
gcc/omp-general.h | 1 +
gcc/omp-low.cc | 9 ++++++++-
gcc/testsuite/ChangeLog.omp | 7 +++++++
gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++
gcc/testsuite/lib/target-supports.exp | 10 ++++++++++
8 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 8296bc927ea..0e840ba301d 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,12 @@
+2022-07-12 Andrew Stubbs <ams@codesourcery.com>
+
+ * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
+ comment.
+ * omp-general.cc (omp_max_simd_vf): New function.
+ * omp-general.h (omp_max_simd_vf): New prototype.
+ * omp-low.cc (lower_rec_simd_input_clauses): Select largest from
+ omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
+
2022-07-08 Tobias Burnus <tobias@codesourcery.com>
Backport from mainline:
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 3175a1e5249..01d2bf03cce 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn)
unvectorizable code, since it is the largest size that can be
handled efficiently by scalar code. omp_max_vf calculates the
maximum number of bytes in a vector, when such a value is relevant
- to loop optimization. */
+ to loop optimization.
+ FIXME: this probably needs to use omp_max_simd_vf when in a target
+ region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that
+ it doesn't actually matter.) */
m_maximum_scale = estimated_poly_value (omp_max_vf ());
m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
}
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index c044d808afc..1cfb118225c 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1012,6 +1012,24 @@ omp_max_simt_vf (void)
return 0;
}
+/* Return maximum SIMD width if offloading may target SIMD hardware. */
+
+int
+omp_max_simd_vf (void)
+{
+ if (!optimize)
+ return 0;
+ if (ENABLE_OFFLOADING)
+ for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
+ {
+ if (startswith (c, "amdgcn"))
+ return 64;
+ else if ((c = strchr (c, ':')))
+ c++;
+ }
+ return 0;
+}
+
/* Store the construct selectors as tree codes from last to first,
return their number. */
diff --git a/gcc/omp-general.h b/gcc/omp-general.h
index dba350a28d0..fd66697ff67 100644
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -116,6 +116,7 @@ extern gimple *omp_build_barrier (tree lhs);
extern tree find_combined_omp_for (tree *, int *, void *);
extern poly_uint64 omp_max_vf (void);
extern int omp_max_simt_vf (void);
+extern int omp_max_simd_vf (void);
extern int omp_constructor_traits_to_codes (tree, enum tree_code *);
extern tree omp_check_context_selector (location_t loc, tree ctx);
extern void omp_mark_declare_variant (location_t loc, tree variant,
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 8ea8cf4fd54..c69440fc91d 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -5160,7 +5160,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
{
if (known_eq (sctx->max_vf, 0U))
{
- sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
+ /* If we are compiling for multiple devices choose the largest VF. */
+ sctx->max_vf = omp_max_vf ();
+ if (omp_maybe_offloaded_ctx (ctx))
+ {
+ if (sctx->is_simt)
+ sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simt_vf ());
+ sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simd_vf ());
+ }
if (maybe_gt (sctx->max_vf, 1U))
{
tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 13141b9768e..2567e10231f 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2022-07-12 Andrew Stubbs <ams@codesourcery.com>
+
+ * lib/target-supports.exp
+ (check_effective_target_amdgcn_offloading_enabled): New.
+ (check_effective_target_nvptx_offloading_enabled): New.
+ * gcc.dg/gomp/target-vf.c: New test.
+
2022-07-05 Tobias Burnus <tobias@codesourcery.com>
Backport from mainline:
diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c
new file mode 100644
index 00000000000..14cea45e53c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */
+
+/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working
+ properly to set the OpenMP vectorization factor for the offload target, and
+ not just for the host. */
+
+float
+foo (float * __restrict x, float * __restrict y)
+{
+ float sum = 0.0;
+
+#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+ for (int i=0; i<1024; i++)
+ sum += x[i] * y[i];
+
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */
+/* { dg-final { scan-tree-dump "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4a845d3c239..e40b06c48a5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1025,6 +1025,16 @@ proc check_effective_target_offloading_enabled {} {
return [check_configured_with "--enable-offload-targets"]
}
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_amdgcn_offloading_enabled {} {
+ return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}]
+}
+
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_nvptx_offloading_enabled {} {
+ return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}]
+}
+
# Return 1 if compilation with -fopenacc is error-free for trivial
# code, 0 otherwise.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-07-12 14:39 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-12 14:39 [gcc/devel/omp/gcc-12] openmp: fix max_vf setting for amdgcn offloading Andrew Stubbs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).