From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1461) id 97FC63856DE6; Tue, 12 Jul 2022 14:39:19 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 97FC63856DE6 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Andrew Stubbs To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/omp/gcc-12] openmp: fix max_vf setting for amdgcn offloading X-Act-Checkin: gcc X-Git-Author: Andrew Stubbs X-Git-Refname: refs/heads/devel/omp/gcc-12 X-Git-Oldrev: 860c76659f38a52b06099ab530de7a9b08fd65ac X-Git-Newrev: 55722a87dd223149dcd41ca9c8eba16ad5b3eddc Message-Id: <20220712143919.97FC63856DE6@sourceware.org> Date: Tue, 12 Jul 2022 14:39:19 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Jul 2022 14:39:19 -0000 https://gcc.gnu.org/g:55722a87dd223149dcd41ca9c8eba16ad5b3eddc commit 55722a87dd223149dcd41ca9c8eba16ad5b3eddc Author: Andrew Stubbs Date: Fri Jul 8 11:58:46 2022 +0100 openmp: fix max_vf setting for amdgcn offloading Ensure that the "max_vf" figure used for the "safelen" attribute is large enough for the largest configured offload device. This change gives ~10x speed improvement on the Bablestream "dot" benchmark for AMD GCN. gcc/ChangeLog: * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add comment. * omp-general.cc (omp_max_simd_vf): New function. * omp-general.h (omp_max_simd_vf): New prototype. * omp-low.cc (lower_rec_simd_input_clauses): Select largest from omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_amdgcn_offloading_enabled): New. (check_effective_target_nvptx_offloading_enabled): New. * gcc.dg/gomp/target-vf.c: New test. Diff: --- gcc/ChangeLog.omp | 9 +++++++++ gcc/gimple-loop-versioning.cc | 5 ++++- gcc/omp-general.cc | 18 ++++++++++++++++++ gcc/omp-general.h | 1 + gcc/omp-low.cc | 9 ++++++++- gcc/testsuite/ChangeLog.omp | 7 +++++++ gcc/testsuite/gcc.dg/gomp/target-vf.c | 21 +++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 10 ++++++++++ 8 files changed, 78 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 8296bc927ea..0e840ba301d 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,12 @@ +2022-07-12 Andrew Stubbs + + * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add + comment. + * omp-general.cc (omp_max_simd_vf): New function. + * omp-general.h (omp_max_simd_vf): New prototype. + * omp-low.cc (lower_rec_simd_input_clauses): Select largest from + omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf. + 2022-07-08 Tobias Burnus Backport from mainline: diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 3175a1e5249..01d2bf03cce 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn) unvectorizable code, since it is the largest size that can be handled efficiently by scalar code. omp_max_vf calculates the maximum number of bytes in a vector, when such a value is relevant - to loop optimization. */ + to loop optimization. + FIXME: this probably needs to use omp_max_simd_vf when in a target + region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that + it doesn't actually matter.) */ m_maximum_scale = estimated_poly_value (omp_max_vf ()); m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); } diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index c044d808afc..1cfb118225c 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -1012,6 +1012,24 @@ omp_max_simt_vf (void) return 0; } +/* Return maximum SIMD width if offloading may target SIMD hardware. */ + +int +omp_max_simd_vf (void) +{ + if (!optimize) + return 0; + if (ENABLE_OFFLOADING) + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;) + { + if (startswith (c, "amdgcn")) + return 64; + else if ((c = strchr (c, ':'))) + c++; + } + return 0; +} + /* Store the construct selectors as tree codes from last to first, return their number. */ diff --git a/gcc/omp-general.h b/gcc/omp-general.h index dba350a28d0..fd66697ff67 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -116,6 +116,7 @@ extern gimple *omp_build_barrier (tree lhs); extern tree find_combined_omp_for (tree *, int *, void *); extern poly_uint64 omp_max_vf (void); extern int omp_max_simt_vf (void); +extern int omp_max_simd_vf (void); extern int omp_constructor_traits_to_codes (tree, enum tree_code *); extern tree omp_check_context_selector (location_t loc, tree ctx); extern void omp_mark_declare_variant (location_t loc, tree variant, diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 8ea8cf4fd54..c69440fc91d 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -5160,7 +5160,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, { if (known_eq (sctx->max_vf, 0U)) { - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); + /* If we are compiling for multiple devices choose the largest VF. */ + sctx->max_vf = omp_max_vf (); + if (omp_maybe_offloaded_ctx (ctx)) + { + if (sctx->is_simt) + sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simt_vf ()); + sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simd_vf ()); + } if (maybe_gt (sctx->max_vf, 1U)) { tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 13141b9768e..2567e10231f 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,10 @@ +2022-07-12 Andrew Stubbs + + * lib/target-supports.exp + (check_effective_target_amdgcn_offloading_enabled): New. + (check_effective_target_nvptx_offloading_enabled): New. + * gcc.dg/gomp/target-vf.c: New test. + 2022-07-05 Tobias Burnus Backport from mainline: diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c new file mode 100644 index 00000000000..14cea45e53c --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ + +/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working + properly to set the OpenMP vectorization factor for the offload target, and + not just for the host. */ + +float +foo (float * __restrict x, float * __restrict y) +{ + float sum = 0.0; + +#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum) + for (int i=0; i<1024; i++) + sum += x[i] * y[i]; + + return sum; +} + +/* { dg-final { scan-tree-dump "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */ +/* { dg-final { scan-tree-dump "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4a845d3c239..e40b06c48a5 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1025,6 +1025,16 @@ proc check_effective_target_offloading_enabled {} { return [check_configured_with "--enable-offload-targets"] } +# Return 1 if compiled with --enable-offload-targets=amdgcn +proc check_effective_target_amdgcn_offloading_enabled {} { + return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}] +} + +# Return 1 if compiled with --enable-offload-targets=amdgcn +proc check_effective_target_nvptx_offloading_enabled {} { + return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}] +} + # Return 1 if compilation with -fopenacc is error-free for trivial # code, 0 otherwise.