public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-11] OpenMP: Fix SIMT for complex/float reduction with && and ||
@ 2021-05-14 8:51 Tobias Burnus
0 siblings, 0 replies; only message in thread
From: Tobias Burnus @ 2021-05-14 8:51 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:db18b80aec206bfa34e94f776030a84f16b7f0cd
commit db18b80aec206bfa34e94f776030a84f16b7f0cd
Author: Tobias Burnus <tobias@codesourcery.com>
Date: Fri May 14 10:07:27 2021 +0200
OpenMP: Fix SIMT for complex/float reduction with && and ||
2021-05-07 Tobias Burnus <tobias@codesourcery.com>
Tom de Vries <tdevries@suse.de>
gcc/ChangeLog:
* omp-low.c (lower_rec_simd_input_clauses): Set max_vf = 1 if
a truth_value_p reduction variable is nonintegral.
libgomp/ChangeLog:
* testsuite/libgomp.c-c++-common/reduction-5.c: New test, testing
complex/floating-point || + && reduction with 'omp target'.
* testsuite/libgomp.c-c++-common/reduction-6.c: Likewise.
(cherry picked from commit 33b647956caa977d1ae489f9baed9cef70b4f382)
Diff:
---
gcc/ChangeLog.omp | 10 ++
gcc/omp-low.c | 28 ++-
libgomp/ChangeLog.omp | 10 ++
.../testsuite/libgomp.c-c++-common/reduction-5.c | 193 ++++++++++++++++++++
.../testsuite/libgomp.c-c++-common/reduction-6.c | 196 +++++++++++++++++++++
5 files changed, 430 insertions(+), 7 deletions(-)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 76f1f4a79d3..5fedea5a7ae 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,13 @@
+2021-05-14 Tobias Burnus <tobias@codesourcery.com>
+
+ Backported from master:
+
+ 2021-05-07 Tobias Burnus <tobias@codesourcery.com>
+ Tom de Vries <tdevries@suse.de>
+
+ * omp-low.c (lower_rec_simd_input_clauses): Set max_vf = 1 if
+ a truth_value_p reduction variable is nonintegral.
+
2021-05-14 Tobias Burnus <tobias@codesourcery.com>
Backported from master:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d50a433d4c1..a5ae1091aa2 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4714,14 +4714,28 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
{
for (tree c = gimple_omp_for_clauses (ctx->stmt); c;
c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
- && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
- {
- /* UDR reductions are not supported yet for SIMT, disable
- SIMT. */
- sctx->max_vf = 1;
- break;
+ {
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
+ continue;
+
+ if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
+ {
+ /* UDR reductions are not supported yet for SIMT, disable
+ SIMT. */
+ sctx->max_vf = 1;
+ break;
+ }
+
+ if (truth_value_p (OMP_CLAUSE_REDUCTION_CODE (c))
+ && !INTEGRAL_TYPE_P (TREE_TYPE (new_var)))
+ {
+ /* Doing boolean operations on non-integral types is
+ for conformance only, it's not worth supporting this
+ for SIMT. */
+ sctx->max_vf = 1;
+ break;
}
+ }
}
if (maybe_gt (sctx->max_vf, 1U))
{
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 1b4a2e35012..840a470dd4b 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,13 @@
+2021-05-14 Tobias Burnus <tobias@codesourcery.com>
+
+ Backported from master:
+
+ 2021-05-07 Tobias Burnus <tobias@codesourcery.com>
+ Tom de Vries <tdevries@suse.de>
+ * testsuite/libgomp.c-c++-common/reduction-5.c: New test, testing
+ complex/floating-point || + && reduction with 'omp target'.
+ * testsuite/libgomp.c-c++-common/reduction-6.c: Likewise.
+
2021-05-14 Tobias Burnus <tobias@codesourcery.com>
Backported from master:
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c
new file mode 100644
index 00000000000..21540512e23
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-5.c
@@ -0,0 +1,193 @@
+/* { dg-additional-options "-foffload=-latomic" { target { offload_target_nvptx } } } */
+/* C / C++'s logical AND and OR operators take any scalar argument
+ which compares (un)equal to 0 - the result 1 or 0 and of type int.
+
+ In this testcase, the int result is again converted to a floating-poing
+ or complex type.
+
+ While having a floating-point/complex array element with || and && can make
+ sense, having a non-integer/non-bool reduction variable is odd but valid.
+
+ Test: FP reduction variable + FP array - as reduction-1.c but with target */
+
+#define N 1024
+_Complex float rcf[N];
+_Complex double rcd[N];
+float rf[N];
+double rd[N];
+
+int
+reduction_or ()
+{
+ float orf = 0;
+ double ord = 0;
+ _Complex float orfc = 0;
+ _Complex double ordc = 0;
+
+ #pragma omp target parallel reduction(||: orf) map(orf)
+ for (int i=0; i < N; ++i)
+ orf = orf || rf[i];
+
+ #pragma omp target parallel for reduction(||: ord) map(ord)
+ for (int i=0; i < N; ++i)
+ ord = ord || rcd[i];
+
+ #pragma omp target parallel for simd reduction(||: orfc) map(orfc)
+ for (int i=0; i < N; ++i)
+ orfc = orfc || rcf[i];
+
+ #pragma omp target parallel loop reduction(||: ordc) map(ordc)
+ for (int i=0; i < N; ++i)
+ ordc = ordc || rcd[i];
+
+ return orf + ord + __real__ orfc + __real__ ordc;
+}
+
+int
+reduction_or_teams ()
+{
+ float orf = 0;
+ double ord = 0;
+ _Complex float orfc = 0;
+ _Complex double ordc = 0;
+
+ #pragma omp target teams distribute parallel for reduction(||: orf) map(orf)
+ for (int i=0; i < N; ++i)
+ orf = orf || rf[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(||: ord) map(ord)
+ for (int i=0; i < N; ++i)
+ ord = ord || rcd[i];
+
+ #pragma omp target teams distribute parallel for reduction(||: orfc) map(orfc)
+ for (int i=0; i < N; ++i)
+ orfc = orfc || rcf[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(||: ordc) map(ordc)
+ for (int i=0; i < N; ++i)
+ ordc = ordc || rcd[i];
+
+ return orf + ord + __real__ orfc + __real__ ordc;
+}
+
+int
+reduction_and ()
+{
+ float andf = 1;
+ double andd = 1;
+ _Complex float andfc = 1;
+ _Complex double anddc = 1;
+
+ #pragma omp target parallel reduction(&&: andf) map(andf)
+ for (int i=0; i < N; ++i)
+ andf = andf && rf[i];
+
+ #pragma omp target parallel for reduction(&&: andd) map(andd)
+ for (int i=0; i < N; ++i)
+ andd = andd && rcd[i];
+
+ #pragma omp target parallel for simd reduction(&&: andfc) map(andfc)
+ for (int i=0; i < N; ++i)
+ andfc = andfc && rcf[i];
+
+ #pragma omp target parallel loop reduction(&&: anddc) map(anddc)
+ for (int i=0; i < N; ++i)
+ anddc = anddc && rcd[i];
+
+ return andf + andd + __real__ andfc + __real__ anddc;
+}
+
+int
+reduction_and_teams ()
+{
+ float andf = 1;
+ double andd = 1;
+ _Complex float andfc = 1;
+ _Complex double anddc = 1;
+
+ #pragma omp target teams distribute parallel for reduction(&&: andf) map(andf)
+ for (int i=0; i < N; ++i)
+ andf = andf && rf[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(&&: andd) map(andd)
+ for (int i=0; i < N; ++i)
+ andd = andd && rcd[i];
+
+ #pragma omp target teams distribute parallel for reduction(&&: andfc) map(andfc)
+ for (int i=0; i < N; ++i)
+ andfc = andfc && rcf[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(&&: anddc) map(anddc)
+ for (int i=0; i < N; ++i)
+ anddc = anddc && rcd[i];
+
+ return andf + andd + __real__ andfc + __real__ anddc;
+}
+
+int
+main ()
+{
+ for (int i = 0; i < N; ++i)
+ {
+ rf[i] = 0;
+ rd[i] = 0;
+ rcf[i] = 0;
+ rcd[i] = 0;
+ }
+
+ if (reduction_or () != 0)
+ __builtin_abort ();
+ if (reduction_or_teams () != 0)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ rf[10] = 1.0;
+ rd[15] = 1.0;
+ rcf[10] = 1.0;
+ rcd[15] = 1.0i;
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < N; ++i)
+ {
+ rf[i] = 1;
+ rd[i] = 1;
+ rcf[i] = 1;
+ rcd[i] = 1;
+ }
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 4)
+ __builtin_abort ();
+ if (reduction_and_teams () != 4)
+ __builtin_abort ();
+
+ rf[10] = 0.0;
+ rd[15] = 0.0;
+ rcf[10] = 0.0;
+ rcd[15] = 0.0;
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c
new file mode 100644
index 00000000000..27d9ef6b635
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-6.c
@@ -0,0 +1,196 @@
+/* { dg-additional-options "-foffload=-latomic" { target { offload_target_nvptx } } } */
+/* C / C++'s logical AND and OR operators take any scalar argument
+ which compares (un)equal to 0 - the result 1 or 0 and of type int.
+
+ In this testcase, the int result is again converted to an integer complex
+ type.
+
+ While having a floating-point/complex array element with || and && can make
+ sense, having a complex reduction variable is odd but valid.
+
+ Test: int complex reduction variable + int complex array.
+ as reduction-4.c but with target. */
+
+#define N 1024
+_Complex char rcc[N];
+_Complex short rcs[N];
+_Complex int rci[N];
+_Complex long long rcl[N];
+
+int
+reduction_or ()
+{
+ _Complex char orc = 0;
+ _Complex short ors = 0;
+ _Complex int ori = 0;
+ _Complex long orl = 0;
+
+ #pragma omp target parallel reduction(||: orc) map(orc)
+ for (int i=0; i < N; ++i)
+ orc = orc || rcl[i];
+
+ #pragma omp target parallel for reduction(||: ors) map(ors)
+ for (int i=0; i < N; ++i)
+ ors = ors || rci[i];
+
+ #pragma omp target parallel for simd reduction(||: ori) map(ori)
+ for (int i=0; i < N; ++i)
+ ori = ori || rcs[i];
+
+ #pragma omp target parallel loop reduction(||: orl) map(orl)
+ for (int i=0; i < N; ++i)
+ orl = orl || rcc[i];
+
+ return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl);
+}
+
+int
+reduction_or_teams ()
+{
+ _Complex char orc = 0;
+ _Complex short ors = 0;
+ _Complex int ori = 0;
+ _Complex long orl = 0;
+
+ #pragma omp target teams distribute parallel for reduction(||: orc) map(orc)
+ for (int i=0; i < N; ++i)
+ orc = orc || rcc[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(||: ors) map(ors)
+ for (int i=0; i < N; ++i)
+ ors = ors || rcs[i];
+
+ #pragma omp target teams distribute parallel for reduction(||: ori) map(ori)
+ for (int i=0; i < N; ++i)
+ ori = ori || rci[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(||: orl) map(orl)
+ for (int i=0; i < N; ++i)
+ orl = orl || rcl[i];
+
+ return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl);
+}
+
+int
+reduction_and ()
+{
+ _Complex char andc = 1;
+ _Complex short ands = 1;
+ _Complex int andi = 1;
+ _Complex long andl = 1;
+
+ #pragma omp target parallel reduction(&&: andc) map(andc)
+ for (int i=0; i < N; ++i)
+ andc = andc && rcc[i];
+
+ #pragma omp target parallel for reduction(&&: ands) map(ands)
+ for (int i=0; i < N; ++i)
+ ands = ands && rcs[i];
+
+ #pragma omp target parallel for simd reduction(&&: andi) map(andi)
+ for (int i=0; i < N; ++i)
+ andi = andi && rci[i];
+
+ #pragma omp target parallel loop reduction(&&: andl) map(andl)
+ for (int i=0; i < N; ++i)
+ andl = andl && rcl[i];
+
+ return __real__ (andc + ands + andi + andl)
+ + __imag__ (andc + ands + andi + andl);
+}
+
+int
+reduction_and_teams ()
+{
+ _Complex char andc = 1;
+ _Complex short ands = 1;
+ _Complex int andi = 1;
+ _Complex long andl = 1;
+
+ #pragma omp target teams distribute parallel for reduction(&&: andc) map(andc)
+ for (int i=0; i < N; ++i)
+ andc = andc && rcl[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(&&: ands) map(ands)
+ for (int i=0; i < N; ++i)
+ ands = ands && rci[i];
+
+ #pragma omp target teams distribute parallel for reduction(&&: andi) map(andi)
+ for (int i=0; i < N; ++i)
+ andi = andi && rcs[i];
+
+ #pragma omp target teams distribute parallel for simd reduction(&&: andl) map(andl)
+ for (int i=0; i < N; ++i)
+ andl = andl && rcc[i];
+
+ return __real__ (andc + ands + andi + andl)
+ + __imag__ (andc + ands + andi + andl);
+}
+
+int
+main ()
+{
+ for (int i = 0; i < N; ++i)
+ {
+ rcc[i] = 0;
+ rcs[i] = 0;
+ rci[i] = 0;
+ rcl[i] = 0;
+ }
+
+ if (reduction_or () != 0)
+ __builtin_abort ();
+ if (reduction_or_teams () != 0)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ rcc[10] = 1.0;
+ rcs[15] = 1.0i;
+ rci[10] = 1.0;
+ rcl[15] = 1.0i;
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i < N; ++i)
+ {
+ rcc[i] = 1;
+ rcs[i] = 1i;
+ rci[i] = 1;
+ rcl[i] = 1 + 1i;
+ }
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 4)
+ __builtin_abort ();
+ if (reduction_and_teams () != 4)
+ __builtin_abort ();
+
+ rcc[10] = 0.0;
+ rcs[15] = 0.0;
+ rci[10] = 0.0;
+ rcl[15] = 0.0;
+
+ if (reduction_or () != 4)
+ __builtin_abort ();
+ if (reduction_or_teams () != 4)
+ __builtin_abort ();
+ if (reduction_and () != 0)
+ __builtin_abort ();
+ if (reduction_and_teams () != 0)
+ __builtin_abort ();
+
+ return 0;
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-05-14 8:51 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 8:51 [gcc/devel/omp/gcc-11] OpenMP: Fix SIMT for complex/float reduction with && and || Tobias Burnus
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).