public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] Some additional OpenACC reduction tests
@ 2015-07-29 17:42 Julian Brown
  2016-03-24 17:03 ` Thomas Schwinge
  0 siblings, 1 reply; 3+ messages in thread
From: Julian Brown @ 2015-07-29 17:42 UTC (permalink / raw)
  To: gcc-patches, nathan, Thomas Schwinge, Cesar Philippidis

[-- Attachment #1: Type: text/plain, Size: 672 bytes --]

Hi,

This is a set of 19 new tests for OpenACC reductions, covering several
ways of performing reductions over the parallel and loop directives
using gang or worker/vector level parallelism. (The semantics are quite
subtle in some places, but I believe the tests follow the specification
to the letter at least, E&OE.)

Several of these do not pass yet, so have been marked with XFAILs.

I will apply to gomp4 branch shortly.

Cheers,

Julian

ChangeLog

    libgomp/
    * testsuite/libgomp.oacc-c-c++-common/loop-reduction-*.c: New tests.
    * testsuite/par-reduction-*.c: New tests.
    * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-*.c:
    New tests.

[-- Attachment #2: openacc-reduction-tests-1.diff --]
[-- Type: text/x-patch, Size: 22482 bytes --]

commit d6cb22b11bbe6f536bd11110f6d5ce8349266040
Author: Julian Brown <julian@codesourcery.com>
Date:   Wed Jul 29 10:04:36 2015 -0700

    Some new OpenACC reduction tests.

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
new file mode 100644
index 0000000..52f9a8f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
@@ -0,0 +1,43 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, non-private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  res = hres = 1;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(*:res)
+    for (i = 0; i < 12; i++)
+      res *= arr[i];
+  }
+
+  for (i = 0; i < 12; i++)
+    hres *= arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
new file mode 100644
index 0000000..b5e3b2f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and vectors, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
new file mode 100644
index 0000000..d724680
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and workers, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang worker reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
new file mode 100644
index 0000000..d610373
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang worker vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
new file mode 100644
index 0000000..3e5c707
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
@@ -0,0 +1,36 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable: separate gang and worker/vector loops).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (j = 0; j < 32; j++)
+      {
+        #pragma acc loop worker vector reduction(+:res)
+        for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+      }
+    /* "res" is non-private, and is not available until after the parallel
+       region.  */
+  }
+
+  for (i = 0; i < 32768; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
new file mode 100644
index 0000000..44d7f0f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
@@ -0,0 +1,35 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+   reduction variable: separate gang and worker/vector loops).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  double arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copyin(arr) copy(res)
+  {
+    #pragma acc loop gang reduction(+:res)
+    for (j = 0; j < 32; j++)
+      {
+        #pragma acc loop worker vector reduction(+:res)
+        for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+      }
+  }
+
+  for (i = 0; i < 32768; i++)
+    hres += arr[i];
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
new file mode 100644
index 0000000..8bc18f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
@@ -0,0 +1,57 @@
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, multiple
+   non-private reduction variables, float type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, mres = 0, hres = 0, hmres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       copy(res, mres)
+  {
+    #pragma acc loop gang reduction(+:res) reduction(max:mres)
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + i];
+	    if (arr[j * 1024 + i] > mres)
+	      mres = arr[j * 1024 + i];
+	  }
+
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + (1023 - i)];
+	    if (arr[j * 1024 + (1023 - i)] > mres)
+	      mres = arr[j * 1024 + (1023 - i)];
+	  }
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+	if (arr[j * 1024 + i] > hmres)
+	  hmres = arr[j * 1024 + i];
+	if (arr[j * 1024 + (1023 - i)] > hmres)
+	  hmres = arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+  assert (mres == hmres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
new file mode 100644
index 0000000..ce8cb38
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
@@ -0,0 +1,41 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
new file mode 100644
index 0000000..63f3fef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
@@ -0,0 +1,43 @@
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (vector reduction in
+   gang-partitioned/worker-partitioned mode, private reduction variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, k;
+  double ina[1024], inb[1024], out[1024], acc;
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 32; i++)
+      {
+        ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
+	inb[j * 32 + i] = (double) (i + j);
+      }
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(acc) copyin(ina, inb) copyout(out)
+  {
+    #pragma acc loop gang worker
+    for (k = 0; k < 32; k++)
+      for (j = 0; j < 32; j++)
+        {
+	  acc = 0;
+
+	  #pragma acc loop vector reduction(+:acc)
+	  for (i = 0; i < 32; i++)
+	    acc += ina[k * 32 + i] * inb[i * 32 + j];
+
+	  out[k * 32 + j] = acc;
+	}
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 32; i++)
+      assert (out[j * 32 + i] == (i + j) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
new file mode 100644
index 0000000..78f6be0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
@@ -0,0 +1,41 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
new file mode 100644
index 0000000..2765908
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
@@ -0,0 +1,41 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = 0;
+
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 32; i++)
+	  res += arr[j * 32 + i];
+	
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = 0;
+      
+      for (i = 0; i < 32; i++)
+	hres += arr[j * 32 + i];
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
new file mode 100644
index 0000000..c30b0e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
@@ -0,0 +1,45 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyout(out)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+        res = j;
+
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 1023; i >= 0; i--)
+	  res += arr[j * 1024 + i];
+
+	out[j] = res;
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    {
+      hres = j;
+      
+      for (i = 0; i < 1024; i++)
+	hres += arr[j * 1024 + i] * 2;
+
+      assert (out[j] == hres);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
new file mode 100644
index 0000000..ac96525
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
@@ -0,0 +1,37 @@
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+   variable: gang-redundant mode).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, arr[1024], out[32], res = 0, hres = 0;
+
+  for (i = 0; i < 1024; i++)
+    arr[i] = i ^ 33;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       private(res) copyin(arr) copyout(out)
+  {
+    /* "res" should be available at the end of the following loop (and should
+       have the same value redundantly in each gang).  */
+    #pragma acc loop worker vector reduction(+:res)
+    for (i = 0; i < 1024; i++)
+      res += arr[i];
+    
+    #pragma acc loop gang (static: 1)
+    for (i = 0; i < 32; i++)
+      out[i] = res;
+  }
+
+  for (i = 0; i < 1024; i++)
+    hres += arr[i];
+
+  for (i = 0; i < 32; i++)
+    assert (out[i] == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
new file mode 100644
index 0000000..0e0ce96
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (worker and
+   vector-partitioned loops individually in gang-partitioned mode, int
+   type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop vector reduction(+:res)
+	for (i = 1023; i >= 0; i--)
+	  res += arr[j * 1024 + i];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      hres += arr[j * 1024 + i] * 2;
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
new file mode 100644
index 0000000..a7a75a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   in gang-partitioned mode, int type with XOR).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j, arr[32768], res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(^:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(^:res)
+	for (i = 0; i < 1024; i++)
+	  res ^= arr[j * 1024 + i];
+
+	#pragma acc loop worker vector reduction(^:res)
+	for (i = 0; i < 1024; i++)
+	  res ^= arr[j * 1024 + (1023 - i)];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres ^= arr[j * 1024 + i];
+	hres ^= arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
new file mode 100644
index 0000000..860e56d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
@@ -0,0 +1,44 @@
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   together in gang-partitioned mode, float type).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, hres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + i];
+
+	#pragma acc loop worker vector reduction(+:res)
+	for (i = 0; i < 1024; i++)
+	  res += arr[j * 1024 + (1023 - i)];
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
new file mode 100644
index 0000000..41e0f71
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
@@ -0,0 +1,57 @@
+/* { dg-xfail-run-if "TODO" { *-*-* } { "*" } { "" } } */
+
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+   together in gang-partitioned mode, float type, multiple reductions).  */
+
+int
+main (int argc, char *argv[])
+{
+  int i, j;
+  float arr[32768];
+  float res = 0, mres = 0, hres = 0, hmres = 0;
+
+  for (i = 0; i < 32768; i++)
+    arr[i] = i;
+
+  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+		       reduction(+:res) reduction(max:mres)
+  {
+    #pragma acc loop gang
+    for (j = 0; j < 32; j++)
+      {
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + i];
+	    if (arr[j * 1024 + i] > mres)
+	      mres = arr[j * 1024 + i];
+	  }
+
+	#pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+	for (i = 0; i < 1024; i++)
+	  {
+	    res += arr[j * 1024 + (1023 - i)];
+	    if (arr[j * 1024 + (1023 - i)] > mres)
+	      mres = arr[j * 1024 + (1023 - i)];
+	  }
+      }
+  }
+
+  for (j = 0; j < 32; j++)
+    for (i = 0; i < 1024; i++)
+      {
+        hres += arr[j * 1024 + i];
+	hres += arr[j * 1024 + (1023 - i)];
+	if (arr[j * 1024 + i] > hmres)
+	  hmres = arr[j * 1024 + i];
+	if (arr[j * 1024 + (1023 - i)] > hmres)
+	  hmres = arr[j * 1024 + (1023 - i)];
+      }
+
+  assert (res == hres);
+  assert (mres == hmres);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
new file mode 100644
index 0000000..1172ca7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
@@ -0,0 +1,37 @@
+#include <assert.h>
+
+/* Test of reduction on parallel directive.  */
+
+#define ACTUAL_GANGS 256
+
+int
+main (int argc, char *argv[])
+{
+  int res = 0, res2 = 0;
+
+  #pragma acc parallel num_gangs(ACTUAL_GANGS) num_workers(32) \
+		       vector_length(32) reduction(+:res) copy(res2)
+  {
+    res += 5;
+
+    #pragma acc atomic
+    res2 += 5;
+  }
+
+  assert (res == res2);
+
+  res = res2 = 1;
+  
+  #pragma acc parallel num_gangs(8) num_workers(32)  vector_length(32) \
+		       reduction(*:res) copy(res2)
+  {
+    res *= 5;
+
+    #pragma acc atomic
+    res2 *= 5;
+  }
+  
+  assert (res == res2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
new file mode 100644
index 0000000..92451ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
@@ -0,0 +1,42 @@
+#include <assert.h>
+#include <openacc.h>
+
+/* Test of reduction on parallel directive (with async).  */
+
+#define ACTUAL_GANGS 256
+
+int
+main (int argc, char *argv[])
+{
+  int res = 0, res2 = 0;
+
+  #pragma acc parallel num_gangs(ACTUAL_GANGS) num_workers(32) \
+		       vector_length(32) reduction(+:res) copy(res2) async(1)
+  {
+    res += 5;
+
+    #pragma acc atomic
+    res2 += 5;
+  }
+
+  acc_wait (1);
+
+  assert (res == res2);
+
+  res = res2 = 1;
+
+  #pragma acc parallel num_gangs(8) num_workers(32) vector_length(32) \
+		       reduction(*:res) copy(res2) async(1)
+  {
+    res *= 5;
+
+    #pragma acc atomic
+    res2 *= 5;
+  }
+
+  acc_wait (1);
+
+  assert (res == res2);
+
+  return 0;
+}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [gomp4] Some additional OpenACC reduction tests
  2015-07-29 17:42 [gomp4] Some additional OpenACC reduction tests Julian Brown
@ 2016-03-24 17:03 ` Thomas Schwinge
  2016-04-12 10:45   ` Improve libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c (was: [gomp4] Some additional OpenACC reduction tests) Thomas Schwinge
  0 siblings, 1 reply; 3+ messages in thread
From: Thomas Schwinge @ 2016-03-24 17:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: Julian Brown, nathan, Cesar Philippidis

[-- Attachment #1: Type: text/plain, Size: 4408 bytes --]

Hi!

On Wed, 29 Jul 2015 18:23:12 +0100, Julian Brown <julian@codesourcery.com> wrote:
> This is a set of 19 new tests for OpenACC reductions, covering several
> ways of performing reductions over the parallel and loop directives
> using gang or worker/vector level parallelism.

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
> @@ -0,0 +1,40 @@
> +#include <assert.h>
> +
> +/* Test of reduction on both parallel and loop directives (workers and vectors
> +   in gang-partitioned mode, int type with XOR).  */
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  int i, j, arr[32768], res = 0, hres = 0;
> +
> +  for (i = 0; i < 32768; i++)
> +    arr[i] = i;
> +
> +  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
> +		       reduction(^:res)
> +  {
> +    #pragma acc loop gang
> +    for (j = 0; j < 32; j++)
> +      {
> +	#pragma acc loop worker vector reduction(^:res)
> +	for (i = 0; i < 1024; i++)
> +	  res ^= arr[j * 1024 + i];
> +
> +	#pragma acc loop worker vector reduction(^:res)
> +	for (i = 0; i < 1024; i++)
> +	  res ^= arr[j * 1024 + (1023 - i)];
> +      }
> +  }
> +
> +  for (j = 0; j < 32; j++)
> +    for (i = 0; i < 1024; i++)
> +      {
> +        hres ^= arr[j * 1024 + i];
> +	hres ^= arr[j * 1024 + (1023 - i)];
> +      }
> +
> +  assert (res == hres);
> +
> +  return 0;
> +}

Given the interpretation of the OpenACC specification that the current
implementation of OpenACC reductions in GCC is base upon (which we're
currently re-visiting), it had been neccessary to add data clauses next
to parallel constructs' reduction clauses -- but not for this test case.
I now found why; it just happend to ;-) always pass, because apparently
the two XOR loops' iterations just cancelled their values, so in the end,
we'd always get an "unremarkable" result of zero for both res and hres.
In gomp-4_0-branch r234461, I have now committed the following:

commit 8fff8ae7117c21d6b4a701a63cdd4634950418d1
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Thu Mar 24 16:54:55 2016 +0000

    Improve libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c:
    	Make failure observable.  Add data clause next to parallel
    	construct's reduction clause.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@234461 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp                                              | 6 ++++++
 .../testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c | 6 +++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 53ae315..b10ae94 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,9 @@
+2016-03-24  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c:
+	Make failure observable.  Add data clause next to parallel
+	construct's reduction clause.
+
 2016-03-11  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/vprop.c: New test.
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
index a7a75a9..5e4590f 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
@@ -12,14 +12,14 @@ main (int argc, char *argv[])
     arr[i] = i;
 
   #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
-		       reduction(^:res)
+    reduction(^:res) copy(res)
   {
     #pragma acc loop gang
     for (j = 0; j < 32; j++)
       {
 	#pragma acc loop worker vector reduction(^:res)
 	for (i = 0; i < 1024; i++)
-	  res ^= arr[j * 1024 + i];
+	  res ^= 3 * arr[j * 1024 + i];
 
 	#pragma acc loop worker vector reduction(^:res)
 	for (i = 0; i < 1024; i++)
@@ -30,7 +30,7 @@ main (int argc, char *argv[])
   for (j = 0; j < 32; j++)
     for (i = 0; i < 1024; i++)
       {
-        hres ^= arr[j * 1024 + i];
+	hres ^= 3 * arr[j * 1024 + i];
 	hres ^= arr[j * 1024 + (1023 - i)];
       }
 


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Improve libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c (was: [gomp4] Some additional OpenACC reduction tests)
  2016-03-24 17:03 ` Thomas Schwinge
@ 2016-04-12 10:45   ` Thomas Schwinge
  0 siblings, 0 replies; 3+ messages in thread
From: Thomas Schwinge @ 2016-04-12 10:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: Julian Brown, nathan, Cesar Philippidis

Hi!

Cesar didn't pick up my gomp-4_0-branch change in his recent trunk
commit, so here goes:

On Thu, 24 Mar 2016 17:57:55 +0100, I wrote:
> On Wed, 29 Jul 2015 18:23:12 +0100, Julian Brown <julian@codesourcery.com> wrote:
> > This is a set of 19 new tests for OpenACC reductions, covering several
> > ways of performing reductions over the parallel and loop directives
> > using gang or worker/vector level parallelism.
> 
> > --- /dev/null
> > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
> > @@ -0,0 +1,40 @@
> > +#include <assert.h>
> > +
> > +/* Test of reduction on both parallel and loop directives (workers and vectors
> > +   in gang-partitioned mode, int type with XOR).  */
> > +
> > +int
> > +main (int argc, char *argv[])
> > +{
> > +  int i, j, arr[32768], res = 0, hres = 0;
> > +
> > +  for (i = 0; i < 32768; i++)
> > +    arr[i] = i;
> > +
> > +  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
> > +		       reduction(^:res)
> > +  {
> > +    #pragma acc loop gang
> > +    for (j = 0; j < 32; j++)
> > +      {
> > +	#pragma acc loop worker vector reduction(^:res)
> > +	for (i = 0; i < 1024; i++)
> > +	  res ^= arr[j * 1024 + i];
> > +
> > +	#pragma acc loop worker vector reduction(^:res)
> > +	for (i = 0; i < 1024; i++)
> > +	  res ^= arr[j * 1024 + (1023 - i)];
> > +      }
> > +  }
> > +
> > +  for (j = 0; j < 32; j++)
> > +    for (i = 0; i < 1024; i++)
> > +      {
> > +        hres ^= arr[j * 1024 + i];
> > +	hres ^= arr[j * 1024 + (1023 - i)];
> > +      }
> > +
> > +  assert (res == hres);
> > +
> > +  return 0;
> > +}
> 
> [...] this test case [...]
> just happend to ;-) always pass, because apparently
> the two XOR loops' iterations just cancelled their values, so in the end,
> we'd always get an "unremarkable" result of zero for both res and hres.

Committed in r234897:

commit f87ba9cde3958ccbb1f2c8b9efec997a458efc16
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Apr 12 10:40:22 2016 +0000

    Improve libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c:
    	Make failure observable.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@234897 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog                                                    | 5 +++++
 .../testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c  | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index b53dc6b..6071b23 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2016-04-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c:
+	Make failure observable.
+
 2016-04-12  Jakub Jelinek  <jakub@redhat.com>
 
 	* libgomp.h (struct gomp_target_task): Remove firstprivate_copies
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
index a7a75a9..a339f32 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
@@ -19,7 +19,7 @@ main (int argc, char *argv[])
       {
 	#pragma acc loop worker vector reduction(^:res)
 	for (i = 0; i < 1024; i++)
-	  res ^= arr[j * 1024 + i];
+	  res ^= 3 * arr[j * 1024 + i];
 
 	#pragma acc loop worker vector reduction(^:res)
 	for (i = 0; i < 1024; i++)
@@ -30,7 +30,7 @@ main (int argc, char *argv[])
   for (j = 0; j < 32; j++)
     for (i = 0; i < 1024; i++)
       {
-        hres ^= arr[j * 1024 + i];
+	hres ^= 3 * arr[j * 1024 + i];
 	hres ^= arr[j * 1024 + (1023 - i)];
       }
 


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-04-12 10:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-29 17:42 [gomp4] Some additional OpenACC reduction tests Julian Brown
2016-03-24 17:03 ` Thomas Schwinge
2016-04-12 10:45   ` Improve libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c (was: [gomp4] Some additional OpenACC reduction tests) Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).