public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Cesar Philippidis <cesar@codesourcery.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	Nathan Sidwell	<nathan_sidwell@mentor.com>,
	Jakub Jelinek <jakub@redhat.com>
Subject: [gomp4] OpenACC reduction tests
Date: Fri, 17 Jul 2015 18:26:00 -0000	[thread overview]
Message-ID: <55A945E7.2050005@codesourcery.com> (raw)
In-Reply-To: <55A945A1.4090109@codesourcery.com>

[-- Attachment #1: Type: text/plain, Size: 442 bytes --]

This patch updates the libgomp OpenACC reduction test cases to check
worker, vector and combined gang worker vector reductions. I tried to
use some macros to simplify the c test cases a bit. I probably could
have made them more generic with an additional header file/macro, but
then that makes it too confusing too debug. The fortran tests are a bit
of a lost clause, unless someone knows how to use the preprocessor with
!$acc loops.

Cesar

[-- Attachment #2: vector-reduction-tests.diff --]
[-- Type: text/x-patch, Size: 69485 bytes --]

2015-07-17  Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/reduction.h: New file.
	* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Update tests
	with worker, vector and combined reductions.
	* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
	* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.


diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
index bb81759..8738927 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
@@ -3,44 +3,54 @@
 /* Integer reductions.  */
 
 #include <stdlib.h>
-#include <stdbool.h>
-
-#define ng 32
-
-#define DO_PRAGMA(x) _Pragma (#x)
-
-#define check_reduction_op(type, op, init, b)	\
-  {						\
-    type res, vres;				\
-    res = (init);				\
-DO_PRAGMA (acc parallel num_gangs (ng) copy (res)) \
-DO_PRAGMA (acc loop gang reduction (op:res))	\
-    for (i = 0; i < n; i++)			\
-      res = res op (b);				\
-						\
-    vres = (init);				\
-    for (i = 0; i < n; i++)			\
-      vres = vres op (b);			\
-						\
-    if (res != vres)				\
-      abort ();					\
-  }
+#include "reduction.h"
+
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
 
 static void
-test_reductions_int (void)
+test_reductions (void)
 {
-  const int n = 1000;
+  const int n = 100;
   int i;
   int array[n];
 
   for (i = 0; i < n; i++)
-    array[i] = i;
-
-  check_reduction_op (int, +, 0, array[i]);
-  check_reduction_op (int, *, 1, array[i]);
-  check_reduction_op (int, &, -1, array[i]);
-  check_reduction_op (int, |, 0, array[i]);
-  check_reduction_op (int, ^, 0, array[i]);
+    array[i] = i+1;
+
+  /* Gang reductions.  */
+  check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang);
+  check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang);
+  check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang);
+  check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang);
+  check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang);
+
+  /* Worker reductions.  */
+  check_reduction_op (int, +, 0, array[i], num_workers (nw), worker);
+  check_reduction_op (int, *, 1, array[i], num_workers (nw), worker);
+  check_reduction_op (int, &, -1, array[i], num_workers (nw), worker);
+  check_reduction_op (int, |, 0, array[i], num_workers (nw), worker);
+  check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker);
+
+  /* Vector reductions.  */
+  check_reduction_op (int, +, 0, array[i], vector_length (vl), vector);
+  check_reduction_op (int, *, 1, array[i], vector_length (vl), vector);
+  check_reduction_op (int, &, -1, array[i], vector_length (vl), vector);
+  check_reduction_op (int, |, 0, array[i], vector_length (vl), vector);
+  check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector);
+
+  /* Combined reductions.  */
+  check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
 }
 
 static void
@@ -55,32 +65,31 @@ test_reductions_bool (void)
     array[i] = i;
 
   cmp_val = 5;
-#if 0
-  // TODO
-  check_reduction_op (bool, &&, true, (cmp_val > array[i]));
-  check_reduction_op (bool, ||, false, (cmp_val > array[i]));
-#endif
-}
 
-#define check_reduction_macro(type, op, init, b)	\
-  {							\
-    type res, vres;					\
-    res = (init);					\
-DO_PRAGMA (acc parallel num_gangs (ng) copy(res))	\
-DO_PRAGMA (acc loop gang reduction (op:res))		\
-    for (i = 0; i < n; i++)				\
-      res = op (res, (b));				\
-							\
-    vres = (init);					\
-    for (i = 0; i < n; i++)				\
-      vres = op (vres, (b));				\
-							\
-    if (res != vres)					\
-      abort ();						\
-  }
-
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-#define min(a, b) (((a) < (b)) ? (a) : (b))
+  /* Gang reductions.  */
+  check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng),
+		      gang);
+  check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng),
+		      gang);
+
+  /* Worker reductions.  */
+  check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw),
+		      worker);
+  check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw),
+		      worker);
+
+  /* Vector reductions.  */
+  check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl),
+		      vector);
+  check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl),
+		      vector);
+
+  /* Combined reductions.  */
+  check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng)
+		      num_workers (nw) vector_length (vl), gang worker vector);
+  check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng)
+		      num_workers (nw) vector_length (vl), gang worker vector);
+}
 
 static void
 test_reductions_minmax (void)
@@ -92,14 +101,32 @@ test_reductions_minmax (void)
   for (i = 0; i < n; i++)
     array[i] = i;
 
-  check_reduction_macro (int, min, n + 1, array[i]);
-  check_reduction_macro (int, max, -1, array[i]);
+  /* Gang reductions.  */
+  check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang);
+  check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang);
+
+  /* Worker reductions.  */
+  check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker);
+  check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker);
+
+  /* Vector reductions.  */
+  check_reduction_macro (int, min, n + 1, array[i], vector_length (vl),
+			 vector);
+  check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector);
+
+  /* Combined reductions.  */
+  check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+  check_reduction_macro (int, max, -1, array[i], num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
 }
 
 int
 main (void)
 {
-  test_reductions_int ();
+  test_reductions ();
   test_reductions_bool ();
   test_reductions_minmax ();
   return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
index ba6eb27..2465ddd 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
@@ -3,123 +3,78 @@
 /* float reductions.  */
 
 #include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
+#include "reduction.h"
 
-#define ng 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
 
-int
-main(void)
+static void
+test_reductions (void)
 {
-  const int n = 1000;
+  const int n = 100;
   int i;
-  float vresult, result, array[n];
-  int lvresult, lresult;
+  float array[n];
 
   for (i = 0; i < n; i++)
-    array[i] = i;
+    array[i] = i+1;
 
-  result = 0;
-  vresult = 0;
+  /* Gang reductions.  */
+  check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang);
+  check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang);
 
-  /* '+' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (+:result) gang
-  for (i = 0; i < n; i++)
-    result += array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult += array[i];
+  /* Worker reductions.  */
+  check_reduction_op (float, +, 0, array[i], num_workers (nw), worker);
+  check_reduction_op (float, *, 1, array[i], num_workers (nw), worker);
 
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* '*' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (*:result) gang
-  for (i = 0; i < n; i++)
-    result *= array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult *= array[i];
-
-  if (fabs(result - vresult) > .0001)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* 'max' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (max:result) gang
-  for (i = 0; i < n; i++)
-    result = result > array[i] ? result : array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult = vresult > array[i] ? vresult : array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* 'min' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (min:result) gang
-  for (i = 0; i < n; i++)
-    result = result < array[i] ? result : array[i];
+  /* Vector reductions.  */
+  check_reduction_op (float, +, 0, array[i], vector_length (vl), vector);
+  check_reduction_op (float, *, 1, array[i], vector_length (vl), vector);
 
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult = vresult < array[i] ? vresult : array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = 0;
-  lvresult = 0;
-
-  /* '&&' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (&&:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult && (result > array[i]);
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    lvresult = lresult && (result > array[i]);
-
-  if (lresult != lvresult)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = 0;
-  lvresult = 0;
+  /* Combined reductions.  */
+  check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw)
+		      vector_length (vl), gang worker vector);
+}
 
-  /* '||' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (||:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult || (result > array[i]);
+static void
+test_reductions_minmax (void)
+{
+  const int n = 1000;
+  int i;
+  float array[n];
 
-  /* Verify the reduction.  */
   for (i = 0; i < n; i++)
-    lvresult = lresult || (result > array[i]);
+    array[i] = i;
 
-  if (lresult != lvresult)
-    abort ();
+  /* Gang reductions.  */
+  check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang);
+  check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang);
+
+  /* Worker reductions.  */
+  check_reduction_macro (float, min, n + 1, array[i], num_workers (nw),
+			 worker);
+  check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker);
+
+  /* Vector reductions.  */
+  check_reduction_macro (float, min, n + 1, array[i], vector_length (vl),
+			 vector);
+  check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector);
+
+  /* Combined reductions.  */
+  check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+  check_reduction_macro (float, max, -1, array[i], num_gangs (ng)
+			 num_workers (nw)vector_length (vl), gang worker
+			 vector);
+}
 
+int
+main (void)
+{
+  test_reductions ();
+  test_reductions_minmax ();
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
index 5ecc651..091421f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
@@ -3,123 +3,79 @@
 /* double reductions.  */
 
 #include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
+#include "reduction.h"
 
-#define ng 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
 
-int
-main(void)
+static void
+test_reductions (void)
 {
-  const int n = 1000;
+  const int n = 10;
   int i;
-  double vresult, result, array[n];
-  int lvresult, lresult;
+  double array[n];
 
   for (i = 0; i < n; i++)
-    array[i] = i;
+    array[i] = i+1;
 
-  result = 0;
-  vresult = 0;
+  /* Gang reductions.  */
+  check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang);
+  check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang);
 
-  /* '+' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (+:result) gang
-  for (i = 0; i < n; i++)
-    result += array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult += array[i];
+  /* Worker reductions.  */
+  check_reduction_op (double, +, 0, array[i], num_workers (nw), worker);
+  check_reduction_op (double, *, 1, array[i], num_workers (nw), worker);
 
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* '*' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (*:result) gang
-  for (i = 0; i < n; i++)
-    result *= array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult *= array[i];
-
-  if (fabs(result - vresult) > .0001)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* 'max' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (max:result) gang
-  for (i = 0; i < n; i++)
-      result = result > array[i] ? result : array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-      vresult = vresult > array[i] ? vresult : array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* 'min' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (min:result) gang
-  for (i = 0; i < n; i++)
-      result = result < array[i] ? result : array[i];
+  /* Vector reductions.  */
+  check_reduction_op (double, +, 0, array[i], vector_length (vl), vector);
+  check_reduction_op (double, *, 1, array[i], vector_length (vl), vector);
 
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-      vresult = vresult < array[i] ? vresult : array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = 0;
-  lvresult = 0;
-
-  /* '&&' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (&&:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult && (result > array[i]);
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    lvresult = lresult && (result > array[i]);
-
-  if (lresult != lvresult)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = 0;
-  lvresult = 0;
+  /* Combined reductions.  */
+  check_reduction_op (double, +, 0, array[i], num_gangs (ng)  num_workers (nw)
+		      vector_length (vl), gang worker vector);
+  check_reduction_op (double, *, 1, array[i], num_gangs (ng)  num_workers (nw)
+		      vector_length (vl), gang worker vector);
+}
 
-  /* '||' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (||:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult || (result > array[i]);
+static void
+test_reductions_minmax (void)
+{
+  const int n = 1000;
+  int i;
+  double array[n];
 
-  /* Verify the reduction.  */
   for (i = 0; i < n; i++)
-    lvresult = lresult || (result > array[i]);
+    array[i] = i;
 
-  if (lresult != lvresult)
-    abort ();
+  /* Gang reductions.  */
+  check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang);
+  check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang);
+
+  /* Worker reductions.  */
+  check_reduction_macro (double, min, n + 1, array[i], num_workers (nw),
+			 worker);
+  check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker);
+
+  /* Vector reductions.  */
+  check_reduction_macro (double, min, n + 1, array[i], vector_length (vl),
+			 vector);
+  check_reduction_macro (double, max, -1, array[i], vector_length (vl),
+			 vector);
+
+  /* Combined reductions.  */
+  check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+  check_reduction_macro (double, max, -1, array[i], num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+}
 
+int
+main (void)
+{
+  test_reductions ();
+  test_reductions_minmax ();
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
index c7069e9..816b09f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
@@ -1,95 +1,53 @@
 /* { dg-do run { target { ! { hppa*-*-hpux* } } } } */
-/* { dg-xfail-run-if "libgomp: cuStreamSynchronize error: launch timeout" { openacc_nvidia_accel_selected } } */
 
 /* complex reductions.  */
 
 #include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
 #include <complex.h>
+#include "reduction.h"
 
-#define ng 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
 
-int
-main(void)
+static void
+test_reductions (void)
 {
-  const int n = 1000;
+  const int n = 10;
   int i;
-  double _Complex vresult, result, array[n];
-  bool lvresult, lresult;
-
-  for (i = 0; i < n; i++)
-    array[i] = i;
-
-  result = 0;
-  vresult = 0;
-
-  /* '+' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (+:result) gang
-  for (i = 0; i < n; i++)
-    result += array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult += array[i];
-
-  if (result != vresult)
-    abort ();
-
-  result = 0;
-  vresult = 0;
-
-  /* '*' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (result)
-#pragma acc loop reduction (*:result) gang
-  for (i = 0; i < n; i++)
-    result *= array[i];
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    vresult *= array[i];
-
-  if (cabsf (result - vresult) > .0001)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = false;
-  lvresult = false;
-
-  /* '&&' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (lresult)
-#pragma acc loop reduction (&&:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult && (creal(result) > creal(array[i]));
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    lvresult = lvresult && (creal(result) > creal(array[i]));
-
-  if (lresult != lvresult)
-    abort ();
-
-  result = 5;
-  vresult = 5;
-
-  lresult = false;
-  lvresult = false;
-
-  /* '||' reductions.  */
-#pragma acc parallel num_gangs (ng) copy (lresult)
-#pragma acc loop reduction (||:lresult) gang
-  for (i = 0; i < n; i++)
-    lresult = lresult || (creal(result) > creal(array[i]));
-
-  /* Verify the reduction.  */
-  for (i = 0; i < n; i++)
-    lvresult = lvresult || (creal(result) > creal(array[i]));
-
-  if (lresult != lvresult)
-    abort ();
+  double _Complex array[n];
+
+  for (i = 0; i < n; i++)
+    array[i] = i+1;
+
+  /* Gang reductions.  */
+  check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang);
+  check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang);
+
+  /* Worker reductions.  */
+  check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw),
+		      worker);
+  check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw),
+		      worker);
+
+  /* Vector reductions.  */
+  check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl),
+		      vector);
+  check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl),
+		      vector);
+
+  /* Combined reductions.  */
+  check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+  check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng)
+			 num_workers (nw) vector_length (vl), gang worker
+			 vector);
+}
 
+int
+main (void)
+{
+  test_reductions ();
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
index 23a194c..e979ab6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
@@ -1,32 +1,53 @@
+/* { dg-do run } */
+
+/* Multiple reductions.  */
+
 #include <stdio.h>
 #include <stdlib.h>
 
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
+
+const int n = 100;
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#define check_reduction(gwv_par, gwv_loop)		\
+  {							\
+  s1 = 2; s2 = 5;					\
+DO_PRAGMA (acc parallel gwv_par copy (s1, s2))		\
+DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2))	\
+    for (i = 0; i < n; i++)				\
+      {							\
+         s1 = s1 + 3;					\
+         s2 = s2 + 5;					\
+      }							\
+							\
+    if (s1 != v1 && s2 != v2)				\
+      abort ();						\
+  }
+
 int
 main (void)
 {
   int s1 = 2, s2 = 5, v1 = 2, v2 = 5;
-  int n = 100;
   int i;
 
-#pragma acc parallel num_gangs (1000)  copy (s1, s2)
-#pragma acc loop reduction (+:s1, s2) gang
-  for (i = 0; i < n; i++)
-    {
-      s1 = s1 + 3;
-      s2 = s2 + 2;
-    }
-
   for (i = 0; i < n; i++)
     {
       v1 = v1 + 3;
       v2 = v2 + 2;
     }
-  
-  if (s1 != v1)
-    abort ();
-  
-  if (s2 != v2)
-    abort ();
-    
+
+  check_reduction (num_gangs (ng), gang);
+
+  /* Nvptx targets require a vector_length or 32 in to allow spinlocks with
+     gangs.  */
+  check_reduction (num_workers (nw) vector_length (vl), worker);
+  check_reduction (vector_length (vl), vector);
+  check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang
+		   worker vector);
+
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
new file mode 100644
index 0000000..17fa951
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+/* Check nested reductions.  */
+
+#include <assert.h>
+
+#define n 1000
+
+int
+main ()
+{
+  int i, j, red = 0, vred = 0;
+  int chunksize = 10;
+
+#pragma acc parallel num_gangs (10) vector_length (32) copy (red)
+#pragma acc loop reduction (+:red) gang
+  for (i = 0; i < n/chunksize; i++)
+#pragma acc loop reduction (+:red) vector
+    for (j = 0; j < chunksize; j++)
+      red += j;
+
+  for (i = 0; i < n/chunksize; i++)
+    for (j = 0; j < chunksize; j++)
+      vred += j;
+
+  assert (red == vred);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
new file mode 100644
index 0000000..1b3f8d4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
@@ -0,0 +1,43 @@
+#ifndef REDUCTION_H
+#define REDUCTION_H
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop)	\
+  {									\
+    type res, vres;							\
+    res = (init);							\
+DO_PRAGMA (acc parallel gwv_par copy (res))				\
+DO_PRAGMA (acc loop gwv_loop reduction (op:res))			\
+    for (i = 0; i < n; i++)						\
+      res = res op (b);							\
+									\
+    vres = (init);							\
+    for (i = 0; i < n; i++)						\
+      vres = vres op (b);						\
+									\
+    if (res != vres)							\
+      abort ();								\
+  }
+
+#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop)	\
+  {									\
+    type res, vres;							\
+    res = (init);							\
+    DO_PRAGMA (acc parallel gwv_par copy(res))				\
+DO_PRAGMA (acc loop gwv_loop reduction (op:res))			\
+    for (i = 0; i < n; i++)						\
+      res = op (res, (b));						\
+									\
+    vres = (init);							\
+    for (i = 0; i < n; i++)						\
+      vres = op (vres, (b));						\
+									\
+    if (res != vres)							\
+      abort ();								\
+  }
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#endif
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
index 3419ffd..03cca04 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
@@ -5,24 +5,50 @@
 program reduction_1
   implicit none
 
-  integer, parameter    :: n = 10, gangs = 20
-  integer               :: i, vresult, result
-  logical               :: lresult, lvresult
+  integer, parameter    :: n = 10, ng = 8, nw = 4, vl = 32
+  integer               :: i, vresult, rg, rw, rv, rc
+  logical               :: lrg, lrw, lrv, lrc, lvresult
   integer, dimension (n) :: array
 
   do i = 1, n
      array(i) = i
   end do
 
-  result = 0
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! '+' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+     rg = rg + array(i)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(+:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
   do i = 1, n
-     result = result + array(i)
+     rw = rw + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+     rv = rv + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+     rc = rc + array(i)
   end do
   !$acc end parallel
 
@@ -31,17 +57,46 @@ program reduction_1
      vresult = vresult + array(i)
   end do
 
-  if (result.ne.vresult) call abort
-
-  result = 0
-  vresult = 0
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
+  !
   ! '*' reductions
+  !
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(*:result) gang
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
   do i = 1, n
-     result = result * array(i)
+     rg = rg * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+     rw = rw * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+     rv = rv * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+     rc = rc * array(i)
   end do
   !$acc end parallel
 
@@ -50,17 +105,46 @@ program reduction_1
      vresult = vresult * array(i)
   end do
 
-  if (result.ne.vresult) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
+
+  !
+  ! 'max' reductions
+  !
 
-  result = 0
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! 'max' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+     rg = max (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+     rw = max (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
+  do i = 1, n
+     rv = max (rv, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(max:result) gang
+  !$acc parallel num_gangs(ng) Num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
   do i = 1, n
-     result = max (result, array(i))
+     rc = max (rc, array(i))
   end do
   !$acc end parallel
 
@@ -69,17 +153,46 @@ program reduction_1
      vresult = max (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  result = 1
-  vresult = 1
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
+  !
   ! 'min' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+     rg = min (rg, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(min:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
   do i = 1, n
-     result = min (result, array(i))
+     rw = min (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
+  do i = 1, n
+     rv = min (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+     rc = min (rc, array(i))
   end do
   !$acc end parallel
 
@@ -88,17 +201,46 @@ program reduction_1
      vresult = min (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
+
+  !
+  ! 'iand' reductions
+  !
 
-  result = 1
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
   vresult = 1
 
-  ! 'iand' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(iand:rg) gang
+  do i = 1, n
+     rg = iand (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(iand:rw) worker
+  do i = 1, n
+     rw = iand (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(iand:rv) vector
+  do i = 1, n
+     rv = iand (rv, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(iand:result) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(iand:rc) gang worker vector
   do i = 1, n
-     result = iand (result, array(i))
+     rc = iand (rc, array(i))
   end do
   !$acc end parallel
 
@@ -107,17 +249,46 @@ program reduction_1
      vresult = iand (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  result = 1
-  vresult = 1
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
+  !
   ! 'ior' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(ior:rg) gang
+  do i = 1, n
+     rg = ior (rg, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(ior:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(ior:rw) worker
   do i = 1, n
-     result = ior (result, array(i))
+     rw = ior (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(ior:rv) gang
+  do i = 1, n
+     rv = ior (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(ior:rc) gang worker vector
+  do i = 1, n
+     rc = ior (rc, array(i))
   end do
   !$acc end parallel
 
@@ -126,17 +297,46 @@ program reduction_1
      vresult = ior (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
-  result = 0
+  !
+  ! 'ieor' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! 'ieor' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(ieor:rg) gang
+  do i = 1, n
+     rg = ieor (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(ieor:rw) worker
+  do i = 1, n
+     rw = ieor (rw, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(ieor:result) gang
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(ieor:rv) vector
   do i = 1, n
-     result = ieor (result, array(i))
+     rv = ieor (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(ieor:rc) gang worker vector
+  do i = 1, n
+     rc = ieor (rc, array(i))
   end do
   !$acc end parallel
 
@@ -145,17 +345,46 @@ program reduction_1
      vresult = ieor (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
+  !
   ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+     lrg = lrg .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
+  do i = 1, n
+     lrw = lrw .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+     lrv = lrv .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.and.:lresult) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .and. (array(i) .ge. 5)
+     lrc = lrc .and. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -164,17 +393,46 @@ program reduction_1
      lvresult = lvresult .and. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
+
+  !
+  ! '.or.' reductions
+  !
 
-  lresult = .false.
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
   lvresult = .false.
 
-  ! '.or.' reductions
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+     lrg = lrg .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+     lrw = lrw .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
+  do i = 1, n
+     lrv = lrv .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.or.:lresult) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .or. (array(i) .ge. 5)
+     lrc = lrc .or. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -183,17 +441,46 @@ program reduction_1
      lvresult = lvresult .or. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
+  do i = 1, n
+     lrw = lrw .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.eqv.:lresult) gang
+  !$acc parallel num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .eqv. (array(i) .ge. 5)
+     lrc = lrc .eqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -202,17 +489,46 @@ program reduction_1
      lvresult = lvresult .eqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+     lrw = lrw .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.neqv.:lresult) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .neqv. (array(i) .ge. 5)
+     lrc = lrc .neqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -221,5 +537,8 @@ program reduction_1
      lvresult = lvresult .neqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 end program reduction_1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
index fe6a9c3..cd09099 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
@@ -5,26 +5,52 @@
 program reduction_2
   implicit none
 
-  integer, parameter    :: n = 10, gangs = 20
+  integer, parameter    :: n = 10, ng = 8, nw = 4, vl = 32
   integer               :: i
-  real, parameter       :: e = .001
-  real                  :: vresult, result
-  logical               :: lresult, lvresult
-  real, dimension (n) :: array
+  real                  :: vresult, rg, rw, rv, rc
+  real, parameter       :: e = 0.001
+  logical               :: lrg, lrw, lrv, lrc, lvresult
+  real, dimension (n)   :: array
 
   do i = 1, n
      array(i) = i
   end do
 
-  result = 0
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! '+' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+     rg = rg + array(i)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(+:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
   do i = 1, n
-     result = result + array(i)
+     rw = rw + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+     rv = rv + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+     rc = rc + array(i)
   end do
   !$acc end parallel
 
@@ -33,17 +59,46 @@ program reduction_2
      vresult = vresult + array(i)
   end do
 
-  if (abs (result - vresult) .ge. e) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
+
+  !
+  ! '*' reductions
+  !
 
-  result = 1
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
   vresult = 1
 
-  ! '*' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+     rg = rg * array(i)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(*:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
   do i = 1, n
-     result = result * array(i)
+     rw = rw * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+     rv = rv * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+     rc = rc * array(i)
   end do
   !$acc end parallel
 
@@ -52,17 +107,46 @@ program reduction_2
      vresult = vresult * array(i)
   end do
 
-  if (result.ne.vresult) call abort
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
+
+  !
+  ! 'max' reductions
+  !
 
-  result = 0
+  rg = 0
+  rw = 0
+  rg = 0
+  rc = 0
   vresult = 0
 
-  ! 'max' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+     rg = max (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+     rw = max (rw, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(max:result) gang
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
   do i = 1, n
-     result = max (result, array(i))
+     rv = max (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
+  do i = 1, n
+     rc = max (rc, array(i))
   end do
   !$acc end parallel
 
@@ -71,17 +155,46 @@ program reduction_2
      vresult = max (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  result = 1
-  vresult = 1
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
 
+  !
   ! 'min' reductions
+  !
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(min:result) gang
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+     rg = min (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
+  do i = 1, n
+     rw = min (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
   do i = 1, n
-     result = min (result, array(i))
+     rv = min (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+     rc = min (rc, array(i))
   end do
   !$acc end parallel
 
@@ -90,17 +203,46 @@ program reduction_2
      vresult = min (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
 
+  !
   ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+     lrg = lrg .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.and.:lresult) gang
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
   do i = 1, n
-     lresult = lresult .and. (array(i) .ge. 5)
+     lrw = lrw .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+     lrv = lrv .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .and. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -109,17 +251,46 @@ program reduction_2
      lvresult = lvresult .and. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
-  lresult = .false.
+  !
+  ! '.or.' reductions
+  !
+
+  lrg = .false.
+  lrw = .false.
+  lrv = .false.
+  lrc = .false.
   lvresult = .false.
 
-  ! '.or.' reductions
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+     lrg = lrg .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+     lrw = lrw .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.or.:lresult) gang
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
   do i = 1, n
-     lresult = lresult .or. (array(i) .ge. 5)
+     lrv = lrv .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .or. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -128,17 +299,46 @@ program reduction_2
      lvresult = lvresult .or. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.eqv.:lresult) gang
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
   do i = 1, n
-     lresult = lresult .eqv. (array(i) .ge. 5)
+     lrw = lrw .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .eqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -147,17 +347,46 @@ program reduction_2
      lvresult = lvresult .eqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+     lrw = lrw .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.neqv.:lresult) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .neqv. (array(i) .ge. 5)
+     lrc = lrc .neqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -166,5 +395,8 @@ program reduction_2
      lvresult = lvresult .neqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 end program reduction_2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
index 155b903..a7dbf2b 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
@@ -5,26 +5,52 @@
 program reduction_3
   implicit none
 
-  integer, parameter    :: n = 10, gangs = 20
+  integer, parameter    :: n = 10, ng = 8, nw = 4, vl = 32
   integer               :: i
-  double precision, parameter :: e = .001
-  double precision      :: vresult, result
-  logical               :: lresult, lvresult
+  double precision      :: vresult, rg, rw, rv, rc
+  double precision, parameter :: e = 0.001
+  logical               :: lrg, lrw, lrv, lrc, lvresult
   double precision, dimension (n) :: array
 
   do i = 1, n
      array(i) = i
   end do
 
-  result = 0
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! '+' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+     rg = rg + array(i)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(+:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
   do i = 1, n
-     result = result + array(i)
+     rw = rw + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+     rv = rv + array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+     rc = rc + array(i)
   end do
   !$acc end parallel
 
@@ -33,17 +59,46 @@ program reduction_3
      vresult = vresult + array(i)
   end do
 
-  if (abs (result - vresult) .ge. e) call abort
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
+
+  !
+  ! '*' reductions
+  !
 
-  result = 1
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
   vresult = 1
 
-  ! '*' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+     rg = rg * array(i)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(*:result) gang
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
   do i = 1, n
-     result = result * array(i)
+     rw = rw * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+     rv = rv * array(i)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+     rc = rc * array(i)
   end do
   !$acc end parallel
 
@@ -52,17 +107,46 @@ program reduction_3
      vresult = vresult * array(i)
   end do
 
-  if (result.ne.vresult) call abort
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
+
+  !
+  ! 'max' reductions
+  !
 
-  result = 0
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! 'max' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+     rg = max (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+     rw = max (rw, array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(max:result) gang
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
   do i = 1, n
-     result = max (result, array(i))
+     rv = max (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
+  do i = 1, n
+     rc = max (rc, array(i))
   end do
   !$acc end parallel
 
@@ -71,17 +155,46 @@ program reduction_3
      vresult = max (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  result = 1
-  vresult = 1
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
 
+  !
   ! 'min' reductions
+  !
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(min:result) gang
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+     rg = min (rg, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
+  do i = 1, n
+     rw = min (rw, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
   do i = 1, n
-     result = min (result, array(i))
+     rv = min (rv, array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+     rc = min (rc, array(i))
   end do
   !$acc end parallel
 
@@ -90,17 +203,46 @@ program reduction_3
      vresult = min (vresult, array(i))
   end do
 
-  if (result.ne.vresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (abs (rg - vresult) .ge. e) call abort
+  if (abs (rw - vresult) .ge. e) call abort
+  if (abs (rv - vresult) .ge. e) call abort
+  if (abs (rc - vresult) .ge. e) call abort
 
+  !
   ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+     lrg = lrg .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.and.:lresult) gang
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
   do i = 1, n
-     lresult = lresult .and. (array(i) .ge. 5)
+     lrw = lrw .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+     lrv = lrv .and. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .and. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -109,17 +251,46 @@ program reduction_3
      lvresult = lvresult .and. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
-  lresult = .false.
+  !
+  ! '.or.' reductions
+  !
+
+  lrg = .false.
+  lrw = .false.
+  lrv = .false.
+  lrc = .false.
   lvresult = .false.
 
-  ! '.or.' reductions
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+     lrg = lrg .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+     lrw = lrw .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.or.:lresult) gang
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
   do i = 1, n
-     lresult = lresult .or. (array(i) .ge. 5)
+     lrv = lrv .or. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .or. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -128,17 +299,46 @@ program reduction_3
      lvresult = lvresult .or. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.eqv.:lresult) gang
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
   do i = 1, n
-     lresult = lresult .eqv. (array(i) .ge. 5)
+     lrw = lrw .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .eqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
+  do i = 1, n
+     lrc = lrc .eqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -147,17 +347,46 @@ program reduction_3
      lvresult = lvresult .eqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
-
-  lresult = .false.
-  lvresult = .false.
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 
+  !
   ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+     lrg = lrg .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+     lrw = lrw .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+     lrv = lrv .neqv. (array(i) .ge. 5)
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(lresult)
-  !$acc loop reduction(.neqv.:lresult) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
   do i = 1, n
-     lresult = lresult .neqv. (array(i) .ge. 5)
+     lrc = lrc .neqv. (array(i) .ge. 5)
   end do
   !$acc end parallel
 
@@ -166,5 +395,8 @@ program reduction_3
      lvresult = lvresult .neqv. (array(i) .ge. 5)
   end do
 
-  if (lresult .neqv. lvresult) call abort
+  if (lrg .neqv. lvresult) call abort
+  if (lrw .neqv. lvresult) call abort
+  if (lrv .neqv. lvresult) call abort
+  if (lrc .neqv. lvresult) call abort
 end program reduction_3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
index 8d4f6c1..c3bdaf6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
@@ -5,50 +5,108 @@
 program reduction_4
   implicit none
 
-  integer, parameter    :: n = 10, gangs = 20
+  integer, parameter    :: n = 10, ng = 8, nw = 4, vl = 32
   integer               :: i
-  complex               :: vresult, result
+  real                  :: vresult, rg, rw, rv, rc
   complex, dimension (n) :: array
 
   do i = 1, n
      array(i) = i
   end do
 
-  result = 0
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
   vresult = 0
 
-  ! '+' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+     rg = rg + REAL(array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
+  do i = 1, n
+     rw = rw + REAL(array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+     rv = rv + REAL(array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs(gangs) copy(result)
-  !$acc loop reduction(+:result) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
   do i = 1, n
-     result = result + array(i)
+     rc = rc + REAL(array(i))
   end do
   !$acc end parallel
 
   ! Verify the results
   do i = 1, n
-     vresult = vresult + array(i)
+     vresult = vresult + REAL(array(i))
   end do
 
-  if (result .ne. vresult) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 
-  result = 1
+  !
+  ! '*' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
   vresult = 1
 
-  ! '*' reductions
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+     rg = rg * REAL(array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+     rw = rw * REAL(array(i))
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+     rv = rv * REAL(array(i))
+  end do
+  !$acc end parallel
 
-  !$acc parallel num_gangs (gangs) copy(result)
-  !$acc loop reduction(*:result) gang
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
   do i = 1, n
-     result = result * array(i)
+     rc = rc * REAL(array(i))
   end do
   !$acc end parallel
 
   ! Verify the results
   do i = 1, n
-     vresult = vresult * array(i)
+     vresult = vresult * REAL(array(i))
   end do
 
-  if (result .ne. vresult) call abort
+  if (rg .ne. vresult) call abort
+  if (rw .ne. vresult) call abort
+  if (rv .ne. vresult) call abort
+  if (rc .ne. vresult) call abort
 end program reduction_4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
index 1066fa7..304fe7f 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
@@ -4,9 +4,12 @@
 
 program reduction
   integer, parameter    :: n = 40, c = 10
-  integer               :: i, vsum, sum
+  integer               :: i, vsum, gs, ws, vs, cs
 
-  call redsub (sum, n, c)
+  call redsub_gang (gs, n, c)
+  call redsub_worker (gs, n, c)
+  call redsub_vector (vs, n, c)
+  call redsub_combined (cs, n, c)
 
   vsum = 0
 
@@ -15,10 +18,11 @@ program reduction
      vsum = vsum + c
   end do
 
-  if (sum.ne.vsum) call abort ()
+  if (gs .ne. vsum) call abort ()
+  if (vs .ne. vsum) call abort ()
 end program reduction
 
-subroutine redsub(sum, n, c)
+subroutine redsub_gang(sum, n, c)
   integer :: sum, n, c
 
   sum = 0
@@ -29,4 +33,43 @@ subroutine redsub(sum, n, c)
      sum = sum + c
   end do
   !$acc end parallel
-end subroutine redsub
+end subroutine redsub_gang
+
+subroutine redsub_worker(sum, n, c)
+  integer :: sum, n, c
+
+  sum = 0
+
+  !$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum)
+  !$acc loop reduction(+:sum) worker
+  do i = 1, n
+     sum = sum + c
+  end do
+  !$acc end parallel
+end subroutine redsub_worker
+
+subroutine redsub_vector(sum, n, c)
+  integer :: sum, n, c
+
+  sum = 0
+
+  !$acc parallel copyin (n, c) vector_length(32) copy(sum)
+  !$acc loop reduction(+:sum) vector
+  do i = 1, n
+     sum = sum + c
+  end do
+  !$acc end parallel
+end subroutine redsub_vector
+
+subroutine redsub_combined(sum, n, c)
+  integer :: sum, n, c
+
+  sum = 0
+
+  !$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum)
+  !$acc loop reduction(+:sum) gang worker vector
+  do i = 1, n
+     sum = sum + c
+  end do
+  !$acc end parallel
+end subroutine redsub_combined
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
index 2733968..990faac 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
@@ -3,28 +3,91 @@
 program reduction
   implicit none
 
-  integer, parameter    :: n = 100
-  integer               :: i, s1, s2, vs1, vs2
+  integer, parameter    :: n = 100, n2 = 1000, chunksize = 10
+  integer               :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2
+  integer               :: j, red, vred
 
-  s1 = 0
-  s2 = 0
+  gs1 = 0
+  gs2 = 0
+  ws1 = 0
+  ws2 = 0
   vs1 = 0
   vs2 = 0
+  cs1 = 0
+  cs2 = 0
+  hs1 = 0
+  hs2 = 0
 
-  !$acc parallel num_gangs (1000) copy(s1, s2)
-  !$acc loop reduction(+:s1, s2) gang
+  !$acc parallel num_gangs (1000) copy(gs1, gs2)
+  !$acc loop reduction(+:gs1, gs2) gang
   do i = 1, n
-     s1 = s1 + 1
-     s2 = s2 + 2
+     gs1 = gs1 + 1
+     gs2 = gs2 + 2
   end do
   !$acc end parallel
 
-  ! Verify the results
+  !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
+  !$acc loop reduction(+:ws1, ws2) worker
+  do i = 1, n
+     ws1 = ws1 + 1
+     ws2 = ws2 + 2
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length (32) copy(vs1, vs2)
+  !$acc loop reduction(+:vs1, vs2) vector
   do i = 1, n
      vs1 = vs1 + 1
      vs2 = vs2 + 2
   end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
+  !$acc loop reduction(+:cs1, cs2) gang worker vector
+  do i = 1, n
+     cs1 = cs1 + 1
+     cs2 = cs2 + 2
+  end do
+  !$acc end parallel
+
+  ! Verify the results on the host
+  do i = 1, n
+     hs1 = hs1 + 1
+     hs2 = hs2 + 2
+  end do
+
+  if (gs1 .ne. hs1) call abort ()
+  if (gs2 .ne. hs2) call abort ()
+
+  if (ws1 .ne. hs1) call abort ()
+  if (ws2 .ne. hs2) call abort ()
+
+  if (vs1 .ne. hs1) call abort ()
+  if (vs2 .ne. hs2) call abort ()
+
+  if (cs1 .ne. hs1) call abort ()
+  if (cs2 .ne. hs2) call abort ()
+
+  ! Nested reductions.
+
+  red = 0
+  vred = 0
+
+  !$acc parallel num_gangs(10) vector_length(32) copy(red)
+  !$acc loop reduction(+:red) gang
+  do i = 1, n/chunksize
+     !$acc loop reduction(+:red) vector
+     do j = 1, chunksize
+        red = red + chunksize
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 1, n/chunksize
+     do j = 1, chunksize
+        vred = vred + chunksize
+     end do
+  end do
 
-  if (s1.ne.vs1) call abort ()
-  if (s2.ne.vs2) call abort ()
+  if (red .ne. vred) call abort ()
 end program reduction

  reply	other threads:[~2015-07-17 18:14 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-17 18:26 [gomp4] OpenACC vector and worker reductions Cesar Philippidis
2015-07-17 18:26 ` Cesar Philippidis [this message]
2015-09-18  8:29   ` [gomp4] OpenACC reduction tests Thomas Schwinge
2015-09-23  8:50     ` Thomas Schwinge
2015-11-07 11:15       ` Thomas Schwinge
2015-09-18 13:40   ` Thomas Schwinge
2016-04-12 11:39     ` [PR testsuite/68242] FAIL: libgomp.oacc-c-c++-common/reduction-2.c, and other OpenACC reduction test case "oddities" (was: [gomp4] OpenACC reduction tests) Thomas Schwinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55A945E7.2050005@codesourcery.com \
    --to=cesar@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    --cc=nathan_sidwell@mentor.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).