public inbox for fortran@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, OpenACC 2.7] struct/array reductions for Fortran
@ 2024-02-08 14:47 ` Chung-Lin Tang
  2024-03-13 18:59   ` Tobias Burnus
  2024-03-18 16:39   ` Thomas Schwinge
  0 siblings, 2 replies; 3+ messages in thread
From: Chung-Lin Tang @ 2024-02-08 14:47 UTC (permalink / raw)
  To: gcc-patches, gfortran, Tobias Burnus, Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 2157 bytes --]

Hi Tobias, Thomas,
this patch adds support for Fortran to use arrays and struct(record) types in OpenACC reductions.

There is still some shortcomings in the current state, mainly that only explicit-shaped arrays can be used (like its C counterpart). Anything else is currently a bit more complicated in the middle-end, since the existing reduction code creates an "init-op" (literal of initial values) which can't be done when say TYPE_MAX_VALUE (TYPE_DOMAIN (array_type)) is not a tree constant. I think we'll be on the hook to solve this later, but I think the current state is okay to submit.

Tested without regressions on mainline (on top of first struct/array reduction patch[1])

Thanks,
Chung-Lin

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641669.html

2024-02-08  Chung-Lin Tang  <cltang@baylibre.com>

gcc/fortran/ChangeLog:
	* openmp.cc (oacc_reduction_defined_type_p): New function.
	(resolve_omp_clauses): Adjust OpenACC array reduction error case. Use
	oacc_reduction_defined_type_p for OpenACC.
	* trans-openmp.cc (gfc_trans_omp_array_reduction_or_udr):
	Add 'bool openacc' parameter, adjust part of function to be !openacc
	only.
	(gfc_trans_omp_reduction_list): Add 'bool openacc' parameter, pass to
	calls to gfc_trans_omp_array_reduction_or_udr.
	(gfc_trans_omp_clauses): Add 'openacc' argument to calls to
	gfc_trans_omp_reduction_list.
	(gfc_trans_omp_do): Pass 'op == EXEC_OACC_LOOP' as 'bool openacc'
	parameter in call to gfc_trans_omp_clauses.

gcc/ChangeLog:
	* omp-low.cc (omp_reduction_init_op): Add checking if reduced array
	has constant bounds.
	(lower_oacc_reductions): Add handling of error_mark_node.

gcc/testsuite/ChangeLog:
	* gfortran.dg/goacc/array-reduction.f90: Adjust testcase.
	* gfortran.dg/goacc/reduction.f95: Likewise.

libgomp/ChangeLog:
	* libgomp/testsuite/libgomp.oacc-fortran/reduction-9.f90: New testcase.
	* libgomp/testsuite/libgomp.oacc-fortran/reduction-10.f90: Likewise.
	* libgomp/testsuite/libgomp.oacc-fortran/reduction-11.f90: Likewise.
	* libgomp/testsuite/libgomp.oacc-fortran/reduction-12.f90: Likewise.
	* libgomp/testsuite/libgomp.oacc-fortran/reduction-13.f90: Likewise.

[-- Attachment #2: openacc-2.7b-struct-array-reduction-fortran.patch --]
[-- Type: text/plain, Size: 57199 bytes --]

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 0af80d54fad..4bba9e666d6 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -7047,6 +7047,72 @@ oacc_is_loop (gfc_code *code)
 	 || code->op == EXEC_OACC_LOOP;
 }
 
+static bool
+oacc_reduction_defined_type_p (enum gfc_omp_reduction_op rop, gfc_typespec *ts)
+{
+  if (rop == OMP_REDUCTION_USER || rop == OMP_REDUCTION_NONE)
+    return false;
+
+  if (ts->type == BT_INTEGER)
+    switch (rop)
+      {
+      case OMP_REDUCTION_AND:
+      case OMP_REDUCTION_OR:
+      case OMP_REDUCTION_EQV:
+      case OMP_REDUCTION_NEQV:
+	return false;
+      default:
+	return true;
+      }
+
+  if (ts->type == BT_LOGICAL)
+    switch (rop)
+      {
+      case OMP_REDUCTION_AND:
+      case OMP_REDUCTION_OR:
+      case OMP_REDUCTION_EQV:
+      case OMP_REDUCTION_NEQV:
+	return true;
+      default:
+	return false;
+      }
+
+  if (ts->type == BT_REAL || ts->type == BT_COMPLEX)
+    switch (rop)
+      {
+      case OMP_REDUCTION_PLUS:
+      case OMP_REDUCTION_TIMES:
+      case OMP_REDUCTION_MINUS:
+	return true;
+
+      case OMP_REDUCTION_AND:
+      case OMP_REDUCTION_OR:
+      case OMP_REDUCTION_EQV:
+      case OMP_REDUCTION_NEQV:
+	return false;
+
+      case OMP_REDUCTION_MAX:
+      case OMP_REDUCTION_MIN:
+	return ts->type != BT_COMPLEX;
+      case OMP_REDUCTION_IAND:
+      case OMP_REDUCTION_IOR:
+      case OMP_REDUCTION_IEOR:
+	return false;
+      default:
+	gcc_unreachable ();
+      }
+
+  if (ts->type == BT_DERIVED)
+    {
+      for (gfc_component *p = ts->u.derived->components; p; p = p->next)
+	if (!oacc_reduction_defined_type_p (rop, &p->ts))
+	  return false;
+      return true;
+    }
+
+  return false;
+}
+
 static void
 resolve_scalar_int_expr (gfc_expr *expr, const char *clause)
 {
@@ -8137,13 +8203,15 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	  else
 	    n->sym->mark = 1;
 
-	  /* OpenACC does not support reductions on arrays.  */
-	  if (n->sym->as)
+	  /* OpenACC current only supports array reductions on explicit-shape
+	     arrays.  */
+	  if ((n->sym->as && n->sym->as->type != AS_EXPLICIT)
+	      || n->sym->attr.codimension)
 	    gfc_error ("Array %qs is not permitted in reduction at %L",
 		       n->sym->name, &n->where);
 	}
     }
-  
+
   for (n = omp_clauses->lists[OMP_LIST_TO]; n; n = n->next)
     n->sym->mark = 0;
   for (n = omp_clauses->lists[OMP_LIST_FROM]; n; n = n->next)
@@ -8797,39 +8865,46 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 		  case OMP_LIST_IN_REDUCTION:
 		  case OMP_LIST_TASK_REDUCTION:
 		  case OMP_LIST_REDUCTION_INSCAN:
-		    switch (n->u.reduction_op)
+		    if (openacc)
 		      {
-		      case OMP_REDUCTION_PLUS:
-		      case OMP_REDUCTION_TIMES:
-		      case OMP_REDUCTION_MINUS:
-			if (!gfc_numeric_ts (&n->sym->ts))
+			if (!oacc_reduction_defined_type_p (n->u.reduction_op,
+							    &n->sym->ts))
 			  bad = true;
-			break;
-		      case OMP_REDUCTION_AND:
-		      case OMP_REDUCTION_OR:
-		      case OMP_REDUCTION_EQV:
-		      case OMP_REDUCTION_NEQV:
-			if (n->sym->ts.type != BT_LOGICAL)
-			  bad = true;
-			break;
-		      case OMP_REDUCTION_MAX:
-		      case OMP_REDUCTION_MIN:
-			if (n->sym->ts.type != BT_INTEGER
-			    && n->sym->ts.type != BT_REAL)
-			  bad = true;
-			break;
-		      case OMP_REDUCTION_IAND:
-		      case OMP_REDUCTION_IOR:
-		      case OMP_REDUCTION_IEOR:
-			if (n->sym->ts.type != BT_INTEGER)
-			  bad = true;
-			break;
-		      case OMP_REDUCTION_USER:
-			bad = true;
-			break;
-		      default:
-			break;
 		      }
+		    else
+		      switch (n->u.reduction_op)
+			{
+			case OMP_REDUCTION_PLUS:
+			case OMP_REDUCTION_TIMES:
+			case OMP_REDUCTION_MINUS:
+			  if (!gfc_numeric_ts (&n->sym->ts))
+			    bad = true;
+			  break;
+			case OMP_REDUCTION_AND:
+			case OMP_REDUCTION_OR:
+			case OMP_REDUCTION_EQV:
+			case OMP_REDUCTION_NEQV:
+			  if (n->sym->ts.type != BT_LOGICAL)
+			    bad = true;
+			  break;
+			case OMP_REDUCTION_MAX:
+			case OMP_REDUCTION_MIN:
+			  if (n->sym->ts.type != BT_INTEGER
+			      && n->sym->ts.type != BT_REAL)
+			    bad = true;
+			  break;
+			case OMP_REDUCTION_IAND:
+			case OMP_REDUCTION_IOR:
+			case OMP_REDUCTION_IEOR:
+			  if (n->sym->ts.type != BT_INTEGER)
+			    bad = true;
+			  break;
+			case OMP_REDUCTION_USER:
+			  bad = true;
+			  break;
+			default:
+			  break;
+			}
 		    if (!bad)
 		      n->u2.udr = NULL;
 		    else
diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc
index 9599521b97c..29ad880a30c 100644
--- a/gcc/fortran/trans-openmp.cc
+++ b/gcc/fortran/trans-openmp.cc
@@ -1996,7 +1996,8 @@ omp_udr_find_orig (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
 }
 
 static void
-gfc_trans_omp_array_reduction_or_udr (tree c, gfc_omp_namelist *n, locus where)
+gfc_trans_omp_array_reduction_or_udr (tree c, gfc_omp_namelist *n, locus where,
+				      bool openacc)
 {
   gfc_symbol *sym = n->sym;
   gfc_symtree *root1 = NULL, *root2 = NULL, *root3 = NULL, *root4 = NULL;
@@ -2251,21 +2252,24 @@ gfc_trans_omp_array_reduction_or_udr (tree c, gfc_omp_namelist *n, locus where)
     poplevel (0, 0);
   OMP_CLAUSE_REDUCTION_INIT (c) = stmt;
 
-  /* Create the merge statement list.  */
-  pushlevel ();
-  if (e4)
-    stmt = gfc_trans_assignment (e3, e4, false, true);
-  else
-    stmt = gfc_trans_call (n->u2.udr->combiner, false,
-			   NULL_TREE, NULL_TREE, false);
-  if (TREE_CODE (stmt) != BIND_EXPR)
-    stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
-  else
-    poplevel (0, 0);
-  OMP_CLAUSE_REDUCTION_MERGE (c) = stmt;
+  if (!openacc)
+    {
+      /* Create the merge statement list.  */
+      pushlevel ();
+      if (e4)
+	stmt = gfc_trans_assignment (e3, e4, false, true);
+      else
+	stmt = gfc_trans_call (n->u2.udr->combiner, false,
+			       NULL_TREE, NULL_TREE, false);
+      if (TREE_CODE (stmt) != BIND_EXPR)
+	stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      else
+	poplevel (0, 0);
+      OMP_CLAUSE_REDUCTION_MERGE (c) = stmt;
 
-  /* And stick the placeholder VAR_DECL into the clause as well.  */
-  OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = outer_decl;
+      /* And stick the placeholder VAR_DECL into the clause as well.  */
+      OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = outer_decl;
+    }
 
   gfc_current_locus = old_loc;
 
@@ -2296,7 +2300,7 @@ gfc_trans_omp_array_reduction_or_udr (tree c, gfc_omp_namelist *n, locus where)
 
 static tree
 gfc_trans_omp_reduction_list (int kind, gfc_omp_namelist *namelist, tree list,
-			      locus where, bool mark_addressable)
+			      locus where, bool mark_addressable, bool openacc)
 {
   omp_clause_code clause = OMP_CLAUSE_REDUCTION;
   switch (kind)
@@ -2376,7 +2380,8 @@ gfc_trans_omp_reduction_list (int kind, gfc_omp_namelist *namelist, tree list,
 	    if (namelist->sym->attr.dimension
 		|| namelist->u.reduction_op == OMP_REDUCTION_USER
 		|| namelist->sym->attr.allocatable)
-	      gfc_trans_omp_array_reduction_or_udr (node, namelist, where);
+	      gfc_trans_omp_array_reduction_or_udr (node, namelist, where,
+						    openacc);
 	    list = gfc_trans_add_clause (node, list);
 	  }
       }
@@ -2715,7 +2720,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
 	  /* An OpenACC async clause indicates the need to set reduction
 	     arguments addressable, to allow asynchronous copy-out.  */
 	  omp_clauses = gfc_trans_omp_reduction_list (list, n, omp_clauses,
-						      where, clauses->async);
+						      where, clauses->async,
+						      openacc);
 	  break;
 	case OMP_LIST_PRIVATE:
 	  clause_code = OMP_CLAUSE_PRIVATE;
@@ -5757,7 +5763,8 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
      on the simd construct and DO's clauses are translated elsewhere.  */
   do_clauses->sched_simd = false;
 
-  omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc);
+  omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc, false,
+				       op == EXEC_OACC_LOOP);
 
   for (i = 0; i < collapse; i++)
     {
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index f3a056df8f2..4bbf30627c3 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -4426,9 +4426,16 @@ omp_reduction_init_op (location_t loc, enum tree_code op, tree type)
 {
   if (TREE_CODE (type) == ARRAY_TYPE)
     {
+      tree min_tree = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
+      tree max_tree = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
+      if (!TREE_CONSTANT (min_tree) || !TREE_CONSTANT (max_tree))
+	{
+	  error_at (loc, "array in reduction must be of constant size");
+	  return error_mark_node;
+	}
       vec<constructor_elt, va_gc> *v = NULL;
-      HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (TYPE_DOMAIN (type)));
-      HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (TYPE_DOMAIN (type)));
+      HOST_WIDE_INT min = tree_to_shwi (min_tree);
+      HOST_WIDE_INT max = tree_to_shwi (max_tree);
       tree t = omp_reduction_init_op (loc, op, TREE_TYPE (type));
       for (HOST_WIDE_INT i = min; i <= max; i++)
 	CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
@@ -7559,6 +7566,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	  has_outer_reduction:;
 	  }
 
+	if (incoming == error_mark_node)
+	  continue;
+
 	if (!ref_to_res)
 	  ref_to_res = integer_zero_node;
 
diff --git a/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90 b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90
index d71c400a5bf..f9a3b43e7f3 100644
--- a/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90
@@ -1,74 +1,80 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-gimple" }
+
 program test
   implicit none
   integer a(10), i
 
   a(:) = 0
-  
+
   ! Array reductions.
-  
-  !$acc parallel reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+
+  !$acc parallel reduction (+:a)
   do i = 1, 10
      a = a + 1
   end do
   !$acc end parallel
 
   !$acc parallel
-  !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a)
   do i = 1, 10
      a = a + 1
   end do
   !$acc end parallel
 
   !$acc kernels
-  !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a)
   do i = 1, 10
      a = a + 1
   end do
   !$acc end kernels
 
   ! Subarray reductions.
-  
-  !$acc parallel reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+
+  !$acc parallel reduction (+:a(1:5))
   do i = 1, 10
      a = a + 1
   end do
   !$acc end parallel
 
   !$acc parallel
-  !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a(1:5))
   do i = 1, 10
      a = a + 1
   end do
   !$acc end parallel
 
   !$acc kernels
-  !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a(1:5))
   do i = 1, 10
      a = a + 1
   end do
   !$acc end kernels
 
   ! Reductions on array elements.
-  
-  !$acc parallel reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+
+  !$acc parallel reduction (+:a(1))
   do i = 1, 10
      a(1) = a(1) + 1
   end do
   !$acc end parallel
 
   !$acc parallel
-  !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a(1))
   do i = 1, 10
      a(1) = a(1) + 1
   end do
   !$acc end parallel
 
   !$acc kernels
-  !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  !$acc loop reduction (+:a(1))
   do i = 1, 10
      a(1) = a(1) + 1
   end do
   !$acc end kernels
-  
+
   print *, a
 end program test
+
+! { dg-final { scan-tree-dump-times "(?n)#pragma acc loop private\\(i\\) reduction\\(\\+:a\\)" 6 "gimple" } }
+! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_parallel reduction\\(\\+:a\\) map\\(tofrom:a \\\[len: \[0-9\]+\\\]\\)" 3 "gimple" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction.f95
index a13574b150c..c425f00d87f 100644
--- a/gcc/testsuite/gfortran.dg/goacc/reduction.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/reduction.f95
@@ -72,9 +72,9 @@ common /blk/ i1
 !$acc end parallel
 !$acc parallel reduction (-:a1)		! { dg-error "OMP DECLARE REDUCTION - not found for type CHARACTER" }
 !$acc end parallel
-!$acc parallel reduction (+:t1)		! { dg-error "OMP DECLARE REDUCTION \\+ not found for type TYPE" }
+!$acc parallel reduction (+:t1)
 !$acc end parallel
-!$acc parallel reduction (*:ta1)	! { dg-error "OMP DECLARE REDUCTION \\* not found for type TYPE" }
+!$acc parallel reduction (*:ta1)
 !$acc end parallel
 !$acc parallel reduction (.and.:i3)	! { dg-error "OMP DECLARE REDUCTION \\.and\\. not found for type INTEGER" }
 !$acc end parallel
@@ -108,9 +108,9 @@ common /blk/ i1
 !$acc end parallel
 !$acc parallel reduction (max:a1)	! { dg-error "OMP DECLARE REDUCTION max not found for type CHARACTER" }
 !$acc end parallel
-!$acc parallel reduction (min:t1)	! { dg-error "OMP DECLARE REDUCTION min not found for type TYPE" }
+!$acc parallel reduction (min:t1)
 !$acc end parallel
-!$acc parallel reduction (max:ta1)	! { dg-error "OMP DECLARE REDUCTION max not found for type TYPE" }
+!$acc parallel reduction (max:ta1)
 !$acc end parallel
 !$acc parallel reduction (iand:r1)	! { dg-error "OMP DECLARE REDUCTION iand not found for type REAL" }
 !$acc end parallel
@@ -130,32 +130,12 @@ common /blk/ i1
 !$acc end parallel
 !$acc parallel reduction (ior:a1)	! { dg-error "OMP DECLARE REDUCTION ior not found for type CHARACTER" }
 !$acc end parallel
-!$acc parallel reduction (ieor:t1)	! { dg-error "OMP DECLARE REDUCTION ieor not found for type TYPE" }
+!$acc parallel reduction (ieor:t1)
 !$acc end parallel
-!$acc parallel reduction (iand:ta1)	! { dg-error "OMP DECLARE REDUCTION iand not found for type TYPE" }
+!$acc parallel reduction (iand:ta1)
 !$acc end parallel
 
 end subroutine
 
-! { dg-error "Array 'ia2' is not permitted in reduction" "" { target "*-*-*" } 27 }
-! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 29 }
-! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 31 }
-! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 33 }
-! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 35 }
 ! { dg-error "Array 'aa1' is not permitted in reduction" "" { target "*-*-*" } 65 }
 ! { dg-error "Array 'ia1' is not permitted in reduction" "" { target "*-*-*" } 67 }
-! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 71 }
-! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 77 }
-! { dg-error "Array 'ia2' is not permitted in reduction" "" { target "*-*-*" } 81 }
-! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 85 }
-! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 89 }
-! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 93 }
-! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 99 }
-! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 103 }
-! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 107 }
-! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 113 }
-! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 117 }
-! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 121 }
-! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 125 }
-! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 129 }
-! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 135 }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-10.f90
new file mode 100644
index 00000000000..506dfaf29f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-10.f90
@@ -0,0 +1,483 @@
+! { dg-do run }
+
+! real array reductions
+
+program reduction_10
+  implicit none
+
+  integer, parameter     :: n = 10, ng = 8, nw = 4, vl = 32
+  integer                :: i, j
+  real, dimension (n) :: vresult, rg, rw, rv, rc
+  logical, dimension (n) :: lrg, lrw, lrv, lrc, lvresult
+  real, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) + array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 1
+  if (count (rw .ne. vresult) .ne. 0) STOP 2
+  if (count (rv .ne. vresult) .ne. 0) STOP 3
+  if (count (rc .ne. vresult) .ne. 0) STOP 4
+
+  !
+  ! '*' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) * array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 5
+  if (count (rw .ne. vresult) .ne. 0) STOP 6
+  if (count (rv .ne. vresult) .ne. 0) STOP 7
+  if (count (rc .ne. vresult) .ne. 0) STOP 8
+
+  !
+  ! 'max' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = max (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = max (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = max (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = max (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = max (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 9
+  if (count (rw .ne. vresult) .ne. 0) STOP 10
+  if (count (rv .ne. vresult) .ne. 0) STOP 11
+  if (count (rc .ne. vresult) .ne. 0) STOP 12
+
+  !
+  ! 'min' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = min (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = min (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = min (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = min (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = min (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 13
+  if (count (rw .ne. vresult) .ne. 0) STOP 14
+  if (count (rv .ne. vresult) .ne. 0) STOP 15
+  if (count (rc .ne. vresult) .ne. 0) STOP 16
+
+  !
+  ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 17
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 18
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 19
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 20
+
+  !
+  ! '.or.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 21
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 22
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 23
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 24
+
+  !
+  ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 25
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 26
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 27
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 28
+
+  !
+  ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 29
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 30
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 31
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 32
+
+end program reduction_10
+
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-11.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-11.f90
new file mode 100644
index 00000000000..4bec1c797cd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-11.f90
@@ -0,0 +1,483 @@
+! { dg-do run }
+
+! double precision array reductions
+
+program reduction_11
+  implicit none
+
+  integer, parameter     :: n = 10, ng = 8, nw = 4, vl = 32
+  integer                :: i, j
+  double precision, dimension (n) :: vresult, rg, rw, rv, rc
+  logical, dimension (n) :: lrg, lrw, lrv, lrc, lvresult
+  double precision, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) + array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 1
+  if (count (rw .ne. vresult) .ne. 0) STOP 2
+  if (count (rv .ne. vresult) .ne. 0) STOP 3
+  if (count (rc .ne. vresult) .ne. 0) STOP 4
+
+  !
+  ! '*' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) * array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 5
+  if (count (rw .ne. vresult) .ne. 0) STOP 6
+  if (count (rv .ne. vresult) .ne. 0) STOP 7
+  if (count (rc .ne. vresult) .ne. 0) STOP 8
+
+  !
+  ! 'max' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = max (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = max (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = max (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = max (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = max (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 9
+  if (count (rw .ne. vresult) .ne. 0) STOP 10
+  if (count (rv .ne. vresult) .ne. 0) STOP 11
+  if (count (rc .ne. vresult) .ne. 0) STOP 12
+
+  !
+  ! 'min' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = min (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = min (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = min (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = min (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = min (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 13
+  if (count (rw .ne. vresult) .ne. 0) STOP 14
+  if (count (rv .ne. vresult) .ne. 0) STOP 15
+  if (count (rc .ne. vresult) .ne. 0) STOP 16
+
+  !
+  ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 17
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 18
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 19
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 20
+
+  !
+  ! '.or.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 21
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 22
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 23
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 24
+
+  !
+  ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 25
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 26
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 27
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 28
+
+  !
+  ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 29
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 30
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 31
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 32
+
+end program reduction_11
+
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-12.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-12.f90
new file mode 100644
index 00000000000..b609c7a294e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-12.f90
@@ -0,0 +1,135 @@
+! { dg-do run }
+
+! complex array reductions
+
+program reduction_12
+  implicit none
+
+  integer, parameter     :: n = 10, ng = 8, nw = 4, vl = 32
+  integer                :: i, j
+  complex, dimension (n) :: vresult, rg, rw, rv, rc
+  logical, dimension (n) :: lrg, lrw, lrv, lrc, lvresult
+  complex, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) + array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 1
+  if (count (rw .ne. vresult) .ne. 0) STOP 2
+  if (count (rv .ne. vresult) .ne. 0) STOP 3
+  if (count (rc .ne. vresult) .ne. 0) STOP 4
+
+  !
+  ! '*' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) * array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 5
+  if (count (rw .ne. vresult) .ne. 0) STOP 6
+  if (count (rv .ne. vresult) .ne. 0) STOP 7
+  if (count (rc .ne. vresult) .ne. 0) STOP 8
+
+end program reduction_12
+
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-13.f90
new file mode 100644
index 00000000000..088c5cd3b04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-13.f90
@@ -0,0 +1,66 @@
+! { dg-do run }
+
+! record type reductions
+
+program reduction_13
+  implicit none
+
+  type t1
+     integer :: i
+     real :: r
+  end type t1
+
+  type t2
+     real :: r
+     integer :: i
+     double precision :: d
+  end type t2
+
+  integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
+  integer :: i
+  type(t1) :: v1, a1
+  type (t2) :: v2, a2
+
+  v1%i = 0
+  v1%r = 0
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(v1)
+  !$acc loop reduction (+:v1)
+  do i = 1, n
+     v1%i = v1%i + 1
+     v1%r = v1%r + 2
+  end do
+  !$acc end parallel
+  a1%i = 0
+  a1%r = 0
+  do i = 1, n
+     a1%i = a1%i + 1
+     a1%r = a1%r + 2
+  end do
+  if (v1%i .ne. a1%i) STOP 1
+  if (v1%r .ne. a1%r) STOP 2
+
+  v2%i = 1
+  v2%r = 1
+  v2%d = 1
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(v2)
+  !$acc loop reduction (*:v2)
+  do i = 1, n
+     v2%i = v2%i * 2
+     v2%r = v2%r * 1.1
+     v2%d = v2%d * 1.3
+  end do
+  !$acc end parallel
+  a2%i = 1
+  a2%r = 1
+  a2%d = 1
+  do i = 1, n
+     a2%i = a2%i * 2
+     a2%r = a2%r * 1.1
+     a2%d = a2%d * 1.3
+  end do
+
+  if (v2%i .ne. a2%i) STOP 3
+  if (v2%r .ne. a2%r) STOP 4
+  if (v2%d .ne. a2%d) STOP 5
+
+end program reduction_13
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-9.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-9.f90
new file mode 100644
index 00000000000..43ab155aa73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-9.f90
@@ -0,0 +1,657 @@
+! { dg-do run }
+
+! integer array reductions
+
+program reduction_9
+  implicit none
+
+  integer, parameter     :: n = 10, ng = 8, nw = 4, vl = 32
+  integer                :: i, j
+  integer, dimension (n) :: vresult, rg, rw, rv, rc
+  logical, dimension (n) :: lrg, lrw, lrv, lrc, lvresult
+  integer, dimension (n) :: array
+
+  do i = 1, n
+     array(i) = i
+  end do
+
+  !
+  ! '+' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(+:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(+:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(+:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(+:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) + array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) + array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 1
+  if (count (rw .ne. vresult) .ne. 0) STOP 2
+  if (count (rv .ne. vresult) .ne. 0) STOP 3
+  if (count (rc .ne. vresult) .ne. 0) STOP 4
+
+  !
+  ! '*' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(*:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = rg(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(*:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = rw(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(*:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = rv(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(*:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = rc(j) * array(i)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = vresult(j) * array(i)
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 5
+  if (count (rw .ne. vresult) .ne. 0) STOP 6
+  if (count (rv .ne. vresult) .ne. 0) STOP 7
+  if (count (rc .ne. vresult) .ne. 0) STOP 8
+
+  !
+  ! 'max' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(max:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = max (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(max:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = max (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(max:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = max (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(max:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = max (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = max (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 9
+  if (count (rw .ne. vresult) .ne. 0) STOP 10
+  if (count (rv .ne. vresult) .ne. 0) STOP 11
+  if (count (rc .ne. vresult) .ne. 0) STOP 12
+
+  !
+  ! 'min' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(min:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = min (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(min:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = min (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(min:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = min (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(min:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = min (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = min (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 13
+  if (count (rw .ne. vresult) .ne. 0) STOP 14
+  if (count (rv .ne. vresult) .ne. 0) STOP 15
+  if (count (rc .ne. vresult) .ne. 0) STOP 16
+
+  !
+  ! 'iand' reductions
+  !
+
+  rg = 1
+  rw = 1
+  rv = 1
+  rc = 1
+  vresult = 1
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(iand:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = iand (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(iand:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = iand (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(iand:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = iand (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(iand:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = iand (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = iand (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 17
+  if (count (rw .ne. vresult) .ne. 0) STOP 18
+  if (count (rv .ne. vresult) .ne. 0) STOP 19
+  if (count (rc .ne. vresult) .ne. 0) STOP 20
+
+  !
+  ! 'ior' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(ior:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = ior (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(ior:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = ior (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(ior:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = ior (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(ior:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = ior (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = ior (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 21
+  if (count (rw .ne. vresult) .ne. 0) STOP 22
+  if (count (rv .ne. vresult) .ne. 0) STOP 23
+  if (count (rc .ne. vresult) .ne. 0) STOP 24
+
+  !
+  ! 'ieor' reductions
+  !
+
+  rg = 0
+  rw = 0
+  rv = 0
+  rc = 0
+  vresult = 0
+
+  !$acc parallel num_gangs(ng) copy(rg)
+  !$acc loop reduction(ieor:rg) gang
+  do i = 1, n
+    do j = 1, n
+      rg(j) = ieor (rg(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(rw)
+  !$acc loop reduction(ieor:rw) worker
+  do i = 1, n
+    do j = 1, n
+      rw(j) = ieor (rw(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(rv)
+  !$acc loop reduction(ieor:rv) vector
+  do i = 1, n
+    do j = 1, n
+      rv(j) = ieor (rv(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+  !$acc loop reduction(ieor:rc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      rc(j) = ieor (rc(j), array(i))
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      vresult(j) = ieor (vresult(j), array(i))
+    end do
+  end do
+
+  if (count (rg .ne. vresult) .ne. 0) STOP 25
+  if (count (rw .ne. vresult) .ne. 0) STOP 26
+  if (count (rv .ne. vresult) .ne. 0) STOP 27
+  if (count (rc .ne. vresult) .ne. 0) STOP 28
+
+  !
+  ! '.and.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.and.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.and.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.and.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.and.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .and. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 29
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 30
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 31
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 32
+
+  !
+  ! '.or.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.or.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.or.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.or.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.or.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .or. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 33
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 34
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 35
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 36
+
+  !
+  ! '.eqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.eqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.eqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.eqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.eqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .eqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 37
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 38
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 39
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 40
+
+  !
+  ! '.neqv.' reductions
+  !
+
+  lrg = .true.
+  lrw = .true.
+  lrv = .true.
+  lrc = .true.
+  lvresult = .true.
+
+  !$acc parallel num_gangs(ng) copy(lrg)
+  !$acc loop reduction(.neqv.:lrg) gang
+  do i = 1, n
+    do j = 1, n
+      lrg(j) = lrg(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_workers(nw) copy(lrw)
+  !$acc loop reduction(.neqv.:lrw) worker
+  do i = 1, n
+    do j = 1, n
+      lrw(j) = lrw(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel vector_length(vl) copy(lrv)
+  !$acc loop reduction(.neqv.:lrv) vector
+  do i = 1, n
+    do j = 1, n
+      lrv(j) = lrv(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+  !$acc loop reduction(.neqv.:lrc) gang worker vector
+  do i = 1, n
+    do j = 1, n
+      lrc(j) = lrc(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+  !$acc end parallel
+
+  ! Verify the results
+  do i = 1, n
+    do j = 1, n
+      lvresult(j) = lvresult(j) .neqv. (array(i) .ge. 5)
+    end do
+  end do
+
+  if (count (lrg .neqv. lvresult) .ne. 0) STOP 41
+  if (count (lrw .neqv. lvresult) .ne. 0) STOP 42
+  if (count (lrv .neqv. lvresult) .ne. 0) STOP 43
+  if (count (lrc .neqv. lvresult) .ne. 0) STOP 44
+
+end program reduction_9
+

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, OpenACC 2.7] struct/array reductions for Fortran
  2024-02-08 14:47 ` [PATCH, OpenACC 2.7] struct/array reductions for Fortran Chung-Lin Tang
@ 2024-03-13 18:59   ` Tobias Burnus
  2024-03-18 16:39   ` Thomas Schwinge
  1 sibling, 0 replies; 3+ messages in thread
From: Tobias Burnus @ 2024-03-13 18:59 UTC (permalink / raw)
  To: Chung-Lin Tang, gcc-patches, gfortran, Thomas Schwinge

Hi Chung-Lin, hi Thomas, hello world,

some thoughts glancing at the patch.

Chung-Lin Tang wrote:
> There is still some shortcomings in the current state, mainly that only explicit-shaped arrays can be used (like its C counterpart). Anything else is currently a bit more complicated in the middle-end, since the existing reduction code creates an "init-op" (literal of initial values) which can't be done when say TYPE_MAX_VALUE (TYPE_DOMAIN (array_type)) is not a tree constant. I think we'll be on the hook to solve this later, but I think the current state is okay to submit.

I think having some initial support is fine, but it needs an 
understandable and somewhat complete error diagnostic and testcases. 
More to this below.

> +      if (!TREE_CONSTANT (min_tree) || !TREE_CONSTANT (max_tree))
> +	{
> +	  error_at (loc, "array in reduction must be of constant size");
> +	  return error_mark_node;
> +	}
Shouldn't this use a sorry_at instead?

> +	  /* OpenACC current only supports array reductions on explicit-shape
> +	     arrays.  */
> +	  if ((n->sym->as && n->sym->as->type != AS_EXPLICIT)
> +	      || n->sym->attr.codimension)
>   	    gfc_error ("Array %qs is not permitted in reduction at %L",
>   		       n->sym->name, &n->where);
[Coarray excursion. I am in favor of allowing it for the reasons above, 
but it could be also rejected but I would prefer to have a proper error 
message in that case.]

While coarrays are unspecified, I do not see a reason why a corray 
shouldn't be permitted here – as long as it is not coindexed. At the 
end, it is just a normal array with some additional properties, which 
make it possible to remotely access it.

Note: For coarray scalars, we have 'sym->as', thus the check should be 
'(n->sym->as && n->sym->as->rank)' to permit scalar coarrays.

* * *

Coarray excursion: A coarray variables exists in multiple processes 
("images", e.g. MPI processes). If 'caf' and 'caf2' are coarrays, then 
'caf = 5' and 'i = caf2' refer to the local variable.

On the other hand, 'caf[n] = 5' or 'i = caf[3,m]' refers to the 'caf' 
variable on image 'n' or [3,m]', respectively, which implies in general 
some function call to read or set the remote data, unless the memory is 
directly accessible (→ e.g. some offset calculation) and the compiler 
already knows how to handle this.

While a coarrary might be allocated in some special memory, as long as 
one uses the local version (i.e. not coindexed / without the image index 
in brackets).

Assume for the example above, e.g., integer :: caf[*], caf2[3:6, 7:*].

* * *

Thus, in terms of OpenACC or OpenMP, there is no reason to fret a 
coarray as long as it is not coindexed and as long as OpenMP/OpenACC 
does not interfere with the memory allocation – either directly ('!$omp 
allocators') or indirectly by placing it into special memory (pinned, 
pseudo-unified-shared memory → OG13's -foffload-memory=pinned/unified).

In the meanwhile, OpenMP actually explicitly allows coarrays with few 
exceptions while OpenACC talks about unspecified behavior.

* * *

Back to generic comments:

If I look at the existing code, I see at gfc_match_omp_clause_reduction:

>  if (gfc_match_omp_variable_list (" :", &c->lists[list_idx], false, NULL,
>                                   &head, openacc, allow_derived) != 
> MATCH_YES)

If 'openacc' is true, array sections are permitted - but the code added 
(see quote above) does not handle n->expr at all and only n->sym.

I think there needs to be at least a "gfc_error ("Sorry, subarrays/array 
sections not yet handled" [subarray is the OpenACC wording, 'array 
section' is the Fortran one, which might be clearer.

But you could consider to handle at least array elements, i.e. 
n->expr->rank == 0.

Additionally, I think the current error message is completely unhelpful 
given that some arrays are supported but most are not.

I think there should be also some testcases for the not-yet-supported 
case. I think the following will trigger the omp-low.cc 'sorry_at' (or 
currently 'error' - but I think it should be a sorry):

subroutine foo(n)

integer :: n, A(n)

... reduction(+:A)

And most others will trigger in openmp.cc; for those, you should have an 
allocatable/pointer and assumed-shape arrays for the diagnostic testcase 
as well.

* * *

I have not really experimented with the code, but does it handle 
multi-dimensional constant arrays like 'integer :: a(3:6,10,-1:1)' ? — I 
bet it does, at least after handling my example [2] for the C patch [1].

Thanks,

Tobias

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641669.html

[2] https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647704.html


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, OpenACC 2.7] struct/array reductions for Fortran
  2024-02-08 14:47 ` [PATCH, OpenACC 2.7] struct/array reductions for Fortran Chung-Lin Tang
  2024-03-13 18:59   ` Tobias Burnus
@ 2024-03-18 16:39   ` Thomas Schwinge
  1 sibling, 0 replies; 3+ messages in thread
From: Thomas Schwinge @ 2024-03-18 16:39 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: gcc-patches, fortran, Tobias Burnus

Hi Chung-Lin!

Thanks for your work here, which I'm beginning to look into (prerequisite
"[PATCH, OpenACC 2.7] Implement reductions for arrays and structs",
first, of course); it'll take me some time.


In non-offloading testing, I noticed for x86_64-pc-linux-gnu '-m32':

    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O0  (test for excess errors)
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O0  execution test
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O1  (test for excess errors)
    +FAIL: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O1  execution test
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O2  (test for excess errors)
    +FAIL: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O2  execution test
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
    +FAIL: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O3 -g  (test for excess errors)
    +FAIL: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -O3 -g  execution test
    +PASS: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -Os  (test for excess errors)
    +FAIL: libgomp.oacc-fortran/reduction-13.f90 -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -foffload=disable  -Os  execution test

With optimizations enabled, it runs into 'STOP 4'.

Per '-Wextra':

    [...]/libgomp.oacc-fortran/reduction-13.f90:40:6: Warning: Inequality comparison for REAL(4) at (1) [-Wcompare-reals]
    [...]/libgomp.oacc-fortran/reduction-13.f90:63:6: Warning: Inequality comparison for REAL(4) at (1) [-Wcompare-reals]
    [...]/libgomp.oacc-fortran/reduction-13.f90:64:6: Warning: Inequality comparison for REAL(8) at (1) [-Wcompare-reals]

Do we need to allow for some epsilon (generally in such test cases), or
is there another problem?

For reference:

On 2024-02-08T22:47:13+0800, Chung-Lin Tang <cltang@baylibre.com> wrote:
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-13.f90
> @@ -0,0 +1,66 @@
> +! { dg-do run }
> +
> +! record type reductions
> +
> +program reduction_13
> +  implicit none
> +
> +  type t1
> +     integer :: i
> +     real :: r
> +  end type t1
> +
> +  type t2
> +     real :: r
> +     integer :: i
> +     double precision :: d
> +  end type t2
> +
> +  integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
> +  integer :: i
> +  type(t1) :: v1, a1
> +  type (t2) :: v2, a2
> +
> +  v1%i = 0
> +  v1%r = 0
> +  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(v1)
> +  !$acc loop reduction (+:v1)
> +  do i = 1, n
> +     v1%i = v1%i + 1
> +     v1%r = v1%r + 2
> +  end do
> +  !$acc end parallel
> +  a1%i = 0
> +  a1%r = 0
> +  do i = 1, n
> +     a1%i = a1%i + 1
> +     a1%r = a1%r + 2
> +  end do
> +  if (v1%i .ne. a1%i) STOP 1
> +  if (v1%r .ne. a1%r) STOP 2
> +
> +  v2%i = 1
> +  v2%r = 1
> +  v2%d = 1
> +  !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(v2)
> +  !$acc loop reduction (*:v2)
> +  do i = 1, n
> +     v2%i = v2%i * 2
> +     v2%r = v2%r * 1.1
> +     v2%d = v2%d * 1.3
> +  end do
> +  !$acc end parallel
> +  a2%i = 1
> +  a2%r = 1
> +  a2%d = 1
> +  do i = 1, n
> +     a2%i = a2%i * 2
> +     a2%r = a2%r * 1.1
> +     a2%d = a2%d * 1.3
> +  end do
> +
> +  if (v2%i .ne. a2%i) STOP 3
> +  if (v2%r .ne. a2%r) STOP 4
> +  if (v2%d .ne. a2%d) STOP 5
> +
> +end program reduction_13


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-03-18 16:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1ee7eb45-6bf1-40e5-9aec-48f2a8d28196@pllab.cs.nthu.edu.tw>
2024-02-08 14:47 ` [PATCH, OpenACC 2.7] struct/array reductions for Fortran Chung-Lin Tang
2024-03-13 18:59   ` Tobias Burnus
2024-03-18 16:39   ` Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).