public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4.1] fold ordered depend(sink) clauses
@ 2015-07-30  4:21 Aldy Hernandez
  2015-07-30 10:30 ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Aldy Hernandez @ 2015-07-30  4:21 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 600 bytes --]

The attached patch canonicalizes sink dependence clauses into one folded 
clause if possible (as discussed in the paper "Expressing DOACROSS Loop 
Dependences in OpenMP").

The basic algorithm is to create a sink vector whose first element is 
the GCD of all the first elements, and whose remaining elements are the 
minimum of the subsequent columns.  Further explanations are included in 
the code.

I have also added further warnings/errors for incompatible and 
nonsensical sink offsets.

I suggest you start with the tests and first see if you agree with the 
folded cases.

How does this look?

[-- Attachment #2: curr --]
[-- Type: text/plain, Size: 17994 bytes --]

commit 3f0a9dc6ceca690a77c74549a42040c52bc02fdc
Author: Aldy Hernandez <aldyh@redhat.com>
Date:   Wed Jul 29 13:39:06 2015 -0700

    	* wide-int.h (wi::gcd): New.
    	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
    	loop_iter_var.
    	Add loop_dir and loop_const_step fields.
    	(delete_omp_context): Free loop_dir and loop_const_step.
    	(gimplify_omp_for): Set loop_dir and loop_const_step.
    	(gimplify_expr): Move code handling OMP_ORDERED into...
    	(gimplify_omp_ordered): ...here.  New.

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2331001..5262233 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -153,7 +153,18 @@ struct gimplify_omp_ctx
   splay_tree variables;
   hash_set<tree> *privatized_types;
   /* Iteration variables in an OMP_FOR.  */
-  vec<tree> iter_vars;
+  vec<tree> loop_iter_var;
+
+  /* Direction of loop in an OMP_FOR.  */
+  enum dir {
+    DIR_UNKNOWN,
+    DIR_FORWARD,
+    DIR_BACKWARD
+  };
+  vec<dir> loop_dir;
+
+  /* Absolute value of step.  NULL_TREE if non-constant.  */
+  vec<tree> loop_const_step;
   location_t location;
   enum omp_clause_default_kind default_kind;
   enum omp_region_type region_type;
@@ -392,7 +403,9 @@ delete_omp_context (struct gimplify_omp_ctx *c)
 {
   splay_tree_delete (c->variables);
   delete c->privatized_types;
-  c->iter_vars.release ();
+  c->loop_iter_var.release ();
+  c->loop_dir.release ();
+  c->loop_const_step.release ();
   XDELETE (c);
 }
 
@@ -7490,8 +7503,12 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
-  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
-				       (OMP_FOR_INIT (for_stmt)));
+  gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+					   (OMP_FOR_INIT (for_stmt)));
+  gimplify_omp_ctxp->loop_dir.create (TREE_VEC_LENGTH
+				      (OMP_FOR_INIT (for_stmt)));
+  gimplify_omp_ctxp->loop_const_step.create (TREE_VEC_LENGTH
+					     (OMP_FOR_INIT (for_stmt)));
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7501,10 +7518,10 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
 		  || POINTER_TYPE_P (TREE_TYPE (decl)));
       if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
-	gimplify_omp_ctxp->iter_vars.quick_push
+	gimplify_omp_ctxp->loop_iter_var.quick_push
 	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
       else
-	gimplify_omp_ctxp->iter_vars.quick_push (decl);
+	gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
 
       /* Make sure the iteration variable is private.  */
       tree c = NULL_TREE;
@@ -7670,6 +7687,23 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i);
       gcc_assert (COMPARISON_CLASS_P (t));
       gcc_assert (TREE_OPERAND (t, 0) == decl);
+      switch (TREE_CODE (t))
+	{
+	case LT_EXPR:
+	case LE_EXPR:
+	  gimplify_omp_ctxp->loop_dir.quick_push
+	    (gimplify_omp_ctx::DIR_FORWARD);
+	  break;
+	case GT_EXPR:
+	case GE_EXPR:
+	  gimplify_omp_ctxp->loop_dir.quick_push
+	    (gimplify_omp_ctx::DIR_BACKWARD);
+	  break;
+	default:
+	  gimplify_omp_ctxp->loop_dir.quick_push
+	    (gimplify_omp_ctx::DIR_UNKNOWN);
+	  break;
+	}
 
       tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL,
 			    is_gimple_val, fb_rvalue);
@@ -7687,6 +7721,11 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	       called to massage things appropriately.  */
 	    gcc_assert (!POINTER_TYPE_P (TREE_TYPE (decl)));
 
+	    /* Pointer increments have been adjusted by now, so p++
+	       should be p += SIZE, and handled by the MODIFY_EXPR
+	       case below.  */
+	    gimplify_omp_ctxp->loop_const_step.quick_push (integer_one_node);
+
 	    if (orig_for_stmt != for_stmt)
 	      break;
 	    t = build_int_cst (TREE_TYPE (decl), 1);
@@ -7703,6 +7742,7 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	  /* c_omp_for_incr_canonicalize_ptr() should have been
 	     called to massage things appropriately.  */
 	  gcc_assert (!POINTER_TYPE_P (TREE_TYPE (decl)));
+	  gimplify_omp_ctxp->loop_const_step.quick_push (integer_one_node);
 	  if (orig_for_stmt != for_stmt)
 	    break;
 	  t = build_int_cst (TREE_TYPE (decl), -1);
@@ -7738,6 +7778,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      gcc_unreachable ();
 	    }
 
+	  /* We only care about steps that can be determined to be
+	     constant at compile time.  */
+	  if (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST)
+	    gimplify_omp_ctxp->loop_const_step.quick_push
+	      (TREE_OPERAND (t, 1));
+	  else
+	    gimplify_omp_ctxp->loop_const_step.quick_push (NULL_TREE);
+
 	  tret = gimplify_expr (&TREE_OPERAND (t, 1), &for_pre_body, NULL,
 				is_gimple_val, fb_rvalue);
 	  ret = MIN (ret, tret);
@@ -8387,6 +8435,228 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
   return GS_ALL_DONE;
 }
 
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+  tree c, decls;
+  int failures = 0;
+  unsigned int i;
+
+  if (gimplify_omp_ctxp)
+    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+	{
+	  bool fail = false;
+	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	       decls && TREE_CODE (decls) == TREE_LIST;
+	       decls = TREE_CHAIN (decls), ++i)
+	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+	      {
+		error_at (OMP_CLAUSE_LOCATION (c),
+			  "variable %qE is not an iteration "
+			  "of outermost loop %d, expected %qE",
+			  TREE_VALUE (decls), i + 1,
+			  gimplify_omp_ctxp->loop_iter_var[i]);
+		fail = true;
+		failures++;
+	      }
+	  /* Avoid being too redundant.  */
+	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+	    {
+	      error_at (OMP_CLAUSE_LOCATION (c),
+			"number of variables in depend(sink) "
+			"clause does not match number of "
+			"iteration variables");
+	      failures++;
+	    }
+	}
+
+  if (failures
+      || !gimplify_omp_ctxp
+      || gimplify_omp_ctxp->loop_iter_var.length () == 0)
+    return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+
+  /* Canonicalize sink dependence clauses into one folded clause if
+     possible.
+
+     The basic algorithm is to create a sink vector whose first
+     element is the GCD of all the first elements, and whose remaining
+     elements are the minimum of the subsequent columns.
+
+     We ignore dependence vectors whose first element is zero because
+     such dependencies are known to be executed by the same thread.
+
+     ?? ^^^^ Is this only applicable for collapse(1) loops?  If so, how
+     ?? to handle collapse(N) loops where N > 1?
+
+     We take into account the direction of the loop, so a minimum
+     becomes a maximum if the loop is iterating backwards.  We also
+     ignore sink clauses where the loop direction is unknown, or where
+     the offsets are clearly invalid because they are not a multiple
+     of the loop increment.
+
+     For example:
+
+	     for (i=0; i < N; ++i)
+		depend(sink:i-8,j-1)
+		depend(sink:i,j-2)	// Completely ignored because i+0.
+		depend(sink:i-4,j+3)
+		depend(sink:i-6,j+2)
+
+     Folded clause is:
+
+        depend(sink:-gcd(8,4,6),min(-1,3,2))
+	  -or-
+	depend(sink:-2,-1)
+  */
+
+  unsigned int len = gimplify_omp_ctxp->loop_iter_var.length ();
+  vec<wide_int> folded_deps;
+  folded_deps.create (len);
+  folded_deps.quick_grow_cleared (len);
+  /* TRUE if the first dimension's offset is negative.  */
+  bool neg_offset_p = false;
+
+  /* Bitmap representing dimensions in the final dependency vector that
+     have been set.  */
+  sbitmap folded_deps_used = sbitmap_alloc (len);
+  bitmap_clear (folded_deps_used);
+
+  tree *list_p = &OMP_ORDERED_CLAUSES (expr);
+  while ((c = *list_p) != NULL)
+    {
+      bool remove = false;
+
+      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+	  || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+	goto next_ordered_clause;
+
+      for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	   decls && TREE_CODE (decls) == TREE_LIST;
+	   decls = TREE_CHAIN (decls), ++i)
+	{
+	  gcc_assert (i < len);
+
+	  /* While the committee makes up its mind, bail if we have any
+	     non-constant steps.  Also, bail if any loop has an unknown
+	     direction.  */
+	  if (!gimplify_omp_ctxp->loop_const_step[i]
+	      || (gimplify_omp_ctxp->loop_dir[i]
+		  == gimplify_omp_ctx::DIR_UNKNOWN))
+	    goto gimplify_omp_ordered_ret;
+
+	  wide_int offset = TREE_PURPOSE (decls);
+
+	  /* Ignore invalid offsets that are not multiples of the step.  */
+	  if (!wi::multiple_of_p
+	      (wi::abs (offset),
+	       wi::abs ((wide_int)
+			gimplify_omp_ctxp->loop_const_step[i]),
+	       UNSIGNED))
+	    {
+	      warning_at (OMP_CLAUSE_LOCATION (c), 0,
+			  "ignoring sink clause with offset that is not "
+			  "a multiple of the loop step");
+	      remove = true;
+	      goto next_ordered_clause;
+	    }
+
+	  /* Calculate the first dimension.  The first dimension of
+	     the folded dependency vector is the GCD of the first
+	     elements, while ignoring any first elements whose offset
+	     is 0.  */
+	  if (i == 0)
+	    {
+	      /* Ignore dependence vectors whose first dimension is 0.  */
+	      if (offset == 0)
+		goto next_ordered_clause;
+	      else
+		{
+		  neg_offset_p =
+		    wi::neg_p (offset,
+			       TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+		  if ((gimplify_omp_ctxp->loop_dir[0]
+		       == gimplify_omp_ctx::DIR_FORWARD && !neg_offset_p)
+		      || (gimplify_omp_ctxp->loop_dir[0]
+			  == gimplify_omp_ctx::DIR_BACKWARD && neg_offset_p))
+		    {
+		      error_at (OMP_CLAUSE_LOCATION (c),
+				"first offset must be in opposite direction "
+				"of loop iterations");
+		      goto gimplify_omp_ordered_ret;
+		    }
+		  /* Initialize the first time around.  */
+		  if (!bitmap_bit_p (folded_deps_used, 0))
+		    {
+		      bitmap_set_bit (folded_deps_used, 0);
+		      folded_deps[0] = wi::abs (offset);
+		    }
+		  else
+		    folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+		}
+	    }
+	  /* Calculate minimum for the remaining dimensions.  */
+	  else
+	    {
+	      if (!bitmap_bit_p (folded_deps_used, i))
+		{
+		  bitmap_set_bit (folded_deps_used, i);
+		  folded_deps[i] = offset;
+		}
+	      else if ((gimplify_omp_ctxp->loop_dir[i]
+			== gimplify_omp_ctx::DIR_FORWARD
+			&& wi::lts_p (offset, folded_deps[i]))
+		       || (gimplify_omp_ctxp->loop_dir[i]
+			   == gimplify_omp_ctx::DIR_BACKWARD
+			   && wi::gts_p (offset, folded_deps[i])))
+		folded_deps[i] = offset;
+	    }
+	}
+
+      remove = true;
+
+    next_ordered_clause:
+      if (remove)
+	*list_p = OMP_CLAUSE_CHAIN (c);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (c);
+    }
+
+  for (i = 0; i < len; ++i)
+    if (!bitmap_bit_p (folded_deps_used, i))
+      break;
+  if (i == len)
+    {
+      if (neg_offset_p)
+	folded_deps[0] = -folded_deps[0];
+
+      tree vec = NULL;
+      i = len;
+      do
+	{
+	  i--;
+	  tree var = gimplify_omp_ctxp->loop_iter_var[i];
+	  vec = tree_cons (wide_int_to_tree (TREE_TYPE (var), folded_deps[i]),
+			   var, vec);
+	}
+      while (i);
+
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+      OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+      OMP_CLAUSE_DECL (c) = vec;
+      OMP_CLAUSE_CHAIN (c) = OMP_ORDERED_CLAUSES (expr);
+      OMP_ORDERED_CLAUSES (expr) = c;
+    }
+
+ gimplify_omp_ordered_ret:
+  sbitmap_free (folded_deps_used);
+  folded_deps.release ();
+
+  return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
 /* Convert the GENERIC expression tree *EXPR_P to GIMPLE.  If the
    expression produces a value to be used as an operand inside a GIMPLE
    statement, the value will be stored back in *EXPR_P.  This value will
@@ -9200,38 +9470,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		}
 		break;
 	      case OMP_ORDERED:
-		if (gimplify_omp_ctxp)
-		  for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
-		       c; c = OMP_CLAUSE_CHAIN (c))
-		    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
-			&& OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
-		      {
-			unsigned int n = 0;
-			bool fail = false;
-			for (tree decls = OMP_CLAUSE_DECL (c);
-			     decls && TREE_CODE (decls) == TREE_LIST;
-			     decls = TREE_CHAIN (decls), ++n)
-			  if (n < gimplify_omp_ctxp->iter_vars.length ()
-			      && TREE_VALUE (decls)
-			      != gimplify_omp_ctxp->iter_vars[n])
-			    {
-			      error_at (OMP_CLAUSE_LOCATION (c),
-					"variable %qE is not an iteration "
-					"of outermost loop %d, expected %qE",
-					TREE_VALUE (decls), n + 1,
-					gimplify_omp_ctxp->iter_vars[n]);
-			      fail = true;
-			    }
-			/* Avoid being too redundant.  */
-			if (!fail
-			    && n != gimplify_omp_ctxp->iter_vars.length ())
-			  error_at (OMP_CLAUSE_LOCATION (c),
-			     "number of variables in depend(sink) clause "
-			     "does not match number of iteration variables");
-		      }
-
-		g = gimple_build_omp_ordered (body,
-					      OMP_ORDERED_CLAUSES (*expr_p));
+		g = gimplify_omp_ordered (*expr_p, body);
 		break;
 	      case OMP_CRITICAL:
 		gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
diff --git a/gcc/testsuite/c-c++-common/gomp/sink-4.c b/gcc/testsuite/c-c++-common/gomp/sink-4.c
index 7934de2..a34f081 100644
--- a/gcc/testsuite/c-c++-common/gomp/sink-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/sink-4.c
@@ -22,4 +22,4 @@ funk (foo *begin, foo *end)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
new file mode 100644
index 0000000..64ddc92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+
+/* Test depend(sink) clause folding.  */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i++)
+    for (j=0; j < N; ++j)
+    {
+/* We should keep the (sink:i,j-2) by virtue of it the i+0.  The
+   remaining clauses get folded with a GCD of -2 for `i' and a minimum
+   of -1 for 'j'.  */
+#pragma omp ordered \
+  depend(sink:i-8,j-1) \
+  depend(sink:i, j-2) \
+  depend(sink:i-4,j+3) \
+  depend(sink:i-6,j+2)
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\) depend\\(sink:i,j-2\\)" 1 "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
new file mode 100644
index 0000000..9b3c442
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i += 3)
+    for (j=0; j < N; ++j)
+    {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+        bar();
+#pragma omp ordered depend(source)
+    }
+
+#pragma omp parallel for ordered(2)
+  for (i=N; i > 0; --i)
+    for (j=0; j < N; ++j)
+      {
+#pragma omp ordered depend(sink:i-1,j-1) /* { dg-error "first offset must be in opposite direction" } */
+	bar();
+#pragma omp ordered depend(source)
+      }
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
new file mode 100644
index 0000000..79cb5f3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+   pointer sizes.  */
+
+typedef struct {
+    char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+  foo *p;
+#pragma omp parallel for ordered(1)
+  for (p=end; p > begin; p--)
+    {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+      void bar ();
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "gimple" } } */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index d8f7b46..c20db61 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -514,6 +514,7 @@ namespace wi
   BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
 				WI_BINARY_RESULT (T1, T2) *);
+  BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
   BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
   BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@ wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
   return quotient;
 }
 
+/* Compute the greatest common divisor of two numbers A and B using
+   Euclid's algorithm.  */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+  T1 x, y, z;
+
+  x = wi::abs (a);
+  y = wi::abs (b);
+
+  while (gt_p (x, 0, sgn))
+    {
+      z = mod_trunc (y, x, sgn);
+      y = x;
+      x = z;
+    }
+
+  return y;
+}
+
 /* Compute X / Y, rouding towards 0, and return the remainder.
    Treat X and Y as having the signedness given by SGN.  Indicate
    in *OVERFLOW if the division overflows.  */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [gomp4.1] fold ordered depend(sink) clauses
  2015-07-30  4:21 [gomp4.1] fold ordered depend(sink) clauses Aldy Hernandez
@ 2015-07-30 10:30 ` Jakub Jelinek
  2015-07-31  2:56   ` Aldy Hernandez
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2015-07-30 10:30 UTC (permalink / raw)
  To: Aldy Hernandez; +Cc: gcc-patches

On Wed, Jul 29, 2015 at 04:48:23PM -0700, Aldy Hernandez wrote:
> @@ -7490,8 +7503,12 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
>  	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
>    gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
>  	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
> -  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
> -				       (OMP_FOR_INIT (for_stmt)));
> +  gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
> +					   (OMP_FOR_INIT (for_stmt)));
> +  gimplify_omp_ctxp->loop_dir.create (TREE_VEC_LENGTH
> +				      (OMP_FOR_INIT (for_stmt)));
> +  gimplify_omp_ctxp->loop_const_step.create (TREE_VEC_LENGTH
> +					     (OMP_FOR_INIT (for_stmt)));
>    for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
>      {
>        t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);

I think the above should be guarded with
  tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
  if (c && OMP_CLAUSE_ORDERED_EXPR (c))
The most common case is that ordered(n) is not present, so we should
optimize for that.

> @@ -7501,10 +7518,10 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
>        gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
>  		  || POINTER_TYPE_P (TREE_TYPE (decl)));
>        if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
> -	gimplify_omp_ctxp->iter_vars.quick_push
> +	gimplify_omp_ctxp->loop_iter_var.quick_push
>  	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
>        else
> -	gimplify_omp_ctxp->iter_vars.quick_push (decl);
> +	gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
>  
>        /* Make sure the iteration variable is private.  */
>        tree c = NULL_TREE;

And all these etc. pushes too, simply remember is_doacross in some bool
variable.

> @@ -8387,6 +8435,228 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
>    return GS_ALL_DONE;
>  }
>  

Function comment missing.

> +static gimple
> +gimplify_omp_ordered (tree expr, gimple_seq body)
> +{

> +     The basic algorithm is to create a sink vector whose first
> +     element is the GCD of all the first elements, and whose remaining
> +     elements are the minimum of the subsequent columns.
> +
> +     We ignore dependence vectors whose first element is zero because
> +     such dependencies are known to be executed by the same thread.
> +
> +     ?? ^^^^ Is this only applicable for collapse(1) loops?  If so, how
> +     ?? to handle collapse(N) loops where N > 1?

For collapse(N) N > 1, you can't ignore first iter var with 0 offset, you can only
ignore if N first iter vars have 0 offset.

Pretty much for the purpose of the algorithm, you compute "first element"
for collapse(N) N > 1 and ordered(5-N)
	for (iv1 = ..; iv1 < ..; iv1 += step1)
	  for (iv2 = ..; iv2 < ..; iv2 += step2)
	    for (iv3 = ..; iv3 < ..; iv3 += step3)
	      for (iv4 = ..; iv4 < ..; iv4 += step4)
		depend(iv1 + off1, iv2 + off2, iv3 + off3, iv4 + off4)
as: off(N) + off(N-1)*step(N) + iv(N-2)*step(N)*step(N-1)...
(to be checked the case if the directions differ).
So basically, you want to precompute if you'd add some loop counter
in between loop(N) and loop(N+1) that would be initially zero and incremented
by step(N).  The GCD is then performed on this, it is compared to zero,
and then finally split again into the several offsets.
The "is this invalid iteration" check (is offN divisible by stepN)
needs to be done before the merging of course.

Except, now that I think, it is not that easy.  Because we have another
test for "is this invalid iteration", in particularly one performed at
runtime on each of the depends.  Say if offN is negative and loop(N)
is forward loop (thus stepN is positive (note, for backward loop stepN
still might be huge positive value for unsigned iterators)), the check
would be if (ivN + offN >= initialN), for forward loop with positive offN
if (ivN + offN < endvalueN) etc.  Now, not sure if by computing the GCD and
merging several depend clauses into one we preserve those tests or not.

All in all, I think it might be really better to do the depend clause
merging during omp lowering in omp-low.c, where you can call
extract_omp_for_data and inspect the iteration variables, have the steps
computed for you etc.  That is the spot where we probably want to emit some
GOMP_depend_sink call (and GOMP_depend_source) and prepare arguments for it.

> +void
> +funk ()
> +{
> +#pragma omp parallel for ordered(2)
> +  for (i=0; i < N; i++)
> +    for (j=0; j < N; ++j)
> +    {
> +/* We should keep the (sink:i,j-2) by virtue of it the i+0.  The
> +   remaining clauses get folded with a GCD of -2 for `i' and a minimum
> +   of -1 for 'j'.  */

I think we shouldn't keep the useless one (sink: i, j-2) (or keep invalid
ones).  Of course, if we remove all depend clauses, we need to remove the
GIMPLE_OMP_ORDERED stmt altogether.

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [gomp4.1] fold ordered depend(sink) clauses
  2015-07-30 10:30 ` Jakub Jelinek
@ 2015-07-31  2:56   ` Aldy Hernandez
  2015-07-31 18:18     ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Aldy Hernandez @ 2015-07-31  2:56 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5290 bytes --]

On 07/30/2015 03:01 AM, Jakub Jelinek wrote:
> On Wed, Jul 29, 2015 at 04:48:23PM -0700, Aldy Hernandez wrote:
>> @@ -7490,8 +7503,12 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
>>   	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
>>     gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
>>   	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
>> -  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
>> -				       (OMP_FOR_INIT (for_stmt)));
>> +  gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
>> +					   (OMP_FOR_INIT (for_stmt)));
>> +  gimplify_omp_ctxp->loop_dir.create (TREE_VEC_LENGTH
>> +				      (OMP_FOR_INIT (for_stmt)));
>> +  gimplify_omp_ctxp->loop_const_step.create (TREE_VEC_LENGTH
>> +					     (OMP_FOR_INIT (for_stmt)));
>>     for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
>>       {
>>         t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
>
> I think the above should be guarded with
>    tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
>    if (c && OMP_CLAUSE_ORDERED_EXPR (c))
> The most common case is that ordered(n) is not present, so we should
> optimize for that.

Done.

>
>> @@ -7501,10 +7518,10 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
>>         gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
>>   		  || POINTER_TYPE_P (TREE_TYPE (decl)));
>>         if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
>> -	gimplify_omp_ctxp->iter_vars.quick_push
>> +	gimplify_omp_ctxp->loop_iter_var.quick_push
>>   	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
>>         else
>> -	gimplify_omp_ctxp->iter_vars.quick_push (decl);
>> +	gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
>>
>>         /* Make sure the iteration variable is private.  */
>>         tree c = NULL_TREE;
>
> And all these etc. pushes too, simply remember is_doacross in some bool
> variable.

Not applicable.  I've moved the code to omp-low, as suggested, where the 
omp_for_data is available, thus simplifying everything.

>
>> @@ -8387,6 +8435,228 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
>>     return GS_ALL_DONE;
>>   }
>>
>
> Function comment missing.

Fixed.

>
>> +static gimple
>> +gimplify_omp_ordered (tree expr, gimple_seq body)
>> +{
>
>> +     The basic algorithm is to create a sink vector whose first
>> +     element is the GCD of all the first elements, and whose remaining
>> +     elements are the minimum of the subsequent columns.
>> +
>> +     We ignore dependence vectors whose first element is zero because
>> +     such dependencies are known to be executed by the same thread.
>> +
>> +     ?? ^^^^ Is this only applicable for collapse(1) loops?  If so, how
>> +     ?? to handle collapse(N) loops where N > 1?
>
> For collapse(N) N > 1, you can't ignore first iter var with 0 offset, you can only
> ignore if N first iter vars have 0 offset.
>
> Pretty much for the purpose of the algorithm, you compute "first element"
> for collapse(N) N > 1 and ordered(5-N)
> 	for (iv1 = ..; iv1 < ..; iv1 += step1)
> 	  for (iv2 = ..; iv2 < ..; iv2 += step2)
> 	    for (iv3 = ..; iv3 < ..; iv3 += step3)
> 	      for (iv4 = ..; iv4 < ..; iv4 += step4)
> 		depend(iv1 + off1, iv2 + off2, iv3 + off3, iv4 + off4)
> as: off(N) + off(N-1)*step(N) + iv(N-2)*step(N)*step(N-1)...
> (to be checked the case if the directions differ).
> So basically, you want to precompute if you'd add some loop counter
> in between loop(N) and loop(N+1) that would be initially zero and incremented
> by step(N).  The GCD is then performed on this, it is compared to zero,
> and then finally split again into the several offsets.
> The "is this invalid iteration" check (is offN divisible by stepN)
> needs to be done before the merging of course.
>
> Except, now that I think, it is not that easy.  Because we have another
> test for "is this invalid iteration", in particularly one performed at
> runtime on each of the depends.  Say if offN is negative and loop(N)
> is forward loop (thus stepN is positive (note, for backward loop stepN
> still might be huge positive value for unsigned iterators)), the check
> would be if (ivN + offN >= initialN), for forward loop with positive offN
> if (ivN + offN < endvalueN) etc.  Now, not sure if by computing the GCD and
> merging several depend clauses into one we preserve those tests or not.

Ughh... as a followup?  For now I'm going to bail on collapse > 1 and 
add a big FIXME.

>
> All in all, I think it might be really better to do the depend clause
> merging during omp lowering in omp-low.c, where you can call
> extract_omp_for_data and inspect the iteration variables, have the steps
> computed for you etc.  That is the spot where we probably want to emit some
> GOMP_depend_sink call (and GOMP_depend_source) and prepare arguments for it.

Excellent suggestion.  Done.

>
>> +void
>> +funk ()
>> +{
>> +#pragma omp parallel for ordered(2)
>> +  for (i=0; i < N; i++)
>> +    for (j=0; j < N; ++j)
>> +    {
>> +/* We should keep the (sink:i,j-2) by virtue of it the i+0.  The
>> +   remaining clauses get folded with a GCD of -2 for `i' and a minimum
>> +   of -1 for 'j'.  */
>
> I think we shouldn't keep the useless one (sink: i, j-2) (or keep invalid
> ones).

Done.

How's this?

Aldy

[-- Attachment #2: curr --]
[-- Type: text/plain, Size: 18414 bytes --]

commit 9c979c4c4cd53092affbf98c05ddd8c9a60915c7
Author: Aldy Hernandez <aldyh@redhat.com>
Date:   Wed Jul 29 13:39:06 2015 -0700

    	* wide-int.h (wi::gcd): New.
    	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
    	loop_iter_var.
    	(delete_omp_context): Same.
    	(gimplify_expr): Move code handling OMP_ORDERED into...
    	(gimplify_omp_ordered): ...here.  New.
    	* omp-low.c (lower_omp_ordered_clauses): New.
    	(lower_omp_ordered): Call lower_omp_ordered_clauses.
    testsuite/
    	* gcc.dg/gomp/sink-fold-1.c: New.
    	* gcc.dg/gomp/sink-fold-2.c: New.
    	* gcc.dg/gomp/sink-fold-3.c: New.
    	* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
    	* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
    	direction.  Make variables more readable.

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2331001..83eb6a1 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -153,7 +153,7 @@ struct gimplify_omp_ctx
   splay_tree variables;
   hash_set<tree> *privatized_types;
   /* Iteration variables in an OMP_FOR.  */
-  vec<tree> iter_vars;
+  vec<tree> loop_iter_var;
   location_t location;
   enum omp_clause_default_kind default_kind;
   enum omp_region_type region_type;
@@ -392,7 +392,7 @@ delete_omp_context (struct gimplify_omp_ctx *c)
 {
   splay_tree_delete (c->variables);
   delete c->privatized_types;
-  c->iter_vars.release ();
+  c->loop_iter_var.release ();
   XDELETE (c);
 }
 
@@ -7490,8 +7490,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
-  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
-				       (OMP_FOR_INIT (for_stmt)));
+
+  tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
+  bool is_doacross = false;
+  if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+    {
+      is_doacross = true;
+      gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+					       (OMP_FOR_INIT (for_stmt)));
+    }
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7500,11 +7507,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_assert (DECL_P (decl));
       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
 		  || POINTER_TYPE_P (TREE_TYPE (decl)));
-      if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
-	gimplify_omp_ctxp->iter_vars.quick_push
-	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
-      else
-	gimplify_omp_ctxp->iter_vars.quick_push (decl);
+      if (is_doacross)
+	{
+	  if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
+	    gimplify_omp_ctxp->loop_iter_var.quick_push
+	      (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+	  else
+	    gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
+	}
 
       /* Make sure the iteration variable is private.  */
       tree c = NULL_TREE;
@@ -8387,6 +8397,53 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
   return GS_ALL_DONE;
 }
 
+/* Gimplify an OMP_ORDERED construct.  EXPR is the tree version.  BODY
+   is the OMP_BODY of the original EXPR (which has already been
+   gimplified so it's not present in the EXPR).
+
+   Return the gimplified GIMPLE_OMP_ORDERED tuple.  */
+
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+  tree c, decls;
+  int failures = 0;
+  unsigned int i;
+
+  if (gimplify_omp_ctxp)
+    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+	{
+	  bool fail = false;
+	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	       decls && TREE_CODE (decls) == TREE_LIST;
+	       decls = TREE_CHAIN (decls), ++i)
+	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+	      {
+		error_at (OMP_CLAUSE_LOCATION (c),
+			  "variable %qE is not an iteration "
+			  "of outermost loop %d, expected %qE",
+			  TREE_VALUE (decls), i + 1,
+			  gimplify_omp_ctxp->loop_iter_var[i]);
+		fail = true;
+		failures++;
+	      }
+	  /* Avoid being too redundant.  */
+	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+	    {
+	      error_at (OMP_CLAUSE_LOCATION (c),
+			"number of variables in depend(sink) "
+			"clause does not match number of "
+			"iteration variables");
+	      failures++;
+	    }
+	}
+
+  return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
 /* Convert the GENERIC expression tree *EXPR_P to GIMPLE.  If the
    expression produces a value to be used as an operand inside a GIMPLE
    statement, the value will be stored back in *EXPR_P.  This value will
@@ -9200,38 +9257,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		}
 		break;
 	      case OMP_ORDERED:
-		if (gimplify_omp_ctxp)
-		  for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
-		       c; c = OMP_CLAUSE_CHAIN (c))
-		    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
-			&& OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
-		      {
-			unsigned int n = 0;
-			bool fail = false;
-			for (tree decls = OMP_CLAUSE_DECL (c);
-			     decls && TREE_CODE (decls) == TREE_LIST;
-			     decls = TREE_CHAIN (decls), ++n)
-			  if (n < gimplify_omp_ctxp->iter_vars.length ()
-			      && TREE_VALUE (decls)
-			      != gimplify_omp_ctxp->iter_vars[n])
-			    {
-			      error_at (OMP_CLAUSE_LOCATION (c),
-					"variable %qE is not an iteration "
-					"of outermost loop %d, expected %qE",
-					TREE_VALUE (decls), n + 1,
-					gimplify_omp_ctxp->iter_vars[n]);
-			      fail = true;
-			    }
-			/* Avoid being too redundant.  */
-			if (!fail
-			    && n != gimplify_omp_ctxp->iter_vars.length ())
-			  error_at (OMP_CLAUSE_LOCATION (c),
-			     "number of variables in depend(sink) clause "
-			     "does not match number of iteration variables");
-		      }
-
-		g = gimple_build_omp_ordered (body,
-					      OMP_ORDERED_CLAUSES (*expr_p));
+		g = gimplify_omp_ordered (*expr_p, body);
 		break;
 	      case OMP_CRITICAL:
 		gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e7d21ea..a63bf60 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11969,6 +11969,230 @@ lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 
+/* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible.  */
+
+static void
+lower_omp_ordered_clauses (gomp_ordered *ord_stmt, omp_context *ctx)
+{
+  struct omp_for_data fd;
+  if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
+    return;
+
+  /* ?? This is stupid.  We need to call extract_omp_for_data just
+     to get the number of ordered loops... */
+  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, NULL);
+  if (!fd.ordered)
+    return;
+  struct omp_for_data_loop *loops
+    = (struct omp_for_data_loop *)
+    alloca (fd.ordered * sizeof (struct omp_for_data_loop));
+  /* ?? ...and then again to get the actual loops.  */
+  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
+
+  /* Canonicalize sink dependence clauses into one folded clause if
+     possible.
+
+     The basic algorithm is to create a sink vector whose first
+     element is the GCD of all the first elements, and whose remaining
+     elements are the minimum of the subsequent columns.
+
+     We ignore dependence vectors whose first element is zero because
+     such dependencies are known to be executed by the same thread.
+
+     We take into account the direction of the loop, so a minimum
+     becomes a maximum if the loop is iterating backwards.  We also
+     ignore sink clauses where the loop direction is unknown, or where
+     the offsets are clearly invalid because they are not a multiple
+     of the loop increment.
+
+     For example:
+
+	     for (i=0; i < N; ++i)
+		depend(sink:i-8,j-1)
+		depend(sink:i,j-2)	// Completely ignored because i+0.
+		depend(sink:i-4,j+3)
+		depend(sink:i-6,j+2)
+
+     Folded clause is:
+
+        depend(sink:-gcd(8,4,6),min(-1,3,2))
+	  -or-
+	depend(sink:-2,-1)
+  */
+
+  /* FIXME: Computing GCD's where the first element is zero is
+     non-trivial in the presence of collapsed loops.  Do this later.  */
+  if (fd.collapse > 1)
+    return;
+
+  unsigned int len = fd.ordered;
+  vec<wide_int> folded_deps;
+  folded_deps.create (len);
+  folded_deps.quick_grow_cleared (len);
+  /* Bitmap representing dimensions in the final dependency vector that
+     have been set.  */
+  sbitmap folded_deps_used = sbitmap_alloc (len);
+  bitmap_clear (folded_deps_used);
+  /* TRUE if the first dimension's offset is negative.  */
+  bool neg_offset_p = false;
+
+  /* ?? We need to save the original iteration variables stored in the
+     depend clauses, because those in fd.loops[].v have already been
+     gimplified.  Perhaps we should use the gimplified versions. ??  */
+  tree *iter_vars = (tree *) alloca (sizeof (tree) * len);
+  memset (iter_vars, 0, sizeof (tree) * len);
+
+  tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
+  tree c;
+  unsigned int i;
+  while ((c = *list_p) != NULL)
+    {
+      bool remove = false;
+
+      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+	  || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+	goto next_ordered_clause;
+
+      tree decls;
+      for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	   decls && TREE_CODE (decls) == TREE_LIST;
+	   decls = TREE_CHAIN (decls), ++i)
+	{
+	  gcc_assert (i < len);
+
+	  enum {
+	    DIR_UNKNOWN,
+	    DIR_FORWARD,
+	    DIR_BACKWARD
+	  } loop_dir;
+	  switch (fd.loops[i].cond_code)
+	    {
+	    case LT_EXPR:
+	    case LE_EXPR:
+	      loop_dir = DIR_FORWARD;
+	      break;
+	    case GT_EXPR:
+	    case GE_EXPR:
+	      loop_dir = DIR_BACKWARD;
+	      break;
+	    default:
+	      loop_dir = DIR_UNKNOWN;
+	      gcc_unreachable ();
+	    }
+
+	  /* While the committee makes up its mind, bail if we have any
+	     non-constant steps.  */
+	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
+	    goto gimplify_omp_ordered_ret;
+
+	  wide_int offset = TREE_PURPOSE (decls);
+	  if (!iter_vars[i])
+	    iter_vars[i] = TREE_VALUE (decls);
+
+	  /* Ignore invalid offsets that are not multiples of the step.  */
+	  if (!wi::multiple_of_p
+	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
+	       UNSIGNED))
+	    {
+	      warning_at (OMP_CLAUSE_LOCATION (c), 0,
+			  "ignoring sink clause with offset that is not "
+			  "a multiple of the loop step");
+	      remove = true;
+	      goto next_ordered_clause;
+	    }
+
+	  /* Calculate the first dimension.  The first dimension of
+	     the folded dependency vector is the GCD of the first
+	     elements, while ignoring any first elements whose offset
+	     is 0.  */
+	  if (i == 0)
+	    {
+	      /* Ignore dependence vectors whose first dimension is 0.  */
+	      if (offset == 0)
+		{
+		  remove = true;
+		  goto next_ordered_clause;
+		}
+	      else
+		{
+		  neg_offset_p =
+		    wi::neg_p (offset,
+			       TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+		  if ((loop_dir == DIR_FORWARD && !neg_offset_p)
+		      || (loop_dir == DIR_BACKWARD && neg_offset_p))
+		    {
+		      error_at (OMP_CLAUSE_LOCATION (c),
+				"first offset must be in opposite direction "
+				"of loop iterations");
+		      goto gimplify_omp_ordered_ret;
+		    }
+		  /* Initialize the first time around.  */
+		  if (!bitmap_bit_p (folded_deps_used, 0))
+		    {
+		      bitmap_set_bit (folded_deps_used, 0);
+		      folded_deps[0] = wi::abs (offset);
+		    }
+		  else
+		    folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+		}
+	    }
+	  /* Calculate minimum for the remaining dimensions.  */
+	  else
+	    {
+	      if (!bitmap_bit_p (folded_deps_used, i))
+		{
+		  bitmap_set_bit (folded_deps_used, i);
+		  folded_deps[i] = offset;
+		}
+	      else if ((loop_dir == DIR_FORWARD
+			&& wi::lts_p (offset, folded_deps[i]))
+		       || (loop_dir == DIR_BACKWARD
+			   && wi::gts_p (offset, folded_deps[i])))
+		folded_deps[i] = offset;
+	    }
+	}
+
+      remove = true;
+
+    next_ordered_clause:
+      if (remove)
+	*list_p = OMP_CLAUSE_CHAIN (c);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (c);
+    }
+
+  for (i = 0; i < len; ++i)
+    if (!bitmap_bit_p (folded_deps_used, i))
+      break;
+  if (i == len)
+    {
+      if (neg_offset_p)
+	folded_deps[0] = -folded_deps[0];
+
+      tree vec = NULL;
+      i = len;
+      do
+	{
+	  i--;
+	  vec = tree_cons (wide_int_to_tree (TREE_TYPE (fd.loops[i].v),
+					     folded_deps[i]),
+			   iter_vars[i], vec);
+	}
+      while (i);
+
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+      OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+      OMP_CLAUSE_DECL (c) = vec;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_ordered_clauses (ord_stmt);
+      *gimple_omp_ordered_clauses_ptr (ord_stmt) = c;
+    }
+
+ gimplify_omp_ordered_ret:
+  sbitmap_free (folded_deps_used);
+  folded_deps.release ();
+}
+
+
 /* Expand code for an OpenMP ordered directive.  */
 
 static void
@@ -11979,6 +12203,8 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gcall *x;
   gbind *bind;
 
+  lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+
   push_gimplify_context ();
 
   block = make_node (BLOCK);
diff --git a/gcc/testsuite/c-c++-common/gomp/sink-4.c b/gcc/testsuite/c-c++-common/gomp/sink-4.c
index 7934de2..111178b 100644
--- a/gcc/testsuite/c-c++-common/gomp/sink-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/sink-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
 
 /* Test that we adjust pointer offsets for sink variables
    correctly.  */
@@ -22,4 +22,4 @@ funk (foo *begin, foo *end)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/sink-3.C b/gcc/testsuite/g++.dg/gomp/sink-3.C
index 83a742e..4271d66 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-3.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-3.C
@@ -8,7 +8,7 @@ typedef struct {
     char stuff[400];
 } foo;
 
-foo *p, *q, *r;
+foo *end, *begin, *p;
 
 template<int N>
 void
@@ -16,7 +16,7 @@ funk ()
 {
   int i,j;
 #pragma omp parallel for ordered(1)
-  for (p=q; p < q; p--)
+  for (p=end; p > begin; p--)
     {
 #pragma omp ordered depend(sink:p+1)
       void bar ();
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
new file mode 100644
index 0000000..f2961b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test depend(sink) clause folding.  */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i++)
+    for (j=0; j < N; ++j)
+    {
+/* We remove the (sink:i,j-2) by virtue of it the i+0.  The remaining
+   clauses get folded with a GCD of -2 for `i' and a minimum of -1 for
+   'j'.  */
+#pragma omp ordered \
+  depend(sink:i-8,j-1) \
+  depend(sink:i, j-2) \
+  depend(sink:i-4,j+3) \
+  depend(sink:i-6,j+2)
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
new file mode 100644
index 0000000..b3b4ac7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i += 3)
+    for (j=0; j < N; ++j)
+    {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
new file mode 100644
index 0000000..4d6293c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+   pointer sizes.  */
+
+typedef struct {
+    char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+  foo *p;
+#pragma omp parallel for ordered(1)
+  for (p=end; p > begin; p--)
+    {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+      void bar ();
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "omplower" } } */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index d8f7b46..c20db61 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -514,6 +514,7 @@ namespace wi
   BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
 				WI_BINARY_RESULT (T1, T2) *);
+  BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
   BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
   BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@ wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
   return quotient;
 }
 
+/* Compute the greatest common divisor of two numbers A and B using
+   Euclid's algorithm.  */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+  T1 x, y, z;
+
+  x = wi::abs (a);
+  y = wi::abs (b);
+
+  while (gt_p (x, 0, sgn))
+    {
+      z = mod_trunc (y, x, sgn);
+      y = x;
+      x = z;
+    }
+
+  return y;
+}
+
 /* Compute X / Y, rouding towards 0, and return the remainder.
    Treat X and Y as having the signedness given by SGN.  Indicate
    in *OVERFLOW if the division overflows.  */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [gomp4.1] fold ordered depend(sink) clauses
  2015-07-31  2:56   ` Aldy Hernandez
@ 2015-07-31 18:18     ` Jakub Jelinek
  2015-07-31 19:28       ` Aldy Hernandez
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2015-07-31 18:18 UTC (permalink / raw)
  To: Aldy Hernandez; +Cc: gcc-patches

On Thu, Jul 30, 2015 at 07:27:51PM -0700, Aldy Hernandez wrote:
> +static gimple
> +gimplify_omp_ordered (tree expr, gimple_seq body)
> +{
> +  tree c, decls;
> +  int failures = 0;
> +  unsigned int i;
> +
> +  if (gimplify_omp_ctxp)
> +    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
> +      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
> +	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
> +	{
> +	  bool fail = false;
> +	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
> +	       decls && TREE_CODE (decls) == TREE_LIST;
> +	       decls = TREE_CHAIN (decls), ++i)
> +	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
> +		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
> +	      {
> +		error_at (OMP_CLAUSE_LOCATION (c),
> +			  "variable %qE is not an iteration "
> +			  "of outermost loop %d, expected %qE",
> +			  TREE_VALUE (decls), i + 1,
> +			  gimplify_omp_ctxp->loop_iter_var[i]);
> +		fail = true;
> +		failures++;
> +	      }
> +	  /* Avoid being too redundant.  */
> +	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
> +	    {
> +	      error_at (OMP_CLAUSE_LOCATION (c),
> +			"number of variables in depend(sink) "
> +			"clause does not match number of "
> +			"iteration variables");
> +	      failures++;
> +	    }

failures seems to be a write only variable.
Perhaps if fail is true (set it to true after this error too),
don't create the ordered at all?  Or drop the bogus clauses.

> +
> +  /* ?? This is stupid.  We need to call extract_omp_for_data just
> +     to get the number of ordered loops... */
> +  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, NULL);
> +  if (!fd.ordered)
> +    return;
> +  struct omp_for_data_loop *loops
> +    = (struct omp_for_data_loop *)
> +    alloca (fd.ordered * sizeof (struct omp_for_data_loop));

You can do just what expand_omp_for does:
  struct omp_for_data fd;
  struct omp_for_data_loop *loops;

  loops
    = (struct omp_for_data_loop *)
      alloca (gimple_omp_for_collapse (ctx->outer->stmt)
              * sizeof (struct omp_for_data_loop));
  extract_omp_for_data (as_a <gomp_for *> (ctx->outer_stmt), &fd, loops);

> +     For example:
> +
> +	     for (i=0; i < N; ++i)
> +		depend(sink:i-8,j-1)
> +		depend(sink:i,j-2)	// Completely ignored because i+0.
> +		depend(sink:i-4,j+3)
> +		depend(sink:i-6,j+2)

Even when writing comments, it is better to make it valid:
	#pragma omp for ordered(2)
	for (i=0; i < N; ++i)
	  for (j=0; j < M; ++j)
	    {
	      #pragma omp ordered \
		depend(sink:i-8,j-1) \
		depend(sink:i,j-2) \	// Completely ignored because i+0.
		depend(sink:i-4,j+3) \
		depend(sink:i-6,j+2)
	      #pragma omp ordered depend(source)
	    }

> +
> +     Folded clause is:
> +
> +        depend(sink:-gcd(8,4,6),min(-1,3,2))

Spaces here instead of desirable tab.

> +	  -or-
> +	depend(sink:-2,-1)
> +  */
> +
> +  /* FIXME: Computing GCD's where the first element is zero is
> +     non-trivial in the presence of collapsed loops.  Do this later.  */
> +  if (fd.collapse > 1)
> +    return;

Better ad an gcc_assert for now.

> +	  enum {
> +	    DIR_UNKNOWN,
> +	    DIR_FORWARD,
> +	    DIR_BACKWARD
> +	  } loop_dir;
> +	  switch (fd.loops[i].cond_code)
> +	    {
> +	    case LT_EXPR:
> +	    case LE_EXPR:
> +	      loop_dir = DIR_FORWARD;
> +	      break;
> +	    case GT_EXPR:
> +	    case GE_EXPR:
> +	      loop_dir = DIR_BACKWARD;
> +	      break;
> +	    default:
> +	      loop_dir = DIR_UNKNOWN;
> +	      gcc_unreachable ();
> +	    }

I think there is no point in doing this, extract_omp_for_data
canonicalizes cond_code already, so it is always
LT_EXPR or GT_EXPR.  So just gcc_assert it is these two and
compare it directly, or add bool forward = fd.loops[i].cond_code == LT_EXPR;
?
> +
> +	  /* While the committee makes up its mind, bail if we have any
> +	     non-constant steps.  */
> +	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
> +	    goto gimplify_omp_ordered_ret;

Misnamed label.  lower instead?

> +	  wide_int offset = TREE_PURPOSE (decls);
> +	  if (!iter_vars[i])
> +	    iter_vars[i] = TREE_VALUE (decls);
> +
> +	  /* Ignore invalid offsets that are not multiples of the step.  */
> +	  if (!wi::multiple_of_p
> +	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
> +	       UNSIGNED))

What does wi::abs do with very large unsigned integers?
  #pragma omp ordered(2)
  for (unsigned int i = 64; i > 32; i += -1)
    for (unsigned int j = 0; j < 32; j++)
?
step of i is 0xffffffff and cond_code is GT_EXPR.  I suppose for unsigned
steps you don't want to use wi::abs, but just negate the step if
iterating downward?

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [gomp4.1] fold ordered depend(sink) clauses
  2015-07-31 18:18     ` Jakub Jelinek
@ 2015-07-31 19:28       ` Aldy Hernandez
  2015-07-31 21:31         ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Aldy Hernandez @ 2015-07-31 19:28 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 7135 bytes --]

On 07/31/2015 09:38 AM, Jakub Jelinek wrote:
> On Thu, Jul 30, 2015 at 07:27:51PM -0700, Aldy Hernandez wrote:
>> +static gimple
>> +gimplify_omp_ordered (tree expr, gimple_seq body)
>> +{
>> +  tree c, decls;
>> +  int failures = 0;
>> +  unsigned int i;
>> +
>> +  if (gimplify_omp_ctxp)
>> +    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
>> +      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
>> +	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
>> +	{
>> +	  bool fail = false;
>> +	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
>> +	       decls && TREE_CODE (decls) == TREE_LIST;
>> +	       decls = TREE_CHAIN (decls), ++i)
>> +	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
>> +		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
>> +	      {
>> +		error_at (OMP_CLAUSE_LOCATION (c),
>> +			  "variable %qE is not an iteration "
>> +			  "of outermost loop %d, expected %qE",
>> +			  TREE_VALUE (decls), i + 1,
>> +			  gimplify_omp_ctxp->loop_iter_var[i]);
>> +		fail = true;
>> +		failures++;
>> +	      }
>> +	  /* Avoid being too redundant.  */
>> +	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
>> +	    {
>> +	      error_at (OMP_CLAUSE_LOCATION (c),
>> +			"number of variables in depend(sink) "
>> +			"clause does not match number of "
>> +			"iteration variables");
>> +	      failures++;
>> +	    }
>
> failures seems to be a write only variable.
> Perhaps if fail is true (set it to true after this error too),
> don't create the ordered at all?  Or drop the bogus clauses.

Oops.  Left over from when we were doing everything in the gimplifier.

I think we can just emit a NOP when error here.  There's no sense in 
removing individual clauses and all that since lower_omp_1 is going to 
ignore sources with error anyhow:

   /* If we have issued syntax errors, avoid doing any heavy lifting.
      Just replace the OMP directives with a NOP to avoid
      confusing RTL expansion.  */
   if (seen_error () && is_gimple_omp (stmt))
     {
       gsi_replace (gsi_p, gimple_build_nop (), true);
       return;
     }

>
>> +
>> +  /* ?? This is stupid.  We need to call extract_omp_for_data just
>> +     to get the number of ordered loops... */
>> +  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, NULL);
>> +  if (!fd.ordered)
>> +    return;
>> +  struct omp_for_data_loop *loops
>> +    = (struct omp_for_data_loop *)
>> +    alloca (fd.ordered * sizeof (struct omp_for_data_loop));
>
> You can do just what expand_omp_for does:
>    struct omp_for_data fd;
>    struct omp_for_data_loop *loops;
>
>    loops
>      = (struct omp_for_data_loop *)
>        alloca (gimple_omp_for_collapse (ctx->outer->stmt)
>                * sizeof (struct omp_for_data_loop));
>    extract_omp_for_data (as_a <gomp_for *> (ctx->outer_stmt), &fd, loops);

Ah!  The collapse of the the outer statement!  Done.

>
>> +     For example:
>> +
>> +	     for (i=0; i < N; ++i)
>> +		depend(sink:i-8,j-1)
>> +		depend(sink:i,j-2)	// Completely ignored because i+0.
>> +		depend(sink:i-4,j+3)
>> +		depend(sink:i-6,j+2)
>
> Even when writing comments, it is better to make it valid:
> 	#pragma omp for ordered(2)
> 	for (i=0; i < N; ++i)
> 	  for (j=0; j < M; ++j)
> 	    {
> 	      #pragma omp ordered \
> 		depend(sink:i-8,j-1) \
> 		depend(sink:i,j-2) \	// Completely ignored because i+0.
> 		depend(sink:i-4,j+3) \
> 		depend(sink:i-6,j+2)
> 	      #pragma omp ordered depend(source)
> 	    }

You have _got_ to be kidding?  Really?  Oh well...done.

>
>> +
>> +     Folded clause is:
>> +
>> +        depend(sink:-gcd(8,4,6),min(-1,3,2))
>
> Spaces here instead of desirable tab.

Done.

>
>> +	  -or-
>> +	depend(sink:-2,-1)
>> +  */
>> +
>> +  /* FIXME: Computing GCD's where the first element is zero is
>> +     non-trivial in the presence of collapsed loops.  Do this later.  */
>> +  if (fd.collapse > 1)
>> +    return;
>
> Better ad an gcc_assert for now.

Done.

>
>> +	  enum {
>> +	    DIR_UNKNOWN,
>> +	    DIR_FORWARD,
>> +	    DIR_BACKWARD
>> +	  } loop_dir;
>> +	  switch (fd.loops[i].cond_code)
>> +	    {
>> +	    case LT_EXPR:
>> +	    case LE_EXPR:
>> +	      loop_dir = DIR_FORWARD;
>> +	      break;
>> +	    case GT_EXPR:
>> +	    case GE_EXPR:
>> +	      loop_dir = DIR_BACKWARD;
>> +	      break;
>> +	    default:
>> +	      loop_dir = DIR_UNKNOWN;
>> +	      gcc_unreachable ();
>> +	    }
>
> I think there is no point in doing this, extract_omp_for_data
> canonicalizes cond_code already, so it is always
> LT_EXPR or GT_EXPR.  So just gcc_assert it is these two and

Well, [LG]E_EXPR can sneak through for i >= fd->collapse in 
extract_omp_for_data:

	case LE_EXPR:
	  if (i >= fd->collapse)
	    break;
	  if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
	    loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
	  else
	    loop->n2 = fold_build2_loc (loc,
				    PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
				    build_int_cst (TREE_TYPE (loop->n2), 1));
	  loop->cond_code = LT_EXPR;
	  break;

How about we assert that it's LE/LT/GT/GE and set a `bool forward' as 
suggested?

> compare it directly, or add bool forward = fd.loops[i].cond_code == LT_EXPR;
> ?
>> +
>> +	  /* While the committee makes up its mind, bail if we have any
>> +	     non-constant steps.  */
>> +	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
>> +	    goto gimplify_omp_ordered_ret;
>
> Misnamed label.  lower instead?

Fixed.

>
>> +	  wide_int offset = TREE_PURPOSE (decls);
>> +	  if (!iter_vars[i])
>> +	    iter_vars[i] = TREE_VALUE (decls);
>> +
>> +	  /* Ignore invalid offsets that are not multiples of the step.  */
>> +	  if (!wi::multiple_of_p
>> +	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
>> +	       UNSIGNED))
>
> What does wi::abs do with very large unsigned integers?
>    #pragma omp ordered(2)
>    for (unsigned int i = 64; i > 32; i += -1)
>      for (unsigned int j = 0; j < 32; j++)
> ?
> step of i is 0xffffffff and cond_code is GT_EXPR.  I suppose for unsigned
> steps you don't want to use wi::abs, but just negate the step if
> iterating downward?

Seems to work just fine for:

    #pragma omp parallel for ordered(2)
     for (unsigned int i = N; i > 32; i += -1)
           for (unsigned int j = 0; j < 32; j++)
     {
#pragma omp ordered depend(sink:i+1,j-2) depend(sink:i+2,j-3)
...
...

(gdb) call debug_generic_stmt(fd.loops[i].step)
4294967295

(gdb) p/x 4294967295
$17 = 0xffffffff

The conversion from tree -> wide-int correctly interprets this:

(gdb) print x
$18 = (const generic_wide_int<wide_int_storage> &) @0x7fffffffd5d0: 
{<wide_int_storage> = {val = {-1, 29799175824, 140737488344576}, len = 1,
     precision = 32}, static is_sign_extended = <optimized out>}
(gdb) call x.dump()
[0xffffffffffffffff], precision = 32

..and then negates it correctly.  For the i+1,j-2, we end up with 
(correctly):

(gdb) call x.dump()
[0x2], precision = 32
(gdb) call y.dump()
[0x1], precision = 32

Then the folded dependence is depend(sink:i+1,j+4294967293), which is -3 
and what I would expect.  Unless I'm missing something...

How does the attached look?

Aldy

[-- Attachment #2: curr --]
[-- Type: text/plain, Size: 18645 bytes --]

commit 92e33750f31c7954b8a92763d20630f4e1af9b6b
Author: Aldy Hernandez <aldyh@redhat.com>
Date:   Wed Jul 29 13:39:06 2015 -0700

    	* wide-int.h (wi::gcd): New.
    	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
    	loop_iter_var.
    	(delete_omp_context): Same.
    	(gimplify_expr): Move code handling OMP_ORDERED into...
    	(gimplify_omp_ordered): ...here.  New.
    	* omp-low.c (lower_omp_ordered_clauses): New.
    	(lower_omp_ordered): Call lower_omp_ordered_clauses.
    testsuite/
    	* gcc.dg/gomp/sink-fold-1.c: New.
    	* gcc.dg/gomp/sink-fold-2.c: New.
    	* gcc.dg/gomp/sink-fold-3.c: New.
    	* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
    	* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
    	direction.  Make variables more readable.

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2331001..250442f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -153,7 +153,7 @@ struct gimplify_omp_ctx
   splay_tree variables;
   hash_set<tree> *privatized_types;
   /* Iteration variables in an OMP_FOR.  */
-  vec<tree> iter_vars;
+  vec<tree> loop_iter_var;
   location_t location;
   enum omp_clause_default_kind default_kind;
   enum omp_region_type region_type;
@@ -392,7 +392,7 @@ delete_omp_context (struct gimplify_omp_ctx *c)
 {
   splay_tree_delete (c->variables);
   delete c->privatized_types;
-  c->iter_vars.release ();
+  c->loop_iter_var.release ();
   XDELETE (c);
 }
 
@@ -7490,8 +7490,15 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
 	      == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
 	      == TREE_VEC_LENGTH (OMP_FOR_INCR (for_stmt)));
-  gimplify_omp_ctxp->iter_vars.create (TREE_VEC_LENGTH
-				       (OMP_FOR_INIT (for_stmt)));
+
+  tree c = find_omp_clause (OMP_FOR_CLAUSES (for_stmt), OMP_CLAUSE_ORDERED);
+  bool is_doacross = false;
+  if (c && OMP_CLAUSE_ORDERED_EXPR (c))
+    {
+      is_doacross = true;
+      gimplify_omp_ctxp->loop_iter_var.create (TREE_VEC_LENGTH
+					       (OMP_FOR_INIT (for_stmt)));
+    }
   for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
@@ -7500,11 +7507,14 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_assert (DECL_P (decl));
       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (decl))
 		  || POINTER_TYPE_P (TREE_TYPE (decl)));
-      if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
-	gimplify_omp_ctxp->iter_vars.quick_push
-	  (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
-      else
-	gimplify_omp_ctxp->iter_vars.quick_push (decl);
+      if (is_doacross)
+	{
+	  if (TREE_CODE (for_stmt) == OMP_FOR && OMP_FOR_ORIG_DECLS (for_stmt))
+	    gimplify_omp_ctxp->loop_iter_var.quick_push
+	      (TREE_VEC_ELT (OMP_FOR_ORIG_DECLS (for_stmt), i));
+	  else
+	    gimplify_omp_ctxp->loop_iter_var.quick_push (decl);
+	}
 
       /* Make sure the iteration variable is private.  */
       tree c = NULL_TREE;
@@ -8387,6 +8397,56 @@ gimplify_transaction (tree *expr_p, gimple_seq *pre_p)
   return GS_ALL_DONE;
 }
 
+/* Gimplify an OMP_ORDERED construct.  EXPR is the tree version.  BODY
+   is the OMP_BODY of the original EXPR (which has already been
+   gimplified so it's not present in the EXPR).
+
+   Return the gimplified GIMPLE_OMP_ORDERED tuple.  */
+
+static gimple
+gimplify_omp_ordered (tree expr, gimple_seq body)
+{
+  tree c, decls;
+  int failures = 0;
+  unsigned int i;
+
+  if (gimplify_omp_ctxp)
+    for (c = OMP_ORDERED_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
+	  && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
+	{
+	  bool fail = false;
+	  for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	       decls && TREE_CODE (decls) == TREE_LIST;
+	       decls = TREE_CHAIN (decls), ++i)
+	    if (i < gimplify_omp_ctxp->loop_iter_var.length ()
+		&& TREE_VALUE (decls) != gimplify_omp_ctxp->loop_iter_var[i])
+	      {
+		error_at (OMP_CLAUSE_LOCATION (c),
+			  "variable %qE is not an iteration "
+			  "of outermost loop %d, expected %qE",
+			  TREE_VALUE (decls), i + 1,
+			  gimplify_omp_ctxp->loop_iter_var[i]);
+		fail = true;
+		failures++;
+	      }
+	  /* Avoid being too redundant.  */
+	  if (!fail && i != gimplify_omp_ctxp->loop_iter_var.length ())
+	    {
+	      error_at (OMP_CLAUSE_LOCATION (c),
+			"number of variables in depend(sink) "
+			"clause does not match number of "
+			"iteration variables");
+	      fail = true;
+	      failures++;
+	    }
+	}
+
+  if (failures)
+    return gimple_build_nop ();
+  return gimple_build_omp_ordered (body, OMP_ORDERED_CLAUSES (expr));
+}
+
 /* Convert the GENERIC expression tree *EXPR_P to GIMPLE.  If the
    expression produces a value to be used as an operand inside a GIMPLE
    statement, the value will be stored back in *EXPR_P.  This value will
@@ -9200,38 +9260,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		}
 		break;
 	      case OMP_ORDERED:
-		if (gimplify_omp_ctxp)
-		  for (tree c = OMP_ORDERED_CLAUSES (*expr_p);
-		       c; c = OMP_CLAUSE_CHAIN (c))
-		    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
-			&& OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
-		      {
-			unsigned int n = 0;
-			bool fail = false;
-			for (tree decls = OMP_CLAUSE_DECL (c);
-			     decls && TREE_CODE (decls) == TREE_LIST;
-			     decls = TREE_CHAIN (decls), ++n)
-			  if (n < gimplify_omp_ctxp->iter_vars.length ()
-			      && TREE_VALUE (decls)
-			      != gimplify_omp_ctxp->iter_vars[n])
-			    {
-			      error_at (OMP_CLAUSE_LOCATION (c),
-					"variable %qE is not an iteration "
-					"of outermost loop %d, expected %qE",
-					TREE_VALUE (decls), n + 1,
-					gimplify_omp_ctxp->iter_vars[n]);
-			      fail = true;
-			    }
-			/* Avoid being too redundant.  */
-			if (!fail
-			    && n != gimplify_omp_ctxp->iter_vars.length ())
-			  error_at (OMP_CLAUSE_LOCATION (c),
-			     "number of variables in depend(sink) clause "
-			     "does not match number of iteration variables");
-		      }
-
-		g = gimple_build_omp_ordered (body,
-					      OMP_ORDERED_CLAUSES (*expr_p));
+		g = gimplify_omp_ordered (*expr_p, body);
 		break;
 	      case OMP_CRITICAL:
 		gimplify_scan_omp_clauses (&OMP_CRITICAL_CLAUSES (*expr_p),
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e7d21ea..489e939 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11969,6 +11969,219 @@ lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 }
 
 
+/* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible.  */
+
+static void
+lower_omp_ordered_clauses (gomp_ordered *ord_stmt, omp_context *ctx)
+{
+  struct omp_for_data fd;
+  if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
+    return;
+
+  unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
+  if (!len)
+    return;
+  struct omp_for_data_loop *loops
+    = (struct omp_for_data_loop *)
+    alloca (len * sizeof (struct omp_for_data_loop));
+  extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
+  if (!fd.ordered)
+    return;
+
+  /* Canonicalize sink dependence clauses into one folded clause if
+     possible.
+
+     The basic algorithm is to create a sink vector whose first
+     element is the GCD of all the first elements, and whose remaining
+     elements are the minimum of the subsequent columns.
+
+     We ignore dependence vectors whose first element is zero because
+     such dependencies are known to be executed by the same thread.
+
+     We take into account the direction of the loop, so a minimum
+     becomes a maximum if the loop is iterating backwards.  We also
+     ignore sink clauses where the loop direction is unknown, or where
+     the offsets are clearly invalid because they are not a multiple
+     of the loop increment.
+
+     For example:
+
+	#pragma omp for ordered(2)
+	for (i=0; i < N; ++i)
+	  for (j=0; j < M; ++j)
+	    {
+	      #pragma omp ordered \
+		depend(sink:i-8,j-1) \
+		depend(sink:i,j-2) \	// Completely ignored because i+0.
+		depend(sink:i-4,j+3) \
+		depend(sink:i-6,j+2)
+	      #pragma omp ordered depend(source)
+	    }
+
+     Folded clause is:
+
+	depend(sink:-gcd(8,4,6),min(-1,3,2))
+	  -or-
+	depend(sink:-2,-1)
+  */
+
+  /* FIXME: Computing GCD's where the first element is zero is
+     non-trivial in the presence of collapsed loops.  Do this later.  */
+  gcc_assert (fd.collapse <= 1);
+
+  vec<wide_int> folded_deps;
+  folded_deps.create (len);
+  folded_deps.quick_grow_cleared (len);
+  /* Bitmap representing dimensions in the final dependency vector that
+     have been set.  */
+  sbitmap folded_deps_used = sbitmap_alloc (len);
+  bitmap_clear (folded_deps_used);
+  /* TRUE if the first dimension's offset is negative.  */
+  bool neg_offset_p = false;
+
+  /* ?? We need to save the original iteration variables stored in the
+     depend clauses, because those in fd.loops[].v have already been
+     gimplified.  Perhaps we should use the gimplified versions. ??  */
+  tree *iter_vars = (tree *) alloca (sizeof (tree) * len);
+  memset (iter_vars, 0, sizeof (tree) * len);
+
+  tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
+  tree c;
+  unsigned int i;
+  while ((c = *list_p) != NULL)
+    {
+      bool remove = false;
+
+      if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
+	  || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
+	goto next_ordered_clause;
+
+      tree decls;
+      for (decls = OMP_CLAUSE_DECL (c), i = 0;
+	   decls && TREE_CODE (decls) == TREE_LIST;
+	   decls = TREE_CHAIN (decls), ++i)
+	{
+	  gcc_assert (i < len);
+
+	  /* extract_omp_for_data has canonicalized the condition.  */
+	  gcc_assert (fd.loops[i].cond_code == LT_EXPR
+		      || fd.loops[i].cond_code == LE_EXPR
+		      || fd.loops[i].cond_code == GT_EXPR
+		      || fd.loops[i].cond_code == GE_EXPR);
+	  bool forward = fd.loops[i].cond_code == LT_EXPR
+	    || fd.loops[i].cond_code == LE_EXPR;
+
+	  /* While the committee makes up its mind, bail if we have any
+	     non-constant steps.  */
+	  if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
+	    goto lower_omp_ordered_ret;
+
+	  wide_int offset = TREE_PURPOSE (decls);
+	  if (!iter_vars[i])
+	    iter_vars[i] = TREE_VALUE (decls);
+
+	  /* Ignore invalid offsets that are not multiples of the step.  */
+	  if (!wi::multiple_of_p
+	      (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
+	       UNSIGNED))
+	    {
+	      warning_at (OMP_CLAUSE_LOCATION (c), 0,
+			  "ignoring sink clause with offset that is not "
+			  "a multiple of the loop step");
+	      remove = true;
+	      goto next_ordered_clause;
+	    }
+
+	  /* Calculate the first dimension.  The first dimension of
+	     the folded dependency vector is the GCD of the first
+	     elements, while ignoring any first elements whose offset
+	     is 0.  */
+	  if (i == 0)
+	    {
+	      /* Ignore dependence vectors whose first dimension is 0.  */
+	      if (offset == 0)
+		{
+		  remove = true;
+		  goto next_ordered_clause;
+		}
+	      else
+		{
+		  neg_offset_p =
+		    wi::neg_p (offset,
+			       TYPE_SIGN (TREE_TYPE (TREE_PURPOSE (decls))));
+		  if ((forward && !neg_offset_p)
+		      || (!forward && neg_offset_p))
+		    {
+		      error_at (OMP_CLAUSE_LOCATION (c),
+				"first offset must be in opposite direction "
+				"of loop iterations");
+		      goto lower_omp_ordered_ret;
+		    }
+		  /* Initialize the first time around.  */
+		  if (!bitmap_bit_p (folded_deps_used, 0))
+		    {
+		      bitmap_set_bit (folded_deps_used, 0);
+		      folded_deps[0] = wi::abs (offset);
+		    }
+		  else
+		    folded_deps[i] = wi::gcd (folded_deps[0], offset, UNSIGNED);
+		}
+	    }
+	  /* Calculate minimum for the remaining dimensions.  */
+	  else
+	    {
+	      if (!bitmap_bit_p (folded_deps_used, i))
+		{
+		  bitmap_set_bit (folded_deps_used, i);
+		  folded_deps[i] = offset;
+		}
+	      else if ((forward && wi::lts_p (offset, folded_deps[i]))
+		       || (!forward && wi::gts_p (offset, folded_deps[i])))
+		folded_deps[i] = offset;
+	    }
+	}
+
+      remove = true;
+
+    next_ordered_clause:
+      if (remove)
+	*list_p = OMP_CLAUSE_CHAIN (c);
+      else
+	list_p = &OMP_CLAUSE_CHAIN (c);
+    }
+
+  for (i = 0; i < len; ++i)
+    if (!bitmap_bit_p (folded_deps_used, i))
+      break;
+  if (i == len)
+    {
+      if (neg_offset_p)
+	folded_deps[0] = -folded_deps[0];
+
+      tree vec = NULL;
+      i = len;
+      do
+	{
+	  i--;
+	  vec = tree_cons (wide_int_to_tree (TREE_TYPE (fd.loops[i].v),
+					     folded_deps[i]),
+			   iter_vars[i], vec);
+	}
+      while (i);
+
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
+      OMP_CLAUSE_DEPEND_KIND (c) = OMP_CLAUSE_DEPEND_SINK;
+      OMP_CLAUSE_DECL (c) = vec;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_ordered_clauses (ord_stmt);
+      *gimple_omp_ordered_clauses_ptr (ord_stmt) = c;
+    }
+
+ lower_omp_ordered_ret:
+  sbitmap_free (folded_deps_used);
+  folded_deps.release ();
+}
+
+
 /* Expand code for an OpenMP ordered directive.  */
 
 static void
@@ -11979,6 +12192,8 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gcall *x;
   gbind *bind;
 
+  lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+
   push_gimplify_context ();
 
   block = make_node (BLOCK);
diff --git a/gcc/testsuite/c-c++-common/gomp/sink-4.c b/gcc/testsuite/c-c++-common/gomp/sink-4.c
index 7934de2..111178b 100644
--- a/gcc/testsuite/c-c++-common/gomp/sink-4.c
+++ b/gcc/testsuite/c-c++-common/gomp/sink-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
 
 /* Test that we adjust pointer offsets for sink variables
    correctly.  */
@@ -22,4 +22,4 @@ funk (foo *begin, foo *end)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400\\)" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+400.\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/sink-1.C b/gcc/testsuite/g++.dg/gomp/sink-1.C
index a83adbf..982d36c 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-1.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-1.C
@@ -12,6 +12,7 @@ void baz ()
       {
 #pragma omp ordered depend(sink:i-3,j)
 	bar (i, j, 0);
+#pragma omp ordered depend(source)
       }
 }
 
diff --git a/gcc/testsuite/g++.dg/gomp/sink-3.C b/gcc/testsuite/g++.dg/gomp/sink-3.C
index 83a742e..4271d66 100644
--- a/gcc/testsuite/g++.dg/gomp/sink-3.C
+++ b/gcc/testsuite/g++.dg/gomp/sink-3.C
@@ -8,7 +8,7 @@ typedef struct {
     char stuff[400];
 } foo;
 
-foo *p, *q, *r;
+foo *end, *begin, *p;
 
 template<int N>
 void
@@ -16,7 +16,7 @@ funk ()
 {
   int i,j;
 #pragma omp parallel for ordered(1)
-  for (p=q; p < q; p--)
+  for (p=end; p > begin; p--)
     {
 #pragma omp ordered depend(sink:p+1)
       void bar ();
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
new file mode 100644
index 0000000..f2961b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test depend(sink) clause folding.  */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i++)
+    for (j=0; j < N; ++j)
+    {
+/* We remove the (sink:i,j-2) by virtue of it the i+0.  The remaining
+   clauses get folded with a GCD of -2 for `i' and a minimum of -1 for
+   'j'.  */
+#pragma omp ordered \
+  depend(sink:i-8,j-1) \
+  depend(sink:i, j-2) \
+  depend(sink:i-4,j+3) \
+  depend(sink:i-6,j+2)
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "omp ordered depend\\(sink:i-2,j-1\\)" 1 "omplower" } } */
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
new file mode 100644
index 0000000..b3b4ac7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+
+int i,j, N;
+
+extern void bar();
+
+void
+funk ()
+{
+#pragma omp parallel for ordered(2)
+  for (i=0; i < N; i += 3)
+    for (j=0; j < N; ++j)
+    {
+#pragma omp ordered depend(sink:i-8,j-1) /* { dg-warning "ignoring sink clause with offset that is not a multiple" } */
+#pragma omp ordered depend(sink:i+3,j-1) /* { dg-error "first offset must be in opposite direction" } */
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
new file mode 100644
index 0000000..4d6293c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/sink-fold-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-omplower" } */
+
+/* Test that we fold sink offsets correctly while taking into account
+   pointer sizes.  */
+
+typedef struct {
+    char stuff[400];
+} foo;
+
+void
+funk (foo *begin, foo *end)
+{
+  foo *p;
+#pragma omp parallel for ordered(1)
+  for (p=end; p > begin; p--)
+    {
+#pragma omp ordered depend(sink:p+2) depend(sink:p+4)
+      void bar ();
+        bar();
+#pragma omp ordered depend(source)
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "depend\\(sink:p\\+800B\\)" 1 "omplower" } } */
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index d8f7b46..c20db61 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -514,6 +514,7 @@ namespace wi
   BINARY_FUNCTION div_round (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION divmod_trunc (const T1 &, const T2 &, signop,
 				WI_BINARY_RESULT (T1, T2) *);
+  BINARY_FUNCTION gcd (const T1 &, const T2 &, signop = UNSIGNED);
   BINARY_FUNCTION mod_trunc (const T1 &, const T2 &, signop, bool * = 0);
   BINARY_FUNCTION smod_trunc (const T1 &, const T2 &);
   BINARY_FUNCTION umod_trunc (const T1 &, const T2 &);
@@ -2653,6 +2654,27 @@ wi::divmod_trunc (const T1 &x, const T2 &y, signop sgn,
   return quotient;
 }
 
+/* Compute the greatest common divisor of two numbers A and B using
+   Euclid's algorithm.  */
+template <typename T1, typename T2>
+inline WI_BINARY_RESULT (T1, T2)
+wi::gcd (const T1 &a, const T2 &b, signop sgn)
+{
+  T1 x, y, z;
+
+  x = wi::abs (a);
+  y = wi::abs (b);
+
+  while (gt_p (x, 0, sgn))
+    {
+      z = mod_trunc (y, x, sgn);
+      y = x;
+      x = z;
+    }
+
+  return y;
+}
+
 /* Compute X / Y, rouding towards 0, and return the remainder.
    Treat X and Y as having the signedness given by SGN.  Indicate
    in *OVERFLOW if the division overflows.  */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [gomp4.1] fold ordered depend(sink) clauses
  2015-07-31 19:28       ` Aldy Hernandez
@ 2015-07-31 21:31         ` Jakub Jelinek
  0 siblings, 0 replies; 6+ messages in thread
From: Jakub Jelinek @ 2015-07-31 21:31 UTC (permalink / raw)
  To: Aldy Hernandez; +Cc: gcc-patches

On Fri, Jul 31, 2015 at 11:48:01AM -0700, Aldy Hernandez wrote:
> commit 92e33750f31c7954b8a92763d20630f4e1af9b6b
> Author: Aldy Hernandez <aldyh@redhat.com>
> Date:   Wed Jul 29 13:39:06 2015 -0700
> 
>     	* wide-int.h (wi::gcd): New.
>     	* gimplify.c (struct gimplify_omp_ctx): Rename iter_vars to
>     	loop_iter_var.
>     	(delete_omp_context): Same.
>     	(gimplify_expr): Move code handling OMP_ORDERED into...
>     	(gimplify_omp_ordered): ...here.  New.
>     	* omp-low.c (lower_omp_ordered_clauses): New.
>     	(lower_omp_ordered): Call lower_omp_ordered_clauses.
>     testsuite/
>     	* gcc.dg/gomp/sink-fold-1.c: New.
>     	* gcc.dg/gomp/sink-fold-2.c: New.
>     	* gcc.dg/gomp/sink-fold-3.c: New.
>     	* c-c++-common/gomp/sink-4.c: Look in omplower dump file instead.
>     	* g++.dg/gomp/sink-3.C: Have sink offset be in the opposite
>     	direction.  Make variables more readable.

Ok for branch, thanks.

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-07-31 19:28 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-30  4:21 [gomp4.1] fold ordered depend(sink) clauses Aldy Hernandez
2015-07-30 10:30 ` Jakub Jelinek
2015-07-31  2:56   ` Aldy Hernandez
2015-07-31 18:18     ` Jakub Jelinek
2015-07-31 19:28       ` Aldy Hernandez
2015-07-31 21:31         ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).