public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][gimple-interchange] Add reduction validity check
@ 2017-12-04 13:11 Richard Biener
  2017-12-04 14:28 ` Bin.Cheng
  2017-12-04 15:13 ` Bin.Cheng
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Biener @ 2017-12-04 13:11 UTC (permalink / raw)
  To: amker.cheng; +Cc: gcc-patches


I've noticed we perform FP reduction association without the required
checks for associative math.  I've added 
gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.

I also noticed we happily interchange a loop with a reduction like

 sum = a[i] - sum;

where a change in order of elements isn't ok.  Unfortunately bwaves
is exactly a case where single_use != next_def (tried to simply remove
that case for now), because reassoc didn't have a chance to fix the
operand order.  Thus this patch exports the relevant handling from
the vectorizer (for stage1 having a separate infrastructure gathering /
analyzing of reduction/induction infrastructure would be nice...)
and uses it from interchange.  We then don't handle
gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer 
missed-opt is PR65930).  I didn't bother to split up the vectorizer
code further to implement relaxed validity checking but simply XFAILed
this testcase.

Earlier I simplified allocation stuff in the main loop which is why
this part is included in this patch.

Bootstrap running on x86_64-unknown-linux-gnu.

I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.

Ok?

Thanks,
Richard.

2017-12-04  Richard Biener  <rguenther@suse.de>

	* tree-vectorizer.h (check_reduction_path): Declare.
	* tree-vect-loop.c (check_reduction_path): New function, split out
	from ...
	(vect_is_simple_reduction): ... here.
	* gimple-loop-interchange.cc: Include tree-vectorizer.h.
	(loop_cand::analyze_iloop_reduction_var): Use single_imm_use.
	Properly check for a supported reduction operation and a
	valid expression if the reduction covers multiple stmts.
	(prepare_perfect_loop_nest): Simpify allocation.
	(pass_linterchange::execute): Likewise.

	* gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags.
	* gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant.
	* gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL.


Index: gcc/gimple-loop-interchange.cc
===================================================================
--- gcc/gimple-loop-interchange.cc	(revision 255375)
+++ gcc/gimple-loop-interchange.cc	(working copy)
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-dce.h"
 #include "tree-data-ref.h"
+#include "tree-vectorizer.h"
 
 /* This pass performs loop interchange: for example, the loop nest
 
@@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (
      in a way that reduction operation is seen as black box.  In general,
      we can ignore reassociation of reduction operator; we can handle fake
      reductions in which VAR is not even used to compute NEXT.  */
-  FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
-    {
-      stmt = USE_STMT (use_p);
-      if (is_gimple_debug (stmt))
-	continue;
-
-      if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
-	return false;
-
-      if (single_use != NULL)
-	return false;
+  if (! single_imm_use (var, &use_p, &single_use)
+      || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
+    return false;
 
-      single_use = stmt;
-    }
+  /* Check the reduction operation.  We require a commutative or
+     left-associative operation.  For FP math we also need to be allowed
+     to associate operations.  */
+  if (! is_gimple_assign (single_use)
+      || ! (commutative_tree_code (gimple_assign_rhs_code (single_use))
+	    || (commutative_ternary_tree_code
+		  (gimple_assign_rhs_code (single_use))
+		&& (use_p->use == gimple_assign_rhs1_ptr (single_use)
+		    || use_p->use == gimple_assign_rhs2_ptr (single_use)))
+	    || (gimple_assign_rhs_code (single_use) == MINUS_EXPR
+		&& use_p->use == gimple_assign_rhs1_ptr (single_use)))
+      || (FLOAT_TYPE_P (TREE_TYPE (var))
+	  && ! flag_associative_math))
+    return false;
 
+  /* Handle and verify a series of stmts feeding the reduction op.  */
   if (single_use != next_def
-      && !stmt_dominates_stmt_p (single_use, next_def))
+      && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
+				gimple_assign_rhs_code (single_use)))
     return false;
 
   /* Only support cases in which INIT is used in inner loop.  */
@@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *
 			   vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
 {
   struct loop *start_loop = NULL, *innermost = loop;
-  struct loop *outermost = superloop_at_depth (loop, 0);
+  struct loop *outermost = loops_for_fn (cfun)->tree_root;
 
   /* Find loop nest from the innermost loop.  The outermost is the innermost
      outer*/
@@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *
   if (!start_loop || !start_loop->inner)
     return false;
 
+  /* Prepare the data reference vector for the loop nest, pruning outer
+     loops we cannot handle.  */
   datarefs->create (20);
-  if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL
+  start_loop = prepare_data_references (start_loop, datarefs);
+  if (!start_loop
       /* Check if there is no data reference.  */
       || datarefs->is_empty ()
       /* Check if there are too many of data references.  */
-      || (int) datarefs->length () > MAX_DATAREFS
-      /* Compute access strides for all data references.  */
-      || ((start_loop = compute_access_strides (start_loop, innermost,
-						*datarefs)) == NULL)
-      /* Check if loop nest should be interchanged.  */
-      || !should_interchange_loop_nest (start_loop, innermost, *datarefs))
-    {
-      free_data_refs_with_aux (*datarefs);
-      return false;
-    }
+      || (int) datarefs->length () > MAX_DATAREFS)
+    return false;
+
+  /* Compute access strides for all data references, pruning outer
+     loops we cannot analyze refs in.  */
+  start_loop = compute_access_strides (start_loop, innermost, *datarefs);
+  if (!start_loop)
+    return false;
+
+  /* Check if any interchange is profitable in the loop nest.  */
+  if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
+    return false;
 
   /* Check if data dependences can be computed for loop nest starting from
      start_loop.  */
@@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *
 	return true;
       }
 
+    /* ???  We should be able to adjust DDRs we already computed by
+       truncating distance vectors.  */
     free_dependence_relations (*ddrs);
+    *ddrs = vNULL;
     /* Try to compute data dependences with the outermost loop stripped.  */
     loop = start_loop;
     start_loop = start_loop->inner;
   } while (start_loop && start_loop->inner);
 
-  loop_nest->release ();
-  free_data_refs_with_aux (*datarefs);
   return false;
 }
 
@@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu
 
   bool changed_p = false;
   struct loop *loop;
-  vec<loop_p> loop_nest;
-  vec<data_reference_p> datarefs;
-  vec<ddr_p> ddrs;
-
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
-    if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
-      {
-	tree_loop_interchange loop_interchange (loop_nest);
-	changed_p |= loop_interchange.interchange (datarefs, ddrs);
-	free_dependence_relations (ddrs);
-	free_data_refs_with_aux (datarefs);
-	loop_nest.release ();
-      }
+    {
+      vec<loop_p> loop_nest = vNULL;
+      vec<data_reference_p> datarefs = vNULL;
+      vec<ddr_p> ddrs = vNULL;
+      if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
+	{
+	  tree_loop_interchange loop_interchange (loop_nest);
+	  changed_p |= loop_interchange.interchange (datarefs, ddrs);
+	}
+      free_dependence_relations (ddrs);
+      free_data_refs_with_aux (datarefs);
+      loop_nest.release ();
+    }
 
   if (changed_p)
     scev_reset_htab ();
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c	(revision 255375)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */
 
 /* Copied from graphite/interchange-4.c */
 
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c	(working copy)
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+
+/* Copied from graphite/interchange-4.c */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+double u[1782225];
+
+static void __attribute__((noinline))
+foo (int N, double *res)
+{
+  int i, j;
+  double sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum = sum + u[i + 1335 * j];
+
+  *res = sum;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  double res;
+
+  for (i = 0; i < 1782225; i++)
+    u[i] = 0;
+  u[0] = __DBL_MAX__;
+  u[1335] = -__DBL_MAX__;
+  u[1] = __DBL_MAX__;
+  u[1336] = -__DBL_MAX__;
+
+  foo (1335, &res);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 0.0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c	(revision 255375)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c	(working copy)
@@ -47,4 +47,4 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange"} } */
+/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange" { xfail *-*-* } } } */
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 255375)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve
 /* FORNOW: Used in tree-parloops.c.  */
 extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *,
 					    bool *, bool);
+/* Used in gimple-loop-interchange.c.  */
+extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
+				  enum tree_code);
 /* Drive for loop analysis stage.  */
 extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 255375)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo
 }
 
 
+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
+   reduction operation CODE has a handled computation expression.  */
+
+bool
+check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
+		      enum tree_code code)
+{
+  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
+  auto_bitmap visited;
+  tree lookfor = PHI_RESULT (phi);
+  ssa_op_iter curri;
+  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
+  while (USE_FROM_PTR (curr) != loop_arg)
+    curr = op_iter_next_use (&curri);
+  curri.i = curri.numops;
+  do
+    {
+      path.safe_push (std::make_pair (curri, curr));
+      tree use = USE_FROM_PTR (curr);
+      if (use == lookfor)
+	break;
+      gimple *def = SSA_NAME_DEF_STMT (use);
+      if (gimple_nop_p (def)
+	  || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
+	{
+pop:
+	  do
+	    {
+	      std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
+	      curri = x.first;
+	      curr = x.second;
+	      do
+		curr = op_iter_next_use (&curri);
+	      /* Skip already visited or non-SSA operands (from iterating
+	         over PHI args).  */
+	      while (curr != NULL_USE_OPERAND_P
+		     && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
+			 || ! bitmap_set_bit (visited,
+					      SSA_NAME_VERSION
+					        (USE_FROM_PTR (curr)))));
+	    }
+	  while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
+	  if (curr == NULL_USE_OPERAND_P)
+	    break;
+	}
+      else
+	{
+	  if (gimple_code (def) == GIMPLE_PHI)
+	    curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
+	  else
+	    curr = op_iter_init_use (&curri, def, SSA_OP_USE);
+	  while (curr != NULL_USE_OPERAND_P
+		 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
+		     || ! bitmap_set_bit (visited,
+					  SSA_NAME_VERSION
+					    (USE_FROM_PTR (curr)))))
+	    curr = op_iter_next_use (&curri);
+	  if (curr == NULL_USE_OPERAND_P)
+	    goto pop;
+	}
+    }
+  while (1);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
+      unsigned i;
+      std::pair<ssa_op_iter, use_operand_p> *x;
+      FOR_EACH_VEC_ELT (path, i, x)
+	{
+	  dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
+	  dump_printf (MSG_NOTE, " ");
+	}
+      dump_printf (MSG_NOTE, "\n");
+    }
+
+  /* Check whether the reduction path detected is valid.  */
+  bool fail = path.length () == 0;
+  bool neg = false;
+  for (unsigned i = 1; i < path.length (); ++i)
+    {
+      gimple *use_stmt = USE_STMT (path[i].second);
+      tree op = USE_FROM_PTR (path[i].second);
+      if (! has_single_use (op)
+	  || ! is_gimple_assign (use_stmt))
+	{
+	  fail = true;
+	  break;
+	}
+      if (gimple_assign_rhs_code (use_stmt) != code)
+	{
+	  if (code == PLUS_EXPR
+	      && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+	    {
+	      /* Track whether we negate the reduction value each iteration.  */
+	      if (gimple_assign_rhs2 (use_stmt) == op)
+		neg = ! neg;
+	    }
+	  else
+	    {
+	      fail = true;
+	      break;
+	    }
+	}
+    }
+  return ! fail && ! neg;
+}
+
+
 /* Function vect_is_simple_reduction
 
    (1) Detect a cross-iteration def-use cycle that represents a simple
@@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   /* Look for the expression computing loop_arg from loop PHI result.  */
-  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
-  auto_bitmap visited;
-  tree lookfor = PHI_RESULT (phi);
-  ssa_op_iter curri;
-  use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi),
-					    SSA_OP_USE);
-  while (USE_FROM_PTR (curr) != loop_arg)
-    curr = op_iter_next_use (&curri);
-  curri.i = curri.numops;
-  do
-    {
-      path.safe_push (std::make_pair (curri, curr));
-      tree use = USE_FROM_PTR (curr);
-      if (use == lookfor)
-	break;
-      gimple *def = SSA_NAME_DEF_STMT (use);
-      if (gimple_nop_p (def)
-	  || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
-	{
-pop:
-	  do
-	    {
-	      std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
-	      curri = x.first;
-	      curr = x.second;
-	      do
-		curr = op_iter_next_use (&curri);
-	      /* Skip already visited or non-SSA operands (from iterating
-	         over PHI args).  */
-	      while (curr != NULL_USE_OPERAND_P
-		     && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-			 || ! bitmap_set_bit (visited,
-					      SSA_NAME_VERSION
-					        (USE_FROM_PTR (curr)))));
-	    }
-	  while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
-	  if (curr == NULL_USE_OPERAND_P)
-	    break;
-	}
-      else
-	{
-	  if (gimple_code (def) == GIMPLE_PHI)
-	    curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
-	  else
-	    curr = op_iter_init_use (&curri, def, SSA_OP_USE);
-	  while (curr != NULL_USE_OPERAND_P
-		 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-		     || ! bitmap_set_bit (visited,
-					  SSA_NAME_VERSION
-					    (USE_FROM_PTR (curr)))))
-	    curr = op_iter_next_use (&curri);
-	  if (curr == NULL_USE_OPERAND_P)
-	    goto pop;
-	}
-    }
-  while (1);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      dump_printf_loc (MSG_NOTE, vect_location,
-		       "reduction path: ");
-      unsigned i;
-      std::pair<ssa_op_iter, use_operand_p> *x;
-      FOR_EACH_VEC_ELT (path, i, x)
-	{
-	  dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
-	  dump_printf (MSG_NOTE, " ");
-	}
-      dump_printf (MSG_NOTE, "\n");
-    }
-
-  /* Check whether the reduction path detected is valid.  */
-  bool fail = path.length () == 0;
-  bool neg = false;
-  for (unsigned i = 1; i < path.length (); ++i)
-    {
-      gimple *use_stmt = USE_STMT (path[i].second);
-      tree op = USE_FROM_PTR (path[i].second);
-      if (! has_single_use (op)
-	  || ! is_gimple_assign (use_stmt))
-	{
-	  fail = true;
-	  break;
-	}
-      if (gimple_assign_rhs_code (use_stmt) != code)
-	{
-	  if (code == PLUS_EXPR
-	      && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
-	    {
-	      /* Track whether we negate the reduction value each iteration.  */
-	      if (gimple_assign_rhs2 (use_stmt) == op)
-		neg = ! neg;
-	    }
-	  else
-	    {
-	      fail = true;
-	      break;
-	    }
-	}
-    }
-  if (! fail && ! neg)
+  if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg,
+			    code))
     return def_stmt;
 
   if (dump_enabled_p ())

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][gimple-interchange] Add reduction validity check
  2017-12-04 13:11 [PATCH][gimple-interchange] Add reduction validity check Richard Biener
@ 2017-12-04 14:28 ` Bin.Cheng
  2017-12-04 14:43   ` Richard Biener
  2017-12-04 15:13 ` Bin.Cheng
  1 sibling, 1 reply; 5+ messages in thread
From: Bin.Cheng @ 2017-12-04 14:28 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches List

On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote:
>
> I've noticed we perform FP reduction association without the required
> checks for associative math.  I've added
> gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.
>
> I also noticed we happily interchange a loop with a reduction like
>
>  sum = a[i] - sum;
>
> where a change in order of elements isn't ok.  Unfortunately bwaves
> is exactly a case where single_use != next_def (tried to simply remove
> that case for now), because reassoc didn't have a chance to fix the
> operand order.  Thus this patch exports the relevant handling from
> the vectorizer (for stage1 having a separate infrastructure gathering /
> analyzing of reduction/induction infrastructure would be nice...)
> and uses it from interchange.  We then don't handle
> gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer
> missed-opt is PR65930).  I didn't bother to split up the vectorizer
> code further to implement relaxed validity checking but simply XFAILed
> this testcase.
>
> Earlier I simplified allocation stuff in the main loop which is why
> this part is included in this patch.
>
> Bootstrap running on x86_64-unknown-linux-gnu.
>
> I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.
>
> Ok?
Sure.
Just for the record.  There is also similar associative check in
predcom.  As you suggested, a path extraction/checking interface for
associative checking would be great, given we have multiple users now.

Thanks,
bin
>
> Thanks,
> Richard.
>
> 2017-12-04  Richard Biener  <rguenther@suse.de>
>
>         * tree-vectorizer.h (check_reduction_path): Declare.
>         * tree-vect-loop.c (check_reduction_path): New function, split out
>         from ...
>         (vect_is_simple_reduction): ... here.
>         * gimple-loop-interchange.cc: Include tree-vectorizer.h.
>         (loop_cand::analyze_iloop_reduction_var): Use single_imm_use.
>         Properly check for a supported reduction operation and a
>         valid expression if the reduction covers multiple stmts.
>         (prepare_perfect_loop_nest): Simpify allocation.
>         (pass_linterchange::execute): Likewise.
>
>         * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags.
>         * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant.
>         * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL.
>
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc      (revision 255375)
> +++ gcc/gimple-loop-interchange.cc      (working copy)
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-ssa-loop-ivopts.h"
>  #include "tree-ssa-dce.h"
>  #include "tree-data-ref.h"
> +#include "tree-vectorizer.h"
>
>  /* This pass performs loop interchange: for example, the loop nest
>
> @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (
>       in a way that reduction operation is seen as black box.  In general,
>       we can ignore reassociation of reduction operator; we can handle fake
>       reductions in which VAR is not even used to compute NEXT.  */
> -  FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
> -    {
> -      stmt = USE_STMT (use_p);
> -      if (is_gimple_debug (stmt))
> -       continue;
> -
> -      if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
> -       return false;
> -
> -      if (single_use != NULL)
> -       return false;
> +  if (! single_imm_use (var, &use_p, &single_use)
> +      || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
> +    return false;
>
> -      single_use = stmt;
> -    }
> +  /* Check the reduction operation.  We require a commutative or
> +     left-associative operation.  For FP math we also need to be allowed
> +     to associate operations.  */
> +  if (! is_gimple_assign (single_use)
> +      || ! (commutative_tree_code (gimple_assign_rhs_code (single_use))
> +           || (commutative_ternary_tree_code
> +                 (gimple_assign_rhs_code (single_use))
> +               && (use_p->use == gimple_assign_rhs1_ptr (single_use)
> +                   || use_p->use == gimple_assign_rhs2_ptr (single_use)))
> +           || (gimple_assign_rhs_code (single_use) == MINUS_EXPR
> +               && use_p->use == gimple_assign_rhs1_ptr (single_use)))
> +      || (FLOAT_TYPE_P (TREE_TYPE (var))
> +         && ! flag_associative_math))
> +    return false;
>
> +  /* Handle and verify a series of stmts feeding the reduction op.  */
>    if (single_use != next_def
> -      && !stmt_dominates_stmt_p (single_use, next_def))
> +      && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
> +                               gimple_assign_rhs_code (single_use)))
>      return false;
>
>    /* Only support cases in which INIT is used in inner loop.  */
> @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *
>                            vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
>  {
>    struct loop *start_loop = NULL, *innermost = loop;
> -  struct loop *outermost = superloop_at_depth (loop, 0);
> +  struct loop *outermost = loops_for_fn (cfun)->tree_root;
>
>    /* Find loop nest from the innermost loop.  The outermost is the innermost
>       outer*/
> @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *
>    if (!start_loop || !start_loop->inner)
>      return false;
>
> +  /* Prepare the data reference vector for the loop nest, pruning outer
> +     loops we cannot handle.  */
>    datarefs->create (20);
> -  if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL
> +  start_loop = prepare_data_references (start_loop, datarefs);
> +  if (!start_loop
>        /* Check if there is no data reference.  */
>        || datarefs->is_empty ()
>        /* Check if there are too many of data references.  */
> -      || (int) datarefs->length () > MAX_DATAREFS
> -      /* Compute access strides for all data references.  */
> -      || ((start_loop = compute_access_strides (start_loop, innermost,
> -                                               *datarefs)) == NULL)
> -      /* Check if loop nest should be interchanged.  */
> -      || !should_interchange_loop_nest (start_loop, innermost, *datarefs))
> -    {
> -      free_data_refs_with_aux (*datarefs);
> -      return false;
> -    }
> +      || (int) datarefs->length () > MAX_DATAREFS)
> +    return false;
> +
> +  /* Compute access strides for all data references, pruning outer
> +     loops we cannot analyze refs in.  */
> +  start_loop = compute_access_strides (start_loop, innermost, *datarefs);
> +  if (!start_loop)
> +    return false;
> +
> +  /* Check if any interchange is profitable in the loop nest.  */
> +  if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
> +    return false;
>
>    /* Check if data dependences can be computed for loop nest starting from
>       start_loop.  */
> @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *
>         return true;
>        }
>
> +    /* ???  We should be able to adjust DDRs we already computed by
> +       truncating distance vectors.  */
>      free_dependence_relations (*ddrs);
> +    *ddrs = vNULL;
>      /* Try to compute data dependences with the outermost loop stripped.  */
>      loop = start_loop;
>      start_loop = start_loop->inner;
>    } while (start_loop && start_loop->inner);
>
> -  loop_nest->release ();
> -  free_data_refs_with_aux (*datarefs);
>    return false;
>  }
>
> @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu
>
>    bool changed_p = false;
>    struct loop *loop;
> -  vec<loop_p> loop_nest;
> -  vec<data_reference_p> datarefs;
> -  vec<ddr_p> ddrs;
> -
>    FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
> -    if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
> -      {
> -       tree_loop_interchange loop_interchange (loop_nest);
> -       changed_p |= loop_interchange.interchange (datarefs, ddrs);
> -       free_dependence_relations (ddrs);
> -       free_data_refs_with_aux (datarefs);
> -       loop_nest.release ();
> -      }
> +    {
> +      vec<loop_p> loop_nest = vNULL;
> +      vec<data_reference_p> datarefs = vNULL;
> +      vec<ddr_p> ddrs = vNULL;
> +      if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
> +       {
> +         tree_loop_interchange loop_interchange (loop_nest);
> +         changed_p |= loop_interchange.interchange (datarefs, ddrs);
> +       }
> +      free_dependence_relations (ddrs);
> +      free_data_refs_with_aux (datarefs);
> +      loop_nest.release ();
> +    }
>
>    if (changed_p)
>      scev_reset_htab ();
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (revision 255375)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c  (working copy)
> @@ -1,5 +1,5 @@
>  /* { dg-do run } */
> -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
> +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */
>
>  /* Copied from graphite/interchange-4.c */
>
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy)
> @@ -0,0 +1,52 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
> +
> +/* Copied from graphite/interchange-4.c */
> +
> +#define DEBUG 0
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +double u[1782225];
> +
> +static void __attribute__((noinline))
> +foo (int N, double *res)
> +{
> +  int i, j;
> +  double sum = 0;
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      sum = sum + u[i + 1335 * j];
> +
> +  *res = sum;
> +}
> +
> +extern void abort ();
> +
> +int
> +main (void)
> +{
> +  int i, j;
> +  double res;
> +
> +  for (i = 0; i < 1782225; i++)
> +    u[i] = 0;
> +  u[0] = __DBL_MAX__;
> +  u[1335] = -__DBL_MAX__;
> +  u[1] = __DBL_MAX__;
> +  u[1336] = -__DBL_MAX__;
> +
> +  foo (1335, &res);
> +
> +#if DEBUG
> +  fprintf (stderr, "res = %d \n", res);
> +#endif
> +
> +  if (res != 0.0)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (revision 255375)
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c  (working copy)
> @@ -47,4 +47,4 @@ main (void)
>    return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange"} } */
> +/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange" { xfail *-*-* } } } */
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       (revision 255375)
> +++ gcc/tree-vectorizer.h       (working copy)
> @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve
>  /* FORNOW: Used in tree-parloops.c.  */
>  extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *,
>                                             bool *, bool);
> +/* Used in gimple-loop-interchange.c.  */
> +extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
> +                                 enum tree_code);
>  /* Drive for loop analysis stage.  */
>  extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
>  extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 255375)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo
>  }
>
>
> +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
> +   reduction operation CODE has a handled computation expression.  */
> +
> +bool
> +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
> +                     enum tree_code code)
> +{
> +  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
> +  auto_bitmap visited;
> +  tree lookfor = PHI_RESULT (phi);
> +  ssa_op_iter curri;
> +  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
> +  while (USE_FROM_PTR (curr) != loop_arg)
> +    curr = op_iter_next_use (&curri);
> +  curri.i = curri.numops;
> +  do
> +    {
> +      path.safe_push (std::make_pair (curri, curr));
> +      tree use = USE_FROM_PTR (curr);
> +      if (use == lookfor)
> +       break;
> +      gimple *def = SSA_NAME_DEF_STMT (use);
> +      if (gimple_nop_p (def)
> +         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
> +       {
> +pop:
> +         do
> +           {
> +             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
> +             curri = x.first;
> +             curr = x.second;
> +             do
> +               curr = op_iter_next_use (&curri);
> +             /* Skip already visited or non-SSA operands (from iterating
> +                over PHI args).  */
> +             while (curr != NULL_USE_OPERAND_P
> +                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> +                        || ! bitmap_set_bit (visited,
> +                                             SSA_NAME_VERSION
> +                                               (USE_FROM_PTR (curr)))));
> +           }
> +         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
> +         if (curr == NULL_USE_OPERAND_P)
> +           break;
> +       }
> +      else
> +       {
> +         if (gimple_code (def) == GIMPLE_PHI)
> +           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
> +         else
> +           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
> +         while (curr != NULL_USE_OPERAND_P
> +                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> +                    || ! bitmap_set_bit (visited,
> +                                         SSA_NAME_VERSION
> +                                           (USE_FROM_PTR (curr)))))
> +           curr = op_iter_next_use (&curri);
> +         if (curr == NULL_USE_OPERAND_P)
> +           goto pop;
> +       }
> +    }
> +  while (1);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
> +      unsigned i;
> +      std::pair<ssa_op_iter, use_operand_p> *x;
> +      FOR_EACH_VEC_ELT (path, i, x)
> +       {
> +         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
> +         dump_printf (MSG_NOTE, " ");
> +       }
> +      dump_printf (MSG_NOTE, "\n");
> +    }
> +
> +  /* Check whether the reduction path detected is valid.  */
> +  bool fail = path.length () == 0;
> +  bool neg = false;
> +  for (unsigned i = 1; i < path.length (); ++i)
> +    {
> +      gimple *use_stmt = USE_STMT (path[i].second);
> +      tree op = USE_FROM_PTR (path[i].second);
> +      if (! has_single_use (op)
> +         || ! is_gimple_assign (use_stmt))
> +       {
> +         fail = true;
> +         break;
> +       }
> +      if (gimple_assign_rhs_code (use_stmt) != code)
> +       {
> +         if (code == PLUS_EXPR
> +             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
> +           {
> +             /* Track whether we negate the reduction value each iteration.  */
> +             if (gimple_assign_rhs2 (use_stmt) == op)
> +               neg = ! neg;
> +           }
> +         else
> +           {
> +             fail = true;
> +             break;
> +           }
> +       }
> +    }
> +  return ! fail && ! neg;
> +}
> +
> +
>  /* Function vect_is_simple_reduction
>
>     (1) Detect a cross-iteration def-use cycle that represents a simple
> @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    /* Look for the expression computing loop_arg from loop PHI result.  */
> -  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
> -  auto_bitmap visited;
> -  tree lookfor = PHI_RESULT (phi);
> -  ssa_op_iter curri;
> -  use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi),
> -                                           SSA_OP_USE);
> -  while (USE_FROM_PTR (curr) != loop_arg)
> -    curr = op_iter_next_use (&curri);
> -  curri.i = curri.numops;
> -  do
> -    {
> -      path.safe_push (std::make_pair (curri, curr));
> -      tree use = USE_FROM_PTR (curr);
> -      if (use == lookfor)
> -       break;
> -      gimple *def = SSA_NAME_DEF_STMT (use);
> -      if (gimple_nop_p (def)
> -         || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
> -       {
> -pop:
> -         do
> -           {
> -             std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
> -             curri = x.first;
> -             curr = x.second;
> -             do
> -               curr = op_iter_next_use (&curri);
> -             /* Skip already visited or non-SSA operands (from iterating
> -                over PHI args).  */
> -             while (curr != NULL_USE_OPERAND_P
> -                    && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> -                        || ! bitmap_set_bit (visited,
> -                                             SSA_NAME_VERSION
> -                                               (USE_FROM_PTR (curr)))));
> -           }
> -         while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
> -         if (curr == NULL_USE_OPERAND_P)
> -           break;
> -       }
> -      else
> -       {
> -         if (gimple_code (def) == GIMPLE_PHI)
> -           curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
> -         else
> -           curr = op_iter_init_use (&curri, def, SSA_OP_USE);
> -         while (curr != NULL_USE_OPERAND_P
> -                && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
> -                    || ! bitmap_set_bit (visited,
> -                                         SSA_NAME_VERSION
> -                                           (USE_FROM_PTR (curr)))))
> -           curr = op_iter_next_use (&curri);
> -         if (curr == NULL_USE_OPERAND_P)
> -           goto pop;
> -       }
> -    }
> -  while (1);
> -  if (dump_file && (dump_flags & TDF_DETAILS))
> -    {
> -      dump_printf_loc (MSG_NOTE, vect_location,
> -                      "reduction path: ");
> -      unsigned i;
> -      std::pair<ssa_op_iter, use_operand_p> *x;
> -      FOR_EACH_VEC_ELT (path, i, x)
> -       {
> -         dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
> -         dump_printf (MSG_NOTE, " ");
> -       }
> -      dump_printf (MSG_NOTE, "\n");
> -    }
> -
> -  /* Check whether the reduction path detected is valid.  */
> -  bool fail = path.length () == 0;
> -  bool neg = false;
> -  for (unsigned i = 1; i < path.length (); ++i)
> -    {
> -      gimple *use_stmt = USE_STMT (path[i].second);
> -      tree op = USE_FROM_PTR (path[i].second);
> -      if (! has_single_use (op)
> -         || ! is_gimple_assign (use_stmt))
> -       {
> -         fail = true;
> -         break;
> -       }
> -      if (gimple_assign_rhs_code (use_stmt) != code)
> -       {
> -         if (code == PLUS_EXPR
> -             && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
> -           {
> -             /* Track whether we negate the reduction value each iteration.  */
> -             if (gimple_assign_rhs2 (use_stmt) == op)
> -               neg = ! neg;
> -           }
> -         else
> -           {
> -             fail = true;
> -             break;
> -           }
> -       }
> -    }
> -  if (! fail && ! neg)
> +  if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg,
> +                           code))
>      return def_stmt;
>
>    if (dump_enabled_p ())

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][gimple-interchange] Add reduction validity check
  2017-12-04 14:28 ` Bin.Cheng
@ 2017-12-04 14:43   ` Richard Biener
  2017-12-05  9:27     ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2017-12-04 14:43 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: gcc-patches List

On Mon, 4 Dec 2017, Bin.Cheng wrote:

> On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote:
> >
> > I've noticed we perform FP reduction association without the required
> > checks for associative math.  I've added
> > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.
> >
> > I also noticed we happily interchange a loop with a reduction like
> >
> >  sum = a[i] - sum;
> >
> > where a change in order of elements isn't ok.  Unfortunately bwaves
> > is exactly a case where single_use != next_def (tried to simply remove
> > that case for now), because reassoc didn't have a chance to fix the
> > operand order.  Thus this patch exports the relevant handling from
> > the vectorizer (for stage1 having a separate infrastructure gathering /
> > analyzing of reduction/induction infrastructure would be nice...)
> > and uses it from interchange.  We then don't handle
> > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer
> > missed-opt is PR65930).  I didn't bother to split up the vectorizer
> > code further to implement relaxed validity checking but simply XFAILed
> > this testcase.
> >
> > Earlier I simplified allocation stuff in the main loop which is why
> > this part is included in this patch.
> >
> > Bootstrap running on x86_64-unknown-linux-gnu.
> >
> > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.
> >
> > Ok?
> Sure.
> Just for the record.  There is also similar associative check in
> predcom.  As you suggested, a path extraction/checking interface for
> associative checking would be great, given we have multiple users now.

Yeah.  Note for interchange we don't need associativeness in the sense
as implemented by associative_tree_code, we need left-associativeness
which also MINUS_EXPR provides.  So my check isn't perfect.  I guess
we instead need

 associative_tree_code ()
 || (code == MINUS_EXPR
     && use_p->use == gimple_assign_rhs1_ptr (single_use))

where we could also allow RDIV_EXPR and other left-associative
tree codes (but check_reduction_path would do the wrong thing
with those at the moment but it has MINUS_EXPR handling that
would support sum = x - (y - sum) which the above rejects).

So I'm retesting with

  /* Check the reduction operation.  We require a left-associative 
operation.  
     For FP math we also need to be allowed to associate operations.  */
  enum tree_code code = gimple_assign_rhs_code (single_use);
  if (gassign *ass = dyn_cast <gassign *> (single_use))
    {
      enum tree_code code = gimple_assign_rhs_code (ass);
      if (! (associative_tree_code (code)
             || (code == MINUS_EXPR
                 && use_p->use == gimple_assign_rhs1_ptr (ass)))
          || (FLOAT_TYPE_P (TREE_TYPE (var))
              && ! flag_associative_math))
        return false;
    }
  else
    return false;

which is more restrictive than before.

Richard.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][gimple-interchange] Add reduction validity check
  2017-12-04 13:11 [PATCH][gimple-interchange] Add reduction validity check Richard Biener
  2017-12-04 14:28 ` Bin.Cheng
@ 2017-12-04 15:13 ` Bin.Cheng
  1 sibling, 0 replies; 5+ messages in thread
From: Bin.Cheng @ 2017-12-04 15:13 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches List

On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote:
>
> I've noticed we perform FP reduction association without the required
> checks for associative math.  I've added
> gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.
>
> I also noticed we happily interchange a loop with a reduction like
>
>  sum = a[i] - sum;
>
> where a change in order of elements isn't ok.  Unfortunately bwaves
> is exactly a case where single_use != next_def (tried to simply remove
> that case for now), because reassoc didn't have a chance to fix the
> operand order.  Thus this patch exports the relevant handling from
> the vectorizer (for stage1 having a separate infrastructure gathering /
> analyzing of reduction/induction infrastructure would be nice...)
> and uses it from interchange.  We then don't handle
> gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer
> missed-opt is PR65930).  I didn't bother to split up the vectorizer
> code further to implement relaxed validity checking but simply XFAILed
> this testcase.
>
> Earlier I simplified allocation stuff in the main loop which is why
> this part is included in this patch.
>
> Bootstrap running on x86_64-unknown-linux-gnu.
>
> I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.
>
> Ok?
>
> Thanks,
> Richard.
>
> 2017-12-04  Richard Biener  <rguenther@suse.de>
>
>         * tree-vectorizer.h (check_reduction_path): Declare.
>         * tree-vect-loop.c (check_reduction_path): New function, split out
>         from ...
>         (vect_is_simple_reduction): ... here.
>         * gimple-loop-interchange.cc: Include tree-vectorizer.h.
>         (loop_cand::analyze_iloop_reduction_var): Use single_imm_use.
>         Properly check for a supported reduction operation and a
>         valid expression if the reduction covers multiple stmts.
>         (prepare_perfect_loop_nest): Simpify allocation.
>         (pass_linterchange::execute): Likewise.
>
>         * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags.
>         * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant.
>         * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL.
>
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc      (revision 255375)
> +++ gcc/gimple-loop-interchange.cc      (working copy)
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
>  #include "tree-ssa-loop-ivopts.h"
>  #include "tree-ssa-dce.h"
>  #include "tree-data-ref.h"
> +#include "tree-vectorizer.h"
>
>  /* This pass performs loop interchange: for example, the loop nest
>
> @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var (
>       in a way that reduction operation is seen as black box.  In general,
>       we can ignore reassociation of reduction operator; we can handle fake
>       reductions in which VAR is not even used to compute NEXT.  */
> -  FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
> -    {
> -      stmt = USE_STMT (use_p);
> -      if (is_gimple_debug (stmt))
> -       continue;
> -
> -      if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
> -       return false;
> -
> -      if (single_use != NULL)
> -       return false;
> +  if (! single_imm_use (var, &use_p, &single_use)
> +      || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
> +    return false;
>
> -      single_use = stmt;
> -    }
> +  /* Check the reduction operation.  We require a commutative or
> +     left-associative operation.  For FP math we also need to be allowed
> +     to associate operations.  */
> +  if (! is_gimple_assign (single_use)
> +      || ! (commutative_tree_code (gimple_assign_rhs_code (single_use))
> +           || (commutative_ternary_tree_code
> +                 (gimple_assign_rhs_code (single_use))
> +               && (use_p->use == gimple_assign_rhs1_ptr (single_use)
> +                   || use_p->use == gimple_assign_rhs2_ptr (single_use)))
> +           || (gimple_assign_rhs_code (single_use) == MINUS_EXPR
> +               && use_p->use == gimple_assign_rhs1_ptr (single_use)))
> +      || (FLOAT_TYPE_P (TREE_TYPE (var))
> +         && ! flag_associative_math))
> +    return false;
>
> +  /* Handle and verify a series of stmts feeding the reduction op.  */
>    if (single_use != next_def
> -      && !stmt_dominates_stmt_p (single_use, next_def))
> +      && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
> +                               gimple_assign_rhs_code (single_use)))
>      return false;
>
>    /* Only support cases in which INIT is used in inner loop.  */
> @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop *
>                            vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
>  {
>    struct loop *start_loop = NULL, *innermost = loop;
> -  struct loop *outermost = superloop_at_depth (loop, 0);
> +  struct loop *outermost = loops_for_fn (cfun)->tree_root;
>
>    /* Find loop nest from the innermost loop.  The outermost is the innermost
>       outer*/
> @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop *
>    if (!start_loop || !start_loop->inner)
>      return false;
>
> +  /* Prepare the data reference vector for the loop nest, pruning outer
> +     loops we cannot handle.  */
>    datarefs->create (20);
> -  if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL
> +  start_loop = prepare_data_references (start_loop, datarefs);
> +  if (!start_loop
>        /* Check if there is no data reference.  */
>        || datarefs->is_empty ()
>        /* Check if there are too many of data references.  */
> -      || (int) datarefs->length () > MAX_DATAREFS
> -      /* Compute access strides for all data references.  */
> -      || ((start_loop = compute_access_strides (start_loop, innermost,
> -                                               *datarefs)) == NULL)
> -      /* Check if loop nest should be interchanged.  */
> -      || !should_interchange_loop_nest (start_loop, innermost, *datarefs))
> -    {
> -      free_data_refs_with_aux (*datarefs);
> -      return false;
> -    }
> +      || (int) datarefs->length () > MAX_DATAREFS)
> +    return false;
> +
> +  /* Compute access strides for all data references, pruning outer
> +     loops we cannot analyze refs in.  */
> +  start_loop = compute_access_strides (start_loop, innermost, *datarefs);
> +  if (!start_loop)
> +    return false;
> +
> +  /* Check if any interchange is profitable in the loop nest.  */
> +  if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
> +    return false;
>
>    /* Check if data dependences can be computed for loop nest starting from
>       start_loop.  */
> @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop *
>         return true;
>        }
>
> +    /* ???  We should be able to adjust DDRs we already computed by
> +       truncating distance vectors.  */
Note it could be hard enough to adjust DDRs for stripped outer loop.
Dependence can become non-dep, even worse, we may result in wrong
"invalid dep" in case of:
   DDR<a,b>:
     (<, <, =)
     (<, >, =)
because after simply stripping, it becomes:
   DDR<a,b>:
     (<, =)
     (>, =)
While the correct result is "no-dep".

Thanks,
bin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][gimple-interchange] Add reduction validity check
  2017-12-04 14:43   ` Richard Biener
@ 2017-12-05  9:27     ` Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2017-12-05  9:27 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: gcc-patches List

On Mon, 4 Dec 2017, Richard Biener wrote:

> On Mon, 4 Dec 2017, Bin.Cheng wrote:
> 
> > On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote:
> > >
> > > I've noticed we perform FP reduction association without the required
> > > checks for associative math.  I've added
> > > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this.
> > >
> > > I also noticed we happily interchange a loop with a reduction like
> > >
> > >  sum = a[i] - sum;
> > >
> > > where a change in order of elements isn't ok.  Unfortunately bwaves
> > > is exactly a case where single_use != next_def (tried to simply remove
> > > that case for now), because reassoc didn't have a chance to fix the
> > > operand order.  Thus this patch exports the relevant handling from
> > > the vectorizer (for stage1 having a separate infrastructure gathering /
> > > analyzing of reduction/induction infrastructure would be nice...)
> > > and uses it from interchange.  We then don't handle
> > > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer
> > > missed-opt is PR65930).  I didn't bother to split up the vectorizer
> > > code further to implement relaxed validity checking but simply XFAILed
> > > this testcase.
> > >
> > > Earlier I simplified allocation stuff in the main loop which is why
> > > this part is included in this patch.
> > >
> > > Bootstrap running on x86_64-unknown-linux-gnu.
> > >
> > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling.
> > >
> > > Ok?
> > Sure.
> > Just for the record.  There is also similar associative check in
> > predcom.  As you suggested, a path extraction/checking interface for
> > associative checking would be great, given we have multiple users now.
> 
> Yeah.  Note for interchange we don't need associativeness in the sense
> as implemented by associative_tree_code, we need left-associativeness
> which also MINUS_EXPR provides.  So my check isn't perfect.  I guess
> we instead need
> 
>  associative_tree_code ()
>  || (code == MINUS_EXPR
>      && use_p->use == gimple_assign_rhs1_ptr (single_use))
> 
> where we could also allow RDIV_EXPR and other left-associative
> tree codes (but check_reduction_path would do the wrong thing
> with those at the moment but it has MINUS_EXPR handling that
> would support sum = x - (y - sum) which the above rejects).
> 
> So I'm retesting with
> 
>   /* Check the reduction operation.  We require a left-associative 
> operation.  
>      For FP math we also need to be allowed to associate operations.  */
>   enum tree_code code = gimple_assign_rhs_code (single_use);
>   if (gassign *ass = dyn_cast <gassign *> (single_use))
>     {
>       enum tree_code code = gimple_assign_rhs_code (ass);
>       if (! (associative_tree_code (code)
>              || (code == MINUS_EXPR
>                  && use_p->use == gimple_assign_rhs1_ptr (ass)))
>           || (FLOAT_TYPE_P (TREE_TYPE (var))
>               && ! flag_associative_math))
>         return false;
>     }
>   else
>     return false;
> 
> which is more restrictive than before.

And here's two testcases, one for sum = x - sum and one for division
by sum which shows wrong-code without this patch.

Richard.

2017-12-05  Richard Biener  <rguenther@suse.de>

	* gcc.dg/tree-ssa/loop-interchange-12.c: New testcase.
	* gcc.dg/tree-ssa/loop-interchange-13.c: Likewise.

Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c	(working copy)
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+
+/* Copied from graphite/interchange-4.c */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+unsigned u[1024];
+
+static void __attribute__((noinline,noclone,noipa))
+foo (int N, unsigned *res)
+{
+  int i, j;
+  unsigned sum = 1;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum = u[i + 2 * j] / sum;
+
+  *res = sum;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  unsigned res;
+
+  u[0] = 10;
+  u[1] = 200;
+  u[2] = 10;
+  u[3] = 10;
+
+  foo (2, &res);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 0)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c	(working copy)
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */
+
+/* Copied from graphite/interchange-4.c */
+
+#define DEBUG 0
+#if DEBUG
+#include <stdio.h>
+#endif
+
+unsigned u[1024];
+
+static void __attribute__((noinline,noclone,noipa))
+foo (int N, int M, unsigned *res)
+{
+  int i, j;
+  unsigned sum = 0;
+  if (N > 0)
+    for (i = 0; i < M; i++)
+      for (j = 0; j < N; j++)
+	sum = u[i + 3 * j] - sum;
+
+  *res = sum;
+}
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i, j;
+  unsigned res;
+
+  u[0] = 1;
+  u[1] = 2;
+  u[2] = 4;
+  u[3] = 5;
+  u[4] = 7;
+  u[5] = 8;
+
+  foo (2, 3, &res);
+
+#if DEBUG
+  fprintf (stderr, "res = %d \n", res);
+#endif
+
+  if (res != 13)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-12-05  9:27 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-04 13:11 [PATCH][gimple-interchange] Add reduction validity check Richard Biener
2017-12-04 14:28 ` Bin.Cheng
2017-12-04 14:43   ` Richard Biener
2017-12-05  9:27     ` Richard Biener
2017-12-04 15:13 ` Bin.Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).