public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR66623
@ 2017-06-08  9:18 Richard Biener
  2017-06-08 13:49 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-08  9:18 UTC (permalink / raw)
  To: gcc-patches


The following fixes unsafe vectorization of reductions in outer loop
vectorization.  It combines it with a bit of cleanup in the affected
function.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-06-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/66623
	* tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
	refactor check_reduction into two parts, properly computing
	whether we have to check reduction validity for outer loop
	vectorization.

	* gcc.dg/vect/pr66623.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 249003)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
-  edge latch_e = loop_latch_edge (loop);
-  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
   enum tree_code orig_code, code;
   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
@@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
   *double_reduc = false;
   *v_reduc_type = TREE_CODE_REDUCTION;
 
-  /* Check validity of the reduction only for the innermost loop.  */
-  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
-  gcc_assert ((check_reduction && loop == vect_loop)
-              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
-
   name = PHI_RESULT (phi);
   /* ???  If there are no uses of the PHI result the inner loop reduction
      won't be detected as possibly double-reduction by vectorizable_reduction
@@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
         {
           if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "reduction used in loop.\n");
+			     "reduction value used in loop.\n");
           return NULL;
         }
 
       phi_use_stmt = use_stmt;
     }
 
+  edge latch_e = loop_latch_edge (loop);
+  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   if (TREE_CODE (loop_arg) != SSA_NAME)
     {
       if (dump_enabled_p ())
@@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
-  if (!def_stmt)
+  if (gimple_nop_p (def_stmt))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "reduction: no def_stmt.\n");
+			 "reduction: no def_stmt\n");
       return NULL;
     }
 
   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
     {
       if (dump_enabled_p ())
-	dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+	{
+	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			   "reduction: unhandled reduction operation: ");
+	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
+	}
       return NULL;
     }
 
@@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   nloop_uses = 0;
+  gphi *lcphi = NULL;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
     {
       gimple *use_stmt = USE_STMT (use_p);
@@ -2829,6 +2829,11 @@ vect_is_simple_reduction (loop_vec_info
 	continue;
       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
 	nloop_uses++;
+      else
+	{
+	  gcc_assert (!lcphi);
+	  lcphi = as_a <gphi *> (use_stmt);
+	}
       if (nloop_uses > 1)
 	{
 	  if (dump_enabled_p ())
@@ -2873,6 +2878,24 @@ vect_is_simple_reduction (loop_vec_info
       return NULL;
     }
 
+  /* If we are vectorizing an inner reduction we are executing that
+     in the original order only in case we are not dealing with a
+     double reduction.  */
+  bool check_reduction = true;
+  if (flow_loop_nested_p (vect_loop, loop))
+    {
+      check_reduction = false;
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+	{
+	  gimple *use_stmt = USE_STMT (use_p);
+	  if (is_gimple_debug (use_stmt))
+	    continue;
+          if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+	    check_reduction = true;
+	}
+    }
+
+  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
   code = orig_code = gimple_assign_rhs_code (def_stmt);
 
   /* We can handle "res -= x[i]", which is non-associative by
@@ -2887,27 +2910,8 @@ vect_is_simple_reduction (loop_vec_info
 
   if (code == COND_EXPR)
     {
-      if (check_reduction)
+      if (! nested_in_vect_loop)
 	*v_reduc_type = COND_REDUCTION;
-    }
-  else if (!commutative_tree_code (code) || !associative_tree_code (code))
-    {
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: not commutative/associative: ");
-      return NULL;
-    }
-
-  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
-    {
-      if (code != COND_EXPR)
-        {
-	  if (dump_enabled_p ())
-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: not binary operation: ");
-
-          return NULL;
-        }
 
       op3 = gimple_assign_rhs1 (def_stmt);
       if (COMPARISON_CLASS_P (op3))
@@ -2918,30 +2922,35 @@ vect_is_simple_reduction (loop_vec_info
 
       op1 = gimple_assign_rhs2 (def_stmt);
       op2 = gimple_assign_rhs3 (def_stmt);
-
-      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
-        {
-          if (dump_enabled_p ())
-            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: uses not ssa_names: ");
-
-          return NULL;
-        }
     }
-  else
+  else if (!commutative_tree_code (code) || !associative_tree_code (code))
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: not commutative/associative: ");
+      return NULL;
+    }
+  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
     {
       op1 = gimple_assign_rhs1 (def_stmt);
       op2 = gimple_assign_rhs2 (def_stmt);
+    }
+  else
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: not handled operation: ");
+      return NULL;
+    }
 
-      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
-        {
-          if (dump_enabled_p ())
-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: uses not ssa_names: ");
+  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: both uses not ssa_names: ");
 
-          return NULL;
-        }
-   }
+      return NULL;
+    }
 
   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
   if ((TREE_CODE (op1) == SSA_NAME
@@ -3091,7 +3100,7 @@ vect_is_simple_reduction (loop_vec_info
 			   == vect_internal_def
 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
     {
-      if (check_reduction && orig_code != MINUS_EXPR)
+      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
 	{
 	  /* Check if we can swap operands (just for simplicity - so that
 	     the rest of the code can assume that the reduction variable
@@ -3145,7 +3154,8 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   /* Try to find SLP reduction chain.  */
-  if (check_reduction && code != COND_EXPR
+  if (! nested_in_vect_loop
+      && code != COND_EXPR
       && vect_is_slp_reduction (loop_info, phi, def_stmt))
     {
       if (dump_enabled_p ())
Index: gcc/testsuite/gcc.dg/vect/pr66623.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr66623.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr66623.c	(working copy)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_float } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+#define OP *
+#define INIT 1.0
+
+float __attribute__((noinline,noclone))
+foo (float *__restrict__ i)
+{
+  float l = INIT;
+  int a;
+  int b;
+
+  for (a = 0; a < 4; a++)
+    for (b = 0; b < 4; b++)
+      l = l OP i[b];
+
+  return l;
+}
+
+float __attribute__((noinline,noclone))
+foo_ref (float *__restrict__ i)
+{
+  float l = INIT;
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  return l;
+}
+
+union u
+{
+  float f;
+  unsigned int u;
+};
+
+int
+main (void)
+{
+  union u res, res2;
+  float a[4];
+
+  if (sizeof (float) != sizeof (unsigned int))
+    return 0;
+
+  check_vect ();
+
+  a[0] = 0.01;
+  a[1] = 0.01;
+  a[2] = 0.01;
+  a[3] = 1.0;
+
+  res.f = foo_ref (a);
+
+  res2.f = foo (a);
+
+  if (res.u != res2.u)
+    abort ();
+
+  return 0;
+}
+
+/* need -ffast-math to vectorize this loop.  */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail.  */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix PR66623
  2017-06-08  9:18 [PATCH] Fix PR66623 Richard Biener
@ 2017-06-08 13:49 ` Richard Biener
  2017-06-09 15:32   ` Christophe Lyon
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-08 13:49 UTC (permalink / raw)
  To: gcc-patches

On Thu, 8 Jun 2017, Richard Biener wrote:

> 
> The following fixes unsafe vectorization of reductions in outer loop
> vectorization.  It combines it with a bit of cleanup in the affected
> function.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Re-testing the following variant -- multiple loop-closed PHI nodes
for the same SSA name happen.

Richard.

2017-06-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/66623
	* tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
	refactor check_reduction into two parts, properly computing
	whether we have to check reduction validity for outer loop
	vectorization.

	* gcc.dg/vect/pr66623.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 249007)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
-  edge latch_e = loop_latch_edge (loop);
-  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
   enum tree_code orig_code, code;
   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
@@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
   *double_reduc = false;
   *v_reduc_type = TREE_CODE_REDUCTION;
 
-  /* Check validity of the reduction only for the innermost loop.  */
-  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
-  gcc_assert ((check_reduction && loop == vect_loop)
-              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
-
   name = PHI_RESULT (phi);
   /* ???  If there are no uses of the PHI result the inner loop reduction
      won't be detected as possibly double-reduction by vectorizable_reduction
@@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
         {
           if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "reduction used in loop.\n");
+			     "reduction value used in loop.\n");
           return NULL;
         }
 
       phi_use_stmt = use_stmt;
     }
 
+  edge latch_e = loop_latch_edge (loop);
+  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   if (TREE_CODE (loop_arg) != SSA_NAME)
     {
       if (dump_enabled_p ())
@@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
-  if (!def_stmt)
+  if (gimple_nop_p (def_stmt))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "reduction: no def_stmt.\n");
+			 "reduction: no def_stmt\n");
       return NULL;
     }
 
   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
     {
       if (dump_enabled_p ())
-	dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
+	{
+	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			   "reduction: unhandled reduction operation: ");
+	  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
+	}
       return NULL;
     }
 
@@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   nloop_uses = 0;
+  auto_vec<gphi *, 3> lcphis;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
     {
       gimple *use_stmt = USE_STMT (use_p);
@@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
 	continue;
       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
 	nloop_uses++;
+      else
+	/* We can have more than one loop-closed PHI.  */
+	lcphis.safe_push (as_a <gphi *> (use_stmt));
       if (nloop_uses > 1)
 	{
 	  if (dump_enabled_p ())
@@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
       return NULL;
     }
 
+  /* If we are vectorizing an inner reduction we are executing that
+     in the original order only in case we are not dealing with a
+     double reduction.  */
+  bool check_reduction = true;
+  if (flow_loop_nested_p (vect_loop, loop))
+    {
+      gphi *lcphi;
+      unsigned i;
+      check_reduction = false;
+      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
+	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+	  {
+	    gimple *use_stmt = USE_STMT (use_p);
+	    if (is_gimple_debug (use_stmt))
+	      continue;
+	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+	      check_reduction = true;
+	  }
+    }
+
+  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
   code = orig_code = gimple_assign_rhs_code (def_stmt);
 
   /* We can handle "res -= x[i]", which is non-associative by
@@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
 
   if (code == COND_EXPR)
     {
-      if (check_reduction)
+      if (! nested_in_vect_loop)
 	*v_reduc_type = COND_REDUCTION;
-    }
-  else if (!commutative_tree_code (code) || !associative_tree_code (code))
-    {
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: not commutative/associative: ");
-      return NULL;
-    }
-
-  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
-    {
-      if (code != COND_EXPR)
-        {
-	  if (dump_enabled_p ())
-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: not binary operation: ");
-
-          return NULL;
-        }
 
       op3 = gimple_assign_rhs1 (def_stmt);
       if (COMPARISON_CLASS_P (op3))
@@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
 
       op1 = gimple_assign_rhs2 (def_stmt);
       op2 = gimple_assign_rhs3 (def_stmt);
-
-      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
-        {
-          if (dump_enabled_p ())
-            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: uses not ssa_names: ");
-
-          return NULL;
-        }
     }
-  else
+  else if (!commutative_tree_code (code) || !associative_tree_code (code))
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: not commutative/associative: ");
+      return NULL;
+    }
+  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
     {
       op1 = gimple_assign_rhs1 (def_stmt);
       op2 = gimple_assign_rhs2 (def_stmt);
+    }
+  else
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: not handled operation: ");
+      return NULL;
+    }
 
-      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
-        {
-          if (dump_enabled_p ())
-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: uses not ssa_names: ");
+  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+    {
+      if (dump_enabled_p ())
+	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			"reduction: both uses not ssa_names: ");
 
-          return NULL;
-        }
-   }
+      return NULL;
+    }
 
   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
   if ((TREE_CODE (op1) == SSA_NAME
@@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
 			   == vect_internal_def
 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
     {
-      if (check_reduction && orig_code != MINUS_EXPR)
+      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
 	{
 	  /* Check if we can swap operands (just for simplicity - so that
 	     the rest of the code can assume that the reduction variable
@@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
     }
 
   /* Try to find SLP reduction chain.  */
-  if (check_reduction && code != COND_EXPR
+  if (! nested_in_vect_loop
+      && code != COND_EXPR
       && vect_is_slp_reduction (loop_info, phi, def_stmt))
     {
       if (dump_enabled_p ())
Index: gcc/testsuite/gcc.dg/vect/pr66623.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr66623.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr66623.c	(working copy)
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_float } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+#define OP *
+#define INIT 1.0
+
+float __attribute__((noinline,noclone))
+foo (float *__restrict__ i)
+{
+  float l = INIT;
+  int a;
+  int b;
+
+  for (a = 0; a < 4; a++)
+    for (b = 0; b < 4; b++)
+      l = l OP i[b];
+
+  return l;
+}
+
+float __attribute__((noinline,noclone))
+foo_ref (float *__restrict__ i)
+{
+  float l = INIT;
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  l = l OP i[0];
+  l = l OP i[1];
+  l = l OP i[2];
+  l = l OP i[3];
+
+  return l;
+}
+
+union u
+{
+  float f;
+  unsigned int u;
+};
+
+int
+main (void)
+{
+  union u res, res2;
+  float a[4];
+
+  if (sizeof (float) != sizeof (unsigned int))
+    return 0;
+
+  check_vect ();
+
+  a[0] = 0.01;
+  a[1] = 0.01;
+  a[2] = 0.01;
+  a[3] = 1.0;
+
+  res.f = foo_ref (a);
+
+  res2.f = foo (a);
+
+  if (res.u != res2.u)
+    abort ();
+
+  return 0;
+}
+
+/* need -ffast-math to vectorize this loop.  */
+/* ARM NEON passes -ffast-math to these tests, so expect this to fail.  */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix PR66623
  2017-06-08 13:49 ` Richard Biener
@ 2017-06-09 15:32   ` Christophe Lyon
  2017-06-09 15:48     ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Christophe Lyon @ 2017-06-09 15:32 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
> On Thu, 8 Jun 2017, Richard Biener wrote:
>
>>
>> The following fixes unsafe vectorization of reductions in outer loop
>> vectorization.  It combines it with a bit of cleanup in the affected
>> function.
>>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Re-testing the following variant -- multiple loop-closed PHI nodes
> for the same SSA name happen.
>
> Richard.
>
> 2017-06-08  Richard Biener  <rguenther@suse.de>
>
>         PR tree-optimization/66623
>         * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>         refactor check_reduction into two parts, properly computing
>         whether we have to check reduction validity for outer loop
>         vectorization.
>
>         * gcc.dg/vect/pr66623.c: New testcase.
>

Hi Richard,

The new testcase fails on armeb* targets:
FAIL:    gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
FAIL:    gcc.dg/vect/pr66623.c execution test

Christophe


> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 249007)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>  {
>    struct loop *loop = (gimple_bb (phi))->loop_father;
>    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> -  edge latch_e = loop_latch_edge (loop);
> -  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>    gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
>    enum tree_code orig_code, code;
>    tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>    *double_reduc = false;
>    *v_reduc_type = TREE_CODE_REDUCTION;
>
> -  /* Check validity of the reduction only for the innermost loop.  */
> -  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> -  gcc_assert ((check_reduction && loop == vect_loop)
> -              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
> -
>    name = PHI_RESULT (phi);
>    /* ???  If there are no uses of the PHI result the inner loop reduction
>       won't be detected as possibly double-reduction by vectorizable_reduction
> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>          {
>            if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "reduction used in loop.\n");
> +                            "reduction value used in loop.\n");
>            return NULL;
>          }
>
>        phi_use_stmt = use_stmt;
>      }
>
> +  edge latch_e = loop_latch_edge (loop);
> +  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>    if (TREE_CODE (loop_arg) != SSA_NAME)
>      {
>        if (dump_enabled_p ())
> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> -  if (!def_stmt)
> +  if (gimple_nop_p (def_stmt))
>      {
>        if (dump_enabled_p ())
>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "reduction: no def_stmt.\n");
> +                        "reduction: no def_stmt\n");
>        return NULL;
>      }
>
>    if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
>      {
>        if (dump_enabled_p ())
> -       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> +       {
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "reduction: unhandled reduction operation: ");
> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
> +       }
>        return NULL;
>      }
>
> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    nloop_uses = 0;
> +  auto_vec<gphi *, 3> lcphis;
>    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>      {
>        gimple *use_stmt = USE_STMT (use_p);
> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>         continue;
>        if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>         nloop_uses++;
> +      else
> +       /* We can have more than one loop-closed PHI.  */
> +       lcphis.safe_push (as_a <gphi *> (use_stmt));
>        if (nloop_uses > 1)
>         {
>           if (dump_enabled_p ())
> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>        return NULL;
>      }
>
> +  /* If we are vectorizing an inner reduction we are executing that
> +     in the original order only in case we are not dealing with a
> +     double reduction.  */
> +  bool check_reduction = true;
> +  if (flow_loop_nested_p (vect_loop, loop))
> +    {
> +      gphi *lcphi;
> +      unsigned i;
> +      check_reduction = false;
> +      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> +       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
> +         {
> +           gimple *use_stmt = USE_STMT (use_p);
> +           if (is_gimple_debug (use_stmt))
> +             continue;
> +           if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
> +             check_reduction = true;
> +         }
> +    }
> +
> +  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>    code = orig_code = gimple_assign_rhs_code (def_stmt);
>
>    /* We can handle "res -= x[i]", which is non-associative by
> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>
>    if (code == COND_EXPR)
>      {
> -      if (check_reduction)
> +      if (! nested_in_vect_loop)
>         *v_reduc_type = COND_REDUCTION;
> -    }
> -  else if (!commutative_tree_code (code) || !associative_tree_code (code))
> -    {
> -      if (dump_enabled_p ())
> -       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                       "reduction: not commutative/associative: ");
> -      return NULL;
> -    }
> -
> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> -    {
> -      if (code != COND_EXPR)
> -        {
> -         if (dump_enabled_p ())
> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: not binary operation: ");
> -
> -          return NULL;
> -        }
>
>        op3 = gimple_assign_rhs1 (def_stmt);
>        if (COMPARISON_CLASS_P (op3))
> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>
>        op1 = gimple_assign_rhs2 (def_stmt);
>        op2 = gimple_assign_rhs3 (def_stmt);
> -
> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> -        {
> -          if (dump_enabled_p ())
> -            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: uses not ssa_names: ");
> -
> -          return NULL;
> -        }
>      }
> -  else
> +  else if (!commutative_tree_code (code) || !associative_tree_code (code))
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: not commutative/associative: ");
> +      return NULL;
> +    }
> +  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>      {
>        op1 = gimple_assign_rhs1 (def_stmt);
>        op2 = gimple_assign_rhs2 (def_stmt);
> +    }
> +  else
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: not handled operation: ");
> +      return NULL;
> +    }
>
> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> -        {
> -          if (dump_enabled_p ())
> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> -                           "reduction: uses not ssa_names: ");
> +  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> +    {
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> +                       "reduction: both uses not ssa_names: ");
>
> -          return NULL;
> -        }
> -   }
> +      return NULL;
> +    }
>
>    type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>    if ((TREE_CODE (op1) == SSA_NAME
> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>                            == vect_internal_def
>                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
>      {
> -      if (check_reduction && orig_code != MINUS_EXPR)
> +      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>         {
>           /* Check if we can swap operands (just for simplicity - so that
>              the rest of the code can assume that the reduction variable
> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>      }
>
>    /* Try to find SLP reduction chain.  */
> -  if (check_reduction && code != COND_EXPR
> +  if (! nested_in_vect_loop
> +      && code != COND_EXPR
>        && vect_is_slp_reduction (loop_info, phi, def_stmt))
>      {
>        if (dump_enabled_p ())
> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> @@ -0,0 +1,86 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +
> +#define OP *
> +#define INIT 1.0
> +
> +float __attribute__((noinline,noclone))
> +foo (float *__restrict__ i)
> +{
> +  float l = INIT;
> +  int a;
> +  int b;
> +
> +  for (a = 0; a < 4; a++)
> +    for (b = 0; b < 4; b++)
> +      l = l OP i[b];
> +
> +  return l;
> +}
> +
> +float __attribute__((noinline,noclone))
> +foo_ref (float *__restrict__ i)
> +{
> +  float l = INIT;
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  l = l OP i[0];
> +  l = l OP i[1];
> +  l = l OP i[2];
> +  l = l OP i[3];
> +
> +  return l;
> +}
> +
> +union u
> +{
> +  float f;
> +  unsigned int u;
> +};
> +
> +int
> +main (void)
> +{
> +  union u res, res2;
> +  float a[4];
> +
> +  if (sizeof (float) != sizeof (unsigned int))
> +    return 0;
> +
> +  check_vect ();
> +
> +  a[0] = 0.01;
> +  a[1] = 0.01;
> +  a[2] = 0.01;
> +  a[3] = 1.0;
> +
> +  res.f = foo_ref (a);
> +
> +  res2.f = foo (a);
> +
> +  if (res.u != res2.u)
> +    abort ();
> +
> +  return 0;
> +}
> +
> +/* need -ffast-math to vectorize this loop.  */
> +/* ARM NEON passes -ffast-math to these tests, so expect this to fail.  */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix PR66623
  2017-06-09 15:32   ` Christophe Lyon
@ 2017-06-09 15:48     ` Richard Biener
  2017-06-12  7:37       ` Christophe Lyon
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2017-06-09 15:48 UTC (permalink / raw)
  To: gcc-patches, Christophe Lyon, Richard Biener; +Cc: gcc-patches

On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
>> On Thu, 8 Jun 2017, Richard Biener wrote:
>>
>>>
>>> The following fixes unsafe vectorization of reductions in outer loop
>>> vectorization.  It combines it with a bit of cleanup in the affected
>>> function.
>>>
>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>
>> Re-testing the following variant -- multiple loop-closed PHI nodes
>> for the same SSA name happen.
>>
>> Richard.
>>
>> 2017-06-08  Richard Biener  <rguenther@suse.de>
>>
>>         PR tree-optimization/66623
>>         * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>>         refactor check_reduction into two parts, properly computing
>>         whether we have to check reduction validity for outer loop
>>         vectorization.
>>
>>         * gcc.dg/vect/pr66623.c: New testcase.
>>
>
>Hi Richard,
>
>The new testcase fails on armeb* targets:
>FAIL:    gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
>FAIL:    gcc.dg/vect/pr66623.c execution test

Ah, the xfail probably isn't enough.  Can you try a dg-skip or so?

>Christophe
>
>
>> Index: gcc/tree-vect-loop.c
>> ===================================================================
>> --- gcc/tree-vect-loop.c        (revision 249007)
>> +++ gcc/tree-vect-loop.c        (working copy)
>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>>  {
>>    struct loop *loop = (gimple_bb (phi))->loop_father;
>>    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
>> -  edge latch_e = loop_latch_edge (loop);
>> -  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>    gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
>NULL;
>>    enum tree_code orig_code, code;
>>    tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>>    *double_reduc = false;
>>    *v_reduc_type = TREE_CODE_REDUCTION;
>>
>> -  /* Check validity of the reduction only for the innermost loop. 
>*/
>> -  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
>> -  gcc_assert ((check_reduction && loop == vect_loop)
>> -              || (!check_reduction && flow_loop_nested_p (vect_loop,
>loop)));
>> -
>>    name = PHI_RESULT (phi);
>>    /* ???  If there are no uses of the PHI result the inner loop
>reduction
>>       won't be detected as possibly double-reduction by
>vectorizable_reduction
>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>>          {
>>            if (dump_enabled_p ())
>>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> -                            "reduction used in loop.\n");
>> +                            "reduction value used in loop.\n");
>>            return NULL;
>>          }
>>
>>        phi_use_stmt = use_stmt;
>>      }
>>
>> +  edge latch_e = loop_latch_edge (loop);
>> +  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>    if (TREE_CODE (loop_arg) != SSA_NAME)
>>      {
>>        if (dump_enabled_p ())
>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>>      }
>>
>>    def_stmt = SSA_NAME_DEF_STMT (loop_arg);
>> -  if (!def_stmt)
>> +  if (gimple_nop_p (def_stmt))
>>      {
>>        if (dump_enabled_p ())
>>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> -                        "reduction: no def_stmt.\n");
>> +                        "reduction: no def_stmt\n");
>>        return NULL;
>>      }
>>
>>    if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
>GIMPLE_PHI)
>>      {
>>        if (dump_enabled_p ())
>> -       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
>> +       {
>> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> +                          "reduction: unhandled reduction operation:
>");
>> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
>def_stmt, 0);
>> +       }
>>        return NULL;
>>      }
>>
>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>>      }
>>
>>    nloop_uses = 0;
>> +  auto_vec<gphi *, 3> lcphis;
>>    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>>      {
>>        gimple *use_stmt = USE_STMT (use_p);
>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>>         continue;
>>        if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>>         nloop_uses++;
>> +      else
>> +       /* We can have more than one loop-closed PHI.  */
>> +       lcphis.safe_push (as_a <gphi *> (use_stmt));
>>        if (nloop_uses > 1)
>>         {
>>           if (dump_enabled_p ())
>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>>        return NULL;
>>      }
>>
>> +  /* If we are vectorizing an inner reduction we are executing that
>> +     in the original order only in case we are not dealing with a
>> +     double reduction.  */
>> +  bool check_reduction = true;
>> +  if (flow_loop_nested_p (vect_loop, loop))
>> +    {
>> +      gphi *lcphi;
>> +      unsigned i;
>> +      check_reduction = false;
>> +      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
>> +       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
>(lcphi))
>> +         {
>> +           gimple *use_stmt = USE_STMT (use_p);
>> +           if (is_gimple_debug (use_stmt))
>> +             continue;
>> +           if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
>(use_stmt)))
>> +             check_reduction = true;
>> +         }
>> +    }
>> +
>> +  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>>    code = orig_code = gimple_assign_rhs_code (def_stmt);
>>
>>    /* We can handle "res -= x[i]", which is non-associative by
>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>>
>>    if (code == COND_EXPR)
>>      {
>> -      if (check_reduction)
>> +      if (! nested_in_vect_loop)
>>         *v_reduc_type = COND_REDUCTION;
>> -    }
>> -  else if (!commutative_tree_code (code) || !associative_tree_code
>(code))
>> -    {
>> -      if (dump_enabled_p ())
>> -       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> -                       "reduction: not commutative/associative: ");
>> -      return NULL;
>> -    }
>> -
>> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
>> -    {
>> -      if (code != COND_EXPR)
>> -        {
>> -         if (dump_enabled_p ())
>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> -                           "reduction: not binary operation: ");
>> -
>> -          return NULL;
>> -        }
>>
>>        op3 = gimple_assign_rhs1 (def_stmt);
>>        if (COMPARISON_CLASS_P (op3))
>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>>
>>        op1 = gimple_assign_rhs2 (def_stmt);
>>        op2 = gimple_assign_rhs3 (def_stmt);
>> -
>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>SSA_NAME)
>> -        {
>> -          if (dump_enabled_p ())
>> -            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> -                           "reduction: uses not ssa_names: ");
>> -
>> -          return NULL;
>> -        }
>>      }
>> -  else
>> +  else if (!commutative_tree_code (code) || !associative_tree_code
>(code))
>> +    {
>> +      if (dump_enabled_p ())
>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> +                       "reduction: not commutative/associative: ");
>> +      return NULL;
>> +    }
>> +  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>>      {
>>        op1 = gimple_assign_rhs1 (def_stmt);
>>        op2 = gimple_assign_rhs2 (def_stmt);
>> +    }
>> +  else
>> +    {
>> +      if (dump_enabled_p ())
>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> +                       "reduction: not handled operation: ");
>> +      return NULL;
>> +    }
>>
>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>SSA_NAME)
>> -        {
>> -          if (dump_enabled_p ())
>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> -                           "reduction: uses not ssa_names: ");
>> +  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
>> +    {
>> +      if (dump_enabled_p ())
>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>> +                       "reduction: both uses not ssa_names: ");
>>
>> -          return NULL;
>> -        }
>> -   }
>> +      return NULL;
>> +    }
>>
>>    type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>>    if ((TREE_CODE (op1) == SSA_NAME
>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>>                            == vect_internal_def
>>                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
>>      {
>> -      if (check_reduction && orig_code != MINUS_EXPR)
>> +      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>>         {
>>           /* Check if we can swap operands (just for simplicity - so
>that
>>              the rest of the code can assume that the reduction
>variable
>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>>      }
>>
>>    /* Try to find SLP reduction chain.  */
>> -  if (check_reduction && code != COND_EXPR
>> +  if (! nested_in_vect_loop
>> +      && code != COND_EXPR
>>        && vect_is_slp_reduction (loop_info, phi, def_stmt))
>>      {
>>        if (dump_enabled_p ())
>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
>> ===================================================================
>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
>> @@ -0,0 +1,86 @@
>> +/* { dg-require-effective-target vect_float } */
>> +
>> +#include "tree-vect.h"
>> +
>> +extern void abort (void);
>> +
>> +#define OP *
>> +#define INIT 1.0
>> +
>> +float __attribute__((noinline,noclone))
>> +foo (float *__restrict__ i)
>> +{
>> +  float l = INIT;
>> +  int a;
>> +  int b;
>> +
>> +  for (a = 0; a < 4; a++)
>> +    for (b = 0; b < 4; b++)
>> +      l = l OP i[b];
>> +
>> +  return l;
>> +}
>> +
>> +float __attribute__((noinline,noclone))
>> +foo_ref (float *__restrict__ i)
>> +{
>> +  float l = INIT;
>> +
>> +  l = l OP i[0];
>> +  l = l OP i[1];
>> +  l = l OP i[2];
>> +  l = l OP i[3];
>> +
>> +  l = l OP i[0];
>> +  l = l OP i[1];
>> +  l = l OP i[2];
>> +  l = l OP i[3];
>> +
>> +  l = l OP i[0];
>> +  l = l OP i[1];
>> +  l = l OP i[2];
>> +  l = l OP i[3];
>> +
>> +  l = l OP i[0];
>> +  l = l OP i[1];
>> +  l = l OP i[2];
>> +  l = l OP i[3];
>> +
>> +  return l;
>> +}
>> +
>> +union u
>> +{
>> +  float f;
>> +  unsigned int u;
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> +  union u res, res2;
>> +  float a[4];
>> +
>> +  if (sizeof (float) != sizeof (unsigned int))
>> +    return 0;
>> +
>> +  check_vect ();
>> +
>> +  a[0] = 0.01;
>> +  a[1] = 0.01;
>> +  a[2] = 0.01;
>> +  a[3] = 1.0;
>> +
>> +  res.f = foo_ref (a);
>> +
>> +  res2.f = foo (a);
>> +
>> +  if (res.u != res2.u)
>> +    abort ();
>> +
>> +  return 0;
>> +}
>> +
>> +/* need -ffast-math to vectorize this loop.  */
>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
>fail.  */
>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
>xfail arm_neon_ok } } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix PR66623
  2017-06-09 15:48     ` Richard Biener
@ 2017-06-12  7:37       ` Christophe Lyon
  2017-06-12  7:45         ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Christophe Lyon @ 2017-06-12  7:37 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Richard Biener

On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote:
> On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
>>> On Thu, 8 Jun 2017, Richard Biener wrote:
>>>
>>>>
>>>> The following fixes unsafe vectorization of reductions in outer loop
>>>> vectorization.  It combines it with a bit of cleanup in the affected
>>>> function.
>>>>
>>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>>
>>> Re-testing the following variant -- multiple loop-closed PHI nodes
>>> for the same SSA name happen.
>>>
>>> Richard.
>>>
>>> 2017-06-08  Richard Biener  <rguenther@suse.de>
>>>
>>>         PR tree-optimization/66623
>>>         * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
>>>         refactor check_reduction into two parts, properly computing
>>>         whether we have to check reduction validity for outer loop
>>>         vectorization.
>>>
>>>         * gcc.dg/vect/pr66623.c: New testcase.
>>>
>>
>>Hi Richard,
>>
>>The new testcase fails on armeb* targets:
>>FAIL:    gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
>>FAIL:    gcc.dg/vect/pr66623.c execution test
>
> Ah, the xfail probably isn't enough.  Can you try a dg-skip or so?
>

I can try, but do we really want to ignore/hide the fact that the generated
code in non-functional on arm big-endian? (execution is OK on little-endian)


>>Christophe
>>
>>
>>> Index: gcc/tree-vect-loop.c
>>> ===================================================================
>>> --- gcc/tree-vect-loop.c        (revision 249007)
>>> +++ gcc/tree-vect-loop.c        (working copy)
>>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
>>>  {
>>>    struct loop *loop = (gimple_bb (phi))->loop_father;
>>>    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
>>> -  edge latch_e = loop_latch_edge (loop);
>>> -  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>>    gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
>>NULL;
>>>    enum tree_code orig_code, code;
>>>    tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
>>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
>>>    *double_reduc = false;
>>>    *v_reduc_type = TREE_CODE_REDUCTION;
>>>
>>> -  /* Check validity of the reduction only for the innermost loop.
>>*/
>>> -  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
>>> -  gcc_assert ((check_reduction && loop == vect_loop)
>>> -              || (!check_reduction && flow_loop_nested_p (vect_loop,
>>loop)));
>>> -
>>>    name = PHI_RESULT (phi);
>>>    /* ???  If there are no uses of the PHI result the inner loop
>>reduction
>>>       won't be detected as possibly double-reduction by
>>vectorizable_reduction
>>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
>>>          {
>>>            if (dump_enabled_p ())
>>>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> -                            "reduction used in loop.\n");
>>> +                            "reduction value used in loop.\n");
>>>            return NULL;
>>>          }
>>>
>>>        phi_use_stmt = use_stmt;
>>>      }
>>>
>>> +  edge latch_e = loop_latch_edge (loop);
>>> +  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
>>>    if (TREE_CODE (loop_arg) != SSA_NAME)
>>>      {
>>>        if (dump_enabled_p ())
>>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
>>>      }
>>>
>>>    def_stmt = SSA_NAME_DEF_STMT (loop_arg);
>>> -  if (!def_stmt)
>>> +  if (gimple_nop_p (def_stmt))
>>>      {
>>>        if (dump_enabled_p ())
>>>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> -                        "reduction: no def_stmt.\n");
>>> +                        "reduction: no def_stmt\n");
>>>        return NULL;
>>>      }
>>>
>>>    if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
>>GIMPLE_PHI)
>>>      {
>>>        if (dump_enabled_p ())
>>> -       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
>>> +       {
>>> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> +                          "reduction: unhandled reduction operation:
>>");
>>> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
>>def_stmt, 0);
>>> +       }
>>>        return NULL;
>>>      }
>>>
>>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
>>>      }
>>>
>>>    nloop_uses = 0;
>>> +  auto_vec<gphi *, 3> lcphis;
>>>    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
>>>      {
>>>        gimple *use_stmt = USE_STMT (use_p);
>>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
>>>         continue;
>>>        if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
>>>         nloop_uses++;
>>> +      else
>>> +       /* We can have more than one loop-closed PHI.  */
>>> +       lcphis.safe_push (as_a <gphi *> (use_stmt));
>>>        if (nloop_uses > 1)
>>>         {
>>>           if (dump_enabled_p ())
>>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
>>>        return NULL;
>>>      }
>>>
>>> +  /* If we are vectorizing an inner reduction we are executing that
>>> +     in the original order only in case we are not dealing with a
>>> +     double reduction.  */
>>> +  bool check_reduction = true;
>>> +  if (flow_loop_nested_p (vect_loop, loop))
>>> +    {
>>> +      gphi *lcphi;
>>> +      unsigned i;
>>> +      check_reduction = false;
>>> +      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
>>> +       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
>>(lcphi))
>>> +         {
>>> +           gimple *use_stmt = USE_STMT (use_p);
>>> +           if (is_gimple_debug (use_stmt))
>>> +             continue;
>>> +           if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
>>(use_stmt)))
>>> +             check_reduction = true;
>>> +         }
>>> +    }
>>> +
>>> +  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
>>>    code = orig_code = gimple_assign_rhs_code (def_stmt);
>>>
>>>    /* We can handle "res -= x[i]", which is non-associative by
>>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
>>>
>>>    if (code == COND_EXPR)
>>>      {
>>> -      if (check_reduction)
>>> +      if (! nested_in_vect_loop)
>>>         *v_reduc_type = COND_REDUCTION;
>>> -    }
>>> -  else if (!commutative_tree_code (code) || !associative_tree_code
>>(code))
>>> -    {
>>> -      if (dump_enabled_p ())
>>> -       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> -                       "reduction: not commutative/associative: ");
>>> -      return NULL;
>>> -    }
>>> -
>>> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
>>> -    {
>>> -      if (code != COND_EXPR)
>>> -        {
>>> -         if (dump_enabled_p ())
>>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> -                           "reduction: not binary operation: ");
>>> -
>>> -          return NULL;
>>> -        }
>>>
>>>        op3 = gimple_assign_rhs1 (def_stmt);
>>>        if (COMPARISON_CLASS_P (op3))
>>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
>>>
>>>        op1 = gimple_assign_rhs2 (def_stmt);
>>>        op2 = gimple_assign_rhs3 (def_stmt);
>>> -
>>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>>SSA_NAME)
>>> -        {
>>> -          if (dump_enabled_p ())
>>> -            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> -                           "reduction: uses not ssa_names: ");
>>> -
>>> -          return NULL;
>>> -        }
>>>      }
>>> -  else
>>> +  else if (!commutative_tree_code (code) || !associative_tree_code
>>(code))
>>> +    {
>>> +      if (dump_enabled_p ())
>>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> +                       "reduction: not commutative/associative: ");
>>> +      return NULL;
>>> +    }
>>> +  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
>>>      {
>>>        op1 = gimple_assign_rhs1 (def_stmt);
>>>        op2 = gimple_assign_rhs2 (def_stmt);
>>> +    }
>>> +  else
>>> +    {
>>> +      if (dump_enabled_p ())
>>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> +                       "reduction: not handled operation: ");
>>> +      return NULL;
>>> +    }
>>>
>>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
>>SSA_NAME)
>>> -        {
>>> -          if (dump_enabled_p ())
>>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> -                           "reduction: uses not ssa_names: ");
>>> +  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
>>> +    {
>>> +      if (dump_enabled_p ())
>>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
>>> +                       "reduction: both uses not ssa_names: ");
>>>
>>> -          return NULL;
>>> -        }
>>> -   }
>>> +      return NULL;
>>> +    }
>>>
>>>    type = TREE_TYPE (gimple_assign_lhs (def_stmt));
>>>    if ((TREE_CODE (op1) == SSA_NAME
>>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
>>>                            == vect_internal_def
>>>                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
>>>      {
>>> -      if (check_reduction && orig_code != MINUS_EXPR)
>>> +      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
>>>         {
>>>           /* Check if we can swap operands (just for simplicity - so
>>that
>>>              the rest of the code can assume that the reduction
>>variable
>>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
>>>      }
>>>
>>>    /* Try to find SLP reduction chain.  */
>>> -  if (check_reduction && code != COND_EXPR
>>> +  if (! nested_in_vect_loop
>>> +      && code != COND_EXPR
>>>        && vect_is_slp_reduction (loop_info, phi, def_stmt))
>>>      {
>>>        if (dump_enabled_p ())
>>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
>>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
>>> @@ -0,0 +1,86 @@
>>> +/* { dg-require-effective-target vect_float } */
>>> +
>>> +#include "tree-vect.h"
>>> +
>>> +extern void abort (void);
>>> +
>>> +#define OP *
>>> +#define INIT 1.0
>>> +
>>> +float __attribute__((noinline,noclone))
>>> +foo (float *__restrict__ i)
>>> +{
>>> +  float l = INIT;
>>> +  int a;
>>> +  int b;
>>> +
>>> +  for (a = 0; a < 4; a++)
>>> +    for (b = 0; b < 4; b++)
>>> +      l = l OP i[b];
>>> +
>>> +  return l;
>>> +}
>>> +
>>> +float __attribute__((noinline,noclone))
>>> +foo_ref (float *__restrict__ i)
>>> +{
>>> +  float l = INIT;
>>> +
>>> +  l = l OP i[0];
>>> +  l = l OP i[1];
>>> +  l = l OP i[2];
>>> +  l = l OP i[3];
>>> +
>>> +  l = l OP i[0];
>>> +  l = l OP i[1];
>>> +  l = l OP i[2];
>>> +  l = l OP i[3];
>>> +
>>> +  l = l OP i[0];
>>> +  l = l OP i[1];
>>> +  l = l OP i[2];
>>> +  l = l OP i[3];
>>> +
>>> +  l = l OP i[0];
>>> +  l = l OP i[1];
>>> +  l = l OP i[2];
>>> +  l = l OP i[3];
>>> +
>>> +  return l;
>>> +}
>>> +
>>> +union u
>>> +{
>>> +  float f;
>>> +  unsigned int u;
>>> +};
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> +  union u res, res2;
>>> +  float a[4];
>>> +
>>> +  if (sizeof (float) != sizeof (unsigned int))
>>> +    return 0;
>>> +
>>> +  check_vect ();
>>> +
>>> +  a[0] = 0.01;
>>> +  a[1] = 0.01;
>>> +  a[2] = 0.01;
>>> +  a[3] = 1.0;
>>> +
>>> +  res.f = foo_ref (a);
>>> +
>>> +  res2.f = foo (a);
>>> +
>>> +  if (res.u != res2.u)
>>> +    abort ();
>>> +
>>> +  return 0;
>>> +}
>>> +
>>> +/* need -ffast-math to vectorize this loop.  */
>>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
>>fail.  */
>>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
>>xfail arm_neon_ok } } } */
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix PR66623
  2017-06-12  7:37       ` Christophe Lyon
@ 2017-06-12  7:45         ` Richard Biener
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Biener @ 2017-06-12  7:45 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Richard Biener, gcc-patches

On Mon, 12 Jun 2017, Christophe Lyon wrote:

> On 9 June 2017 at 17:48, Richard Biener <richard.guenther@gmail.com> wrote:
> > On June 9, 2017 5:32:10 PM GMT+02:00, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> >>On 8 June 2017 at 15:49, Richard Biener <rguenther@suse.de> wrote:
> >>> On Thu, 8 Jun 2017, Richard Biener wrote:
> >>>
> >>>>
> >>>> The following fixes unsafe vectorization of reductions in outer loop
> >>>> vectorization.  It combines it with a bit of cleanup in the affected
> >>>> function.
> >>>>
> >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >>>
> >>> Re-testing the following variant -- multiple loop-closed PHI nodes
> >>> for the same SSA name happen.
> >>>
> >>> Richard.
> >>>
> >>> 2017-06-08  Richard Biener  <rguenther@suse.de>
> >>>
> >>>         PR tree-optimization/66623
> >>>         * tree-vect-loop.c (vect_is_simple_reduction): Cleanup,
> >>>         refactor check_reduction into two parts, properly computing
> >>>         whether we have to check reduction validity for outer loop
> >>>         vectorization.
> >>>
> >>>         * gcc.dg/vect/pr66623.c: New testcase.
> >>>
> >>
> >>Hi Richard,
> >>
> >>The new testcase fails on armeb* targets:
> >>FAIL:    gcc.dg/vect/pr66623.c -flto -ffat-lto-objects execution test
> >>FAIL:    gcc.dg/vect/pr66623.c execution test
> >
> > Ah, the xfail probably isn't enough.  Can you try a dg-skip or so?
> >
> 
> I can try, but do we really want to ignore/hide the fact that the generated
> code in non-functional on arm big-endian? (execution is OK on little-endian)

The code is expected to "miscompile" with -ffast-math and from other
testcases I see that (at least in the vectorizer testsuite) NEON
always enables that.  So - can you check if adding

{ dg-additional-options "-fno-fast-math" }

makes the testcase pass?  Or investigate somewhat what the actual
issue is?  That is, remove the XFAIL (because with dg-do run we
really need to not vectorize this)?  I also recall that vectorization
on big-endian on arm is "broken" but that might have changed.

Richard.

> 
> >>Christophe
> >>
> >>
> >>> Index: gcc/tree-vect-loop.c
> >>> ===================================================================
> >>> --- gcc/tree-vect-loop.c        (revision 249007)
> >>> +++ gcc/tree-vect-loop.c        (working copy)
> >>> @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info
> >>>  {
> >>>    struct loop *loop = (gimple_bb (phi))->loop_father;
> >>>    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
> >>> -  edge latch_e = loop_latch_edge (loop);
> >>> -  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> >>>    gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt =
> >>NULL;
> >>>    enum tree_code orig_code, code;
> >>>    tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
> >>> @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info
> >>>    *double_reduc = false;
> >>>    *v_reduc_type = TREE_CODE_REDUCTION;
> >>>
> >>> -  /* Check validity of the reduction only for the innermost loop.
> >>*/
> >>> -  bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
> >>> -  gcc_assert ((check_reduction && loop == vect_loop)
> >>> -              || (!check_reduction && flow_loop_nested_p (vect_loop,
> >>loop)));
> >>> -
> >>>    name = PHI_RESULT (phi);
> >>>    /* ???  If there are no uses of the PHI result the inner loop
> >>reduction
> >>>       won't be detected as possibly double-reduction by
> >>vectorizable_reduction
> >>> @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info
> >>>          {
> >>>            if (dump_enabled_p ())
> >>>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> -                            "reduction used in loop.\n");
> >>> +                            "reduction value used in loop.\n");
> >>>            return NULL;
> >>>          }
> >>>
> >>>        phi_use_stmt = use_stmt;
> >>>      }
> >>>
> >>> +  edge latch_e = loop_latch_edge (loop);
> >>> +  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
> >>>    if (TREE_CODE (loop_arg) != SSA_NAME)
> >>>      {
> >>>        if (dump_enabled_p ())
> >>> @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info
> >>>      }
> >>>
> >>>    def_stmt = SSA_NAME_DEF_STMT (loop_arg);
> >>> -  if (!def_stmt)
> >>> +  if (gimple_nop_p (def_stmt))
> >>>      {
> >>>        if (dump_enabled_p ())
> >>>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> -                        "reduction: no def_stmt.\n");
> >>> +                        "reduction: no def_stmt\n");
> >>>        return NULL;
> >>>      }
> >>>
> >>>    if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) !=
> >>GIMPLE_PHI)
> >>>      {
> >>>        if (dump_enabled_p ())
> >>> -       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
> >>> +       {
> >>> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >>> +                          "reduction: unhandled reduction operation:
> >>");
> >>> +         dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
> >>def_stmt, 0);
> >>> +       }
> >>>        return NULL;
> >>>      }
> >>>
> >>> @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info
> >>>      }
> >>>
> >>>    nloop_uses = 0;
> >>> +  auto_vec<gphi *, 3> lcphis;
> >>>    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> >>>      {
> >>>        gimple *use_stmt = USE_STMT (use_p);
> >>> @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info
> >>>         continue;
> >>>        if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
> >>>         nloop_uses++;
> >>> +      else
> >>> +       /* We can have more than one loop-closed PHI.  */
> >>> +       lcphis.safe_push (as_a <gphi *> (use_stmt));
> >>>        if (nloop_uses > 1)
> >>>         {
> >>>           if (dump_enabled_p ())
> >>> @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info
> >>>        return NULL;
> >>>      }
> >>>
> >>> +  /* If we are vectorizing an inner reduction we are executing that
> >>> +     in the original order only in case we are not dealing with a
> >>> +     double reduction.  */
> >>> +  bool check_reduction = true;
> >>> +  if (flow_loop_nested_p (vect_loop, loop))
> >>> +    {
> >>> +      gphi *lcphi;
> >>> +      unsigned i;
> >>> +      check_reduction = false;
> >>> +      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
> >>> +       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result
> >>(lcphi))
> >>> +         {
> >>> +           gimple *use_stmt = USE_STMT (use_p);
> >>> +           if (is_gimple_debug (use_stmt))
> >>> +             continue;
> >>> +           if (! flow_bb_inside_loop_p (vect_loop, gimple_bb
> >>(use_stmt)))
> >>> +             check_reduction = true;
> >>> +         }
> >>> +    }
> >>> +
> >>> +  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
> >>>    code = orig_code = gimple_assign_rhs_code (def_stmt);
> >>>
> >>>    /* We can handle "res -= x[i]", which is non-associative by
> >>> @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info
> >>>
> >>>    if (code == COND_EXPR)
> >>>      {
> >>> -      if (check_reduction)
> >>> +      if (! nested_in_vect_loop)
> >>>         *v_reduc_type = COND_REDUCTION;
> >>> -    }
> >>> -  else if (!commutative_tree_code (code) || !associative_tree_code
> >>(code))
> >>> -    {
> >>> -      if (dump_enabled_p ())
> >>> -       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> -                       "reduction: not commutative/associative: ");
> >>> -      return NULL;
> >>> -    }
> >>> -
> >>> -  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
> >>> -    {
> >>> -      if (code != COND_EXPR)
> >>> -        {
> >>> -         if (dump_enabled_p ())
> >>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> -                           "reduction: not binary operation: ");
> >>> -
> >>> -          return NULL;
> >>> -        }
> >>>
> >>>        op3 = gimple_assign_rhs1 (def_stmt);
> >>>        if (COMPARISON_CLASS_P (op3))
> >>> @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info
> >>>
> >>>        op1 = gimple_assign_rhs2 (def_stmt);
> >>>        op2 = gimple_assign_rhs3 (def_stmt);
> >>> -
> >>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
> >>SSA_NAME)
> >>> -        {
> >>> -          if (dump_enabled_p ())
> >>> -            report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> -                           "reduction: uses not ssa_names: ");
> >>> -
> >>> -          return NULL;
> >>> -        }
> >>>      }
> >>> -  else
> >>> +  else if (!commutative_tree_code (code) || !associative_tree_code
> >>(code))
> >>> +    {
> >>> +      if (dump_enabled_p ())
> >>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> +                       "reduction: not commutative/associative: ");
> >>> +      return NULL;
> >>> +    }
> >>> +  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
> >>>      {
> >>>        op1 = gimple_assign_rhs1 (def_stmt);
> >>>        op2 = gimple_assign_rhs2 (def_stmt);
> >>> +    }
> >>> +  else
> >>> +    {
> >>> +      if (dump_enabled_p ())
> >>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> +                       "reduction: not handled operation: ");
> >>> +      return NULL;
> >>> +    }
> >>>
> >>> -      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) !=
> >>SSA_NAME)
> >>> -        {
> >>> -          if (dump_enabled_p ())
> >>> -           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> -                           "reduction: uses not ssa_names: ");
> >>> +  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
> >>> +    {
> >>> +      if (dump_enabled_p ())
> >>> +       report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
> >>> +                       "reduction: both uses not ssa_names: ");
> >>>
> >>> -          return NULL;
> >>> -        }
> >>> -   }
> >>> +      return NULL;
> >>> +    }
> >>>
> >>>    type = TREE_TYPE (gimple_assign_lhs (def_stmt));
> >>>    if ((TREE_CODE (op1) == SSA_NAME
> >>> @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info
> >>>                            == vect_internal_def
> >>>                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
> >>>      {
> >>> -      if (check_reduction && orig_code != MINUS_EXPR)
> >>> +      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> >>>         {
> >>>           /* Check if we can swap operands (just for simplicity - so
> >>that
> >>>              the rest of the code can assume that the reduction
> >>variable
> >>> @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info
> >>>      }
> >>>
> >>>    /* Try to find SLP reduction chain.  */
> >>> -  if (check_reduction && code != COND_EXPR
> >>> +  if (! nested_in_vect_loop
> >>> +      && code != COND_EXPR
> >>>        && vect_is_slp_reduction (loop_info, phi, def_stmt))
> >>>      {
> >>>        if (dump_enabled_p ())
> >>> Index: gcc/testsuite/gcc.dg/vect/pr66623.c
> >>> ===================================================================
> >>> --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0)
> >>> +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy)
> >>> @@ -0,0 +1,86 @@
> >>> +/* { dg-require-effective-target vect_float } */
> >>> +
> >>> +#include "tree-vect.h"
> >>> +
> >>> +extern void abort (void);
> >>> +
> >>> +#define OP *
> >>> +#define INIT 1.0
> >>> +
> >>> +float __attribute__((noinline,noclone))
> >>> +foo (float *__restrict__ i)
> >>> +{
> >>> +  float l = INIT;
> >>> +  int a;
> >>> +  int b;
> >>> +
> >>> +  for (a = 0; a < 4; a++)
> >>> +    for (b = 0; b < 4; b++)
> >>> +      l = l OP i[b];
> >>> +
> >>> +  return l;
> >>> +}
> >>> +
> >>> +float __attribute__((noinline,noclone))
> >>> +foo_ref (float *__restrict__ i)
> >>> +{
> >>> +  float l = INIT;
> >>> +
> >>> +  l = l OP i[0];
> >>> +  l = l OP i[1];
> >>> +  l = l OP i[2];
> >>> +  l = l OP i[3];
> >>> +
> >>> +  l = l OP i[0];
> >>> +  l = l OP i[1];
> >>> +  l = l OP i[2];
> >>> +  l = l OP i[3];
> >>> +
> >>> +  l = l OP i[0];
> >>> +  l = l OP i[1];
> >>> +  l = l OP i[2];
> >>> +  l = l OP i[3];
> >>> +
> >>> +  l = l OP i[0];
> >>> +  l = l OP i[1];
> >>> +  l = l OP i[2];
> >>> +  l = l OP i[3];
> >>> +
> >>> +  return l;
> >>> +}
> >>> +
> >>> +union u
> >>> +{
> >>> +  float f;
> >>> +  unsigned int u;
> >>> +};
> >>> +
> >>> +int
> >>> +main (void)
> >>> +{
> >>> +  union u res, res2;
> >>> +  float a[4];
> >>> +
> >>> +  if (sizeof (float) != sizeof (unsigned int))
> >>> +    return 0;
> >>> +
> >>> +  check_vect ();
> >>> +
> >>> +  a[0] = 0.01;
> >>> +  a[1] = 0.01;
> >>> +  a[2] = 0.01;
> >>> +  a[3] = 1.0;
> >>> +
> >>> +  res.f = foo_ref (a);
> >>> +
> >>> +  res2.f = foo (a);
> >>> +
> >>> +  if (res.u != res2.u)
> >>> +    abort ();
> >>> +
> >>> +  return 0;
> >>> +}
> >>> +
> >>> +/* need -ffast-math to vectorize this loop.  */
> >>> +/* ARM NEON passes -ffast-math to these tests, so expect this to
> >>fail.  */
> >>> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {
> >>xfail arm_neon_ok } } } */
> >
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-06-12  7:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-08  9:18 [PATCH] Fix PR66623 Richard Biener
2017-06-08 13:49 ` Richard Biener
2017-06-09 15:32   ` Christophe Lyon
2017-06-09 15:48     ` Richard Biener
2017-06-12  7:37       ` Christophe Lyon
2017-06-12  7:45         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).