public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch] Optimize condition reductions where the result is an integer induction variable
@ 2015-11-11 12:22 Alan Hayward
  2015-11-11 13:25 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Alan Hayward @ 2015-11-11 12:22 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

Hi,
I hoped to post this in time for Monday’s cut off date, but circumstances
delayed me until today. Hoping if possible this patch will still be able
to go in.


This patch builds upon the change for PR65947, and reduces the amount of
code produced in a vectorized condition reduction where operand 2 of the
COND_EXPR is an assignment of a increasing integer induction variable that
won't wrap.
 

For example (assuming all types are ints), this is a match:

last = 5;
for (i = 0; i < N; i++)
  if (a[i] < min_v)
    last = i;

Whereas, this is not because the result is based off a memory access:
last = 5;
for (i = 0; i < N; i++)
  if (a[i] < min_v)
    last = a[i];

In the integer induction variable case we can just use a MAX reduction and
skip all the code I added in my vectorized condition reduction patch - the
additional induction variables in vectorizable_reduction () and the
additional checks in vect_create_epilog_for_reduction (). From the patch
diff only, it's not immediately obvious that those parts will be skipped
as there is no code changes in those areas.

The initial value of the induction variable is force set to zero, as any
other value could effect the result of the induction. At the end of the
loop, if the result is zero, then we restore the original initial value.




Cheers,
Alan.


[-- Attachment #2: optimizeConditionReductions.patch --]
[-- Type: application/octet-stream, Size: 16071 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index 7933f5c861201a7720ca77d84671898586516914..1e7a05dc8cc6f00fee265b9b8ad65beaa520f1e8 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -9,7 +9,7 @@ extern void abort (void) __attribute__ ((noreturn));
 int
 condition_reduction (int *a, int min_v)
 {
-  int last = -1;
+  int last = 66; /* High start value.  */
 
   for (int i = 0; i < N; i++)
     if (a[i] < min_v)
@@ -28,12 +28,13 @@ main (void)
   31, 32
   };
 
-  int ret = condition_reduction (a, 16);
+  int ret = condition_reduction (a, 1);
 
-  if (ret != 19)
+  if (ret != 17)
     abort ();
 
   return 0;
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index 9a43a6059fabc1ca527dd6305bf6482f2103fab4..b4c6659b77ccbbafd77c62976e408dda37a2f4c4 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..fb5ffd48c7bb87f6e2f9e055e90e4f121a202d5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction where the result is a negative of the induction.
+   Will fail to vectorize to a simple case.  */
+
+signed int
+condition_reduction (signed int *a, signed int min_v)
+{
+  signed int last = -1;
+
+  for (signed int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = -i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  signed int a[N] = {
+  11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  signed int ret = condition_reduction (a, 16);
+
+  if (ret != -19)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c6faddd189dabf9b96b4e665c59cf268cf9a3aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction with a reversed loop.
+   Will fail to vectorize to a simple case.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = -1;
+
+  for (int i = N-1; i >=0; i--)
+    if (a[i] < min_v)
+      last = i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  int a[N] = {
+  17, 28, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != 2)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index 9c627d9d7d5c1c42f285a29a1a601b3bc60b8a5a..9e9ff53828609b19e6fe97ae2d640294cb980c14 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -38,3 +38,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index e115de2a28259307e4eaa9cbbe2fa93663c22873..4b6aa9216b0c200c43a227bf8b4954ecbe4f0090 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -48,3 +48,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 76a0567aa5450e00de2d700123ffc930b4028486..f4e7fdc97c89866b19838593cb84fc1754f87494 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index 360e3b51ee15ecea3b5d590c93d524e143f0ffca..21be8d0b749a4c1f2f29b6aed17533ec26b7aede 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -39,3 +39,4 @@ main (void)
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
 /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index 4997ef79cae83585758436874ac109ca368a1fc1..e1432403b2dd227a7e13db6e228d819780b89ad6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -37,3 +37,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 55e53093caaf663d592dccbf2c913f334f79b3d9..1e251c0cef3f31fd9494edbc0ac23c32c595a3c0 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4184,7 +4184,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
   tree bitsize;
   tree adjustment_def = NULL;
   tree vec_initial_def = NULL;
-  tree reduction_op, expr, def;
+  tree reduction_op, expr, def, initial_def = NULL;
   tree orig_name, scalar_result;
   imm_use_iterator imm_iter, phi_imm_iter;
   use_operand_p use_p, phi_use_p;
@@ -4245,9 +4245,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       /* Get at the scalar def before the loop, that defines the initial value
 	 of the reduction variable.  */
       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
-      tree op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+      initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+					   loop_preheader_edge (loop));
       vec_initial_defs.create (1);
-      vec_initial_def = get_initial_def_for_reduction (stmt, op,
+      vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
 						       &adjustment_def);
       vec_initial_defs.quick_push (vec_initial_def);
     }
@@ -4263,9 +4264,28 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       def = vect_defs[i];
       for (j = 0; j < ncopies; j++)
         {
-          /* Set the loop-entry arg of the reduction-phi.  */
-          add_phi_arg (as_a <gphi *> (phi), vec_init_def,
+	  /* Set the loop-entry arg of the reduction-phi.  */
+
+	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      /* Initialise the reduction phi to zero.  This prevents initial
+		 values of non-zero interferring with the reduction op.  */
+	      gcc_assert (ncopies == 1);
+	      gcc_assert (i == 0);
+
+	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
+	      tree zero = build_int_cst ( TREE_TYPE (vec_init_def_type), 0);
+	      tree zero_vec = build_vector_from_val (vec_init_def_type, zero);
+
+	      add_phi_arg (as_a <gphi *> (phi), zero_vec,
+			   loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	    }
+	  else
+	    {
+	      add_phi_arg (as_a <gphi *> (phi), vec_init_def,
 		       loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	    }
 
           /* Set the loop-latch arg for the reduction-phi.  */
           if (j > 0)
@@ -4607,10 +4627,29 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
 	}
       else
 	tmp = build1 (reduc_code, scalar_type, new_phi_result);
+
       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	{
+	  /* Earlier we set the initial value to be zero.  Check the result
+	     and if it is zero then replace with the original initial
+	     value.  */
+	  tree zero = build_zero_cst (scalar_type);
+	  tree comparez = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+	  tmp = build3 (COND_EXPR, scalar_type, comparez, initial_def,
+			new_temp);
+
+	  epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
+	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+	  gimple_assign_set_lhs (epilog_stmt, new_temp);
+	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+	}
+
       scalar_results.safe_push (new_temp);
     }
   else
@@ -5087,6 +5126,45 @@ vect_finalize_reduction:
 }
 
 
+/* Function is_integer_induction.
+
+   Check if STMT (which is part of loop LOOP) both increments and
+   does not cause overflow.  */
+
+static bool
+is_integer_induction (gimple *stmt, struct loop *loop)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+  tree base = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
+  tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
+  tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));
+  widest_int ni, max_loop_value;
+
+  /* Make sure the loop is integer based.  */
+  if (TREE_CODE (base) != INTEGER_CST
+      || TREE_CODE (step) != INTEGER_CST)
+    return false;
+
+  /* Check that the induction increments.  */
+  if (tree_int_cst_compare (step, size_zero_node) <= 0)
+    return false;
+
+  /* Check that the max size of the loop will not wrap.  */
+
+  if (! max_loop_iterations (loop, &ni))
+    return false;
+  /* Convert backedges to iterations.  */
+  ni += 1;
+
+  max_loop_value = wi::add (wi::to_widest (base),
+			    wi::mul (wi::to_widest (step), ni));
+
+  if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
+    return false;
+
+  return true;
+}
+
 /* Function vectorizable_reduction.
 
    Check if STMT performs a reduction operation that can be vectorized.
@@ -5185,6 +5263,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
+  bool cond_expr_is_integer_induction = false;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5328,6 +5407,16 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
           reduc_def_stmt = def_stmt;
           reduc_index = i;
         }
+
+      if (i == 1 && code == COND_EXPR && dt == vect_induction_def
+	  && is_integer_induction (def_stmt, loop))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "condition expression based on integer "
+			     "induction.\n");
+	  cond_expr_is_integer_induction = true;
+	}
     }
 
   is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
@@ -5357,6 +5446,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  (loop_vinfo, reduc_def_stmt,
 		  !nested_cycle, &dummy, false,
 		  &STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
+
+  if (cond_expr_is_integer_induction
+      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+    STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
@@ -5484,6 +5578,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     {
       /* This is a reduction pattern: get the vectype from the type of the
          reduction variable, and get the tree-code from orig_stmt.  */
+      gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		  == TREE_CODE_REDUCTION);
       orig_code = gimple_assign_rhs_code (orig_stmt);
       gcc_assert (vectype_out);
       vec_mode = TYPE_MODE (vectype_out);
@@ -5493,6 +5589,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       /* Regular reduction: use the same vectype and tree-code as used for
          the vector code inside the loop can be used for the epilog code. */
       orig_code = code;
+
+      /* For simple condition reductions, replace with the actual expression
+	 we want to base our reduction around.  */
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	orig_code = MAX_EXPR;
     }
 
   if (nested_cycle)
@@ -5513,7 +5615,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
   epilog_reduc_code = ERROR_MARK;
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
+      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
     {
       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
 	{
@@ -5539,6 +5643,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  epilog_reduc_code = ERROR_MARK;
 		}
 	    }
+
+	  /* When epilog_reduc_code is ERROR_MARK then a reduction will be
+	     generated in the epilog using multiple expressions.  This does not
+	     work for condition reductions.  */
+	  if (epilog_reduc_code == ERROR_MARK
+	      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+			== INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "no reduc code for scalar code.\n");
+	      return false;
+	    }
 	}
       else
 	{
@@ -5573,7 +5690,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     }
 
   if ((double_reduc
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
       && ncopies > 1)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6ad0cc452f1168ce6ee48cc03ae140fc947f2a8d..b03eb964da1b6ed6021734b2b7ecc01beb75a525 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -64,7 +64,8 @@ enum vect_def_type {
 /* Define type of reduction.  */
 enum vect_reduction_type {
   TREE_CODE_REDUCTION,
-  COND_REDUCTION
+  COND_REDUCTION,
+  INTEGER_INDUC_COND_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch] Optimize condition reductions where the result is an integer induction variable
  2015-11-11 12:22 [Patch] Optimize condition reductions where the result is an integer induction variable Alan Hayward
@ 2015-11-11 13:25 ` Richard Biener
  2015-11-11 18:54   ` Alan Hayward
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2015-11-11 13:25 UTC (permalink / raw)
  To: Alan Hayward; +Cc: gcc-patches

On Wed, Nov 11, 2015 at 1:22 PM, Alan Hayward <alan.hayward@arm.com> wrote:
> Hi,
> I hoped to post this in time for Monday’s cut off date, but circumstances
> delayed me until today. Hoping if possible this patch will still be able
> to go in.
>
>
> This patch builds upon the change for PR65947, and reduces the amount of
> code produced in a vectorized condition reduction where operand 2 of the
> COND_EXPR is an assignment of a increasing integer induction variable that
> won't wrap.
>
>
> For example (assuming all types are ints), this is a match:
>
> last = 5;
> for (i = 0; i < N; i++)
>   if (a[i] < min_v)
>     last = i;
>
> Whereas, this is not because the result is based off a memory access:
> last = 5;
> for (i = 0; i < N; i++)
>   if (a[i] < min_v)
>     last = a[i];
>
> In the integer induction variable case we can just use a MAX reduction and
> skip all the code I added in my vectorized condition reduction patch - the
> additional induction variables in vectorizable_reduction () and the
> additional checks in vect_create_epilog_for_reduction (). From the patch
> diff only, it's not immediately obvious that those parts will be skipped
> as there is no code changes in those areas.
>
> The initial value of the induction variable is force set to zero, as any
> other value could effect the result of the induction. At the end of the
> loop, if the result is zero, then we restore the original initial value.

+static bool
+is_integer_induction (gimple *stmt, struct loop *loop)

is_nonwrapping_integer_induction?

+  tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));

don't use TYPE_MAX_VALUE.

+  /* Check that the induction increments.  */
+  if (tree_int_cst_compare (step, size_zero_node) <= 0)
+    return false;

tree_int_cst_sgn (step) == -1

+  /* Check that the max size of the loop will not wrap.  */
+
+  if (! max_loop_iterations (loop, &ni))
+    return false;
+  /* Convert backedges to iterations.  */
+  ni += 1;

just use max_stmt_executions (loop, &ni) which properly checks for overflow
of the +1.

+  max_loop_value = wi::add (wi::to_widest (base),
+                           wi::mul (wi::to_widest (step), ni));
+
+  if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
+    return false;

you miss a check for the wi::add / wi::mul to overflow.  You can use
extra args to determine this.

Instead of TYPE_MAX_VALUE use wi::max_value (precision, sign).

I wonder if you want to skip all the overflow checks for TYPE_OVERFLOW_UNDEFINED
IV types?

Thanks,
Richard.

>
>
>
> Cheers,
> Alan.
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch] Optimize condition reductions where the result is an integer induction variable
  2015-11-11 13:25 ` Richard Biener
@ 2015-11-11 18:54   ` Alan Hayward
  2015-11-12 10:54     ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Alan Hayward @ 2015-11-11 18:54 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3029 bytes --]



On 11/11/2015 13:25, "Richard Biener" <richard.guenther@gmail.com> wrote:

>On Wed, Nov 11, 2015 at 1:22 PM, Alan Hayward <alan.hayward@arm.com>
>wrote:
>> Hi,
>> I hoped to post this in time for Monday’s cut off date, but
>>circumstances
>> delayed me until today. Hoping if possible this patch will still be able
>> to go in.
>>
>>
>> This patch builds upon the change for PR65947, and reduces the amount of
>> code produced in a vectorized condition reduction where operand 2 of the
>> COND_EXPR is an assignment of a increasing integer induction variable
>>that
>> won't wrap.
>>
>>
>> For example (assuming all types are ints), this is a match:
>>
>> last = 5;
>> for (i = 0; i < N; i++)
>>   if (a[i] < min_v)
>>     last = i;
>>
>> Whereas, this is not because the result is based off a memory access:
>> last = 5;
>> for (i = 0; i < N; i++)
>>   if (a[i] < min_v)
>>     last = a[i];
>>
>> In the integer induction variable case we can just use a MAX reduction
>>and
>> skip all the code I added in my vectorized condition reduction patch -
>>the
>> additional induction variables in vectorizable_reduction () and the
>> additional checks in vect_create_epilog_for_reduction (). From the patch
>> diff only, it's not immediately obvious that those parts will be skipped
>> as there is no code changes in those areas.
>>
>> The initial value of the induction variable is force set to zero, as any
>> other value could effect the result of the induction. At the end of the
>> loop, if the result is zero, then we restore the original initial value.
>
>+static bool
>+is_integer_induction (gimple *stmt, struct loop *loop)
>
>is_nonwrapping_integer_induction?
>
>+  tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));
>
>don't use TYPE_MAX_VALUE.
>
>+  /* Check that the induction increments.  */
>+  if (tree_int_cst_compare (step, size_zero_node) <= 0)
>+    return false;
>
>tree_int_cst_sgn (step) == -1
>
>+  /* Check that the max size of the loop will not wrap.  */
>+
>+  if (! max_loop_iterations (loop, &ni))
>+    return false;
>+  /* Convert backedges to iterations.  */
>+  ni += 1;
>
>just use max_stmt_executions (loop, &ni) which properly checks for
>overflow
>of the +1.
>
>+  max_loop_value = wi::add (wi::to_widest (base),
>+                           wi::mul (wi::to_widest (step), ni));
>+
>+  if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
>+    return false;
>
>you miss a check for the wi::add / wi::mul to overflow.  You can use
>extra args to determine this.
>
>Instead of TYPE_MAX_VALUE use wi::max_value (precision, sign).
>
>I wonder if you want to skip all the overflow checks for
>TYPE_OVERFLOW_UNDEFINED
>IV types?
>

Ok with all the above.

Tried using max_value () but this gave me a wide_int instead of a
widest_int.
Instead I've replaced with min_precision and GET_MODE_BITSIZE.

Added an extra check for when the type is TYPE_OVERFLOW_UNDEFINED.



Alan.


[-- Attachment #2: optimizeConditionReductions2.patch --]
[-- Type: application/octet-stream, Size: 16467 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index 7933f5c861201a7720ca77d84671898586516914..1e7a05dc8cc6f00fee265b9b8ad65beaa520f1e8 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -9,7 +9,7 @@ extern void abort (void) __attribute__ ((noreturn));
 int
 condition_reduction (int *a, int min_v)
 {
-  int last = -1;
+  int last = 66; /* High start value.  */
 
   for (int i = 0; i < N; i++)
     if (a[i] < min_v)
@@ -28,12 +28,13 @@ main (void)
   31, 32
   };
 
-  int ret = condition_reduction (a, 16);
+  int ret = condition_reduction (a, 1);
 
-  if (ret != 19)
+  if (ret != 17)
     abort ();
 
   return 0;
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index 9a43a6059fabc1ca527dd6305bf6482f2103fab4..b4c6659b77ccbbafd77c62976e408dda37a2f4c4 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..fb5ffd48c7bb87f6e2f9e055e90e4f121a202d5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction where the result is a negative of the induction.
+   Will fail to vectorize to a simple case.  */
+
+signed int
+condition_reduction (signed int *a, signed int min_v)
+{
+  signed int last = -1;
+
+  for (signed int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = -i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  signed int a[N] = {
+  11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  signed int ret = condition_reduction (a, 16);
+
+  if (ret != -19)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c6faddd189dabf9b96b4e665c59cf268cf9a3aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction with a reversed loop.
+   Will fail to vectorize to a simple case.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = -1;
+
+  for (int i = N-1; i >=0; i--)
+    if (a[i] < min_v)
+      last = i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  int a[N] = {
+  17, 28, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != 2)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index 9c627d9d7d5c1c42f285a29a1a601b3bc60b8a5a..9e9ff53828609b19e6fe97ae2d640294cb980c14 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -38,3 +38,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index e115de2a28259307e4eaa9cbbe2fa93663c22873..4b6aa9216b0c200c43a227bf8b4954ecbe4f0090 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -48,3 +48,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 76a0567aa5450e00de2d700123ffc930b4028486..f4e7fdc97c89866b19838593cb84fc1754f87494 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index 360e3b51ee15ecea3b5d590c93d524e143f0ffca..21be8d0b749a4c1f2f29b6aed17533ec26b7aede 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -39,3 +39,4 @@ main (void)
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
 /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index 4997ef79cae83585758436874ac109ca368a1fc1..e1432403b2dd227a7e13db6e228d819780b89ad6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -37,3 +37,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 43ada18ac7cfebbff06f43b68639a69873ea5c65..76c8397513aa59a4c9dc26caceaa8740bc54b721 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4052,7 +4052,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
   tree bitsize;
   tree adjustment_def = NULL;
   tree vec_initial_def = NULL;
-  tree reduction_op, expr, def;
+  tree reduction_op, expr, def, initial_def = NULL;
   tree orig_name, scalar_result;
   imm_use_iterator imm_iter, phi_imm_iter;
   use_operand_p use_p, phi_use_p;
@@ -4113,9 +4113,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       /* Get at the scalar def before the loop, that defines the initial value
 	 of the reduction variable.  */
       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
-      tree op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+      initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+					   loop_preheader_edge (loop));
       vec_initial_defs.create (1);
-      vec_initial_def = get_initial_def_for_reduction (stmt, op,
+      vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
 						       &adjustment_def);
       vec_initial_defs.quick_push (vec_initial_def);
     }
@@ -4131,9 +4132,28 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       def = vect_defs[i];
       for (j = 0; j < ncopies; j++)
         {
-          /* Set the loop-entry arg of the reduction-phi.  */
-          add_phi_arg (as_a <gphi *> (phi), vec_init_def,
+	  /* Set the loop-entry arg of the reduction-phi.  */
+
+	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      /* Initialise the reduction phi to zero.  This prevents initial
+		 values of non-zero interferring with the reduction op.  */
+	      gcc_assert (ncopies == 1);
+	      gcc_assert (i == 0);
+
+	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
+	      tree zero = build_int_cst ( TREE_TYPE (vec_init_def_type), 0);
+	      tree zero_vec = build_vector_from_val (vec_init_def_type, zero);
+
+	      add_phi_arg (as_a <gphi *> (phi), zero_vec,
+			   loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	    }
+	  else
+	    {
+	      add_phi_arg (as_a <gphi *> (phi), vec_init_def,
 		       loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	    }
 
           /* Set the loop-latch arg for the reduction-phi.  */
           if (j > 0)
@@ -4475,10 +4495,29 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
 	}
       else
 	tmp = build1 (reduc_code, scalar_type, new_phi_result);
+
       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	{
+	  /* Earlier we set the initial value to be zero.  Check the result
+	     and if it is zero then replace with the original initial
+	     value.  */
+	  tree zero = build_zero_cst (scalar_type);
+	  tree comparez = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+	  tmp = build3 (COND_EXPR, scalar_type, comparez, initial_def,
+			new_temp);
+
+	  epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
+	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+	  gimple_assign_set_lhs (epilog_stmt, new_temp);
+	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+	}
+
       scalar_results.safe_push (new_temp);
     }
   else
@@ -4955,6 +4994,55 @@ vect_finalize_reduction:
 }
 
 
+/* Function is_nonwrapping_integer_induction.
+
+   Check if STMT (which is part of loop LOOP) both increments and
+   does not cause overflow.  */
+
+static bool
+is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+  tree base = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
+  tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
+  tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
+  widest_int ni, max_loop_value, lhs_max;
+  bool overflow = false;
+
+  /* Make sure the loop is integer based.  */
+  if (TREE_CODE (base) != INTEGER_CST
+      || TREE_CODE (step) != INTEGER_CST)
+    return false;
+
+  /* Check that the induction increments.  */
+  if (tree_int_cst_sgn (step) == -1)
+    return false;
+
+  /* Check that the max size of the loop will not wrap.  */
+
+  if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
+    {
+      return (GET_MODE_BITSIZE (TYPE_MODE (lhs_type))
+	      >= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (base))));
+    }
+
+  if (! max_stmt_executions (loop, &ni))
+    return false;
+
+  max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
+			    &overflow);
+  if (overflow)
+    return false;
+
+  max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
+			    TYPE_SIGN (lhs_type), &overflow);
+  if (overflow)
+    return false;
+
+  return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
+	  <= GET_MODE_BITSIZE (TYPE_MODE (lhs_type)));
+}
+
 /* Function vectorizable_reduction.
 
    Check if STMT performs a reduction operation that can be vectorized.
@@ -5053,6 +5141,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
+  bool cond_expr_is_nonwrapping_integer_induction = false;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5196,6 +5285,16 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
           reduc_def_stmt = def_stmt;
           reduc_index = i;
         }
+
+      if (i == 1 && code == COND_EXPR && dt == vect_induction_def
+	  && is_nonwrapping_integer_induction (def_stmt, loop))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "condition expression based on integer "
+			     "induction.\n");
+	  cond_expr_is_nonwrapping_integer_induction = true;
+	}
     }
 
   is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
@@ -5225,6 +5324,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  (loop_vinfo, reduc_def_stmt,
 		  !nested_cycle, &dummy, false,
 		  &STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
+
+  if (cond_expr_is_nonwrapping_integer_induction
+      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+    STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
@@ -5352,6 +5456,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     {
       /* This is a reduction pattern: get the vectype from the type of the
          reduction variable, and get the tree-code from orig_stmt.  */
+      gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		  == TREE_CODE_REDUCTION);
       orig_code = gimple_assign_rhs_code (orig_stmt);
       gcc_assert (vectype_out);
       vec_mode = TYPE_MODE (vectype_out);
@@ -5361,6 +5467,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       /* Regular reduction: use the same vectype and tree-code as used for
          the vector code inside the loop can be used for the epilog code. */
       orig_code = code;
+
+      /* For simple condition reductions, replace with the actual expression
+	 we want to base our reduction around.  */
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	orig_code = MAX_EXPR;
     }
 
   if (nested_cycle)
@@ -5381,7 +5493,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
   epilog_reduc_code = ERROR_MARK;
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
+      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
     {
       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
 	{
@@ -5407,6 +5521,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  epilog_reduc_code = ERROR_MARK;
 		}
 	    }
+
+	  /* When epilog_reduc_code is ERROR_MARK then a reduction will be
+	     generated in the epilog using multiple expressions.  This does not
+	     work for condition reductions.  */
+	  if (epilog_reduc_code == ERROR_MARK
+	      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+			== INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "no reduc code for scalar code.\n");
+	      return false;
+	    }
 	}
       else
 	{
@@ -5441,7 +5568,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     }
 
   if ((double_reduc
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
       && ncopies > 1)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index fed80c81c3f3bf38923ed6cd6ed4961129dadc40..620dc7116755585be15eaa599ade1eb25c37c07e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -63,7 +63,8 @@ enum vect_def_type {
 /* Define type of reduction.  */
 enum vect_reduction_type {
   TREE_CODE_REDUCTION,
-  COND_REDUCTION
+  COND_REDUCTION,
+  INTEGER_INDUC_COND_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch] Optimize condition reductions where the result is an integer induction variable
  2015-11-11 18:54   ` Alan Hayward
@ 2015-11-12 10:54     ` Richard Biener
  2015-11-13 10:52       ` Alan Hayward
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2015-11-12 10:54 UTC (permalink / raw)
  To: Alan Hayward; +Cc: gcc-patches

On Wed, Nov 11, 2015 at 7:54 PM, Alan Hayward <alan.hayward@arm.com> wrote:
>
>
> On 11/11/2015 13:25, "Richard Biener" <richard.guenther@gmail.com> wrote:
>
>>On Wed, Nov 11, 2015 at 1:22 PM, Alan Hayward <alan.hayward@arm.com>
>>wrote:
>>> Hi,
>>> I hoped to post this in time for Monday’s cut off date, but
>>>circumstances
>>> delayed me until today. Hoping if possible this patch will still be able
>>> to go in.
>>>
>>>
>>> This patch builds upon the change for PR65947, and reduces the amount of
>>> code produced in a vectorized condition reduction where operand 2 of the
>>> COND_EXPR is an assignment of a increasing integer induction variable
>>>that
>>> won't wrap.
>>>
>>>
>>> For example (assuming all types are ints), this is a match:
>>>
>>> last = 5;
>>> for (i = 0; i < N; i++)
>>>   if (a[i] < min_v)
>>>     last = i;
>>>
>>> Whereas, this is not because the result is based off a memory access:
>>> last = 5;
>>> for (i = 0; i < N; i++)
>>>   if (a[i] < min_v)
>>>     last = a[i];
>>>
>>> In the integer induction variable case we can just use a MAX reduction
>>>and
>>> skip all the code I added in my vectorized condition reduction patch -
>>>the
>>> additional induction variables in vectorizable_reduction () and the
>>> additional checks in vect_create_epilog_for_reduction (). From the patch
>>> diff only, it's not immediately obvious that those parts will be skipped
>>> as there is no code changes in those areas.
>>>
>>> The initial value of the induction variable is force set to zero, as any
>>> other value could effect the result of the induction. At the end of the
>>> loop, if the result is zero, then we restore the original initial value.
>>
>>+static bool
>>+is_integer_induction (gimple *stmt, struct loop *loop)
>>
>>is_nonwrapping_integer_induction?
>>
>>+  tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));
>>
>>don't use TYPE_MAX_VALUE.
>>
>>+  /* Check that the induction increments.  */
>>+  if (tree_int_cst_compare (step, size_zero_node) <= 0)
>>+    return false;
>>
>>tree_int_cst_sgn (step) == -1
>>
>>+  /* Check that the max size of the loop will not wrap.  */
>>+
>>+  if (! max_loop_iterations (loop, &ni))
>>+    return false;
>>+  /* Convert backedges to iterations.  */
>>+  ni += 1;
>>
>>just use max_stmt_executions (loop, &ni) which properly checks for
>>overflow
>>of the +1.
>>
>>+  max_loop_value = wi::add (wi::to_widest (base),
>>+                           wi::mul (wi::to_widest (step), ni));
>>+
>>+  if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
>>+    return false;
>>
>>you miss a check for the wi::add / wi::mul to overflow.  You can use
>>extra args to determine this.
>>
>>Instead of TYPE_MAX_VALUE use wi::max_value (precision, sign).
>>
>>I wonder if you want to skip all the overflow checks for
>>TYPE_OVERFLOW_UNDEFINED
>>IV types?
>>
>
> Ok with all the above.
>
> Tried using max_value () but this gave me a wide_int instead of a
> widest_int.
> Instead I've replaced with min_precision and GET_MODE_BITSIZE.
>
> Added an extra check for when the type is TYPE_OVERFLOW_UNDEFINED.

+         /* Set the loop-entry arg of the reduction-phi.  */
+
+         if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+               == INTEGER_INDUC_COND_REDUCTION)

extra vertical space

+             tree zero = build_int_cst ( TREE_TYPE (vec_init_def_type), 0);
+             tree zero_vec = build_vector_from_val (vec_init_def_type, zero);
+

build_zero_cst (vec_init_def_type);

+         else
+           {
+             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
                       loop_preheader_edge (loop), UNKNOWN_LOCATION);
+           }

no {}s around single stmts

+         tree comparez = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);

please no l33t speech

+         tmp = build3 (COND_EXPR, scalar_type, comparez, initial_def,
+                       new_temp);
+
+         epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
+         new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+         gimple_assign_set_lhs (epilog_stmt, new_temp);

epilog_stmt = gimple_build_assign (make_ssa_name (new_scalar_dest),
                                                    COND_EXPR,
compare, initial_def, new_temp);


+  /* Check that the max size of the loop will not wrap.  */
+
+  if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
+    {
+      return (GET_MODE_BITSIZE (TYPE_MODE (lhs_type))
+             >= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (base))));

this mode check will always be true as lhs_type and base are from the
same PHI node.

+  return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
+         <= GET_MODE_BITSIZE (TYPE_MODE (lhs_type)));

please use TYPE_PRECISION (lhs_type) instead.

Ok with those changes.

Thanks,
Richard.

>
>
> Alan.
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch] Optimize condition reductions where the result is an integer induction variable
  2015-11-12 10:54     ` Richard Biener
@ 2015-11-13 10:52       ` Alan Hayward
  0 siblings, 0 replies; 5+ messages in thread
From: Alan Hayward @ 2015-11-13 10:52 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5539 bytes --]



On 12/11/2015 10:53, "Richard Biener" <richard.guenther@gmail.com> wrote:

>On Wed, Nov 11, 2015 at 7:54 PM, Alan Hayward <alan.hayward@arm.com>
>wrote:
>>
>>
>> On 11/11/2015 13:25, "Richard Biener" <richard.guenther@gmail.com>
>>wrote:
>>
>>>On Wed, Nov 11, 2015 at 1:22 PM, Alan Hayward <alan.hayward@arm.com>
>>>wrote:
>>>> Hi,
>>>> I hoped to post this in time for Monday’s cut off date, but
>>>>circumstances
>>>> delayed me until today. Hoping if possible this patch will still be
>>>>able
>>>> to go in.
>>>>
>>>>
>>>> This patch builds upon the change for PR65947, and reduces the amount
>>>>of
>>>> code produced in a vectorized condition reduction where operand 2 of
>>>>the
>>>> COND_EXPR is an assignment of a increasing integer induction variable
>>>>that
>>>> won't wrap.
>>>>
>>>>
>>>> For example (assuming all types are ints), this is a match:
>>>>
>>>> last = 5;
>>>> for (i = 0; i < N; i++)
>>>>   if (a[i] < min_v)
>>>>     last = i;
>>>>
>>>> Whereas, this is not because the result is based off a memory access:
>>>> last = 5;
>>>> for (i = 0; i < N; i++)
>>>>   if (a[i] < min_v)
>>>>     last = a[i];
>>>>
>>>> In the integer induction variable case we can just use a MAX reduction
>>>>and
>>>> skip all the code I added in my vectorized condition reduction patch -
>>>>the
>>>> additional induction variables in vectorizable_reduction () and the
>>>> additional checks in vect_create_epilog_for_reduction (). From the
>>>>patch
>>>> diff only, it's not immediately obvious that those parts will be
>>>>skipped
>>>> as there is no code changes in those areas.
>>>>
>>>> The initial value of the induction variable is force set to zero, as
>>>>any
>>>> other value could effect the result of the induction. At the end of
>>>>the
>>>> loop, if the result is zero, then we restore the original initial
>>>>value.
>>>
>>>+static bool
>>>+is_integer_induction (gimple *stmt, struct loop *loop)
>>>
>>>is_nonwrapping_integer_induction?
>>>
>>>+  tree lhs_max = TYPE_MAX_VALUE (TREE_TYPE (gimple_phi_result (stmt)));
>>>
>>>don't use TYPE_MAX_VALUE.
>>>
>>>+  /* Check that the induction increments.  */
>>>+  if (tree_int_cst_compare (step, size_zero_node) <= 0)
>>>+    return false;
>>>
>>>tree_int_cst_sgn (step) == -1
>>>
>>>+  /* Check that the max size of the loop will not wrap.  */
>>>+
>>>+  if (! max_loop_iterations (loop, &ni))
>>>+    return false;
>>>+  /* Convert backedges to iterations.  */
>>>+  ni += 1;
>>>
>>>just use max_stmt_executions (loop, &ni) which properly checks for
>>>overflow
>>>of the +1.
>>>
>>>+  max_loop_value = wi::add (wi::to_widest (base),
>>>+                           wi::mul (wi::to_widest (step), ni));
>>>+
>>>+  if (wi::gtu_p (max_loop_value, wi::to_widest (lhs_max)))
>>>+    return false;
>>>
>>>you miss a check for the wi::add / wi::mul to overflow.  You can use
>>>extra args to determine this.
>>>
>>>Instead of TYPE_MAX_VALUE use wi::max_value (precision, sign).
>>>
>>>I wonder if you want to skip all the overflow checks for
>>>TYPE_OVERFLOW_UNDEFINED
>>>IV types?
>>>
>>
>> Ok with all the above.
>>
>> Tried using max_value () but this gave me a wide_int instead of a
>> widest_int.
>> Instead I've replaced with min_precision and GET_MODE_BITSIZE.
>>
>> Added an extra check for when the type is TYPE_OVERFLOW_UNDEFINED.
>
>+         /* Set the loop-entry arg of the reduction-phi.  */
>+
>+         if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
>+               == INTEGER_INDUC_COND_REDUCTION)
>
>extra vertical space
>
>+             tree zero = build_int_cst ( TREE_TYPE (vec_init_def_type),
>0);
>+             tree zero_vec = build_vector_from_val (vec_init_def_type,
>zero);
>+
>
>build_zero_cst (vec_init_def_type);
>
>+         else
>+           {
>+             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
>                       loop_preheader_edge (loop), UNKNOWN_LOCATION);
>+           }
>
>no {}s around single stmts
>
>+         tree comparez = build2 (EQ_EXPR, boolean_type_node, new_temp,
>zero);
>
>please no l33t speech


Ha! That wasn't the intention, it was short for "compare zero". Changed to
"zcompare" becuase I didn't want to use "compare".



>
>+         tmp = build3 (COND_EXPR, scalar_type, comparez, initial_def,
>+                       new_temp);
>+
>+         epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
>+         new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
>+         gimple_assign_set_lhs (epilog_stmt, new_temp);
>
>epilog_stmt = gimple_build_assign (make_ssa_name (new_scalar_dest),
>                                                    COND_EXPR,
>compare, initial_def, new_temp);
>

I had to pull the make_ssa_name out to a variable, as it's required for a
scalar_results.safe_push () a few lines down.



>
>+  /* Check that the max size of the loop will not wrap.  */
>+
>+  if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
>+    {
>+      return (GET_MODE_BITSIZE (TYPE_MODE (lhs_type))
>+             >= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (base))));
>
>this mode check will always be true as lhs_type and base are from the
>same PHI node.
>
>+  return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
>+         <= GET_MODE_BITSIZE (TYPE_MODE (lhs_type)));
>
>please use TYPE_PRECISION (lhs_type) instead.
>
>Ok with those changes.
>

Submitted to head with changes as requested.


Alan.


[-- Attachment #2: optimizeConditionReductions3.patch --]
[-- Type: application/octet-stream, Size: 16205 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index 7933f5c861201a7720ca77d84671898586516914..1e7a05dc8cc6f00fee265b9b8ad65beaa520f1e8 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -9,7 +9,7 @@ extern void abort (void) __attribute__ ((noreturn));
 int
 condition_reduction (int *a, int min_v)
 {
-  int last = -1;
+  int last = 66; /* High start value.  */
 
   for (int i = 0; i < N; i++)
     if (a[i] < min_v)
@@ -28,12 +28,13 @@ main (void)
   31, 32
   };
 
-  int ret = condition_reduction (a, 16);
+  int ret = condition_reduction (a, 1);
 
-  if (ret != 19)
+  if (ret != 17)
     abort ();
 
   return 0;
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index 9a43a6059fabc1ca527dd6305bf6482f2103fab4..b4c6659b77ccbbafd77c62976e408dda37a2f4c4 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..fb5ffd48c7bb87f6e2f9e055e90e4f121a202d5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction where the result is a negative of the induction.
+   Will fail to vectorize to a simple case.  */
+
+signed int
+condition_reduction (signed int *a, signed int min_v)
+{
+  signed int last = -1;
+
+  for (signed int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = -i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  signed int a[N] = {
+  11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  signed int ret = condition_reduction (a, 16);
+
+  if (ret != -19)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c6faddd189dabf9b96b4e665c59cf268cf9a3aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction with a reversed loop.
+   Will fail to vectorize to a simple case.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = -1;
+
+  for (int i = N-1; i >=0; i--)
+    if (a[i] < min_v)
+      last = i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  int a[N] = {
+  17, 28, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != 2)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index 9c627d9d7d5c1c42f285a29a1a601b3bc60b8a5a..9e9ff53828609b19e6fe97ae2d640294cb980c14 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -38,3 +38,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index e115de2a28259307e4eaa9cbbe2fa93663c22873..4b6aa9216b0c200c43a227bf8b4954ecbe4f0090 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -48,3 +48,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 76a0567aa5450e00de2d700123ffc930b4028486..f4e7fdc97c89866b19838593cb84fc1754f87494 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -37,4 +37,5 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index 360e3b51ee15ecea3b5d590c93d524e143f0ffca..21be8d0b749a4c1f2f29b6aed17533ec26b7aede 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -39,3 +39,4 @@ main (void)
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
 /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index 4997ef79cae83585758436874ac109ca368a1fc1..e1432403b2dd227a7e13db6e228d819780b89ad6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -37,3 +37,4 @@ main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 43ada18ac7cfebbff06f43b68639a69873ea5c65..86899209692bcea1431c372a4d9c86a17d326513 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4052,7 +4052,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
   tree bitsize;
   tree adjustment_def = NULL;
   tree vec_initial_def = NULL;
-  tree reduction_op, expr, def;
+  tree reduction_op, expr, def, initial_def = NULL;
   tree orig_name, scalar_result;
   imm_use_iterator imm_iter, phi_imm_iter;
   use_operand_p use_p, phi_use_p;
@@ -4113,9 +4113,10 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       /* Get at the scalar def before the loop, that defines the initial value
 	 of the reduction variable.  */
       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
-      tree op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+      initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+					   loop_preheader_edge (loop));
       vec_initial_defs.create (1);
-      vec_initial_def = get_initial_def_for_reduction (stmt, op,
+      vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
 						       &adjustment_def);
       vec_initial_defs.quick_push (vec_initial_def);
     }
@@ -4131,9 +4132,25 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
       def = vect_defs[i];
       for (j = 0; j < ncopies; j++)
         {
-          /* Set the loop-entry arg of the reduction-phi.  */
-          add_phi_arg (as_a <gphi *> (phi), vec_init_def,
-		       loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	  /* Set the loop-entry arg of the reduction-phi.  */
+
+	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	      == INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      /* Initialise the reduction phi to zero.  This prevents initial
+		 values of non-zero interferring with the reduction op.  */
+	      gcc_assert (ncopies == 1);
+	      gcc_assert (i == 0);
+
+	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
+	      tree zero_vec = build_zero_cst (vec_init_def_type);
+
+	      add_phi_arg (as_a <gphi *> (phi), zero_vec,
+			   loop_preheader_edge (loop), UNKNOWN_LOCATION);
+	    }
+	  else
+	    add_phi_arg (as_a <gphi *> (phi), vec_init_def,
+			 loop_preheader_edge (loop), UNKNOWN_LOCATION);
 
           /* Set the loop-latch arg for the reduction-phi.  */
           if (j > 0)
@@ -4475,10 +4492,28 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
 	}
       else
 	tmp = build1 (reduc_code, scalar_type, new_phi_result);
+
       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	{
+	  /* Earlier we set the initial value to be zero.  Check the result
+	     and if it is zero then replace with the original initial
+	     value.  */
+	  tree zero = build_zero_cst (scalar_type);
+	  tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
+
+	  tmp = make_ssa_name (new_scalar_dest);
+	  epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
+					     initial_def, new_temp);
+	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+	  new_temp = tmp;
+	}
+
       scalar_results.safe_push (new_temp);
     }
   else
@@ -4955,6 +4990,52 @@ vect_finalize_reduction:
 }
 
 
+/* Function is_nonwrapping_integer_induction.
+
+   Check if STMT (which is part of loop LOOP) both increments and
+   does not cause overflow.  */
+
+static bool
+is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+  tree base = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
+  tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
+  tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
+  widest_int ni, max_loop_value, lhs_max;
+  bool overflow = false;
+
+  /* Make sure the loop is integer based.  */
+  if (TREE_CODE (base) != INTEGER_CST
+      || TREE_CODE (step) != INTEGER_CST)
+    return false;
+
+  /* Check that the induction increments.  */
+  if (tree_int_cst_sgn (step) == -1)
+    return false;
+
+  /* Check that the max size of the loop will not wrap.  */
+
+  if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
+    return true;
+
+  if (! max_stmt_executions (loop, &ni))
+    return false;
+
+  max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
+			    &overflow);
+  if (overflow)
+    return false;
+
+  max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
+			    TYPE_SIGN (lhs_type), &overflow);
+  if (overflow)
+    return false;
+
+  return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
+	  <= TYPE_PRECISION (lhs_type));
+}
+
 /* Function vectorizable_reduction.
 
    Check if STMT performs a reduction operation that can be vectorized.
@@ -5053,6 +5134,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
+  bool cond_expr_is_nonwrapping_integer_induction = false;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5196,6 +5278,16 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
           reduc_def_stmt = def_stmt;
           reduc_index = i;
         }
+
+      if (i == 1 && code == COND_EXPR && dt == vect_induction_def
+	  && is_nonwrapping_integer_induction (def_stmt, loop))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "condition expression based on integer "
+			     "induction.\n");
+	  cond_expr_is_nonwrapping_integer_induction = true;
+	}
     }
 
   is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
@@ -5225,6 +5317,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  (loop_vinfo, reduc_def_stmt,
 		  !nested_cycle, &dummy, false,
 		  &STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
+
+  if (cond_expr_is_nonwrapping_integer_induction
+      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+    STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
@@ -5352,6 +5449,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     {
       /* This is a reduction pattern: get the vectype from the type of the
          reduction variable, and get the tree-code from orig_stmt.  */
+      gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		  == TREE_CODE_REDUCTION);
       orig_code = gimple_assign_rhs_code (orig_stmt);
       gcc_assert (vectype_out);
       vec_mode = TYPE_MODE (vectype_out);
@@ -5361,6 +5460,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
       /* Regular reduction: use the same vectype and tree-code as used for
          the vector code inside the loop can be used for the epilog code. */
       orig_code = code;
+
+      /* For simple condition reductions, replace with the actual expression
+	 we want to base our reduction around.  */
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	  == INTEGER_INDUC_COND_REDUCTION)
+	orig_code = MAX_EXPR;
     }
 
   if (nested_cycle)
@@ -5381,7 +5486,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
   epilog_reduc_code = ERROR_MARK;
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
+      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
     {
       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
 	{
@@ -5407,6 +5514,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		  epilog_reduc_code = ERROR_MARK;
 		}
 	    }
+
+	  /* When epilog_reduc_code is ERROR_MARK then a reduction will be
+	     generated in the epilog using multiple expressions.  This does not
+	     work for condition reductions.  */
+	  if (epilog_reduc_code == ERROR_MARK
+	      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+			== INTEGER_INDUC_COND_REDUCTION)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "no reduc code for scalar code.\n");
+	      return false;
+	    }
 	}
       else
 	{
@@ -5441,7 +5561,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     }
 
   if ((double_reduc
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		== INTEGER_INDUC_COND_REDUCTION)
       && ncopies > 1)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index fed80c81c3f3bf38923ed6cd6ed4961129dadc40..620dc7116755585be15eaa599ade1eb25c37c07e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -63,7 +63,8 @@ enum vect_def_type {
 /* Define type of reduction.  */
 enum vect_reduction_type {
   TREE_CODE_REDUCTION,
-  COND_REDUCTION
+  COND_REDUCTION,
+  INTEGER_INDUC_COND_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-11-13 10:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-11 12:22 [Patch] Optimize condition reductions where the result is an integer induction variable Alan Hayward
2015-11-11 13:25 ` Richard Biener
2015-11-11 18:54   ` Alan Hayward
2015-11-12 10:54     ` Richard Biener
2015-11-13 10:52       ` Alan Hayward

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).