[PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
@ 2015-05-26 11:13 Bin Cheng
  2015-06-01 10:46 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Bin Cheng @ 2015-05-26 11:13 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1951 bytes --]

Hi,
My first part patch improving how we handle overflow in scev is posted at
https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
second part patch.

This patch does below improvements:
  1) Computes and records control iv for each loop's exit edge.  This
provides a way to compute overflow information in loop niter and use it in
different customers.  It think it's useful, especially with option
-funsafe-loop-optimizers.
  2) Improve chrec_convert by adding new interface
loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
type and loop using overflow information of loop's control iv.  This
basically propagates no-overflow information from control iv to ivs
converted from control iv.  Moreover, we can further improve the logic by
using possible VRP information in the future.

With this patch, cases like scev-9.c and scev-10.c in patch can be handled
now.  Cases reported in PR48052 can be vectorized too.
Opinions?

Thanks,
bin


2015-05-26  Bin Cheng  <bin.cheng@arm.com>

	* cfgloop.h (struct control_iv): New.
	(struct loop): New field control_ivs.
	* tree-ssa-loop-niter.c : Include "stor-layout.h".
	(number_of_iterations_lt): Set no_overflow information.
	(number_of_iterations_exit): Init control iv in niter struct.
	(record_control_iv): New.
	(estimate_numbers_of_iterations_loop): Call record_control_iv.
	(loop_exits_before_overflow): New.  Interface factored out of
	scev_probably_wraps_p.
	(scev_probably_wraps_p): Factor loop niter related code into
	loop_exits_before_overflow.
	(free_numbers_of_iterations_estimates_loop): Free control ivs.
	* tree-ssa-loop-niter.h (free_loop_control_ivs): New.

gcc/testsuite/ChangeLog
2015-05-26  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/48052
	* gcc.dg/tree-ssa/scev-8.c: New.
	* gcc.dg/tree-ssa/scev-9.c: New.
	* gcc.dg/tree-ssa/scev-10.c: New.
	* gcc.dg/vect/pr48052.c: New.


[-- Attachment #2: improve-overlow-in-loop_niter-scev-20150526.txt --]
[-- Type: text/plain, Size: 17266 bytes --]

Index: gcc/tree-ssa-loop-niter.c
===================================================================
--- gcc/tree-ssa-loop-niter.c	(revision 222758)
+++ gcc/tree-ssa-loop-niter.c	(working copy)
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "wide-int.h"
 #include "inchash.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "fold-const.h"
 #include "calls.h"
 #include "hashtab.h"
@@ -1184,6 +1185,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0
       niter->niter = delta;
       niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
 				     TYPE_SIGN (niter_type));
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1965,6 +1967,9 @@ number_of_iterations_exit (struct loop *loop, edge
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   last = last_stmt (exit->src);
   if (!last)
     return false;
@@ -2744,6 +2749,29 @@ record_estimate (struct loop *loop, tree bound, co
   record_niter_bound (loop, new_i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc<control_iv> ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3467,6 +3495,7 @@ estimate_numbers_of_iterations_loop (struct loop *
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3773,6 +3802,188 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  widest_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   wide_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  This is done by proving
+	   below conditions:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by using loop's initial condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	gcc_assert (operand_equal_p (e, base, 0));
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3788,13 +3999,6 @@ scev_probably_wraps_p (tree base, tree step,
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  widest_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3828,57 +4032,9 @@ scev_probably_wraps_p (tree base, tree step,
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   wide_int_to_tree (TREE_TYPE (valid_niter),
-					     niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
-
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
   return true;
@@ -3889,17 +4045,26 @@ scev_probably_wraps_p (tree base, tree step,
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
+  loop->bounds = NULL;
 
-  loop->bounds = NULL;
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
Index: gcc/tree-ssa-loop-niter.h
===================================================================
--- gcc/tree-ssa-loop-niter.h	(revision 222758)
+++ gcc/tree-ssa-loop-niter.h	(working copy)
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);
Index: gcc/testsuite/gcc.dg/vect/pr48052.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
+
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 222758)
+++ gcc/cfgloop.h	(working copy)
@@ -116,6 +116,14 @@ enum loop_estimation
   EST_LAST
 };
 
+/* The structure describing a non-overflow control induction variable
+   of loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -203,6 +211,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
  2015-05-26 11:13 [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II Bin Cheng
@ 2015-06-01 10:46 ` Richard Biener
  2015-06-02  3:37   ` Bin.Cheng
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-06-01 10:46 UTC (permalink / raw)
  To: Bin Cheng; +Cc: GCC Patches

On Tue, May 26, 2015 at 1:04 PM, Bin Cheng <bin.cheng@arm.com> wrote:
> Hi,
> My first part patch improving how we handle overflow in scev is posted at
> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
> second part patch.
>
> This patch does below improvements:
>   1) Computes and records control iv for each loop's exit edge.  This
> provides a way to compute overflow information in loop niter and use it in
> different customers.  It think it's useful, especially with option
> -funsafe-loop-optimizers.
>   2) Improve chrec_convert by adding new interface
> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
> type and loop using overflow information of loop's control iv.  This
> basically propagates no-overflow information from control iv to ivs
> converted from control iv.  Moreover, we can further improve the logic by
> using possible VRP information in the future.

But 2) you already posted (and I have approved it but you didn't commit yet?).

Can you commit that approved patch and only send the parts I didn't approve
yet?

Thanks,
Richard.

> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
> now.  Cases reported in PR48052 can be vectorized too.
> Opinions?
>
> Thanks,
> bin
>
>
> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>
>         * cfgloop.h (struct control_iv): New.
>         (struct loop): New field control_ivs.
>         * tree-ssa-loop-niter.c : Include "stor-layout.h".
>         (number_of_iterations_lt): Set no_overflow information.
>         (number_of_iterations_exit): Init control iv in niter struct.
>         (record_control_iv): New.
>         (estimate_numbers_of_iterations_loop): Call record_control_iv.
>         (loop_exits_before_overflow): New.  Interface factored out of
>         scev_probably_wraps_p.
>         (scev_probably_wraps_p): Factor loop niter related code into
>         loop_exits_before_overflow.
>         (free_numbers_of_iterations_estimates_loop): Free control ivs.
>         * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>
> gcc/testsuite/ChangeLog
> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>
>         PR tree-optimization/48052
>         * gcc.dg/tree-ssa/scev-8.c: New.
>         * gcc.dg/tree-ssa/scev-9.c: New.
>         * gcc.dg/tree-ssa/scev-10.c: New.
>         * gcc.dg/vect/pr48052.c: New.
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
  2015-06-01 10:46 ` Richard Biener
@ 2015-06-02  3:37   ` Bin.Cheng
  2015-06-02  8:53     ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Bin.Cheng @ 2015-06-02  3:37 UTC (permalink / raw)
  To: Richard Biener; +Cc: Bin Cheng, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 5888 bytes --]

On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng <bin.cheng@arm.com> wrote:
>> Hi,
>> My first part patch improving how we handle overflow in scev is posted at
>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
>> second part patch.
>>
>> This patch does below improvements:
>>   1) Computes and records control iv for each loop's exit edge.  This
>> provides a way to compute overflow information in loop niter and use it in
>> different customers.  It think it's useful, especially with option
>> -funsafe-loop-optimizers.
>>   2) Improve chrec_convert by adding new interface
>> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
>> type and loop using overflow information of loop's control iv.  This
>> basically propagates no-overflow information from control iv to ivs
>> converted from control iv.  Moreover, we can further improve the logic by
>> using possible VRP information in the future.
>
> But 2) you already posted (and I have approved it but you didn't commit yet?).
>
> Can you commit that approved patch and only send the parts I didn't approve
> yet?
>
> Thanks,
> Richard.
>
>> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
>> now.  Cases reported in PR48052 can be vectorized too.
>> Opinions?
>>
>> Thanks,
>> bin
>>
>>
>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>
>>         * cfgloop.h (struct control_iv): New.
>>         (struct loop): New field control_ivs.
>>         * tree-ssa-loop-niter.c : Include "stor-layout.h".
>>         (number_of_iterations_lt): Set no_overflow information.
>>         (number_of_iterations_exit): Init control iv in niter struct.
>>         (record_control_iv): New.
>>         (estimate_numbers_of_iterations_loop): Call record_control_iv.
>>         (loop_exits_before_overflow): New.  Interface factored out of
>>         scev_probably_wraps_p.
>>         (scev_probably_wraps_p): Factor loop niter related code into
>>         loop_exits_before_overflow.
>>         (free_numbers_of_iterations_estimates_loop): Free control ivs.
>>         * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>>
>> gcc/testsuite/ChangeLog
>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>
>>         PR tree-optimization/48052
>>         * gcc.dg/tree-ssa/scev-8.c: New.
>>         * gcc.dg/tree-ssa/scev-9.c: New.
>>         * gcc.dg/tree-ssa/scev-10.c: New.
>>         * gcc.dg/vect/pr48052.c: New.
>>

Hi Richard,
I think you replied the review message of this patch to another
thread.  Sorry for being mis-leading.  S I copied and answered your
review comments in this thread thus we can continue here.

>> +       /* Done proving if this is a no-overflow control IV.  */
>> +       if (operand_equal_p (base, civ->base, 0))
>> +         return true;
>
> so all control IVs are no-overflow?

This patch only records known no-overflow control ivs in loop
structure, so it depends on loop niter analyzer.  For now, this patch
(and the existing code) sets no-overflow flag only for two cases.  One
is the step-1 case, the other one is in assert_no_overflow_lt.
As a matter of fact, we may want to set no_overflow flag for all cases
with -funsafe-loop-optimizations in the future.  In that case, we will
assume all control IVs are no-overflow.

>
>> +            base <= UPPER_BOUND (type) - step  ;;step > 0
>> +            base >= LOWER_BOUND (type) - step  ;;step < 0
>> +
>> +          by using loop's initial condition.  */
>> +       stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
>> +       if (operand_equal_p (stepped, civ->base, 0))
>> +         {
>> +           if (tree_int_cst_sign_bit (step))
>> +             {
>> +               code = LT_EXPR;
>> +               extreme = lower_bound_in_type (type, type);
>> +             }
>> +           else
>> +             {
>> +               code = GT_EXPR;
>> +               extreme = upper_bound_in_type (type, type);
>> +             }
>> +           extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
>> +           e = fold_build2 (code, boolean_type_node, base, extreme);
>
> looks like you are actually computing base + step <= UPPER_BOUND (type)
> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
> are constants why not compute it the way the comment specifies it?  Comparison
> codes also don't match the comment and we try to prove the condition is false.
I tried to prove the condition are satisfied by proving the reverse
condition ("base > UPPER_BOUND (type) - step") is false here.  In the
updated patch, I revised comments to reflect that logic.  Is it ok?

>
> This also reminds me of eventually pushing forward my idea of strengthening
> simplify_using_initial_
> conditions by using the VRP machinery (I have a small
> prototype patch for that).
Interesting.  If I understand correctly, VRP info is hold for ssa var
on a global scope basis?  The loop's initial condition may strengthen
var's range information, rather than the contrary.  Actually I tried
to refine vrp info in scope of loop and used the refined vrp
information in loop optimizer (In the thread which you replied this
review message to).  Looking forward to seeing how it's handled in
your patch.

>
>> +/* The structure describing a non-overflow control induction variable
>> +   of loop's exit edge.  */
>> +struct GTY ((chain_next ("%h.next"))) control_iv {
>> +  tree base;
>> +  tree step;
>> +  struct control_iv *next;
>> +};
>
> The comment suggests these don't exist for multiple edges?  Maybe you
> can clarify this.
As the linked list structure suggests, we can support one no-overflow
iv for each loop's exit edge.  I revised the comment a little bit.

>
> Otherwise the patch looks ok.

I attached the updated patch,  does it look fine?

Thanks,
bin
>
> Thanks,
> Richard.

[-- Attachment #2: improve-overlow-in-loop_niter-scev-20150602.txt --]
[-- Type: text/plain, Size: 16771 bytes --]

Index: gcc/tree-ssa-loop-niter.c
===================================================================
--- gcc/tree-ssa-loop-niter.c	(revision 222758)
+++ gcc/tree-ssa-loop-niter.c	(working copy)
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "wide-int.h"
 #include "inchash.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "fold-const.h"
 #include "calls.h"
 #include "hashtab.h"
@@ -1184,6 +1185,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0
       niter->niter = delta;
       niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
 				     TYPE_SIGN (niter_type));
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1965,6 +1967,9 @@ number_of_iterations_exit (struct loop *loop, edge
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   last = last_stmt (exit->src);
   if (!last)
     return false;
@@ -2744,6 +2749,29 @@ record_estimate (struct loop *loop, tree bound, co
   record_niter_bound (loop, new_i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc<control_iv> ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3467,6 +3495,7 @@ estimate_numbers_of_iterations_loop (struct loop *
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3773,6 +3802,189 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  widest_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   wide_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  Specifically, we try
+	   to prove below conditions are satisfied:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by proving the reverse conditions are false using loop's initial
+	   condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	gcc_assert (operand_equal_p (e, base, 0));
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3788,13 +4000,6 @@ scev_probably_wraps_p (tree base, tree step,
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  widest_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3828,57 +4033,9 @@ scev_probably_wraps_p (tree base, tree step,
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   wide_int_to_tree (TREE_TYPE (valid_niter),
-					     niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
-
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
   return true;
@@ -3889,17 +4046,26 @@ scev_probably_wraps_p (tree base, tree step,
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
+  loop->bounds = NULL;
 
-  loop->bounds = NULL;
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
Index: gcc/tree-ssa-loop-niter.h
===================================================================
--- gcc/tree-ssa-loop-niter.h	(revision 222758)
+++ gcc/tree-ssa-loop-niter.h	(working copy)
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);
Index: gcc/testsuite/gcc.dg/vect/pr48052.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
+
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 222758)
+++ gcc/cfgloop.h	(working copy)
@@ -116,6 +116,14 @@ enum loop_estimation
   EST_LAST
 };
 
+/* The structure describing non-overflow control induction variable for
+   loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -203,6 +211,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
  2015-06-02  3:37   ` Bin.Cheng
@ 2015-06-02  8:53     ` Richard Biener
  2015-06-02  9:33       ` Bin.Cheng
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2015-06-02  8:53 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: Bin Cheng, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 6253 bytes --]

On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng <bin.cheng@arm.com> wrote:
>>> Hi,
>>> My first part patch improving how we handle overflow in scev is posted at
>>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
>>> second part patch.
>>>
>>> This patch does below improvements:
>>>   1) Computes and records control iv for each loop's exit edge.  This
>>> provides a way to compute overflow information in loop niter and use it in
>>> different customers.  It think it's useful, especially with option
>>> -funsafe-loop-optimizers.
>>>   2) Improve chrec_convert by adding new interface
>>> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
>>> type and loop using overflow information of loop's control iv.  This
>>> basically propagates no-overflow information from control iv to ivs
>>> converted from control iv.  Moreover, we can further improve the logic by
>>> using possible VRP information in the future.
>>
>> But 2) you already posted (and I have approved it but you didn't commit yet?).
>>
>> Can you commit that approved patch and only send the parts I didn't approve
>> yet?
>>
>> Thanks,
>> Richard.
>>
>>> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
>>> now.  Cases reported in PR48052 can be vectorized too.
>>> Opinions?
>>>
>>> Thanks,
>>> bin
>>>
>>>
>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>
>>>         * cfgloop.h (struct control_iv): New.
>>>         (struct loop): New field control_ivs.
>>>         * tree-ssa-loop-niter.c : Include "stor-layout.h".
>>>         (number_of_iterations_lt): Set no_overflow information.
>>>         (number_of_iterations_exit): Init control iv in niter struct.
>>>         (record_control_iv): New.
>>>         (estimate_numbers_of_iterations_loop): Call record_control_iv.
>>>         (loop_exits_before_overflow): New.  Interface factored out of
>>>         scev_probably_wraps_p.
>>>         (scev_probably_wraps_p): Factor loop niter related code into
>>>         loop_exits_before_overflow.
>>>         (free_numbers_of_iterations_estimates_loop): Free control ivs.
>>>         * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>
>>>         PR tree-optimization/48052
>>>         * gcc.dg/tree-ssa/scev-8.c: New.
>>>         * gcc.dg/tree-ssa/scev-9.c: New.
>>>         * gcc.dg/tree-ssa/scev-10.c: New.
>>>         * gcc.dg/vect/pr48052.c: New.
>>>
>
> Hi Richard,
> I think you replied the review message of this patch to another
> thread.  Sorry for being mis-leading.  S I copied and answered your
> review comments in this thread thus we can continue here.
>
>>> +       /* Done proving if this is a no-overflow control IV.  */
>>> +       if (operand_equal_p (base, civ->base, 0))
>>> +         return true;
>>
>> so all control IVs are no-overflow?
>
> This patch only records known no-overflow control ivs in loop
> structure, so it depends on loop niter analyzer.  For now, this patch
> (and the existing code) sets no-overflow flag only for two cases.  One
> is the step-1 case, the other one is in assert_no_overflow_lt.
> As a matter of fact, we may want to set no_overflow flag for all cases
> with -funsafe-loop-optimizations in the future.  In that case, we will
> assume all control IVs are no-overflow.
>
>>
>>> +            base <= UPPER_BOUND (type) - step  ;;step > 0
>>> +            base >= LOWER_BOUND (type) - step  ;;step < 0
>>> +
>>> +          by using loop's initial condition.  */
>>> +       stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
>>> +       if (operand_equal_p (stepped, civ->base, 0))
>>> +         {
>>> +           if (tree_int_cst_sign_bit (step))
>>> +             {
>>> +               code = LT_EXPR;
>>> +               extreme = lower_bound_in_type (type, type);
>>> +             }
>>> +           else
>>> +             {
>>> +               code = GT_EXPR;
>>> +               extreme = upper_bound_in_type (type, type);
>>> +             }
>>> +           extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
>>> +           e = fold_build2 (code, boolean_type_node, base, extreme);
>>
>> looks like you are actually computing base + step <= UPPER_BOUND (type)
>> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
>> are constants why not compute it the way the comment specifies it?  Comparison
>> codes also don't match the comment and we try to prove the condition is false.
> I tried to prove the condition are satisfied by proving the reverse
> condition ("base > UPPER_BOUND (type) - step") is false here.  In the
> updated patch, I revised comments to reflect that logic.  Is it ok?
>
>>
>> This also reminds me of eventually pushing forward my idea of strengthening
>> simplify_using_initial_
>> conditions by using the VRP machinery (I have a small
>> prototype patch for that).
> Interesting.  If I understand correctly, VRP info is hold for ssa var
> on a global scope basis?  The loop's initial condition may strengthen
> var's range information, rather than the contrary.  Actually I tried
> to refine vrp info in scope of loop and used the refined vrp
> information in loop optimizer (In the thread which you replied this
> review message to).  Looking forward to seeing how it's handled in
> your patch.

Attached (don't remember the state of the patch, but of course some
caching is missing here).

>>
>>> +/* The structure describing a non-overflow control induction variable
>>> +   of loop's exit edge.  */
>>> +struct GTY ((chain_next ("%h.next"))) control_iv {
>>> +  tree base;
>>> +  tree step;
>>> +  struct control_iv *next;
>>> +};
>>
>> The comment suggests these don't exist for multiple edges?  Maybe you
>> can clarify this.
> As the linked list structure suggests, we can support one no-overflow
> iv for each loop's exit edge.  I revised the comment a little bit.
>
>>
>> Otherwise the patch looks ok.
>
> I attached the updated patch,  does it look fine?

Yes.

Thanks,
Richard.

> Thanks,
> bin
>>
>> Thanks,
>> Richard.

[-- Attachment #2: fix-pr62630-2 --]
[-- Type: application/octet-stream, Size: 5955 bytes --]

Index: gcc/tree-ssanames.h
===================================================================
*** gcc/tree-ssanames.h	(revision 220784)
--- gcc/tree-ssanames.h	(working copy)
*************** extern enum value_range_type get_range_i
*** 75,80 ****
--- 75,82 ----
  					     wide_int *);
  extern void set_nonzero_bits (tree, const wide_int_ref &);
  extern wide_int get_nonzero_bits (const_tree);
+ extern value_range_type determine_value_range (struct loop *, tree,
+ 					       wide_int *, wide_int *);
  extern void init_ssanames (struct function *, int);
  extern void fini_ssanames (void);
  extern void ssanames_print_statistics (void);
Index: gcc/tree-vrp.c
===================================================================
*** gcc/tree-vrp.c	(revision 220784)
--- gcc/tree-vrp.c	(working copy)
*************** make_pass_vrp (gcc::context *ctxt)
*** 10455,10457 ****
--- 10455,10593 ----
  {
    return new pass_vrp (ctxt);
  }
+ 
+ 
+ /* Worker for determine_value_range.  */
+ 
+ static void
+ determine_value_range_1 (value_range_t *vr, struct loop *loop, tree expr)
+ {
+   if (BINARY_CLASS_P (expr))
+     {
+       value_range_t vr0 = VR_INITIALIZER, vr1 = VR_INITIALIZER;
+       determine_value_range_1 (&vr0, loop, TREE_OPERAND (expr, 0));
+       determine_value_range_1 (&vr1, loop, TREE_OPERAND (expr, 1));
+       extract_range_from_binary_expr_1 (vr, TREE_CODE (expr), TREE_TYPE (expr),
+ 					&vr0, &vr1);
+     }
+   else if (UNARY_CLASS_P (expr))
+     {
+       value_range_t vr0 = VR_INITIALIZER;
+       determine_value_range_1 (&vr0, loop, TREE_OPERAND (expr, 0));
+       extract_range_from_unary_expr_1 (vr, TREE_CODE (expr), TREE_TYPE (expr),
+ 				       &vr0, TREE_TYPE (TREE_OPERAND (expr, 0)));
+     }
+   else if (TREE_CODE (expr) == INTEGER_CST)
+     set_value_range_to_value (vr, expr, NULL);
+   else
+     {
+       value_range_type kind;
+       wide_int min, max;
+       /* For SSA names try to extract range info computed by VRP.  Otherwise
+ 	 fall back to varying.  */
+       if (TREE_CODE (expr) == SSA_NAME
+ 	  && INTEGRAL_TYPE_P (expr)
+ 	  && (kind = get_range_info (expr, &min, &max)) != VR_VARYING)
+ 	set_value_range (vr, kind, wide_int_to_tree (TREE_TYPE (expr), min),
+ 			 wide_int_to_tree (TREE_TYPE (expr), max), NULL);
+       else
+ 	set_value_range_to_varying (vr);
+ 
+       if (TREE_CODE (expr) == SSA_NAME
+ 	  && INTEGRAL_TYPE_P (TREE_TYPE (expr))
+ 	  && loop)
+ 	{
+ 	  /* The maximum number of dominator BBs we search for conditions
+ 	     of loop header copies we use for simplifying a conditional
+ 	     expression.  */
+ #define MAX_DOMINATORS_TO_WALK 8
+ 
+ 	  /* Now walk the dominators of the loop header and use the entry
+ 	     guards to refine the estimates.  */
+ 	  unsigned cnt = 0;
+ 	  for (basic_block bb = loop->header;
+ 	       bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
+ 	       && cnt < MAX_DOMINATORS_TO_WALK;
+ 	       bb = get_immediate_dominator (CDI_DOMINATORS, bb))
+ 	    {
+ 	      if (!single_pred_p (bb))
+ 		continue;
+ 	      edge e = single_pred_edge (bb);
+ 
+ 	      if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
+ 		continue;
+ 
+ 	      gimple cond = last_stmt (e->src);
+ 	      tree c0 = gimple_cond_lhs (cond);
+ 	      if (c0 != expr)
+ 		continue;
+ 
+ 	      tree c1 = gimple_cond_rhs (cond);
+ 	      if (TREE_CODE (c1) != INTEGER_CST)
+ 		continue;
+ 
+ 	      enum tree_code cmp = gimple_cond_code (cond);
+ 	      if (e->flags & EDGE_FALSE_VALUE)
+ 		cmp = invert_tree_comparison (cmp, false);
+ 
+ 	      /* Ideally we'd be able to re-use a worker from
+ 		 extract_range_from_assert but just handle the simple
+ 		 X CMP CST cases here.  */
+ 	      value_range_t vr0 = VR_INITIALIZER;
+ 	      switch (cmp)
+ 		{
+ 		case EQ_EXPR:
+ 		  set_value_range (&vr0, VR_RANGE, c1, c1, NULL);
+ 		  break;
+ 		case NE_EXPR:
+ 		  set_value_range (&vr0, VR_ANTI_RANGE, c1, c1, NULL);
+ 		  break;
+ 		case GT_EXPR:
+ 		case GE_EXPR:
+ 		  set_value_range (&vr0, VR_RANGE,
+ 				   c1, vrp_val_max (TREE_TYPE (c0)), NULL);
+ 		  break;
+ 		case LT_EXPR:
+ 		case LE_EXPR:
+ 		  set_value_range (&vr0, VR_RANGE,
+ 				   vrp_val_min (TREE_TYPE (c0)), c1, NULL);
+ 		  break;
+ 
+ 		default:
+ 		  set_value_range_to_varying (&vr0);
+ 		}
+ 
+ 	      if (cmp == LT_EXPR || cmp == GT_EXPR)
+ 		{
+ 		  value_range_t vr1 = VR_INITIALIZER;
+ 		  set_value_range (&vr1, VR_ANTI_RANGE, c1, c1, NULL);
+ 		  vrp_intersect_ranges_1 (&vr0, &vr1);
+ 		}
+ 
+ 	      vrp_intersect_ranges (vr, &vr0);
+ 	      ++cnt;
+ 	    }
+ 	}
+     }
+ }
+ 
+ /* Interpret EXPR in the context of LOOP and compute a value-range for
+    its value and return it in *MIN and *MAX with returned kind.  */
+ 
+ enum value_range_type
+ determine_value_range (struct loop *loop, tree expr,
+ 		       wide_int *min, wide_int *max)
+ {
+   value_range_t vr = VR_INITIALIZER;
+   determine_value_range_1 (&vr, loop, expr);
+   if ((vr.type == VR_RANGE
+        || vr.type == VR_ANTI_RANGE)
+       && !symbolic_range_p (&vr))
+     {
+       *min = vr.min;
+       *max = vr.max;
+       return vr.type;
+     }
+   else
+     return VR_VARYING;
+ }
Index: gcc/tree-ssa-loop-niter.c
===================================================================
*** gcc/tree-ssa-loop-niter.c	(revision 220784)
--- gcc/tree-ssa-loop-niter.c	(working copy)
*************** bound_difference (struct loop *loop, tre
*** 508,513 ****
--- 517,535 ----
      }
    else
      {
+       if (INTEGRAL_TYPE_P (type))
+ 	{
+ 	  tree diff = fold_build2 (MINUS_EXPR, TREE_TYPE (x), x, y);
+ 	  wide_int min, max;
+ 
+ 	  if (determine_value_range (loop, diff, &min, &max) == VR_RANGE)
+ 	    {
+ 	      wi::to_mpz (min, bnds->below, TYPE_SIGN (TREE_TYPE (x)));
+ 	      wi::to_mpz (max, bnds->up, TYPE_SIGN (TREE_TYPE (x)));
+ 	      return;
+ 	    }
+ 	}
+  
        /* Otherwise, use the value ranges to determine the initial
  	 estimates on below and up.  */
        mpz_init (minx);


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
  2015-06-02  8:53     ` Richard Biener
@ 2015-06-02  9:33       ` Bin.Cheng
  2015-06-02  9:42         ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Bin.Cheng @ 2015-06-02  9:33 UTC (permalink / raw)
  To: Richard Biener; +Cc: Bin Cheng, GCC Patches

On Tue, Jun 2, 2015 at 4:40 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng <bin.cheng@arm.com> wrote:
>>>> Hi,
>>>> My first part patch improving how we handle overflow in scev is posted at
>>>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
>>>> second part patch.
>>>>
>>>> This patch does below improvements:
>>>>   1) Computes and records control iv for each loop's exit edge.  This
>>>> provides a way to compute overflow information in loop niter and use it in
>>>> different customers.  It think it's useful, especially with option
>>>> -funsafe-loop-optimizers.
>>>>   2) Improve chrec_convert by adding new interface
>>>> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
>>>> type and loop using overflow information of loop's control iv.  This
>>>> basically propagates no-overflow information from control iv to ivs
>>>> converted from control iv.  Moreover, we can further improve the logic by
>>>> using possible VRP information in the future.
>>>
>>> But 2) you already posted (and I have approved it but you didn't commit yet?).
>>>
>>> Can you commit that approved patch and only send the parts I didn't approve
>>> yet?
>>>
>>> Thanks,
>>> Richard.
>>>
>>>> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
>>>> now.  Cases reported in PR48052 can be vectorized too.
>>>> Opinions?
>>>>
>>>> Thanks,
>>>> bin
>>>>
>>>>
>>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>>
>>>>         * cfgloop.h (struct control_iv): New.
>>>>         (struct loop): New field control_ivs.
>>>>         * tree-ssa-loop-niter.c : Include "stor-layout.h".
>>>>         (number_of_iterations_lt): Set no_overflow information.
>>>>         (number_of_iterations_exit): Init control iv in niter struct.
>>>>         (record_control_iv): New.
>>>>         (estimate_numbers_of_iterations_loop): Call record_control_iv.
>>>>         (loop_exits_before_overflow): New.  Interface factored out of
>>>>         scev_probably_wraps_p.
>>>>         (scev_probably_wraps_p): Factor loop niter related code into
>>>>         loop_exits_before_overflow.
>>>>         (free_numbers_of_iterations_estimates_loop): Free control ivs.
>>>>         * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>>>>
>>>> gcc/testsuite/ChangeLog
>>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>>
>>>>         PR tree-optimization/48052
>>>>         * gcc.dg/tree-ssa/scev-8.c: New.
>>>>         * gcc.dg/tree-ssa/scev-9.c: New.
>>>>         * gcc.dg/tree-ssa/scev-10.c: New.
>>>>         * gcc.dg/vect/pr48052.c: New.
>>>>
>>
>> Hi Richard,
>> I think you replied the review message of this patch to another
>> thread.  Sorry for being mis-leading.  S I copied and answered your
>> review comments in this thread thus we can continue here.
>>
>>>> +       /* Done proving if this is a no-overflow control IV.  */
>>>> +       if (operand_equal_p (base, civ->base, 0))
>>>> +         return true;
>>>
>>> so all control IVs are no-overflow?
>>
>> This patch only records known no-overflow control ivs in loop
>> structure, so it depends on loop niter analyzer.  For now, this patch
>> (and the existing code) sets no-overflow flag only for two cases.  One
>> is the step-1 case, the other one is in assert_no_overflow_lt.
>> As a matter of fact, we may want to set no_overflow flag for all cases
>> with -funsafe-loop-optimizations in the future.  In that case, we will
>> assume all control IVs are no-overflow.
>>
>>>
>>>> +            base <= UPPER_BOUND (type) - step  ;;step > 0
>>>> +            base >= LOWER_BOUND (type) - step  ;;step < 0
>>>> +
>>>> +          by using loop's initial condition.  */
>>>> +       stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
>>>> +       if (operand_equal_p (stepped, civ->base, 0))
>>>> +         {
>>>> +           if (tree_int_cst_sign_bit (step))
>>>> +             {
>>>> +               code = LT_EXPR;
>>>> +               extreme = lower_bound_in_type (type, type);
>>>> +             }
>>>> +           else
>>>> +             {
>>>> +               code = GT_EXPR;
>>>> +               extreme = upper_bound_in_type (type, type);
>>>> +             }
>>>> +           extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
>>>> +           e = fold_build2 (code, boolean_type_node, base, extreme);
>>>
>>> looks like you are actually computing base + step <= UPPER_BOUND (type)
>>> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
>>> are constants why not compute it the way the comment specifies it?  Comparison
>>> codes also don't match the comment and we try to prove the condition is false.
>> I tried to prove the condition are satisfied by proving the reverse
>> condition ("base > UPPER_BOUND (type) - step") is false here.  In the
>> updated patch, I revised comments to reflect that logic.  Is it ok?
>>
>>>
>>> This also reminds me of eventually pushing forward my idea of strengthening
>>> simplify_using_initial_
>>> conditions by using the VRP machinery (I have a small
>>> prototype patch for that).
>> Interesting.  If I understand correctly, VRP info is hold for ssa var
>> on a global scope basis?  The loop's initial condition may strengthen
>> var's range information, rather than the contrary.  Actually I tried
>> to refine vrp info in scope of loop and used the refined vrp
>> information in loop optimizer (In the thread which you replied this
>> review message to).  Looking forward to seeing how it's handled in
>> your patch.
>
> Attached (don't remember the state of the patch, but of course some
> caching is missing here).
If I understand correctly, you are trying to compute/refine
control-flow sensitive vrp information by using loop's initial
conditions.  I did kind of same thing in the old patch, but yes, your
patch is of course more general.  One point is we can extend the idea
to non-loop control flows, for example, if-then-else.  I guess it
might be harder to develop a caching structure dealing with non-loop
control flows

>
>>>
>>>> +/* The structure describing a non-overflow control induction variable
>>>> +   of loop's exit edge.  */
>>>> +struct GTY ((chain_next ("%h.next"))) control_iv {
>>>> +  tree base;
>>>> +  tree step;
>>>> +  struct control_iv *next;
>>>> +};
>>>
>>> The comment suggests these don't exist for multiple edges?  Maybe you
>>> can clarify this.
>> As the linked list structure suggests, we can support one no-overflow
>> iv for each loop's exit edge.  I revised the comment a little bit.
>>
>>>
>>> Otherwise the patch looks ok.
>>
>> I attached the updated patch,  does it look fine?
>
> Yes.
Will apply the patch later.

Thanks,
bin
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>>>
>>> Thanks,
>>> Richard.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
  2015-06-02  9:33       ` Bin.Cheng
@ 2015-06-02  9:42         ` Richard Biener
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2015-06-02  9:42 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: Bin Cheng, GCC Patches

On Tue, Jun 2, 2015 at 11:30 AM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Tue, Jun 2, 2015 at 4:40 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng <amker.cheng@gmail.com> wrote:
>>> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng <bin.cheng@arm.com> wrote:
>>>>> Hi,
>>>>> My first part patch improving how we handle overflow in scev is posted at
>>>>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
>>>>> second part patch.
>>>>>
>>>>> This patch does below improvements:
>>>>>   1) Computes and records control iv for each loop's exit edge.  This
>>>>> provides a way to compute overflow information in loop niter and use it in
>>>>> different customers.  It think it's useful, especially with option
>>>>> -funsafe-loop-optimizers.
>>>>>   2) Improve chrec_convert by adding new interface
>>>>> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
>>>>> type and loop using overflow information of loop's control iv.  This
>>>>> basically propagates no-overflow information from control iv to ivs
>>>>> converted from control iv.  Moreover, we can further improve the logic by
>>>>> using possible VRP information in the future.
>>>>
>>>> But 2) you already posted (and I have approved it but you didn't commit yet?).
>>>>
>>>> Can you commit that approved patch and only send the parts I didn't approve
>>>> yet?
>>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
>>>>> now.  Cases reported in PR48052 can be vectorized too.
>>>>> Opinions?
>>>>>
>>>>> Thanks,
>>>>> bin
>>>>>
>>>>>
>>>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>>>
>>>>>         * cfgloop.h (struct control_iv): New.
>>>>>         (struct loop): New field control_ivs.
>>>>>         * tree-ssa-loop-niter.c : Include "stor-layout.h".
>>>>>         (number_of_iterations_lt): Set no_overflow information.
>>>>>         (number_of_iterations_exit): Init control iv in niter struct.
>>>>>         (record_control_iv): New.
>>>>>         (estimate_numbers_of_iterations_loop): Call record_control_iv.
>>>>>         (loop_exits_before_overflow): New.  Interface factored out of
>>>>>         scev_probably_wraps_p.
>>>>>         (scev_probably_wraps_p): Factor loop niter related code into
>>>>>         loop_exits_before_overflow.
>>>>>         (free_numbers_of_iterations_estimates_loop): Free control ivs.
>>>>>         * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>>>>>
>>>>> gcc/testsuite/ChangeLog
>>>>> 2015-05-26  Bin Cheng  <bin.cheng@arm.com>
>>>>>
>>>>>         PR tree-optimization/48052
>>>>>         * gcc.dg/tree-ssa/scev-8.c: New.
>>>>>         * gcc.dg/tree-ssa/scev-9.c: New.
>>>>>         * gcc.dg/tree-ssa/scev-10.c: New.
>>>>>         * gcc.dg/vect/pr48052.c: New.
>>>>>
>>>
>>> Hi Richard,
>>> I think you replied the review message of this patch to another
>>> thread.  Sorry for being mis-leading.  S I copied and answered your
>>> review comments in this thread thus we can continue here.
>>>
>>>>> +       /* Done proving if this is a no-overflow control IV.  */
>>>>> +       if (operand_equal_p (base, civ->base, 0))
>>>>> +         return true;
>>>>
>>>> so all control IVs are no-overflow?
>>>
>>> This patch only records known no-overflow control ivs in loop
>>> structure, so it depends on loop niter analyzer.  For now, this patch
>>> (and the existing code) sets no-overflow flag only for two cases.  One
>>> is the step-1 case, the other one is in assert_no_overflow_lt.
>>> As a matter of fact, we may want to set no_overflow flag for all cases
>>> with -funsafe-loop-optimizations in the future.  In that case, we will
>>> assume all control IVs are no-overflow.
>>>
>>>>
>>>>> +            base <= UPPER_BOUND (type) - step  ;;step > 0
>>>>> +            base >= LOWER_BOUND (type) - step  ;;step < 0
>>>>> +
>>>>> +          by using loop's initial condition.  */
>>>>> +       stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
>>>>> +       if (operand_equal_p (stepped, civ->base, 0))
>>>>> +         {
>>>>> +           if (tree_int_cst_sign_bit (step))
>>>>> +             {
>>>>> +               code = LT_EXPR;
>>>>> +               extreme = lower_bound_in_type (type, type);
>>>>> +             }
>>>>> +           else
>>>>> +             {
>>>>> +               code = GT_EXPR;
>>>>> +               extreme = upper_bound_in_type (type, type);
>>>>> +             }
>>>>> +           extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
>>>>> +           e = fold_build2 (code, boolean_type_node, base, extreme);
>>>>
>>>> looks like you are actually computing base + step <= UPPER_BOUND (type)
>>>> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
>>>> are constants why not compute it the way the comment specifies it?  Comparison
>>>> codes also don't match the comment and we try to prove the condition is false.
>>> I tried to prove the condition are satisfied by proving the reverse
>>> condition ("base > UPPER_BOUND (type) - step") is false here.  In the
>>> updated patch, I revised comments to reflect that logic.  Is it ok?
>>>
>>>>
>>>> This also reminds me of eventually pushing forward my idea of strengthening
>>>> simplify_using_initial_
>>>> conditions by using the VRP machinery (I have a small
>>>> prototype patch for that).
>>> Interesting.  If I understand correctly, VRP info is hold for ssa var
>>> on a global scope basis?  The loop's initial condition may strengthen
>>> var's range information, rather than the contrary.  Actually I tried
>>> to refine vrp info in scope of loop and used the refined vrp
>>> information in loop optimizer (In the thread which you replied this
>>> review message to).  Looking forward to seeing how it's handled in
>>> your patch.
>>
>> Attached (don't remember the state of the patch, but of course some
>> caching is missing here).
> If I understand correctly, you are trying to compute/refine
> control-flow sensitive vrp information by using loop's initial
> conditions.  I did kind of same thing in the old patch, but yes, your
> patch is of course more general.  One point is we can extend the idea
> to non-loop control flows, for example, if-then-else.  I guess it
> might be harder to develop a caching structure dealing with non-loop
> control flows

Well, the caching is of course dependent on the "context" BB for which
we could keep a lattice for SSA value-ranges we compute.  So a
caching interface could for example keep a <basic-block, SSA-name> ->
value-range
map and for a loop context just use the loop header as basic-block to key on.

Of course the code isn't very clever with determining context-based value-ranges
and I've strided off to start coding a dominator-based VRP instead of
a SSA-based
inserting assert-exprs one.  But I didn't get very far on that.  At
least it would
allow computing value-ranges on-demand like we'd like to have for the niter
use.

Richard.

>>
>>>>
>>>>> +/* The structure describing a non-overflow control induction variable
>>>>> +   of loop's exit edge.  */
>>>>> +struct GTY ((chain_next ("%h.next"))) control_iv {
>>>>> +  tree base;
>>>>> +  tree step;
>>>>> +  struct control_iv *next;
>>>>> +};
>>>>
>>>> The comment suggests these don't exist for multiple edges?  Maybe you
>>>> can clarify this.
>>> As the linked list structure suggests, we can support one no-overflow
>>> iv for each loop's exit edge.  I revised the comment a little bit.
>>>
>>>>
>>>> Otherwise the patch looks ok.
>>>
>>> I attached the updated patch,  does it look fine?
>>
>> Yes.
> Will apply the patch later.
>
> Thanks,
> bin
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> bin
>>>>
>>>> Thanks,
>>>> Richard.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II
@ 2015-05-26 10:48 Bin Cheng
  0 siblings, 0 replies; 7+ messages in thread
From: Bin Cheng @ 2015-05-26 10:48 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1958 bytes --]

Hi,
My first part patch improving how we handle overflow in scev is posted at
https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
second part patch.

This patch does below improvements:
  1) Computes and records control iv for each loop's exit edge.  This
provides a way to compute overflow information in loop niter and use it in
different customers.  It think it's useful, especially with option
-funsafe-loop-optimizers.
  2) Improve chrec_convert by adding new interface
loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
type and loop using overflow information of loop's control iv.  This
basically propagates no-overflow information from control iv to ivs
converted from control iv.  Moreover, we can further improve the logic by
using possible VRP information in the future.

With this patch, cases like scev-9.c and scev-10.c in patch can be handled
now.  Also cases reported in PR48052 now can be vectorized too.
Opinions?

Thanks,
bin


2015-05-26  Bin Cheng  <bin.cheng@arm.com>

	* cfgloop.h (struct control_iv): New.
	(struct loop): New field control_ivs.
	* tree-ssa-loop-niter.c : Include "stor-layout.h".
	(number_of_iterations_lt): Set no_overflow information.
	(number_of_iterations_exit): Init control iv in niter struct.
	(record_control_iv): New.
	(estimate_numbers_of_iterations_loop): Call record_control_iv.
	(loop_exits_before_overflow): New.  Interface factored out of
	scev_probably_wraps_p.
	(scev_probably_wraps_p): Factor loop niter related code into
	loop_exits_before_overflow.
	(free_numbers_of_iterations_estimates_loop): Free control ivs.
	* tree-ssa-loop-niter.h (free_loop_control_ivs): New.

gcc/testsuite/ChangeLog
2015-05-26  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/48052
	* gcc.dg/tree-ssa/scev-8.c: New.
	* gcc.dg/tree-ssa/scev-9.c: New.
	* gcc.dg/tree-ssa/scev-10.c: New.
	* gcc.dg/vect/pr48052.c: New.

[-- Attachment #2: improve-overlow-in-loop_niter-scev-20150526.txt --]
[-- Type: text/plain, Size: 17266 bytes --]

Index: gcc/tree-ssa-loop-niter.c
===================================================================
--- gcc/tree-ssa-loop-niter.c	(revision 222758)
+++ gcc/tree-ssa-loop-niter.c	(working copy)
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "wide-int.h"
 #include "inchash.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "fold-const.h"
 #include "calls.h"
 #include "hashtab.h"
@@ -1184,6 +1185,7 @@ number_of_iterations_lt (tree type, affine_iv *iv0
       niter->niter = delta;
       niter->max = widest_int::from (wi::from_mpz (niter_type, bnds->up, false),
 				     TYPE_SIGN (niter_type));
+      niter->control.no_overflow = true;
       return true;
     }
 
@@ -1965,6 +1967,9 @@ number_of_iterations_exit (struct loop *loop, edge
     return false;
 
   niter->assumptions = boolean_false_node;
+  niter->control.base = NULL_TREE;
+  niter->control.step = NULL_TREE;
+  niter->control.no_overflow = false;
   last = last_stmt (exit->src);
   if (!last)
     return false;
@@ -2744,6 +2749,29 @@ record_estimate (struct loop *loop, tree bound, co
   record_niter_bound (loop, new_i_bound, realistic, upper);
 }
 
+/* Records the control iv analyzed in NITER for LOOP if the iv is valid
+   and doesn't overflow.  */
+
+static void
+record_control_iv (struct loop *loop, struct tree_niter_desc *niter)
+{
+  struct control_iv *iv;
+
+  if (!niter->control.base || !niter->control.step)
+    return;
+
+  if (!integer_onep (niter->assumptions) || !niter->control.no_overflow)
+    return;
+
+  iv = ggc_alloc<control_iv> ();
+  iv->base = niter->control.base;
+  iv->step = niter->control.step;
+  iv->next = loop->control_ivs;
+  loop->control_ivs = iv;
+
+  return;
+}
+
 /* Record the estimate on number of iterations of LOOP based on the fact that
    the induction variable BASE + STEP * i evaluated in STMT does not wrap and
    its values belong to the range <LOW, HIGH>.  REALISTIC is true if the
@@ -3467,6 +3495,7 @@ estimate_numbers_of_iterations_loop (struct loop *
       record_estimate (loop, niter, niter_desc.max,
 		       last_stmt (ex->src),
 		       true, ex == likely_exit, true);
+      record_control_iv (loop, &niter_desc);
     }
   exits.release ();
 
@@ -3773,6 +3802,188 @@ nowrap_type_p (tree type)
   return false;
 }
 
+/* Return true if we can prove LOOP is exited before evolution of induction
+   variabled {BASE, STEP} overflows with respect to its type bound.  */
+
+static bool
+loop_exits_before_overflow (tree base, tree step,
+			    gimple at_stmt, struct loop *loop)
+{
+  widest_int niter;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
+  tree e, delta, step_abs, unsigned_base;
+  tree type = TREE_TYPE (step);
+  tree unsigned_type, valid_niter;
+
+  /* Don't issue signed overflow warnings.  */
+  fold_defer_overflow_warnings ();
+
+  /* Compute the number of iterations before we reach the bound of the
+     type, and verify that the loop is exited before this occurs.  */
+  unsigned_type = unsigned_type_for (type);
+  unsigned_base = fold_convert (unsigned_type, base);
+
+  if (tree_int_cst_sign_bit (step))
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   lower_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, unsigned_base, extreme);
+      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
+			      fold_convert (unsigned_type, step));
+    }
+  else
+    {
+      tree extreme = fold_convert (unsigned_type,
+				   upper_bound_in_type (type, type));
+      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, unsigned_base);
+      step_abs = fold_convert (unsigned_type, step);
+    }
+
+  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
+
+  estimate_numbers_of_iterations_loop (loop);
+
+  if (max_loop_iterations (loop, &niter)
+      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
+      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
+			   wide_int_to_tree (TREE_TYPE (valid_niter),
+					     niter))) != NULL
+      && integer_nonzerop (e))
+    {
+      fold_undefer_and_ignore_overflow_warnings ();
+      return true;
+    }
+  if (at_stmt)
+    for (bound = loop->bounds; bound; bound = bound->next)
+      {
+	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
+	  {
+	    fold_undefer_and_ignore_overflow_warnings ();
+	    return true;
+	  }
+      }
+  fold_undefer_and_ignore_overflow_warnings ();
+
+  /* Try to prove loop is exited before {base, step} overflows with the
+     help of analyzed loop control IV.  This is done only for IVs with
+     constant step because otherwise we don't have the information.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    for (civ = loop->control_ivs; civ; civ = civ->next)
+      {
+	enum tree_code code;
+	tree stepped, extreme, civ_type = TREE_TYPE (civ->step);
+
+	/* Have to consider type difference because operand_equal_p ignores
+	   that for constants.  */
+	if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (civ_type)
+	    || element_precision (type) != element_precision (civ_type))
+	  continue;
+
+	/* Only consider control IV with same step.  */
+	if (!operand_equal_p (step, civ->step, 0))
+	  continue;
+
+	/* Done proving if this is a no-overflow control IV.  */
+	if (operand_equal_p (base, civ->base, 0))
+	  return true;
+
+	/* If this is a before stepping control IV, in other words, we have
+
+	     {civ_base, step} = {base + step, step}
+
+	   Because civ {base + step, step} doesn't overflow during loop
+	   iterations, {base, step} will not overflow if we can prove the
+	   operation "base + step" does not overflow.  This is done by proving
+	   below conditions:
+
+	     base <= UPPER_BOUND (type) - step  ;;step > 0
+	     base >= LOWER_BOUND (type) - step  ;;step < 0
+
+	   by using loop's initial condition.  */
+	stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
+	if (operand_equal_p (stepped, civ->base, 0))
+	  {
+	    if (tree_int_cst_sign_bit (step))
+	      {
+		code = LT_EXPR;
+		extreme = lower_bound_in_type (type, type);
+	      }
+	    else
+	      {
+		code = GT_EXPR;
+		extreme = upper_bound_in_type (type, type);
+	      }
+	    extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	    e = fold_build2 (code, boolean_type_node, base, extreme);
+	    e = simplify_using_initial_conditions (loop, e);
+	    if (integer_zerop (e))
+	      return true;
+
+	    continue;
+	  }
+
+	/* Similar to above, only in this case we have:
+
+	     {civ_base, step} = {(signed T)((unsigned T)base + step), step}
+	     && TREE_TYPE (civ_base) = signed T.
+
+	   We prove that below condition is satisfied:
+
+	     (signed T)((unsigned T)base + step)
+	       == (signed T)(unsigned T)base + step
+	       == base + step
+
+	   because of exact the same reason as above.  This also proves
+	   there is no overflow in the operation "base + step", thus the
+	   induction variable {base, step} during loop iterations.
+
+	   This is necessary to handle cases as below:
+
+	     int foo (int *a, signed char s, signed char l)
+	       {
+		 signed char i;
+		 for (i = s; i < l; i++)
+		   a[i] = 0;
+		 return 0;
+	       }
+
+	   The variable I is firstly converted to type unsigned char,
+	   incremented, then converted back to type signed char.  */
+	if (!CONVERT_EXPR_P (civ->base) || TREE_TYPE (civ->base) != type)
+	  continue;
+	e = TREE_OPERAND (civ->base, 0);
+	if (TREE_CODE (e) != PLUS_EXPR
+	    || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
+	    || !operand_equal_p (step,
+				 fold_convert (type,
+					       TREE_OPERAND (e, 1)), 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	if (!CONVERT_EXPR_P (e) || !operand_equal_p (e, unsigned_base, 0))
+	  continue;
+	e = TREE_OPERAND (e, 0);
+	gcc_assert (operand_equal_p (e, base, 0));
+	if (tree_int_cst_sign_bit (step))
+	  {
+	    code = LT_EXPR;
+	    extreme = lower_bound_in_type (type, type);
+	  }
+	else
+	  {
+	    code = GT_EXPR;
+	    extreme = upper_bound_in_type (type, type);
+	  }
+	extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
+	e = fold_build2 (code, boolean_type_node, base, extreme);
+	e = simplify_using_initial_conditions (loop, e);
+	if (integer_zerop (e))
+	  return true;
+      }
+
+  return false;
+}
+
 /* Return false only when the induction variable BASE + STEP * I is
    known to not overflow: i.e. when the number of iterations is small
    enough with respect to the step and initial condition in order to
@@ -3788,13 +3999,6 @@ scev_probably_wraps_p (tree base, tree step,
 		       gimple at_stmt, struct loop *loop,
 		       bool use_overflow_semantics)
 {
-  tree delta, step_abs;
-  tree unsigned_type, valid_niter;
-  tree type = TREE_TYPE (step);
-  tree e;
-  widest_int niter;
-  struct nb_iter_bound *bound;
-
   /* FIXME: We really need something like
      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
 
@@ -3828,57 +4032,9 @@ scev_probably_wraps_p (tree base, tree step,
   if (TREE_CODE (step) != INTEGER_CST)
     return true;
 
-  /* Don't issue signed overflow warnings.  */
-  fold_defer_overflow_warnings ();
+  if (loop_exits_before_overflow (base, step, at_stmt, loop))
+    return false;
 
-  /* Otherwise, compute the number of iterations before we reach the
-     bound of the type, and verify that the loop is exited before this
-     occurs.  */
-  unsigned_type = unsigned_type_for (type);
-  base = fold_convert (unsigned_type, base);
-
-  if (tree_int_cst_sign_bit (step))
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   lower_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
-      step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
-			      fold_convert (unsigned_type, step));
-    }
-  else
-    {
-      tree extreme = fold_convert (unsigned_type,
-				   upper_bound_in_type (type, type));
-      delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
-      step_abs = fold_convert (unsigned_type, step);
-    }
-
-  valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
-
-  estimate_numbers_of_iterations_loop (loop);
-
-  if (max_loop_iterations (loop, &niter)
-      && wi::fits_to_tree_p (niter, TREE_TYPE (valid_niter))
-      && (e = fold_binary (GT_EXPR, boolean_type_node, valid_niter,
-			   wide_int_to_tree (TREE_TYPE (valid_niter),
-					     niter))) != NULL
-      && integer_nonzerop (e))
-    {
-      fold_undefer_and_ignore_overflow_warnings ();
-      return false;
-    }
-  if (at_stmt)
-    for (bound = loop->bounds; bound; bound = bound->next)
-      {
-	if (n_of_executions_at_most (at_stmt, bound, valid_niter))
-	  {
-	    fold_undefer_and_ignore_overflow_warnings ();
-	    return false;
-	  }
-      }
-
-  fold_undefer_and_ignore_overflow_warnings ();
-
   /* At this point we still don't have a proof that the iv does not
      overflow: give up.  */
   return true;
@@ -3889,17 +4045,26 @@ scev_probably_wraps_p (tree base, tree step,
 void
 free_numbers_of_iterations_estimates_loop (struct loop *loop)
 {
-  struct nb_iter_bound *bound, *next;
+  struct control_iv *civ;
+  struct nb_iter_bound *bound;
 
   loop->nb_iterations = NULL;
   loop->estimate_state = EST_NOT_COMPUTED;
-  for (bound = loop->bounds; bound; bound = next)
+  for (bound = loop->bounds; bound;)
     {
-      next = bound->next;
+      struct nb_iter_bound *next = bound->next;
       ggc_free (bound);
+      bound = next;
     }
+  loop->bounds = NULL;
 
-  loop->bounds = NULL;
+  for (civ = loop->control_ivs; civ;)
+    {
+      struct control_iv *next = civ->next;
+      ggc_free (civ);
+      civ = next;
+    }
+  loop->control_ivs = NULL;
 }
 
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
Index: gcc/tree-ssa-loop-niter.h
===================================================================
--- gcc/tree-ssa-loop-niter.h	(revision 222758)
+++ gcc/tree-ssa-loop-niter.h	(working copy)
@@ -41,6 +41,7 @@ extern void estimate_numbers_of_iterations (void);
 extern bool stmt_dominates_stmt_p (gimple, gimple);
 extern bool nowrap_type_p (tree);
 extern bool scev_probably_wraps_p (tree, tree, gimple, struct loop *, bool);
+extern void free_loop_control_ivs (struct loop *);
 extern void free_numbers_of_iterations_estimates_loop (struct loop *);
 extern void free_numbers_of_iterations_estimates (void);
 extern void substitute_in_loop_info (struct loop *, tree, tree);
Index: gcc/testsuite/gcc.dg/vect/pr48052.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr48052.c	(revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+int foo(int* A, int* B,  unsigned start, unsigned BS)
+{
+  int s;
+  for (unsigned k = start;  k < start + BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+int bar(int* A, int* B, unsigned BS)
+{
+  int s;
+  for (unsigned k = 0;  k < BS; k++)
+    {
+      s += A[k] * B[k];
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-8.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-8.c	(revision 0)
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo1 (long long s, long long l)
+{
+  long long i;
+
+  for (i = s; i < l; i++)
+    {
+      a[(short)i] = 0;
+    }
+  return 0;
+}
+
+int
+foo2 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i, step = 1;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[c];
+      c += step;
+    }
+
+  return sum;
+}
+
+int
+foo3 (unsigned char s, unsigned char l, unsigned char c)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i += c)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+int
+foo4 (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i != l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-not "use \[0-9\]\n  address" "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-10.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-10.c	(revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (signed char s, signed char l)
+{
+  signed char i;
+  int sum = 0;
+
+  for (i = s; i < l; i++)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
+
Index: gcc/testsuite/gcc.dg/tree-ssa/scev-9.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/scev-9.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int *a;
+
+int
+foo (unsigned char s, unsigned char l)
+{
+  unsigned char i;
+  int sum = 0;
+
+  for (i = s; i < l; i += 1)
+    {
+      sum += a[i];
+    }
+
+  return sum;
+}
+
+/* Address of array references are not scevs.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
+
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 222758)
+++ gcc/cfgloop.h	(working copy)
@@ -116,6 +116,14 @@ enum loop_estimation
   EST_LAST
 };
 
+/* The structure describing a non-overflow control induction variable
+   of loop's exit edge.  */
+struct GTY ((chain_next ("%h.next"))) control_iv {
+  tree base;
+  tree step;
+  struct control_iv *next;
+};
+
 /* Structure to hold information for each natural loop.  */
 struct GTY ((chain_next ("%h.next"))) loop {
   /* Index into loops array.  */
@@ -203,6 +211,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
   /* Upper bound on number of iterations of a loop.  */
   struct nb_iter_bound *bounds;
 
+  /* Non-overflow control ivs of a loop.  */
+  struct control_iv *control_ivs;
+
   /* Head of the cyclic list of the exits of the loop.  */
   struct loop_exit *exits;
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-06-02  9:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-26 11:13 [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II Bin Cheng
2015-06-01 10:46 ` Richard Biener
2015-06-02  3:37   ` Bin.Cheng
2015-06-02  8:53     ` Richard Biener
2015-06-02  9:33       ` Bin.Cheng
2015-06-02  9:42         ` Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2015-05-26 10:48 Bin Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).