public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Analyze niter for until-wrap condition [PR101145]
@ 2021-07-01  2:05 Jiufu Guo
  2021-07-01  7:22 ` Bin.Cheng
  2021-07-01 12:35 ` Richard Biener
  0 siblings, 2 replies; 8+ messages in thread
From: Jiufu Guo @ 2021-07-01  2:05 UTC (permalink / raw)
  To: gcc-patches; +Cc: guojiufu, wschmidt, segher, dje.gcc, rguenther, jlaw, amker

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
    cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

	PR tree-optimization/101145
	* tree-ssa-loop-niter.c
	(number_of_iterations_until_wrap): New function.
	(number_of_iterations_lt): Invoke above function.
	(adjust_cond_for_loop_until_wrap):
	Merge to number_of_iterations_until_wrap.
	(number_of_iterations_cond): Update invokes for
	adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

	PR tree-optimization/101145
	* gcc.dg/vect/pr101145.c: New test.
	* gcc.dg/vect/pr101145.inc: New test.
	* gcc.dg/vect/pr101145_1.c: New test.
	* gcc.dg/vect/pr101145_2.c: New test.
	* gcc.dg/vect/pr101145_3.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 00000000000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include <limits.h>
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n <= ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{  // infininate 
+  while (0 <= ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = 32;
+  while (--l < n)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 1;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+  
+  l = n - 32;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+
+  l = UINT_MAX;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n + 32;
+  res = foo_1 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n + 32;
+  res = foo_2 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 1;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 2;
+  res = foo_3 (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+
+  res = foo_5 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  n = 64;
+  l = n - 32;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;
+
+  l = n + 1;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != l)
+    fail++;
+
+  l = 0;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != l)
+    fail++;
+
+  l = 32;
+  res = bar_1 (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;  
+
+  res = bar_1 (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;  
+
+  if (fail)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc
new file mode 100644
index 00000000000..6eed3fa8aca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
@@ -0,0 +1,63 @@
+TYPE __attribute__ ((noinline))
+foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+  for (l = L_BASE; n < l; l += C)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+TYPE __attribute__ ((noinline))
+bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+  for (l = L_BASE_DOWN; l < n; l -= C)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
+
+int a[1000], b[1000];
+int fail;
+
+int
+main ()
+{
+  TYPE res;
+  TYPE l;
+  TYPE n;
+  n = N_BASE;
+  l = n - C;
+  res = foo_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n;
+  res = foo_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n + C;
+  res = foo_sign (a, b, l, n);
+  if (neq ((res - MIN) / C, 0))
+    fail++;
+
+  n = N_BASE_DOWN;
+  l = n - C;
+  res = bar_sign (a, b, l, n);
+  if (neq ((MAX - res) / C, 0))
+    fail++;
+
+  l = n;
+  res = bar_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n + C;
+  res = bar_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  if (fail)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
new file mode 100644
index 00000000000..94f6b99b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE signed char
+#define MIN -128
+#define MAX 127
+#define N_BASE (MAX - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 3
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
new file mode 100644
index 00000000000..d3cfc9e01e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE unsigned char
+#define MIN 0
+#define MAX 255
+#define N_BASE (MAX - 32 + 1)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 2
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
new file mode 100644
index 00000000000..e2cf9b1f7e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE int *
+#define MIN ((TYPE)0)
+#define MAX ((TYPE)((long long)-1))
+#define N_BASE ((TYPE) (-32))
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 1
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..06db6a36ef8 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
     }
 }
 
+/* Determines number of iterations of loop whose ending condition
+   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
+   The number of iterations is stored to NITER.  */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
+				 affine_iv *iv1, class tree_niter_desc *niter)
+{
+  tree niter_type = unsigned_type_for (type);
+  tree max, min;
+
+  if (POINTER_TYPE_P (type))
+    {
+      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
+      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
+    }
+  else
+    {
+      max = TYPE_MAX_VALUE (type);
+      min = TYPE_MIN_VALUE (type);
+    }
+
+  tree high = max, low = min, one = build_int_cst (niter_type, 1);
+  tree step;
+
+  /* n < {base, C}. */
+  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
+      && !tree_int_cst_sign_bit (iv1->step))
+    {
+      step = iv1->step;
+      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
+      if (TREE_CODE (iv1->base) == INTEGER_CST)
+	low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
+      else if (TREE_CODE (iv0->base) == INTEGER_CST)
+	low = iv0->base;
+    }
+  /* {base, -C} < n. */
+  else if (TREE_CODE (iv0->step) == INTEGER_CST
+	   && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
+    {
+      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
+      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
+      if (TREE_CODE (iv0->base) == INTEGER_CST)
+	high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
+      else if (TREE_CODE (iv1->base) == INTEGER_CST)
+	high = iv1->base;
+    }
+  else
+    return false;
+
+  /* (delta + step - 1) / step */
+  step = fold_convert (niter_type, step);
+  niter->niter = fold_convert (niter_type, niter->niter);
+  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
+  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
+
+  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
+  m = fold_convert (niter_type, m);
+  mpz_t mstep, tmp, mmax;
+  mpz_init (mstep);
+  mpz_init (tmp);
+  mpz_init (mmax);
+  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
+  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
+  mpz_add (tmp, mmax, mstep);
+  mpz_sub_ui (tmp, tmp, 1);
+  mpz_fdiv_q (tmp, tmp, mstep);
+  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
+				 TYPE_SIGN (niter_type));
+  mpz_clear (mstep);
+  mpz_clear (tmp);
+
+  niter->may_be_zero
+    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
+
+  niter->control.no_overflow = false;
+
+  return true;
+}
+
 /* Determines number of iterations of loop whose ending condition
    is IV0 < IV1.  TYPE is the type of the iv.  The number of
    iterations is stored to NITER.  BNDS bounds the difference
@@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
       niter->bound = iv0->base;
     }
 
+  /* {base, -C} < n,  or n < {base, C} */
+  if (tree_int_cst_sign_bit (iv0->step)
+      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
+    return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
+
   delta = fold_build2 (MINUS_EXPR, niter_type,
 		       fold_convert (niter_type, iv1->base),
 		       fold_convert (niter_type, iv0->base));
@@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
     }
 }
 
-/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
-   the condition for loop-until-wrap cases.  For example:
-     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
-     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
-   Return true if condition is successfully adjusted.  */
-
-static bool
-adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
-				 affine_iv *iv1)
-{
-  /* Only support simple cases for the moment.  */
-  if (TREE_CODE (iv0->base) != INTEGER_CST
-      || TREE_CODE (iv1->base) != INTEGER_CST)
-    return false;
-
-  tree niter_type = unsigned_type_for (type), high, low;
-  /* Case: i-- < 10.  */
-  if (integer_zerop (iv1->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_minus_onep (iv0->step))
-	return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-	  && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
-	return false;
-      high = fold_build2 (PLUS_EXPR, niter_type,
-			  fold_convert (niter_type, iv0->base),
-			  build_int_cst (niter_type, 1));
-      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
-    }
-  else if (integer_zerop (iv0->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_onep (iv1->step))
-	return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-	  && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
-	return false;
-      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
-      low = fold_build2 (MINUS_EXPR, niter_type,
-			 fold_convert (niter_type, iv1->base),
-			 build_int_cst (niter_type, 1));
-    }
-  else
-    gcc_unreachable ();
-
-  iv0->base = low;
-  iv0->step = fold_convert (niter_type, integer_one_node);
-  iv1->base = high;
-  iv1->step = build_int_cst (niter_type, 0);
-  *code = NE_EXPR;
-  return true;
-}
-
 /* Determine the number of iterations according to condition (for staying
    inside loop) which compares two induction variables using comparison
    operator CODE.  The induction variable on left side of the comparison
@@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
       return true;
     }
 
-  /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
-     adjusting iv0, iv1 and code.  */
-  if (code != NE_EXPR
-      && (tree_int_cst_sign_bit (iv0->step)
-	  || (!integer_zerop (iv1->step)
-	      && !tree_int_cst_sign_bit (iv1->step)))
-      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
-    return false;
-
   /* OK, now we know we have a senseful loop.  Handle several cases, depending
      on what comparison operator is used.  */
   bound_difference (loop, iv1->base, iv0->base, &bnds);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01  2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
@ 2021-07-01  7:22 ` Bin.Cheng
  2021-07-01 11:32   ` guojiufu
  2021-07-01 12:35 ` Richard Biener
  1 sibling, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2021-07-01  7:22 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: gcc-patches List, Richard Biener, amker, Segher Boessenkool,
	jlaw, Bill Schmidt, dje.gcc

On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i > val; ++i)
>     cnt++;
>   return cnt;
> }
>
> The number of iterations should be about UINT_MAX - start.
>
> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
>
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>
> gcc/ChangeLog:
>
>         PR tree-optimization/101145
>         * tree-ssa-loop-niter.c
>         (number_of_iterations_until_wrap): New function.
>         (number_of_iterations_lt): Invoke above function.
>         (adjust_cond_for_loop_until_wrap):
>         Merge to number_of_iterations_until_wrap.
>         (number_of_iterations_cond): Update invokes for
>         adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/101145
>         * gcc.dg/vect/pr101145.c: New test.
>         * gcc.dg/vect/pr101145.inc: New test.
>         * gcc.dg/vect/pr101145_1.c: New test.
>         * gcc.dg/vect/pr101145_2.c: New test.
>         * gcc.dg/vect/pr101145_3.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +++++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>  6 files changed, 380 insertions(+), 65 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>

> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b5add827018..06db6a36ef8 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
>      }
>  }
>
> +/* Determines number of iterations of loop whose ending condition
> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
> +   The number of iterations is stored to NITER.  */
> +
> +static bool
> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
> +                                affine_iv *iv1, class tree_niter_desc *niter)
> +{
> +  tree niter_type = unsigned_type_for (type);
> +  tree max, min;
> +
> +  if (POINTER_TYPE_P (type))
> +    {
> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> +    }
> +  else
> +    {
> +      max = TYPE_MAX_VALUE (type);
> +      min = TYPE_MIN_VALUE (type);
> +    }
> +
> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
> +  tree step;
> +
> +  /* n < {base, C}. */
> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
> +      && !tree_int_cst_sign_bit (iv1->step))
> +    {
> +      step = iv1->step;
> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
max/iv1->base could be of pointer type, not sure if this is canonical though.

> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
> +       low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
> +       low = iv0->base;
> +    }
> +  /* {base, -C} < n. */
> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
> +          && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
> +    {
> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
> +       high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
> +       high = iv1->base;
> +    }
> +  else
> +    return false;
> +
> +  /* (delta + step - 1) / step */
> +  step = fold_convert (niter_type, step);
> +  niter->niter = fold_convert (niter_type, niter->niter);
> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
> +
> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> +  m = fold_convert (niter_type, m);
> +  mpz_t mstep, tmp, mmax;
> +  mpz_init (mstep);
> +  mpz_init (tmp);
> +  mpz_init (mmax);
> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> +  mpz_add (tmp, mmax, mstep);
> +  mpz_sub_ui (tmp, tmp, 1);
> +  mpz_fdiv_q (tmp, tmp, mstep);
> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
> +                                TYPE_SIGN (niter_type));
This computation is similar to function number_of_iterations_lt, could
we factor it out into an independent function?

> +  mpz_clear (mstep);
> +  mpz_clear (tmp);
> +
> +  niter->may_be_zero
> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
If iv0->base and iv1->base are constant and iv1->base <= iv0->base,
the number of iteration is actually zero, but here we rely on
may_be_zero (== true), which loses information.  Could we specially
handle this case and do a fast return?

Could you test this on some more targets(x86, aarch64) please?  Otherwise LGTM.

Thanks,
bin
> +
> +  niter->control.no_overflow = false;
> +
> +  return true;
> +}
> +
>  /* Determines number of iterations of loop whose ending condition
>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>     iterations is stored to NITER.  BNDS bounds the difference
> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
>        niter->bound = iv0->base;
>      }
>
> +  /* {base, -C} < n,  or n < {base, C} */
> +  if (tree_int_cst_sign_bit (iv0->step)
> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
> +
>    delta = fold_build2 (MINUS_EXPR, niter_type,
>                        fold_convert (niter_type, iv1->base),
>                        fold_convert (niter_type, iv0->base));
> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>      }
>  }
>
> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> -   the condition for loop-until-wrap cases.  For example:
> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
> -   Return true if condition is successfully adjusted.  */
> -
> -static bool
> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
> -                                affine_iv *iv1)
> -{
> -  /* Only support simple cases for the moment.  */
> -  if (TREE_CODE (iv0->base) != INTEGER_CST
> -      || TREE_CODE (iv1->base) != INTEGER_CST)
> -    return false;
> -
> -  tree niter_type = unsigned_type_for (type), high, low;
> -  /* Case: i-- < 10.  */
> -  if (integer_zerop (iv1->step))
> -    {
> -      /* TODO: Should handle case in which abs(step) != 1.  */
> -      if (!integer_minus_onep (iv0->step))
> -       return false;
> -      /* Give up on infinite loop.  */
> -      if (*code == LE_EXPR
> -         && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> -       return false;
> -      high = fold_build2 (PLUS_EXPR, niter_type,
> -                         fold_convert (niter_type, iv0->base),
> -                         build_int_cst (niter_type, 1));
> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> -    }
> -  else if (integer_zerop (iv0->step))
> -    {
> -      /* TODO: Should handle case in which abs(step) != 1.  */
> -      if (!integer_onep (iv1->step))
> -       return false;
> -      /* Give up on infinite loop.  */
> -      if (*code == LE_EXPR
> -         && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> -       return false;
> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> -      low = fold_build2 (MINUS_EXPR, niter_type,
> -                        fold_convert (niter_type, iv1->base),
> -                        build_int_cst (niter_type, 1));
> -    }
> -  else
> -    gcc_unreachable ();
> -
> -  iv0->base = low;
> -  iv0->step = fold_convert (niter_type, integer_one_node);
> -  iv1->base = high;
> -  iv1->step = build_int_cst (niter_type, 0);
> -  *code = NE_EXPR;
> -  return true;
> -}
> -
>  /* Determine the number of iterations according to condition (for staying
>     inside loop) which compares two induction variables using comparison
>     operator CODE.  The induction variable on left side of the comparison
> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>        return true;
>      }
>
> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
> -     adjusting iv0, iv1 and code.  */
> -  if (code != NE_EXPR
> -      && (tree_int_cst_sign_bit (iv0->step)
> -         || (!integer_zerop (iv1->step)
> -             && !tree_int_cst_sign_bit (iv1->step)))
> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> -    return false;
> -
>    /* OK, now we know we have a senseful loop.  Handle several cases, depending
>       on what comparison operator is used.  */
>    bound_difference (loop, iv1->base, iv0->base, &bnds);
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01  7:22 ` Bin.Cheng
@ 2021-07-01 11:32   ` guojiufu
  0 siblings, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-01 11:32 UTC (permalink / raw)
  To: Bin.Cheng
  Cc: gcc-patches List, Richard Biener, amker, Segher Boessenkool,
	jlaw, Bill Schmidt, dje.gcc

On 2021-07-01 15:22, Bin.Cheng wrote:
> On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>> 
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>>   unsigned cnt = 0;
>>   for (unsigned i = start; i > val; ++i)
>>     cnt++;
>>   return cnt;
>> }
>> 
>> The number of iterations should be about UINT_MAX - start.
>> 
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>> 
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>> 
>> gcc/ChangeLog:
>> 
>>         PR tree-optimization/101145
>>         * tree-ssa-loop-niter.c
>>         (number_of_iterations_until_wrap): New function.
>>         (number_of_iterations_lt): Invoke above function.
>>         (adjust_cond_for_loop_until_wrap):
>>         Merge to number_of_iterations_until_wrap.
>>         (number_of_iterations_cond): Update invokes for
>>         adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>         PR tree-optimization/101145
>>         * gcc.dg/vect/pr101145.c: New test.
>>         * gcc.dg/vect/pr101145.inc: New test.
>>         * gcc.dg/vect/pr101145_1.c: New test.
>>         * gcc.dg/vect/pr101145_2.c: New test.
>>         * gcc.dg/vect/pr101145_3.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
>> +++++++++++++++++++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>>  6 files changed, 380 insertions(+), 65 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> 
> 
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv 
>> *iv0, affine_iv *iv1,
>>      }
>>  }
>> 
>> +/* Determines number of iterations of loop whose ending condition
>> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
>> +   The number of iterations is stored to NITER.  */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv 
>> *iv0,
>> +                                affine_iv *iv1, class tree_niter_desc 
>> *niter)
>> +{
>> +  tree niter_type = unsigned_type_for (type);
>> +  tree max, min;
>> +
>> +  if (POINTER_TYPE_P (type))
>> +    {
>> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> +    }
>> +  else
>> +    {
>> +      max = TYPE_MAX_VALUE (type);
>> +      min = TYPE_MIN_VALUE (type);
>> +    }
>> +
>> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> +  tree step;
>> +
>> +  /* n < {base, C}. */
>> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == 
>> INTEGER_CST
>> +      && !tree_int_cst_sign_bit (iv1->step))
>> +    {
>> +      step = iv1->step;
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, 
>> iv1->base);
> max/iv1->base could be of pointer type, not sure if this is canonical 
> though.
Thanks.  Pointer needs careful attention.  I added case pr101145_3.c for 
pointer,
as test, the iteration number is 7: 0xffffffffffffffe4 - 
0xffffffffffffffff,
where pointer type is pointer to int: "int *".  It works as expected.
I notice in number_of_iterations_lt, there are code likes:
     delta = fold_build2 (MINUS_EXPR, niter_type,
                          fold_convert (niter_type, iv1->base),
                          fold_convert (niter_type, iv0->base));
This would also be ok.

> 
>> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +       low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +       low = iv0->base;
>> +    }
>> +  /* {base, -C} < n. */
>> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
>> +          && tree_int_cst_sign_bit (iv0->step) && integer_zerop 
>> (iv1->step))
>> +    {
>> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), 
>> iv0->step);
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, 
>> min);
>> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +       high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +       high = iv1->base;
>> +    }
>> +  else
>> +    return false;
>> +
>> +  /* (delta + step - 1) / step */
>> +  step = fold_convert (niter_type, step);
>> +  niter->niter = fold_convert (niter_type, niter->niter);
>> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, 
>> step);
>> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, 
>> niter->niter, step);
>> +
>> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> +  m = fold_convert (niter_type, m);
>> +  mpz_t mstep, tmp, mmax;
>> +  mpz_init (mstep);
>> +  mpz_init (tmp);
>> +  mpz_init (mmax);
>> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> +  mpz_add (tmp, mmax, mstep);
>> +  mpz_sub_ui (tmp, tmp, 1);
>> +  mpz_fdiv_q (tmp, tmp, mstep);
>> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, 
>> false),
>> +                                TYPE_SIGN (niter_type));
> This computation is similar to function number_of_iterations_lt, could
> we factor it out into an independent function?
Thanks!  Will update as you said.

> 
>> +  mpz_clear (mstep);
>> +  mpz_clear (tmp);
>> +
>> +  niter->may_be_zero
>> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> If iv0->base and iv1->base are constant and iv1->base <= iv0->base,
> the number of iteration is actually zero, but here we rely on
> may_be_zero (== true), which loses information.  Could we specially
> handle this case and do a fast return?
As tests, if iv1->base <= iv0->base in user source code, this function
will not be called, even number_of_iterations_exit_assumptions will not
be called.  But it is still may be possible to run into this function if
some previous optimizations generate constant bases.
Then a fast return would be useful, I will update the patch.

> 
> Could you test this on some more targets(x86, aarch64) please?  
> Otherwise LGTM.
I will have some tests on other targets. Hope there is no difference 
between
targets for this patch :)

BR,
Jiufu Guo

> 
> Thanks,
> bin
>> +
>> +  niter->control.no_overflow = false;
>> +
>> +  return true;
>> +}
>> +
>>  /* Determines number of iterations of loop whose ending condition
>>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>>     iterations is stored to NITER.  BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree 
>> type, affine_iv *iv0,
>>        niter->bound = iv0->base;
>>      }
>> 
>> +  /* {base, -C} < n,  or n < {base, C} */
>> +  if (tree_int_cst_sign_bit (iv0->step)
>> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit 
>> (iv1->step)))
>> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1, 
>> niter);
>> +
>>    delta = fold_build2 (MINUS_EXPR, niter_type,
>>                        fold_convert (niter_type, iv1->base),
>>                        fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>>      }
>>  }
>> 
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> -   the condition for loop-until-wrap cases.  For example:
>> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
>> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
>> -   Return true if condition is successfully adjusted.  */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code 
>> *code,
>> -                                affine_iv *iv1)
>> -{
>> -  /* Only support simple cases for the moment.  */
>> -  if (TREE_CODE (iv0->base) != INTEGER_CST
>> -      || TREE_CODE (iv1->base) != INTEGER_CST)
>> -    return false;
>> -
>> -  tree niter_type = unsigned_type_for (type), high, low;
>> -  /* Case: i-- < 10.  */
>> -  if (integer_zerop (iv1->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_minus_onep (iv0->step))
>> -       return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -         && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> -       return false;
>> -      high = fold_build2 (PLUS_EXPR, niter_type,
>> -                         fold_convert (niter_type, iv0->base),
>> -                         build_int_cst (niter_type, 1));
>> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> -    }
>> -  else if (integer_zerop (iv0->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_onep (iv1->step))
>> -       return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -         && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> -       return false;
>> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> -      low = fold_build2 (MINUS_EXPR, niter_type,
>> -                        fold_convert (niter_type, iv1->base),
>> -                        build_int_cst (niter_type, 1));
>> -    }
>> -  else
>> -    gcc_unreachable ();
>> -
>> -  iv0->base = low;
>> -  iv0->step = fold_convert (niter_type, integer_one_node);
>> -  iv1->base = high;
>> -  iv1->step = build_int_cst (niter_type, 0);
>> -  *code = NE_EXPR;
>> -  return true;
>> -}
>> -
>>  /* Determine the number of iterations according to condition (for 
>> staying
>>     inside loop) which compares two induction variables using 
>> comparison
>>     operator CODE.  The induction variable on left side of the 
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>>        return true;
>>      }
>> 
>> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++) 
>> by
>> -     adjusting iv0, iv1 and code.  */
>> -  if (code != NE_EXPR
>> -      && (tree_int_cst_sign_bit (iv0->step)
>> -         || (!integer_zerop (iv1->step)
>> -             && !tree_int_cst_sign_bit (iv1->step)))
>> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> -    return false;
>> -
>>    /* OK, now we know we have a senseful loop.  Handle several cases, 
>> depending
>>       on what comparison operator is used.  */
>>    bound_difference (loop, iv1->base, iv0->base, &bnds);
>> --
>> 2.17.1
>> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01  2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
  2021-07-01  7:22 ` Bin.Cheng
@ 2021-07-01 12:35 ` Richard Biener
  2021-07-01 14:12   ` guojiufu
  2021-07-02  7:55   ` guojiufu
  1 sibling, 2 replies; 8+ messages in thread
From: Richard Biener @ 2021-07-01 12:35 UTC (permalink / raw)
  To: Jiufu Guo; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker

On Thu, 1 Jul 2021, Jiufu Guo wrote:

> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i > val; ++i)
>     cnt++;
>   return cnt;
> }
> 
> The number of iterations should be about UINT_MAX - start.

For

unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i >= val; ++i)
    cnt++;
  return cnt;
}

and val == 0 the loop never terminates.  I don't see anywhere
in the patch that you disregard GE_EXPR and I remember
the code handles GE as well as GT?  From a quick look this is
also not covered by a testcase you add - not exactly sure
how it would materialize in a miscompilation.

> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
> 
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/101145
> 	* tree-ssa-loop-niter.c
> 	(number_of_iterations_until_wrap): New function.
> 	(number_of_iterations_lt): Invoke above function.
> 	(adjust_cond_for_loop_until_wrap):
> 	Merge to number_of_iterations_until_wrap.
> 	(number_of_iterations_cond): Update invokes for
> 	adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/101145
> 	* gcc.dg/vect/pr101145.c: New test.
> 	* gcc.dg/vect/pr101145.inc: New test.
> 	* gcc.dg/vect/pr101145_1.c: New test.
> 	* gcc.dg/vect/pr101145_2.c: New test.
> 	* gcc.dg/vect/pr101145_3.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +++++++++++++++++++++++++
>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>  6 files changed, 380 insertions(+), 65 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c
> new file mode 100644
> index 00000000000..74031b031cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> @@ -0,0 +1,187 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#include <limits.h>
> +
> +unsigned __attribute__ ((noinline))
> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (n < ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> +  while (UINT_MAX - 64 < ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  l = UINT_MAX - 32;
> +  while (n < ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (n <= ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{  // infininate 
> +  while (0 <= ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  //no loop
> +  l = UINT_MAX;
> +  while (n < ++l)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (--l < n)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> +  while (--l < 64)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  l = 32;
> +  while (--l < n)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +
> +int a[3200], b[3200];
> +int fail;
> +
> +int
> +main ()
> +{
> +  unsigned l, n;
> +  unsigned res;
> +  /* l > n*/
> +  n = UINT_MAX - 64;
> +  l = n + 32;
> +  res = foo (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n;
> +  res = foo (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n - 1;
> +  res = foo (a, b, l, n);
> +  if (res != l + 1)
> +    fail++;
> +  
> +  l = n - 32;
> +  res = foo (a, b, l, n);
> +  if (res != l + 1)
> +    fail++;
> +
> +  l = UINT_MAX;
> +  res = foo (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n + 32;
> +  res = foo_1 (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n + 32;
> +  res = foo_2 (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n;
> +  res = foo_3 (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n - 1;
> +  res = foo_3 (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  l = n - 2;
> +  res = foo_3 (a, b, l, n);
> +  if (res != l + 1)
> +    fail++;
> +
> +  res = foo_5 (a, b, l, n);
> +  if (res != 0)
> +    fail++;
> +
> +  n = 64;
> +  l = n - 32;
> +  res = bar (a, b, l, n);
> +  res++;
> +  if (res != 0)
> +    fail++;
> +
> +  l = n;
> +  res = bar (a, b, l, n);
> +  res++;
> +  if (res != 0)
> +    fail++;
> +
> +  l = n + 1;
> +  res = bar (a, b, l, n);
> +  res++;
> +  if (res != l)
> +    fail++;
> +
> +  l = 0;
> +  res = bar (a, b, l, n);
> +  res++;
> +  if (res != l)
> +    fail++;
> +
> +  l = 32;
> +  res = bar_1 (a, b, l, n);
> +  res++;
> +  if (res != 0)
> +    fail++;  
> +
> +  res = bar_1 (a, b, l, n);
> +  res++;
> +  if (res != 0)
> +    fail++;  
> +
> +  if (fail)
> +    __builtin_abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> new file mode 100644
> index 00000000000..6eed3fa8aca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> @@ -0,0 +1,63 @@
> +TYPE __attribute__ ((noinline))
> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> +{
> +  for (l = L_BASE; n < l; l += C)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +TYPE __attribute__ ((noinline))
> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> +{
> +  for (l = L_BASE_DOWN; l < n; l -= C)
> +    *a++ = *b++ + 1;
> +  return l;
> +}
> +
> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
> +
> +int a[1000], b[1000];
> +int fail;
> +
> +int
> +main ()
> +{
> +  TYPE res;
> +  TYPE l;
> +  TYPE n;
> +  n = N_BASE;
> +  l = n - C;
> +  res = foo_sign (a, b, l, n);
> +  if (res != l)
> +    fail++;
> +
> +  l = n;
> +  res = foo_sign (a, b, l, n);
> +  if (res != l)
> +    fail++;
> +
> +  l = n + C;
> +  res = foo_sign (a, b, l, n);
> +  if (neq ((res - MIN) / C, 0))
> +    fail++;
> +
> +  n = N_BASE_DOWN;
> +  l = n - C;
> +  res = bar_sign (a, b, l, n);
> +  if (neq ((MAX - res) / C, 0))
> +    fail++;
> +
> +  l = n;
> +  res = bar_sign (a, b, l, n);
> +  if (res != l)
> +    fail++;
> +
> +  l = n + C;
> +  res = bar_sign (a, b, l, n);
> +  if (res != l)
> +    fail++;
> +
> +  if (fail)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> new file mode 100644
> index 00000000000..94f6b99b893
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE signed char
> +#define MIN -128
> +#define MAX 127
> +#define N_BASE (MAX - 32)
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 3
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> new file mode 100644
> index 00000000000..d3cfc9e01e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE unsigned char
> +#define MIN 0
> +#define MAX 255
> +#define N_BASE (MAX - 32 + 1)
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 2
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> new file mode 100644
> index 00000000000..e2cf9b1f7e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE int *
> +#define MIN ((TYPE)0)
> +#define MAX ((TYPE)((long long)-1))
> +#define N_BASE ((TYPE) (-32))
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 1
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b5add827018..06db6a36ef8 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
>      }
>  }
>  
> +/* Determines number of iterations of loop whose ending condition
> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
> +   The number of iterations is stored to NITER.  */
> +
> +static bool
> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
> +				 affine_iv *iv1, class tree_niter_desc *niter)
> +{
> +  tree niter_type = unsigned_type_for (type);
> +  tree max, min;
> +
> +  if (POINTER_TYPE_P (type))
> +    {
> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> +    }
> +  else
> +    {
> +      max = TYPE_MAX_VALUE (type);
> +      min = TYPE_MIN_VALUE (type);
> +    }

It would be cheaper to use wide_int here - wi::min/max_value 
(TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
code below.

> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
> +  tree step;
> +
> +  /* n < {base, C}. */
> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
> +      && !tree_int_cst_sign_bit (iv1->step))
> +    {
> +      step = iv1->step;
> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
> +	low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
> +	low = iv0->base;
> +    }
> +  /* {base, -C} < n. */
> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
> +	   && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
> +    {
> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
> +	high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
> +	high = iv1->base;
> +    }
> +  else
> +    return false;
> +
> +  /* (delta + step - 1) / step */
> +  step = fold_convert (niter_type, step);
> +  niter->niter = fold_convert (niter_type, niter->niter);
> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
> +
> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> +  m = fold_convert (niter_type, m);
> +  mpz_t mstep, tmp, mmax;
> +  mpz_init (mstep);
> +  mpz_init (tmp);
> +  mpz_init (mmax);
> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> +  mpz_add (tmp, mmax, mstep);
> +  mpz_sub_ui (tmp, tmp, 1);
> +  mpz_fdiv_q (tmp, tmp, mstep);
> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
> +				 TYPE_SIGN (niter_type));
> +  mpz_clear (mstep);
> +  mpz_clear (tmp);

You forget to clear mmax?  I wonder why you need to involve GMP here
at all since this is just add/sub/divide - you could use
widest_int directly I think.  At least the back-and-forth between
trees, wide-int and mpz looks odd.

Richard.

> +  niter->may_be_zero
> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> +
> +  niter->control.no_overflow = false;
> +
> +  return true;
> +}
> +
>  /* Determines number of iterations of loop whose ending condition
>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>     iterations is stored to NITER.  BNDS bounds the difference
> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
>        niter->bound = iv0->base;
>      }
>  
> +  /* {base, -C} < n,  or n < {base, C} */
> +  if (tree_int_cst_sign_bit (iv0->step)
> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
> +
>    delta = fold_build2 (MINUS_EXPR, niter_type,
>  		       fold_convert (niter_type, iv1->base),
>  		       fold_convert (niter_type, iv0->base));
> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>      }
>  }
>  
> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> -   the condition for loop-until-wrap cases.  For example:
> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
> -   Return true if condition is successfully adjusted.  */
> -
> -static bool
> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
> -				 affine_iv *iv1)
> -{
> -  /* Only support simple cases for the moment.  */
> -  if (TREE_CODE (iv0->base) != INTEGER_CST
> -      || TREE_CODE (iv1->base) != INTEGER_CST)
> -    return false;
> -
> -  tree niter_type = unsigned_type_for (type), high, low;
> -  /* Case: i-- < 10.  */
> -  if (integer_zerop (iv1->step))
> -    {
> -      /* TODO: Should handle case in which abs(step) != 1.  */
> -      if (!integer_minus_onep (iv0->step))
> -	return false;
> -      /* Give up on infinite loop.  */
> -      if (*code == LE_EXPR
> -	  && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> -	return false;
> -      high = fold_build2 (PLUS_EXPR, niter_type,
> -			  fold_convert (niter_type, iv0->base),
> -			  build_int_cst (niter_type, 1));
> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> -    }
> -  else if (integer_zerop (iv0->step))
> -    {
> -      /* TODO: Should handle case in which abs(step) != 1.  */
> -      if (!integer_onep (iv1->step))
> -	return false;
> -      /* Give up on infinite loop.  */
> -      if (*code == LE_EXPR
> -	  && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> -	return false;
> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> -      low = fold_build2 (MINUS_EXPR, niter_type,
> -			 fold_convert (niter_type, iv1->base),
> -			 build_int_cst (niter_type, 1));
> -    }
> -  else
> -    gcc_unreachable ();
> -
> -  iv0->base = low;
> -  iv0->step = fold_convert (niter_type, integer_one_node);
> -  iv1->base = high;
> -  iv1->step = build_int_cst (niter_type, 0);
> -  *code = NE_EXPR;
> -  return true;
> -}
> -
>  /* Determine the number of iterations according to condition (for staying
>     inside loop) which compares two induction variables using comparison
>     operator CODE.  The induction variable on left side of the comparison
> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>        return true;
>      }
>  
> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
> -     adjusting iv0, iv1 and code.  */
> -  if (code != NE_EXPR
> -      && (tree_int_cst_sign_bit (iv0->step)
> -	  || (!integer_zerop (iv1->step)
> -	      && !tree_int_cst_sign_bit (iv1->step)))
> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> -    return false;
> -
>    /* OK, now we know we have a senseful loop.  Handle several cases, depending
>       on what comparison operator is used.  */
>    bound_difference (loop, iv1->base, iv0->base, &bnds);
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01 12:35 ` Richard Biener
@ 2021-07-01 14:12   ` guojiufu
  2021-07-02  0:51     ` Bin.Cheng
  2021-07-02  7:55   ` guojiufu
  1 sibling, 1 reply; 8+ messages in thread
From: guojiufu @ 2021-07-01 14:12 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker

On 2021-07-01 20:35, Richard Biener wrote:
> On Thu, 1 Jul 2021, Jiufu Guo wrote:
> 
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>>   unsigned cnt = 0;
>>   for (unsigned i = start; i > val; ++i)
>>     cnt++;
>>   return cnt;
>> }
>> 
>> The number of iterations should be about UINT_MAX - start.
> 
> For
> 
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i >= val; ++i)
>     cnt++;
>   return cnt;
> }
> 
> and val == 0 the loop never terminates.  I don't see anywhere
> in the patch that you disregard GE_EXPR and I remember
> the code handles GE as well as GT?  From a quick look this is
> also not covered by a testcase you add - not exactly sure
> how it would materialize in a miscompilation.

In number_of_iterations_cond, there is code:
    if (code == GE_EXPR || code == GT_EXPR
	|| (code == NE_EXPR && integer_zerop (iv0->step)))
       {
         std::swap (iv0, iv1);
         code = swap_tree_comparison (code);
       }
It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
and LE (val <= i) is converted to LT (val - 1 < i).
So, the code is added to number_of_iterations_lt.

But, this patch leads mis-compilation for unsigned "i >= val" as
above transforms: converting LE (val <= i) to LT (val - 1 < i)
seems not appropriate (e.g where val=0).
Thanks for pointing out this!!!

I would investigate a way to handle this correctly.
A possible way maybe just to return false for this kind of LE.

Any suggestions?

> 
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>> 
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>> 
>> gcc/ChangeLog:
>> 
>> 	PR tree-optimization/101145
>> 	* tree-ssa-loop-niter.c
>> 	(number_of_iterations_until_wrap): New function.
>> 	(number_of_iterations_lt): Invoke above function.
>> 	(adjust_cond_for_loop_until_wrap):
>> 	Merge to number_of_iterations_until_wrap.
>> 	(number_of_iterations_cond): Update invokes for
>> 	adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	PR tree-optimization/101145
>> 	* gcc.dg/vect/pr101145.c: New test.
>> 	* gcc.dg/vect/pr101145.inc: New test.
>> 	* gcc.dg/vect/pr101145_1.c: New test.
>> 	* gcc.dg/vect/pr101145_2.c: New test.
>> 	* gcc.dg/vect/pr101145_3.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
>> +++++++++++++++++++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>>  6 files changed, 380 insertions(+), 65 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> 
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> new file mode 100644
>> index 00000000000..74031b031cf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> @@ -0,0 +1,187 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#include <limits.h>
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
>> unsigned)
>> +{
>> +  while (UINT_MAX - 64 < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  l = UINT_MAX - 32;
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (n <= ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{  // infininate
>> +  while (0 <= ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  //no loop
>> +  l = UINT_MAX;
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (--l < n)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
>> unsigned)
>> +{
>> +  while (--l < 64)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  l = 32;
>> +  while (--l < n)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +
>> +int a[3200], b[3200];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> +  unsigned l, n;
>> +  unsigned res;
>> +  /* l > n*/
>> +  n = UINT_MAX - 64;
>> +  l = n + 32;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 1;
>> +  res = foo (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  l = n - 32;
>> +  res = foo (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  l = UINT_MAX;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 32;
>> +  res = foo_1 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 32;
>> +  res = foo_2 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 1;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 2;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  res = foo_5 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  n = 64;
>> +  l = n - 32;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 1;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = 0;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = 32;
>> +  res = bar_1 (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  res = bar_1 (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  if (fail)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc 
>> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> new file mode 100644
>> index 00000000000..6eed3fa8aca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE; n < l; l += C)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE_DOWN; l < n; l -= C)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> +
>> +int a[1000], b[1000];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> +  TYPE res;
>> +  TYPE l;
>> +  TYPE n;
>> +  n = N_BASE;
>> +  l = n - C;
>> +  res = foo_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n + C;
>> +  res = foo_sign (a, b, l, n);
>> +  if (neq ((res - MIN) / C, 0))
>> +    fail++;
>> +
>> +  n = N_BASE_DOWN;
>> +  l = n - C;
>> +  res = bar_sign (a, b, l, n);
>> +  if (neq ((MAX - res) / C, 0))
>> +    fail++;
>> +
>> +  l = n;
>> +  res = bar_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n + C;
>> +  res = bar_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  if (fail)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> new file mode 100644
>> index 00000000000..94f6b99b893
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE signed char
>> +#define MIN -128
>> +#define MAX 127
>> +#define N_BASE (MAX - 32)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 3
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> new file mode 100644
>> index 00000000000..d3cfc9e01e1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE unsigned char
>> +#define MIN 0
>> +#define MAX 255
>> +#define N_BASE (MAX - 32 + 1)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 2
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> new file mode 100644
>> index 00000000000..e2cf9b1f7e6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE int *
>> +#define MIN ((TYPE)0)
>> +#define MAX ((TYPE)((long long)-1))
>> +#define N_BASE ((TYPE) (-32))
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 1
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv 
>> *iv0, affine_iv *iv1,
>>      }
>>  }
>> 
>> +/* Determines number of iterations of loop whose ending condition
>> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
>> +   The number of iterations is stored to NITER.  */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv 
>> *iv0,
>> +				 affine_iv *iv1, class tree_niter_desc *niter)
>> +{
>> +  tree niter_type = unsigned_type_for (type);
>> +  tree max, min;
>> +
>> +  if (POINTER_TYPE_P (type))
>> +    {
>> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> +    }
>> +  else
>> +    {
>> +      max = TYPE_MAX_VALUE (type);
>> +      min = TYPE_MIN_VALUE (type);
>> +    }
> 
> It would be cheaper to use wide_int here - wi::min/max_value
> (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> code below.

Thanks!  I think you mean to use wide_int for "tree high = max, low = 
min,".
I would check and update the patch to use wide_int.

The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with 
iv0/iv1->base,
for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?

> 
>> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> +  tree step;
>> +
>> +  /* n < {base, C}. */
>> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == 
>> INTEGER_CST
>> +      && !tree_int_cst_sign_bit (iv1->step))
>> +    {
>> +      step = iv1->step;
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, 
>> iv1->base);
>> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +	low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +	low = iv0->base;
>> +    }
>> +  /* {base, -C} < n. */
>> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
>> +	   && tree_int_cst_sign_bit (iv0->step) && integer_zerop 
>> (iv1->step))
>> +    {
>> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), 
>> iv0->step);
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, 
>> min);
>> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +	high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +	high = iv1->base;
>> +    }
>> +  else
>> +    return false;
>> +
>> +  /* (delta + step - 1) / step */
>> +  step = fold_convert (niter_type, step);
>> +  niter->niter = fold_convert (niter_type, niter->niter);
>> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, 
>> step);
>> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, 
>> niter->niter, step);
>> +
>> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> +  m = fold_convert (niter_type, m);
>> +  mpz_t mstep, tmp, mmax;
>> +  mpz_init (mstep);
>> +  mpz_init (tmp);
>> +  mpz_init (mmax);
>> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> +  mpz_add (tmp, mmax, mstep);
>> +  mpz_sub_ui (tmp, tmp, 1);
>> +  mpz_fdiv_q (tmp, tmp, mstep);
>> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, 
>> false),
>> +				 TYPE_SIGN (niter_type));
>> +  mpz_clear (mstep);
>> +  mpz_clear (tmp);
> 
> You forget to clear mmax?  I wonder why you need to involve GMP here
Oh, thanks for pointing out this :) need to clear mmax.

> at all since this is just add/sub/divide - you could use
> widest_int directly I think.  At least the back-and-forth between
> trees, wide-int and mpz looks odd.
I once use 'trees' to do the calculation, while I encounter the issue
that the may not hold the precision for some cases:
"mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.

I would check to use widest_int (e.g. long[4]) which seems enough to 
hold all types.


BR,
Jiufu Guo.

> 
> Richard.
> 
>> +  niter->may_be_zero
>> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> +
>> +  niter->control.no_overflow = false;
>> +
>> +  return true;
>> +}
>> +
>>  /* Determines number of iterations of loop whose ending condition
>>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>>     iterations is stored to NITER.  BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree 
>> type, affine_iv *iv0,
>>        niter->bound = iv0->base;
>>      }
>> 
>> +  /* {base, -C} < n,  or n < {base, C} */
>> +  if (tree_int_cst_sign_bit (iv0->step)
>> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit 
>> (iv1->step)))
>> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1, 
>> niter);
>> +
>>    delta = fold_build2 (MINUS_EXPR, niter_type,
>>  		       fold_convert (niter_type, iv1->base),
>>  		       fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>>      }
>>  }
>> 
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> -   the condition for loop-until-wrap cases.  For example:
>> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
>> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
>> -   Return true if condition is successfully adjusted.  */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code 
>> *code,
>> -				 affine_iv *iv1)
>> -{
>> -  /* Only support simple cases for the moment.  */
>> -  if (TREE_CODE (iv0->base) != INTEGER_CST
>> -      || TREE_CODE (iv1->base) != INTEGER_CST)
>> -    return false;
>> -
>> -  tree niter_type = unsigned_type_for (type), high, low;
>> -  /* Case: i-- < 10.  */
>> -  if (integer_zerop (iv1->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_minus_onep (iv0->step))
>> -	return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -	  && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> -	return false;
>> -      high = fold_build2 (PLUS_EXPR, niter_type,
>> -			  fold_convert (niter_type, iv0->base),
>> -			  build_int_cst (niter_type, 1));
>> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> -    }
>> -  else if (integer_zerop (iv0->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_onep (iv1->step))
>> -	return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -	  && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> -	return false;
>> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> -      low = fold_build2 (MINUS_EXPR, niter_type,
>> -			 fold_convert (niter_type, iv1->base),
>> -			 build_int_cst (niter_type, 1));
>> -    }
>> -  else
>> -    gcc_unreachable ();
>> -
>> -  iv0->base = low;
>> -  iv0->step = fold_convert (niter_type, integer_one_node);
>> -  iv1->base = high;
>> -  iv1->step = build_int_cst (niter_type, 0);
>> -  *code = NE_EXPR;
>> -  return true;
>> -}
>> -
>>  /* Determine the number of iterations according to condition (for 
>> staying
>>     inside loop) which compares two induction variables using 
>> comparison
>>     operator CODE.  The induction variable on left side of the 
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>>        return true;
>>      }
>> 
>> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++) 
>> by
>> -     adjusting iv0, iv1 and code.  */
>> -  if (code != NE_EXPR
>> -      && (tree_int_cst_sign_bit (iv0->step)
>> -	  || (!integer_zerop (iv1->step)
>> -	      && !tree_int_cst_sign_bit (iv1->step)))
>> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> -    return false;
>> -
>>    /* OK, now we know we have a senseful loop.  Handle several cases, 
>> depending
>>       on what comparison operator is used.  */
>>    bound_difference (loop, iv1->base, iv0->base, &bnds);
>> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01 14:12   ` guojiufu
@ 2021-07-02  0:51     ` Bin.Cheng
  2021-07-02  4:06       ` guojiufu
  0 siblings, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2021-07-02  0:51 UTC (permalink / raw)
  To: guojiufu
  Cc: Richard Biener, Segher Boessenkool, amker, gcc-patches List,
	Bill Schmidt, jlaw, dje.gcc

On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On 2021-07-01 20:35, Richard Biener wrote:
> > On Thu, 1 Jul 2021, Jiufu Guo wrote:
> >
> >> For code like:
> >> unsigned foo(unsigned val, unsigned start)
> >> {
> >>   unsigned cnt = 0;
> >>   for (unsigned i = start; i > val; ++i)
> >>     cnt++;
> >>   return cnt;
> >> }
> >>
> >> The number of iterations should be about UINT_MAX - start.
> >
> > For
> >
> > unsigned foo(unsigned val, unsigned start)
> > {
> >   unsigned cnt = 0;
> >   for (unsigned i = start; i >= val; ++i)
> >     cnt++;
> >   return cnt;
> > }
> >
> > and val == 0 the loop never terminates.  I don't see anywhere
> > in the patch that you disregard GE_EXPR and I remember
> > the code handles GE as well as GT?  From a quick look this is
> > also not covered by a testcase you add - not exactly sure
> > how it would materialize in a miscompilation.
>
> In number_of_iterations_cond, there is code:
>     if (code == GE_EXPR || code == GT_EXPR
>         || (code == NE_EXPR && integer_zerop (iv0->step)))
>        {
>          std::swap (iv0, iv1);
>          code = swap_tree_comparison (code);
>        }
> It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
> and LE (val <= i) is converted to LT (val - 1 < i).
> So, the code is added to number_of_iterations_lt.
>
> But, this patch leads mis-compilation for unsigned "i >= val" as
> above transforms: converting LE (val <= i) to LT (val - 1 < i)
> seems not appropriate (e.g where val=0).
I don't know where the exact code is, but IIRC, number_of_iteration
handles boundary conditions when transforming <= into <.  You may
check it out.

> Thanks for pointing out this!!!
>
> I would investigate a way to handle this correctly.
> A possible way maybe just to return false for this kind of LE.
IIRC, it checks the boundary conditions, either returns false or
simply introduces more assumptions.
>
> Any suggestions?
>
> >
> >> There is function adjust_cond_for_loop_until_wrap which
> >> handles similar work for const bases.
> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
> >> function number_of_iterations_cond/number_of_iterations_lt
> >> to analyze number of iterations for this kind of loop.
> >>
> >> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> >>
> >> gcc/ChangeLog:
> >>
> >>      PR tree-optimization/101145
> >>      * tree-ssa-loop-niter.c
> >>      (number_of_iterations_until_wrap): New function.
> >>      (number_of_iterations_lt): Invoke above function.
> >>      (adjust_cond_for_loop_until_wrap):
> >>      Merge to number_of_iterations_until_wrap.
> >>      (number_of_iterations_cond): Update invokes for
> >>      adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>      PR tree-optimization/101145
> >>      * gcc.dg/vect/pr101145.c: New test.
> >>      * gcc.dg/vect/pr101145.inc: New test.
> >>      * gcc.dg/vect/pr101145_1.c: New test.
> >>      * gcc.dg/vect/pr101145_2.c: New test.
> >>      * gcc.dg/vect/pr101145_3.c: New test.
> >> ---
> >>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187
> >> +++++++++++++++++++++++++
> >>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
> >>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
> >>  6 files changed, 380 insertions(+), 65 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> new file mode 100644
> >> index 00000000000..74031b031cf
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> @@ -0,0 +1,187 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#include <limits.h>
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (n < ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> +  while (UINT_MAX - 64 < ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  l = UINT_MAX - 32;
> >> +  while (n < ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (n <= ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{  // infininate
> >> +  while (0 <= ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  //no loop
> >> +  l = UINT_MAX;
> >> +  while (n < ++l)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (--l < n)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> +  while (--l < 64)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  l = 32;
> >> +  while (--l < n)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +
> >> +int a[3200], b[3200];
> >> +int fail;
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  unsigned l, n;
> >> +  unsigned res;
> >> +  /* l > n*/
> >> +  n = UINT_MAX - 64;
> >> +  l = n + 32;
> >> +  res = foo (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n;
> >> +  res = foo (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n - 1;
> >> +  res = foo (a, b, l, n);
> >> +  if (res != l + 1)
> >> +    fail++;
> >> +
> >> +  l = n - 32;
> >> +  res = foo (a, b, l, n);
> >> +  if (res != l + 1)
> >> +    fail++;
> >> +
> >> +  l = UINT_MAX;
> >> +  res = foo (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n + 32;
> >> +  res = foo_1 (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n + 32;
> >> +  res = foo_2 (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n;
> >> +  res = foo_3 (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n - 1;
> >> +  res = foo_3 (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n - 2;
> >> +  res = foo_3 (a, b, l, n);
> >> +  if (res != l + 1)
> >> +    fail++;
> >> +
> >> +  res = foo_5 (a, b, l, n);
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  n = 64;
> >> +  l = n - 32;
> >> +  res = bar (a, b, l, n);
> >> +  res++;
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n;
> >> +  res = bar (a, b, l, n);
> >> +  res++;
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  l = n + 1;
> >> +  res = bar (a, b, l, n);
> >> +  res++;
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  l = 0;
> >> +  res = bar (a, b, l, n);
> >> +  res++;
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  l = 32;
> >> +  res = bar_1 (a, b, l, n);
> >> +  res++;
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  res = bar_1 (a, b, l, n);
> >> +  res++;
> >> +  if (res != 0)
> >> +    fail++;
> >> +
> >> +  if (fail)
> >> +    __builtin_abort ();
> >> +  return 0;
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> new file mode 100644
> >> index 00000000000..6eed3fa8aca
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> @@ -0,0 +1,63 @@
> >> +TYPE __attribute__ ((noinline))
> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> +{
> >> +  for (l = L_BASE; n < l; l += C)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +TYPE __attribute__ ((noinline))
> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> +{
> >> +  for (l = L_BASE_DOWN; l < n; l -= C)
> >> +    *a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
> >> +
> >> +int a[1000], b[1000];
> >> +int fail;
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  TYPE res;
> >> +  TYPE l;
> >> +  TYPE n;
> >> +  n = N_BASE;
> >> +  l = n - C;
> >> +  res = foo_sign (a, b, l, n);
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  l = n;
> >> +  res = foo_sign (a, b, l, n);
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  l = n + C;
> >> +  res = foo_sign (a, b, l, n);
> >> +  if (neq ((res - MIN) / C, 0))
> >> +    fail++;
> >> +
> >> +  n = N_BASE_DOWN;
> >> +  l = n - C;
> >> +  res = bar_sign (a, b, l, n);
> >> +  if (neq ((MAX - res) / C, 0))
> >> +    fail++;
> >> +
> >> +  l = n;
> >> +  res = bar_sign (a, b, l, n);
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  l = n + C;
> >> +  res = bar_sign (a, b, l, n);
> >> +  if (res != l)
> >> +    fail++;
> >> +
> >> +  if (fail)
> >> +    __builtin_abort ();
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> new file mode 100644
> >> index 00000000000..94f6b99b893
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE signed char
> >> +#define MIN -128
> >> +#define MAX 127
> >> +#define N_BASE (MAX - 32)
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 3
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> new file mode 100644
> >> index 00000000000..d3cfc9e01e1
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE unsigned char
> >> +#define MIN 0
> >> +#define MAX 255
> >> +#define N_BASE (MAX - 32 + 1)
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 2
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> new file mode 100644
> >> index 00000000000..e2cf9b1f7e6
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE int *
> >> +#define MIN ((TYPE)0)
> >> +#define MAX ((TYPE)((long long)-1))
> >> +#define N_BASE ((TYPE) (-32))
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 1
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> index b5add827018..06db6a36ef8 100644
> >> --- a/gcc/tree-ssa-loop-niter.c
> >> +++ b/gcc/tree-ssa-loop-niter.c
> >> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
> >> *iv0, affine_iv *iv1,
> >>      }
> >>  }
> >>
> >> +/* Determines number of iterations of loop whose ending condition
> >> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
> >> +   The number of iterations is stored to NITER.  */
> >> +
> >> +static bool
> >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
> >> *iv0,
> >> +                             affine_iv *iv1, class tree_niter_desc *niter)
> >> +{
> >> +  tree niter_type = unsigned_type_for (type);
> >> +  tree max, min;
> >> +
> >> +  if (POINTER_TYPE_P (type))
> >> +    {
> >> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> >> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> >> +    }
> >> +  else
> >> +    {
> >> +      max = TYPE_MAX_VALUE (type);
> >> +      min = TYPE_MIN_VALUE (type);
> >> +    }
> >
> > It would be cheaper to use wide_int here - wi::min/max_value
> > (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> > code below.
>
> Thanks!  I think you mean to use wide_int for "tree high = max, low =
> min,".
> I would check and update the patch to use wide_int.
>
> The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with
> iv0/iv1->base,
> for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?
>
> >
> >> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
> >> +  tree step;
> >> +
> >> +  /* n < {base, C}. */
> >> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
> >> INTEGER_CST
> >> +      && !tree_int_cst_sign_bit (iv1->step))
> >> +    {
> >> +      step = iv1->step;
> >> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
> >> iv1->base);
> >> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
> >> +    low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> >> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
> >> +    low = iv0->base;
> >> +    }
> >> +  /* {base, -C} < n. */
> >> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
> >> +       && tree_int_cst_sign_bit (iv0->step) && integer_zerop
> >> (iv1->step))
> >> +    {
> >> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
> >> iv0->step);
> >> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
> >> min);
> >> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
> >> +    high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> >> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
> >> +    high = iv1->base;
> >> +    }
> >> +  else
> >> +    return false;
> >> +
> >> +  /* (delta + step - 1) / step */
> >> +  step = fold_convert (niter_type, step);
> >> +  niter->niter = fold_convert (niter_type, niter->niter);
> >> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
> >> step);
> >> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
> >> niter->niter, step);
> >> +
> >> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> >> +  m = fold_convert (niter_type, m);
> >> +  mpz_t mstep, tmp, mmax;
> >> +  mpz_init (mstep);
> >> +  mpz_init (tmp);
> >> +  mpz_init (mmax);
> >> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> >> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> >> +  mpz_add (tmp, mmax, mstep);
> >> +  mpz_sub_ui (tmp, tmp, 1);
> >> +  mpz_fdiv_q (tmp, tmp, mstep);
> >> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
> >> false),
> >> +                             TYPE_SIGN (niter_type));
> >> +  mpz_clear (mstep);
> >> +  mpz_clear (tmp);
> >
> > You forget to clear mmax?  I wonder why you need to involve GMP here
> Oh, thanks for pointing out this :) need to clear mmax.
>
> > at all since this is just add/sub/divide - you could use
> > widest_int directly I think.  At least the back-and-forth between
> > trees, wide-int and mpz looks odd.
> I once use 'trees' to do the calculation, while I encounter the issue
> that the may not hold the precision for some cases:
> "mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.
>
> I would check to use widest_int (e.g. long[4]) which seems enough to
> hold all types.
>
>
> BR,
> Jiufu Guo.
>
> >
> > Richard.
> >
> >> +  niter->may_be_zero
> >> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> >> +
> >> +  niter->control.no_overflow = false;
> >> +
> >> +  return true;
> >> +}
> >> +
> >>  /* Determines number of iterations of loop whose ending condition
> >>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
> >>     iterations is stored to NITER.  BNDS bounds the difference
> >> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
> >> type, affine_iv *iv0,
> >>        niter->bound = iv0->base;
> >>      }
> >>
> >> +  /* {base, -C} < n,  or n < {base, C} */
> >> +  if (tree_int_cst_sign_bit (iv0->step)
> >> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
> >> (iv1->step)))
> >> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1,
> >> niter);
> >> +
> >>    delta = fold_build2 (MINUS_EXPR, niter_type,
> >>                     fold_convert (niter_type, iv1->base),
> >>                     fold_convert (niter_type, iv0->base));
> >> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
> >>      }
> >>  }
> >>
> >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> >> -   the condition for loop-until-wrap cases.  For example:
> >> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
> >> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
> >> -   Return true if condition is successfully adjusted.  */
> >> -
> >> -static bool
> >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
> >> *code,
> >> -                             affine_iv *iv1)
> >> -{
> >> -  /* Only support simple cases for the moment.  */
> >> -  if (TREE_CODE (iv0->base) != INTEGER_CST
> >> -      || TREE_CODE (iv1->base) != INTEGER_CST)
> >> -    return false;
> >> -
> >> -  tree niter_type = unsigned_type_for (type), high, low;
> >> -  /* Case: i-- < 10.  */
> >> -  if (integer_zerop (iv1->step))
> >> -    {
> >> -      /* TODO: Should handle case in which abs(step) != 1.  */
> >> -      if (!integer_minus_onep (iv0->step))
> >> -    return false;
> >> -      /* Give up on infinite loop.  */
> >> -      if (*code == LE_EXPR
> >> -      && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> >> -    return false;
> >> -      high = fold_build2 (PLUS_EXPR, niter_type,
> >> -                      fold_convert (niter_type, iv0->base),
> >> -                      build_int_cst (niter_type, 1));
> >> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> >> -    }
> >> -  else if (integer_zerop (iv0->step))
> >> -    {
> >> -      /* TODO: Should handle case in which abs(step) != 1.  */
> >> -      if (!integer_onep (iv1->step))
> >> -    return false;
> >> -      /* Give up on infinite loop.  */
> >> -      if (*code == LE_EXPR
> >> -      && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> >> -    return false;
> >> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> >> -      low = fold_build2 (MINUS_EXPR, niter_type,
> >> -                     fold_convert (niter_type, iv1->base),
> >> -                     build_int_cst (niter_type, 1));
> >> -    }
> >> -  else
> >> -    gcc_unreachable ();
> >> -
> >> -  iv0->base = low;
> >> -  iv0->step = fold_convert (niter_type, integer_one_node);
> >> -  iv1->base = high;
> >> -  iv1->step = build_int_cst (niter_type, 0);
> >> -  *code = NE_EXPR;
> >> -  return true;
> >> -}
> >> -
> >>  /* Determine the number of iterations according to condition (for
> >> staying
> >>     inside loop) which compares two induction variables using
> >> comparison
> >>     operator CODE.  The induction variable on left side of the
> >> comparison
> >> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
> >>        return true;
> >>      }
> >>
> >> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++)
> >> by
> >> -     adjusting iv0, iv1 and code.  */
> >> -  if (code != NE_EXPR
> >> -      && (tree_int_cst_sign_bit (iv0->step)
> >> -      || (!integer_zerop (iv1->step)
> >> -          && !tree_int_cst_sign_bit (iv1->step)))
> >> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> >> -    return false;
> >> -
> >>    /* OK, now we know we have a senseful loop.  Handle several cases,
> >> depending
> >>       on what comparison operator is used.  */
> >>    bound_difference (loop, iv1->base, iv0->base, &bnds);
> >>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-02  0:51     ` Bin.Cheng
@ 2021-07-02  4:06       ` guojiufu
  0 siblings, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-02  4:06 UTC (permalink / raw)
  To: Bin.Cheng
  Cc: Richard Biener, Segher Boessenkool, amker, gcc-patches List,
	Bill Schmidt, jlaw, dje.gcc

On 2021-07-02 08:51, Bin.Cheng wrote:
> On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>> 
>> On 2021-07-01 20:35, Richard Biener wrote:
>> > On Thu, 1 Jul 2021, Jiufu Guo wrote:
>> >
>> >> For code like:
>> >> unsigned foo(unsigned val, unsigned start)
>> >> {
>> >>   unsigned cnt = 0;
>> >>   for (unsigned i = start; i > val; ++i)
>> >>     cnt++;
>> >>   return cnt;
>> >> }
>> >>
>> >> The number of iterations should be about UINT_MAX - start.
>> >
>> > For
>> >
>> > unsigned foo(unsigned val, unsigned start)
>> > {
>> >   unsigned cnt = 0;
>> >   for (unsigned i = start; i >= val; ++i)
>> >     cnt++;
>> >   return cnt;
>> > }
>> >
>> > and val == 0 the loop never terminates.  I don't see anywhere
>> > in the patch that you disregard GE_EXPR and I remember
>> > the code handles GE as well as GT?  From a quick look this is
>> > also not covered by a testcase you add - not exactly sure
>> > how it would materialize in a miscompilation.
>> 
>> In number_of_iterations_cond, there is code:
>>     if (code == GE_EXPR || code == GT_EXPR
>>         || (code == NE_EXPR && integer_zerop (iv0->step)))
>>        {
>>          std::swap (iv0, iv1);
>>          code = swap_tree_comparison (code);
>>        }
>> It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
>> and LE (val <= i) is converted to LT (val - 1 < i).
>> So, the code is added to number_of_iterations_lt.
>> 
>> But, this patch leads mis-compilation for unsigned "i >= val" as
>> above transforms: converting LE (val <= i) to LT (val - 1 < i)
>> seems not appropriate (e.g where val=0).
> I don't know where the exact code is, but IIRC, number_of_iteration
> handles boundary conditions when transforming <= into <.  You may
> check it out.
Yes, in number_of_iterations_le, there is code to check MAX/MIN
         if (integer_nonzerop (iv0->step))
           assumption = fold_build2 (NE_EXPR, boolean_type_node,
                                     iv1->base, TYPE_MAX_VALUE (type));
         else
           assumption = fold_build2 (NE_EXPR, boolean_type_node,
                                     iv0->base, TYPE_MIN_VALUE (type));

Checking why this code does not help.
> 
>> Thanks for pointing out this!!!
>> 
>> I would investigate a way to handle this correctly.
>> A possible way maybe just to return false for this kind of LE.
> IIRC, it checks the boundary conditions, either returns false or
> simply introduces more assumptions.
Thanks! Adding more assumptions would help.
The below code also runs into infinite, more assumptions may help this 
code.

__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
   unsigned cnt = 0;
   for (unsigned i = start; val <= i; i+=16)
     cnt++;
   return cnt;
}

foo (4, 8);

Thanks again!


BR,
Jiufu Guo
>> 
>> Any suggestions?
>> 
>> >
>> >> There is function adjust_cond_for_loop_until_wrap which
>> >> handles similar work for const bases.
>> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> >> function number_of_iterations_cond/number_of_iterations_lt
>> >> to analyze number of iterations for this kind of loop.
>> >>
>> >> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>> >>
>> >> gcc/ChangeLog:
>> >>
>> >>      PR tree-optimization/101145
>> >>      * tree-ssa-loop-niter.c
>> >>      (number_of_iterations_until_wrap): New function.
>> >>      (number_of_iterations_lt): Invoke above function.
>> >>      (adjust_cond_for_loop_until_wrap):
>> >>      Merge to number_of_iterations_until_wrap.
>> >>      (number_of_iterations_cond): Update invokes for
>> >>      adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>> >>
>> >> gcc/testsuite/ChangeLog:
>> >>
>> >>      PR tree-optimization/101145
>> >>      * gcc.dg/vect/pr101145.c: New test.
>> >>      * gcc.dg/vect/pr101145.inc: New test.
>> >>      * gcc.dg/vect/pr101145_1.c: New test.
>> >>      * gcc.dg/vect/pr101145_2.c: New test.
>> >>      * gcc.dg/vect/pr101145_3.c: New test.
>> >> ---
>> >>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187
>> >> +++++++++++++++++++++++++
>> >>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>> >>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>> >>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>> >>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>> >>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>> >>  6 files changed, 380 insertions(+), 65 deletions(-)
>> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >>
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> new file mode 100644
>> >> index 00000000000..74031b031cf
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> @@ -0,0 +1,187 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#include <limits.h>
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  while (n < ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> >> unsigned)
>> >> +{
>> >> +  while (UINT_MAX - 64 < ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  l = UINT_MAX - 32;
>> >> +  while (n < ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  while (n <= ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{  // infininate
>> >> +  while (0 <= ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  //no loop
>> >> +  l = UINT_MAX;
>> >> +  while (n < ++l)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  while (--l < n)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> >> unsigned)
>> >> +{
>> >> +  while (--l < 64)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> +  l = 32;
>> >> +  while (--l < n)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +
>> >> +int a[3200], b[3200];
>> >> +int fail;
>> >> +
>> >> +int
>> >> +main ()
>> >> +{
>> >> +  unsigned l, n;
>> >> +  unsigned res;
>> >> +  /* l > n*/
>> >> +  n = UINT_MAX - 64;
>> >> +  l = n + 32;
>> >> +  res = foo (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n;
>> >> +  res = foo (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n - 1;
>> >> +  res = foo (a, b, l, n);
>> >> +  if (res != l + 1)
>> >> +    fail++;
>> >> +
>> >> +  l = n - 32;
>> >> +  res = foo (a, b, l, n);
>> >> +  if (res != l + 1)
>> >> +    fail++;
>> >> +
>> >> +  l = UINT_MAX;
>> >> +  res = foo (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n + 32;
>> >> +  res = foo_1 (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n + 32;
>> >> +  res = foo_2 (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n;
>> >> +  res = foo_3 (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n - 1;
>> >> +  res = foo_3 (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n - 2;
>> >> +  res = foo_3 (a, b, l, n);
>> >> +  if (res != l + 1)
>> >> +    fail++;
>> >> +
>> >> +  res = foo_5 (a, b, l, n);
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  n = 64;
>> >> +  l = n - 32;
>> >> +  res = bar (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n;
>> >> +  res = bar (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  l = n + 1;
>> >> +  res = bar (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  l = 0;
>> >> +  res = bar (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  l = 32;
>> >> +  res = bar_1 (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  res = bar_1 (a, b, l, n);
>> >> +  res++;
>> >> +  if (res != 0)
>> >> +    fail++;
>> >> +
>> >> +  if (fail)
>> >> +    __builtin_abort ();
>> >> +  return 0;
>> >> +}
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> new file mode 100644
>> >> index 00000000000..6eed3fa8aca
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> @@ -0,0 +1,63 @@
>> >> +TYPE __attribute__ ((noinline))
>> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> >> +{
>> >> +  for (l = L_BASE; n < l; l += C)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +TYPE __attribute__ ((noinline))
>> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> >> +{
>> >> +  for (l = L_BASE_DOWN; l < n; l -= C)
>> >> +    *a++ = *b++ + 1;
>> >> +  return l;
>> >> +}
>> >> +
>> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> >> +
>> >> +int a[1000], b[1000];
>> >> +int fail;
>> >> +
>> >> +int
>> >> +main ()
>> >> +{
>> >> +  TYPE res;
>> >> +  TYPE l;
>> >> +  TYPE n;
>> >> +  n = N_BASE;
>> >> +  l = n - C;
>> >> +  res = foo_sign (a, b, l, n);
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  l = n;
>> >> +  res = foo_sign (a, b, l, n);
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  l = n + C;
>> >> +  res = foo_sign (a, b, l, n);
>> >> +  if (neq ((res - MIN) / C, 0))
>> >> +    fail++;
>> >> +
>> >> +  n = N_BASE_DOWN;
>> >> +  l = n - C;
>> >> +  res = bar_sign (a, b, l, n);
>> >> +  if (neq ((MAX - res) / C, 0))
>> >> +    fail++;
>> >> +
>> >> +  l = n;
>> >> +  res = bar_sign (a, b, l, n);
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  l = n + C;
>> >> +  res = bar_sign (a, b, l, n);
>> >> +  if (res != l)
>> >> +    fail++;
>> >> +
>> >> +  if (fail)
>> >> +    __builtin_abort ();
>> >> +  return 0;
>> >> +}
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> new file mode 100644
>> >> index 00000000000..94f6b99b893
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE signed char
>> >> +#define MIN -128
>> >> +#define MAX 127
>> >> +#define N_BASE (MAX - 32)
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 3
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> new file mode 100644
>> >> index 00000000000..d3cfc9e01e1
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE unsigned char
>> >> +#define MIN 0
>> >> +#define MAX 255
>> >> +#define N_BASE (MAX - 32 + 1)
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 2
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> new file mode 100644
>> >> index 00000000000..e2cf9b1f7e6
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE int *
>> >> +#define MIN ((TYPE)0)
>> >> +#define MAX ((TYPE)((long long)-1))
>> >> +#define N_BASE ((TYPE) (-32))
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 1
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> >> index b5add827018..06db6a36ef8 100644
>> >> --- a/gcc/tree-ssa-loop-niter.c
>> >> +++ b/gcc/tree-ssa-loop-niter.c
>> >> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
>> >> *iv0, affine_iv *iv1,
>> >>      }
>> >>  }
>> >>
>> >> +/* Determines number of iterations of loop whose ending condition
>> >> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
>> >> +   The number of iterations is stored to NITER.  */
>> >> +
>> >> +static bool
>> >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
>> >> *iv0,
>> >> +                             affine_iv *iv1, class tree_niter_desc *niter)
>> >> +{
>> >> +  tree niter_type = unsigned_type_for (type);
>> >> +  tree max, min;
>> >> +
>> >> +  if (POINTER_TYPE_P (type))
>> >> +    {
>> >> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> >> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> >> +    }
>> >> +  else
>> >> +    {
>> >> +      max = TYPE_MAX_VALUE (type);
>> >> +      min = TYPE_MIN_VALUE (type);
>> >> +    }
>> >
>> > It would be cheaper to use wide_int here - wi::min/max_value
>> > (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
>> > code below.
>> 
>> Thanks!  I think you mean to use wide_int for "tree high = max, low =
>> min,".
>> I would check and update the patch to use wide_int.
>> 
>> The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with
>> iv0/iv1->base,
>> for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?
>> 
>> >
>> >> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> >> +  tree step;
>> >> +
>> >> +  /* n < {base, C}. */
>> >> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
>> >> INTEGER_CST
>> >> +      && !tree_int_cst_sign_bit (iv1->step))
>> >> +    {
>> >> +      step = iv1->step;
>> >> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
>> >> iv1->base);
>> >> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
>> >> +    low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> >> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> >> +    low = iv0->base;
>> >> +    }
>> >> +  /* {base, -C} < n. */
>> >> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
>> >> +       && tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> >> (iv1->step))
>> >> +    {
>> >> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
>> >> iv0->step);
>> >> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
>> >> min);
>> >> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
>> >> +    high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> >> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> >> +    high = iv1->base;
>> >> +    }
>> >> +  else
>> >> +    return false;
>> >> +
>> >> +  /* (delta + step - 1) / step */
>> >> +  step = fold_convert (niter_type, step);
>> >> +  niter->niter = fold_convert (niter_type, niter->niter);
>> >> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
>> >> step);
>> >> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
>> >> niter->niter, step);
>> >> +
>> >> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> >> +  m = fold_convert (niter_type, m);
>> >> +  mpz_t mstep, tmp, mmax;
>> >> +  mpz_init (mstep);
>> >> +  mpz_init (tmp);
>> >> +  mpz_init (mmax);
>> >> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> >> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> >> +  mpz_add (tmp, mmax, mstep);
>> >> +  mpz_sub_ui (tmp, tmp, 1);
>> >> +  mpz_fdiv_q (tmp, tmp, mstep);
>> >> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
>> >> false),
>> >> +                             TYPE_SIGN (niter_type));
>> >> +  mpz_clear (mstep);
>> >> +  mpz_clear (tmp);
>> >
>> > You forget to clear mmax?  I wonder why you need to involve GMP here
>> Oh, thanks for pointing out this :) need to clear mmax.
>> 
>> > at all since this is just add/sub/divide - you could use
>> > widest_int directly I think.  At least the back-and-forth between
>> > trees, wide-int and mpz looks odd.
>> I once use 'trees' to do the calculation, while I encounter the issue
>> that the may not hold the precision for some cases:
>> "mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.
>> 
>> I would check to use widest_int (e.g. long[4]) which seems enough to
>> hold all types.
>> 
>> 
>> BR,
>> Jiufu Guo.
>> 
>> >
>> > Richard.
>> >
>> >> +  niter->may_be_zero
>> >> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> >> +
>> >> +  niter->control.no_overflow = false;
>> >> +
>> >> +  return true;
>> >> +}
>> >> +
>> >>  /* Determines number of iterations of loop whose ending condition
>> >>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>> >>     iterations is stored to NITER.  BNDS bounds the difference
>> >> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
>> >> type, affine_iv *iv0,
>> >>        niter->bound = iv0->base;
>> >>      }
>> >>
>> >> +  /* {base, -C} < n,  or n < {base, C} */
>> >> +  if (tree_int_cst_sign_bit (iv0->step)
>> >> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
>> >> (iv1->step)))
>> >> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1,
>> >> niter);
>> >> +
>> >>    delta = fold_build2 (MINUS_EXPR, niter_type,
>> >>                     fold_convert (niter_type, iv1->base),
>> >>                     fold_convert (niter_type, iv0->base));
>> >> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> >>      }
>> >>  }
>> >>
>> >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> >> -   the condition for loop-until-wrap cases.  For example:
>> >> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
>> >> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
>> >> -   Return true if condition is successfully adjusted.  */
>> >> -
>> >> -static bool
>> >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
>> >> *code,
>> >> -                             affine_iv *iv1)
>> >> -{
>> >> -  /* Only support simple cases for the moment.  */
>> >> -  if (TREE_CODE (iv0->base) != INTEGER_CST
>> >> -      || TREE_CODE (iv1->base) != INTEGER_CST)
>> >> -    return false;
>> >> -
>> >> -  tree niter_type = unsigned_type_for (type), high, low;
>> >> -  /* Case: i-- < 10.  */
>> >> -  if (integer_zerop (iv1->step))
>> >> -    {
>> >> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> >> -      if (!integer_minus_onep (iv0->step))
>> >> -    return false;
>> >> -      /* Give up on infinite loop.  */
>> >> -      if (*code == LE_EXPR
>> >> -      && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> >> -    return false;
>> >> -      high = fold_build2 (PLUS_EXPR, niter_type,
>> >> -                      fold_convert (niter_type, iv0->base),
>> >> -                      build_int_cst (niter_type, 1));
>> >> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> >> -    }
>> >> -  else if (integer_zerop (iv0->step))
>> >> -    {
>> >> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> >> -      if (!integer_onep (iv1->step))
>> >> -    return false;
>> >> -      /* Give up on infinite loop.  */
>> >> -      if (*code == LE_EXPR
>> >> -      && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> >> -    return false;
>> >> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> >> -      low = fold_build2 (MINUS_EXPR, niter_type,
>> >> -                     fold_convert (niter_type, iv1->base),
>> >> -                     build_int_cst (niter_type, 1));
>> >> -    }
>> >> -  else
>> >> -    gcc_unreachable ();
>> >> -
>> >> -  iv0->base = low;
>> >> -  iv0->step = fold_convert (niter_type, integer_one_node);
>> >> -  iv1->base = high;
>> >> -  iv1->step = build_int_cst (niter_type, 0);
>> >> -  *code = NE_EXPR;
>> >> -  return true;
>> >> -}
>> >> -
>> >>  /* Determine the number of iterations according to condition (for
>> >> staying
>> >>     inside loop) which compares two induction variables using
>> >> comparison
>> >>     operator CODE.  The induction variable on left side of the
>> >> comparison
>> >> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>> >>        return true;
>> >>      }
>> >>
>> >> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++)
>> >> by
>> >> -     adjusting iv0, iv1 and code.  */
>> >> -  if (code != NE_EXPR
>> >> -      && (tree_int_cst_sign_bit (iv0->step)
>> >> -      || (!integer_zerop (iv1->step)
>> >> -          && !tree_int_cst_sign_bit (iv1->step)))
>> >> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> >> -    return false;
>> >> -
>> >>    /* OK, now we know we have a senseful loop.  Handle several cases,
>> >> depending
>> >>       on what comparison operator is used.  */
>> >>    bound_difference (loop, iv1->base, iv0->base, &bnds);
>> >>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
  2021-07-01 12:35 ` Richard Biener
  2021-07-01 14:12   ` guojiufu
@ 2021-07-02  7:55   ` guojiufu
  1 sibling, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-02  7:55 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker

On 2021-07-01 20:35, Richard Biener wrote:
> On Thu, 1 Jul 2021, Jiufu Guo wrote:
> 
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>>   unsigned cnt = 0;
>>   for (unsigned i = start; i > val; ++i)
>>     cnt++;
>>   return cnt;
>> }
>> 
>> The number of iterations should be about UINT_MAX - start.
> 
> For
> 
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i >= val; ++i)
>     cnt++;
>   return cnt;
> }
> 
> and val == 0 the loop never terminates.  I don't see anywhere
> in the patch that you disregard GE_EXPR and I remember
> the code handles GE as well as GT?  From a quick look this is
> also not covered by a testcase you add - not exactly sure
> how it would materialize in a miscompilation.
> 
Find a similar issue on the below code with the trunk.
The below code should run infinite, but it exits quickly.

#include <limits.h>
__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
   unsigned cnt = 0;
   for (unsigned i = start; i <= val; i+=16)
     cnt++;
   return cnt;
}

int main()
{
   return foo (UINT_MAX-7, 8);
}

Just opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101291

BR,
Jiufu Guo.

>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>> 
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>> 
>> gcc/ChangeLog:
>> 
>> 	PR tree-optimization/101145
>> 	* tree-ssa-loop-niter.c
>> 	(number_of_iterations_until_wrap): New function.
>> 	(number_of_iterations_lt): Invoke above function.
>> 	(adjust_cond_for_loop_until_wrap):
>> 	Merge to number_of_iterations_until_wrap.
>> 	(number_of_iterations_cond): Update invokes for
>> 	adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	PR tree-optimization/101145
>> 	* gcc.dg/vect/pr101145.c: New test.
>> 	* gcc.dg/vect/pr101145.inc: New test.
>> 	* gcc.dg/vect/pr101145_1.c: New test.
>> 	* gcc.dg/vect/pr101145_2.c: New test.
>> 	* gcc.dg/vect/pr101145_3.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
>> +++++++++++++++++++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
>>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>>  gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
>>  6 files changed, 380 insertions(+), 65 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> 
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> new file mode 100644
>> index 00000000000..74031b031cf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> @@ -0,0 +1,187 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#include <limits.h>
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
>> unsigned)
>> +{
>> +  while (UINT_MAX - 64 < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  l = UINT_MAX - 32;
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (n <= ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{  // infininate
>> +  while (0 <= ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  //no loop
>> +  l = UINT_MAX;
>> +  while (n < ++l)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  while (--l < n)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
>> unsigned)
>> +{
>> +  while (--l < 64)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
>> n)
>> +{
>> +  l = 32;
>> +  while (--l < n)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +
>> +int a[3200], b[3200];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> +  unsigned l, n;
>> +  unsigned res;
>> +  /* l > n*/
>> +  n = UINT_MAX - 64;
>> +  l = n + 32;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 1;
>> +  res = foo (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  l = n - 32;
>> +  res = foo (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  l = UINT_MAX;
>> +  res = foo (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 32;
>> +  res = foo_1 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 32;
>> +  res = foo_2 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 1;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n - 2;
>> +  res = foo_3 (a, b, l, n);
>> +  if (res != l + 1)
>> +    fail++;
>> +
>> +  res = foo_5 (a, b, l, n);
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  n = 64;
>> +  l = n - 32;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  l = n + 1;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = 0;
>> +  res = bar (a, b, l, n);
>> +  res++;
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = 32;
>> +  res = bar_1 (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  res = bar_1 (a, b, l, n);
>> +  res++;
>> +  if (res != 0)
>> +    fail++;
>> +
>> +  if (fail)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc 
>> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> new file mode 100644
>> index 00000000000..6eed3fa8aca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE; n < l; l += C)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE_DOWN; l < n; l -= C)
>> +    *a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> +
>> +int a[1000], b[1000];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> +  TYPE res;
>> +  TYPE l;
>> +  TYPE n;
>> +  n = N_BASE;
>> +  l = n - C;
>> +  res = foo_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n;
>> +  res = foo_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n + C;
>> +  res = foo_sign (a, b, l, n);
>> +  if (neq ((res - MIN) / C, 0))
>> +    fail++;
>> +
>> +  n = N_BASE_DOWN;
>> +  l = n - C;
>> +  res = bar_sign (a, b, l, n);
>> +  if (neq ((MAX - res) / C, 0))
>> +    fail++;
>> +
>> +  l = n;
>> +  res = bar_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  l = n + C;
>> +  res = bar_sign (a, b, l, n);
>> +  if (res != l)
>> +    fail++;
>> +
>> +  if (fail)
>> +    __builtin_abort ();
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> new file mode 100644
>> index 00000000000..94f6b99b893
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE signed char
>> +#define MIN -128
>> +#define MAX 127
>> +#define N_BASE (MAX - 32)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 3
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> new file mode 100644
>> index 00000000000..d3cfc9e01e1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE unsigned char
>> +#define MIN 0
>> +#define MAX 255
>> +#define N_BASE (MAX - 32 + 1)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 2
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c 
>> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> new file mode 100644
>> index 00000000000..e2cf9b1f7e6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE int *
>> +#define MIN ((TYPE)0)
>> +#define MAX ((TYPE)((long long)-1))
>> +#define N_BASE ((TYPE) (-32))
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 1
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } 
>> } */
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv 
>> *iv0, affine_iv *iv1,
>>      }
>>  }
>> 
>> +/* Determines number of iterations of loop whose ending condition
>> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
>> +   The number of iterations is stored to NITER.  */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv 
>> *iv0,
>> +				 affine_iv *iv1, class tree_niter_desc *niter)
>> +{
>> +  tree niter_type = unsigned_type_for (type);
>> +  tree max, min;
>> +
>> +  if (POINTER_TYPE_P (type))
>> +    {
>> +      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> +      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> +    }
>> +  else
>> +    {
>> +      max = TYPE_MAX_VALUE (type);
>> +      min = TYPE_MIN_VALUE (type);
>> +    }
> 
> It would be cheaper to use wide_int here - wi::min/max_value
> (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> code below.
> 
>> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> +  tree step;
>> +
>> +  /* n < {base, C}. */
>> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == 
>> INTEGER_CST
>> +      && !tree_int_cst_sign_bit (iv1->step))
>> +    {
>> +      step = iv1->step;
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, 
>> iv1->base);
>> +      if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +	low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> +      else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +	low = iv0->base;
>> +    }
>> +  /* {base, -C} < n. */
>> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
>> +	   && tree_int_cst_sign_bit (iv0->step) && integer_zerop 
>> (iv1->step))
>> +    {
>> +      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), 
>> iv0->step);
>> +      niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, 
>> min);
>> +      if (TREE_CODE (iv0->base) == INTEGER_CST)
>> +	high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> +      else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> +	high = iv1->base;
>> +    }
>> +  else
>> +    return false;
>> +
>> +  /* (delta + step - 1) / step */
>> +  step = fold_convert (niter_type, step);
>> +  niter->niter = fold_convert (niter_type, niter->niter);
>> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, 
>> step);
>> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, 
>> niter->niter, step);
>> +
>> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> +  m = fold_convert (niter_type, m);
>> +  mpz_t mstep, tmp, mmax;
>> +  mpz_init (mstep);
>> +  mpz_init (tmp);
>> +  mpz_init (mmax);
>> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> +  mpz_add (tmp, mmax, mstep);
>> +  mpz_sub_ui (tmp, tmp, 1);
>> +  mpz_fdiv_q (tmp, tmp, mstep);
>> +  niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, 
>> false),
>> +				 TYPE_SIGN (niter_type));
>> +  mpz_clear (mstep);
>> +  mpz_clear (tmp);
> 
> You forget to clear mmax?  I wonder why you need to involve GMP here
> at all since this is just add/sub/divide - you could use
> widest_int directly I think.  At least the back-and-forth between
> trees, wide-int and mpz looks odd.
> 
> Richard.
> 
>> +  niter->may_be_zero
>> +    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> +
>> +  niter->control.no_overflow = false;
>> +
>> +  return true;
>> +}
>> +
>>  /* Determines number of iterations of loop whose ending condition
>>     is IV0 < IV1.  TYPE is the type of the iv.  The number of
>>     iterations is stored to NITER.  BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree 
>> type, affine_iv *iv0,
>>        niter->bound = iv0->base;
>>      }
>> 
>> +  /* {base, -C} < n,  or n < {base, C} */
>> +  if (tree_int_cst_sign_bit (iv0->step)
>> +      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit 
>> (iv1->step)))
>> +    return number_of_iterations_until_wrap (loop, type, iv0, iv1, 
>> niter);
>> +
>>    delta = fold_build2 (MINUS_EXPR, niter_type,
>>  		       fold_convert (niter_type, iv1->base),
>>  		       fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>>      }
>>  }
>> 
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> -   the condition for loop-until-wrap cases.  For example:
>> -     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
>> -     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
>> -   Return true if condition is successfully adjusted.  */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code 
>> *code,
>> -				 affine_iv *iv1)
>> -{
>> -  /* Only support simple cases for the moment.  */
>> -  if (TREE_CODE (iv0->base) != INTEGER_CST
>> -      || TREE_CODE (iv1->base) != INTEGER_CST)
>> -    return false;
>> -
>> -  tree niter_type = unsigned_type_for (type), high, low;
>> -  /* Case: i-- < 10.  */
>> -  if (integer_zerop (iv1->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_minus_onep (iv0->step))
>> -	return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -	  && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> -	return false;
>> -      high = fold_build2 (PLUS_EXPR, niter_type,
>> -			  fold_convert (niter_type, iv0->base),
>> -			  build_int_cst (niter_type, 1));
>> -      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> -    }
>> -  else if (integer_zerop (iv0->step))
>> -    {
>> -      /* TODO: Should handle case in which abs(step) != 1.  */
>> -      if (!integer_onep (iv1->step))
>> -	return false;
>> -      /* Give up on infinite loop.  */
>> -      if (*code == LE_EXPR
>> -	  && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> -	return false;
>> -      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> -      low = fold_build2 (MINUS_EXPR, niter_type,
>> -			 fold_convert (niter_type, iv1->base),
>> -			 build_int_cst (niter_type, 1));
>> -    }
>> -  else
>> -    gcc_unreachable ();
>> -
>> -  iv0->base = low;
>> -  iv0->step = fold_convert (niter_type, integer_one_node);
>> -  iv1->base = high;
>> -  iv1->step = build_int_cst (niter_type, 0);
>> -  *code = NE_EXPR;
>> -  return true;
>> -}
>> -
>>  /* Determine the number of iterations according to condition (for 
>> staying
>>     inside loop) which compares two induction variables using 
>> comparison
>>     operator CODE.  The induction variable on left side of the 
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>>        return true;
>>      }
>> 
>> -  /* Handle special case loops: while (i-- < 10) and while (10 < i++) 
>> by
>> -     adjusting iv0, iv1 and code.  */
>> -  if (code != NE_EXPR
>> -      && (tree_int_cst_sign_bit (iv0->step)
>> -	  || (!integer_zerop (iv1->step)
>> -	      && !tree_int_cst_sign_bit (iv1->step)))
>> -      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> -    return false;
>> -
>>    /* OK, now we know we have a senseful loop.  Handle several cases, 
>> depending
>>       on what comparison operator is used.  */
>>    bound_difference (loop, iv1->base, iv0->base, &bnds);
>> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-07-02  7:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-01  2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
2021-07-01  7:22 ` Bin.Cheng
2021-07-01 11:32   ` guojiufu
2021-07-01 12:35 ` Richard Biener
2021-07-01 14:12   ` guojiufu
2021-07-02  0:51     ` Bin.Cheng
2021-07-02  4:06       ` guojiufu
2021-07-02  7:55   ` guojiufu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).