public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3136] Analyze niter for until-wrap condition [PR101145]
@ 2021-08-25  8:39 Jiu Fu Guo
  0 siblings, 0 replies; only message in thread
From: Jiu Fu Guo @ 2021-08-25  8:39 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3673dcf6d6baeb67bb70ff03d4cb3f92beed0075

commit r12-3136-g3673dcf6d6baeb67bb70ff03d4cb3f92beed0075
Author: Jiufu Guo <guojiufu@linux.ibm.com>
Date:   Wed Jul 7 13:41:01 2021 +0800

    Analyze niter for until-wrap condition [PR101145]
    
    For code like:
    unsigned foo(unsigned val, unsigned start)
    {
      unsigned cnt = 0;
      for (unsigned i = start; i > val; ++i)
        cnt++;
      return cnt;
    }
    
    The number of iterations should be about UINT_MAX - start.
    
    There is function adjust_cond_for_loop_until_wrap which
    handles similar work for const bases.
    Like adjust_cond_for_loop_until_wrap, this patch enhance
    function number_of_iterations_cond/number_of_iterations_lt
    to analyze number of iterations for this kind of loop.
    
    gcc/ChangeLog:
    
    2021-08-25  Jiufu Guo  <guojiufu@linux.ibm.com>
    
            PR tree-optimization/101145
            * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
            New function.
            (number_of_iterations_lt): Invoke above function.
            (adjust_cond_for_loop_until_wrap):
            Merge to number_of_iterations_until_wrap.
            (number_of_iterations_cond): Update invokes for
            adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
    
    gcc/testsuite/ChangeLog:
    
    2021-08-25  Jiufu Guo  <guojiufu@linux.ibm.com>
    
            PR tree-optimization/101145
            * gcc.dg/vect/pr101145.c: New test.
            * gcc.dg/vect/pr101145.inc: New test.
            * gcc.dg/vect/pr101145_1.c: New test.
            * gcc.dg/vect/pr101145_2.c: New test.
            * gcc.dg/vect/pr101145_3.c: New test.
            * gcc.dg/vect/pr101145inf.c: New test.
            * gcc.dg/vect/pr101145inf.inc: New test.
            * gcc.dg/vect/pr101145inf_1.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr101145.c      | 187 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr101145.inc    |  65 +++++++++++
 gcc/testsuite/gcc.dg/vect/pr101145_1.c    |  13 +++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c    |  13 +++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c    |  13 +++
 gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 ++++
 gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 +++++
 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 ++++
 gcc/tree-ssa-loop-niter.c                 | 157 ++++++++++++++-----------
 9 files changed, 459 insertions(+), 65 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 00000000000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include <limits.h>
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n <= ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{  // infininate 
+  while (0 <= ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = 32;
+  while (--l < n)
+    *a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 1;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+  
+  l = n - 32;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+
+  l = UINT_MAX;
+  res = foo (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n + 32;
+  res = foo_1 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n + 32;
+  res = foo_2 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 1;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  l = n - 2;
+  res = foo_3 (a, b, l, n);
+  if (res != l + 1)
+    fail++;
+
+  res = foo_5 (a, b, l, n);
+  if (res != 0)
+    fail++;
+
+  n = 64;
+  l = n - 32;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;
+
+  l = n;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;
+
+  l = n + 1;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != l)
+    fail++;
+
+  l = 0;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != l)
+    fail++;
+
+  l = 32;
+  res = bar_1 (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;  
+
+  res = bar_1 (a, b, l, n);
+  res++;
+  if (res != 0)
+    fail++;  
+
+  if (fail)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc
new file mode 100644
index 00000000000..615d2e5e552
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
@@ -0,0 +1,65 @@
+TYPE __attribute__ ((noinline))
+foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+  TYPE i;
+  for (i = l; n < i; i += C)
+    *a++ = *b++ + 1;
+  return i;
+}
+
+TYPE __attribute__ ((noinline))
+bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+  TYPE i;
+  for (i = l; i < n; i -= C)
+    *a++ = *b++ + 1;
+  return i;
+}
+
+int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
+
+int a[1000], b[1000];
+int fail;
+
+int
+main ()
+{
+  TYPE res;
+  TYPE l;
+  TYPE n;
+  n = N_BASE;
+  l = n - C;
+  res = foo_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n;
+  res = foo_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n + C;
+  res = foo_sign (a, b, l, n);
+  if (neq ((res - MIN) / C, 0))
+    fail++;
+
+  n = N_BASE_DOWN;
+  l = n - C;
+  res = bar_sign (a, b, l, n);
+  if (neq ((MAX - res) / C, 0))
+    fail++;
+
+  l = n;
+  res = bar_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  l = n + C;
+  res = bar_sign (a, b, l, n);
+  if (res != l)
+    fail++;
+
+  if (fail)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
new file mode 100644
index 00000000000..8bc26e2436e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE signed char
+#define MIN -128
+#define MAX 127
+#define N_BASE (MAX - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 3
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
new file mode 100644
index 00000000000..b14c4b4b519
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE unsigned char
+#define MIN 0
+#define MAX 255
+#define N_BASE (MAX - 32 + 1)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 2
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
new file mode 100644
index 00000000000..99289afec0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE int *
+#define MIN ((TYPE)0)
+#define MAX ((TYPE)((long long)-1))
+#define N_BASE (MIN - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 1
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c b/gcc/testsuite/gcc.dg/vect/pr101145inf.c
new file mode 100644
index 00000000000..ed49f5670b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c
@@ -0,0 +1,25 @@
+/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
+/* { dg-options "-O3" } */
+#include <limits.h>
+#include "pr101145inf.inc"
+
+__attribute__ ((noinline))
+unsigned foo(unsigned val, unsigned start)
+{
+  unsigned cnt = 0;
+  for (unsigned i = start; val <= i; i+=16)
+    cnt++;
+  return cnt;
+}
+
+void test_finite ()
+{
+  unsigned n = foo (16, UINT_MAX - 32);
+  if (n != 3)
+    __builtin_abort ();
+}
+
+void test_infinite ()
+{
+ foo (15, UINT_MAX - 32);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc
new file mode 100644
index 00000000000..4aa3d049187
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc
@@ -0,0 +1,28 @@
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+
+void test_finite ();
+void test_infinite ();
+
+void do_exit (int i)
+{
+  exit (0);
+}
+
+int main(void)
+{
+  test_finite ();
+  struct sigaction s;
+  sigemptyset (&s.sa_mask);
+  s.sa_handler = do_exit;
+  s.sa_flags = 0;
+  sigaction (SIGALRM, &s, NULL);
+  alarm (1);
+
+  test_infinite ();
+
+  __builtin_abort ();
+  return 1;
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
new file mode 100644
index 00000000000..4ee3e31c7fe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */
+/* { dg-options "-O3" } */
+#include <limits.h>
+#include "pr101145inf.inc"
+
+__attribute__ ((noinline))
+unsigned foo(unsigned val, unsigned start)
+{
+  unsigned cnt = 0;
+  for (unsigned i = start; i < val; i-=16)
+    cnt++;
+  return cnt;
+}
+
+void test_finite ()
+{
+  foo (UINT_MAX - 15, 32);
+}
+
+void test_infinite ()
+{
+  foo (UINT_MAX - 14, 32);
+}
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 466158a5eb1..7af92d1c893 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
     }
 }
 
+/* Determines number of iterations of loop whose ending condition
+   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
+   The number of iterations is stored to NITER.  */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
+				 affine_iv *iv1, class tree_niter_desc *niter)
+{
+  tree niter_type = unsigned_type_for (type);
+  tree step, num, assumptions, may_be_zero;
+  wide_int high, low, max, min;
+
+  may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
+  if (integer_onep (may_be_zero))
+    return false;
+
+  int prec = TYPE_PRECISION (type);
+  signop sgn = TYPE_SIGN (type);
+  min = wi::min_value (prec, sgn);
+  max = wi::max_value (prec, sgn);
+
+  /* n < {base, C}. */
+  if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step))
+    {
+      step = iv1->step;
+      /* MIN + C - 1 <= n.  */
+      tree last = wide_int_to_tree (type, min + wi::to_wide (step) - 1);
+      assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base);
+      if (integer_zerop (assumptions))
+	return false;
+
+      num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max),
+			 iv1->base);
+      high = max;
+      if (TREE_CODE (iv1->base) == INTEGER_CST)
+	low = wi::to_wide (iv1->base) - 1;
+      else if (TREE_CODE (iv0->base) == INTEGER_CST)
+	low = wi::to_wide (iv0->base);
+      else
+	low = min;
+    }
+  /* {base, -C} < n.  */
+  else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
+    {
+      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
+      /* MAX - C + 1 >= n.  */
+      tree last = wide_int_to_tree (type, max - wi::to_wide (step) + 1);
+      assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base);
+      if (integer_zerop (assumptions))
+	return false;
+
+      num = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
+			 wide_int_to_tree (type, min));
+      low = min;
+      if (TREE_CODE (iv0->base) == INTEGER_CST)
+	high = wi::to_wide (iv0->base) + 1;
+      else if (TREE_CODE (iv1->base) == INTEGER_CST)
+	high = wi::to_wide (iv1->base);
+      else
+	high = max;
+    }
+  else
+    return false;
+
+  /* (delta + step - 1) / step */
+  step = fold_convert (niter_type, step);
+  num = fold_convert (niter_type, num);
+  num = fold_build2 (PLUS_EXPR, niter_type, num, step);
+  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step);
+
+  widest_int delta, s;
+  delta = widest_int::from (high, sgn) - widest_int::from (low, sgn);
+  s = wi::to_widest (step);
+  delta = delta + s - 1;
+  niter->max = wi::udiv_floor (delta, s);
+
+  niter->may_be_zero = may_be_zero;
+
+  if (!integer_nonzerop (assumptions))
+    niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				      niter->assumptions, assumptions);
+
+  niter->control.no_overflow = false;
+
+  return true;
+}
+
 /* Determines number of iterations of loop whose ending condition
    is IV0 < IV1.  TYPE is the type of the iv.  The number of
    iterations is stored to NITER.  BNDS bounds the difference
@@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
       niter->bound = iv0->base;
     }
 
+  /* {base, -C} < n,  or n < {base, C} */
+  if (tree_int_cst_sign_bit (iv0->step)
+      || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
+    return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
+
   delta = fold_build2 (MINUS_EXPR, niter_type,
 		       fold_convert (niter_type, iv1->base),
 		       fold_convert (niter_type, iv0->base));
@@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
     }
 }
 
-/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
-   the condition for loop-until-wrap cases.  For example:
-     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
-     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
-   Return true if condition is successfully adjusted.  */
-
-static bool
-adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
-				 affine_iv *iv1)
-{
-  /* Only support simple cases for the moment.  */
-  if (TREE_CODE (iv0->base) != INTEGER_CST
-      || TREE_CODE (iv1->base) != INTEGER_CST)
-    return false;
-
-  tree niter_type = unsigned_type_for (type), high, low;
-  /* Case: i-- < 10.  */
-  if (integer_zerop (iv1->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_minus_onep (iv0->step))
-	return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-	  && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
-	return false;
-      high = fold_build2 (PLUS_EXPR, niter_type,
-			  fold_convert (niter_type, iv0->base),
-			  build_int_cst (niter_type, 1));
-      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
-    }
-  else if (integer_zerop (iv0->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_onep (iv1->step))
-	return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-	  && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
-	return false;
-      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
-      low = fold_build2 (MINUS_EXPR, niter_type,
-			 fold_convert (niter_type, iv1->base),
-			 build_int_cst (niter_type, 1));
-    }
-  else
-    gcc_unreachable ();
-
-  iv0->base = low;
-  iv0->step = fold_convert (niter_type, integer_one_node);
-  iv1->base = high;
-  iv1->step = build_int_cst (niter_type, 0);
-  *code = NE_EXPR;
-  return true;
-}
-
 /* Determine the number of iterations according to condition (for staying
    inside loop) which compares two induction variables using comparison
    operator CODE.  The induction variable on left side of the comparison
@@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop,
       return true;
     }
 
-  /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
-     adjusting iv0, iv1 and code.  */
-  if (code != NE_EXPR
-      && (tree_int_cst_sign_bit (iv0->step)
-	  || (!integer_zerop (iv1->step)
-	      && !tree_int_cst_sign_bit (iv1->step)))
-      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
-    return false;
-
   /* OK, now we know we have a senseful loop.  Handle several cases, depending
      on what comparison operator is used.  */
   bound_difference (loop, iv1->base, iv0->base, &bnds);


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-25  8:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-25  8:39 [gcc r12-3136] Analyze niter for until-wrap condition [PR101145] Jiu Fu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).