* [PATCH] Analyze niter for until-wrap condition [PR101145]
@ 2021-07-01 2:05 Jiufu Guo
2021-07-01 7:22 ` Bin.Cheng
2021-07-01 12:35 ` Richard Biener
0 siblings, 2 replies; 8+ messages in thread
From: Jiufu Guo @ 2021-07-01 2:05 UTC (permalink / raw)
To: gcc-patches; +Cc: guojiufu, wschmidt, segher, dje.gcc, rguenther, jlaw, amker
For code like:
unsigned foo(unsigned val, unsigned start)
{
unsigned cnt = 0;
for (unsigned i = start; i > val; ++i)
cnt++;
return cnt;
}
The number of iterations should be about UINT_MAX - start.
There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.
Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
gcc/ChangeLog:
PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
gcc/testsuite/ChangeLog:
PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c | 187 +++++++++++++++++++++++++
gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
6 files changed, 380 insertions(+), 65 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 00000000000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include <limits.h>
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+ while (UINT_MAX - 64 < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ l = UINT_MAX - 32;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ while (n <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{ // infininate
+ while (0 <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ //no loop
+ l = UINT_MAX;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+ while (--l < 64)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+ l = 32;
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+ unsigned l, n;
+ unsigned res;
+ /* l > n*/
+ n = UINT_MAX - 64;
+ l = n + 32;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = n - 32;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = UINT_MAX;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_1 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_2 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 2;
+ res = foo_3 (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ res = foo_5 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ n = 64;
+ l = n - 32;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n + 1;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 0;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 32;
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc
new file mode 100644
index 00000000000..6eed3fa8aca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
@@ -0,0 +1,63 @@
+TYPE __attribute__ ((noinline))
+foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE; n < l; l += C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+TYPE __attribute__ ((noinline))
+bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE_DOWN; l < n; l -= C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
+
+int a[1000], b[1000];
+int fail;
+
+int
+main ()
+{
+ TYPE res;
+ TYPE l;
+ TYPE n;
+ n = N_BASE;
+ l = n - C;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = foo_sign (a, b, l, n);
+ if (neq ((res - MIN) / C, 0))
+ fail++;
+
+ n = N_BASE_DOWN;
+ l = n - C;
+ res = bar_sign (a, b, l, n);
+ if (neq ((MAX - res) / C, 0))
+ fail++;
+
+ l = n;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
new file mode 100644
index 00000000000..94f6b99b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE signed char
+#define MIN -128
+#define MAX 127
+#define N_BASE (MAX - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 3
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
new file mode 100644
index 00000000000..d3cfc9e01e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE unsigned char
+#define MIN 0
+#define MAX 255
+#define N_BASE (MAX - 32 + 1)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 2
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
new file mode 100644
index 00000000000..e2cf9b1f7e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE int *
+#define MIN ((TYPE)0)
+#define MAX ((TYPE)((long long)-1))
+#define N_BASE ((TYPE) (-32))
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 1
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..06db6a36ef8 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
}
}
+/* Determines number of iterations of loop whose ending condition
+ is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
+ The number of iterations is stored to NITER. */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
+ affine_iv *iv1, class tree_niter_desc *niter)
+{
+ tree niter_type = unsigned_type_for (type);
+ tree max, min;
+
+ if (POINTER_TYPE_P (type))
+ {
+ max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
+ min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
+ }
+ else
+ {
+ max = TYPE_MAX_VALUE (type);
+ min = TYPE_MIN_VALUE (type);
+ }
+
+ tree high = max, low = min, one = build_int_cst (niter_type, 1);
+ tree step;
+
+ /* n < {base, C}. */
+ if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
+ && !tree_int_cst_sign_bit (iv1->step))
+ {
+ step = iv1->step;
+ niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
+ if (TREE_CODE (iv1->base) == INTEGER_CST)
+ low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
+ else if (TREE_CODE (iv0->base) == INTEGER_CST)
+ low = iv0->base;
+ }
+ /* {base, -C} < n. */
+ else if (TREE_CODE (iv0->step) == INTEGER_CST
+ && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
+ {
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
+ niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
+ if (TREE_CODE (iv0->base) == INTEGER_CST)
+ high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
+ else if (TREE_CODE (iv1->base) == INTEGER_CST)
+ high = iv1->base;
+ }
+ else
+ return false;
+
+ /* (delta + step - 1) / step */
+ step = fold_convert (niter_type, step);
+ niter->niter = fold_convert (niter_type, niter->niter);
+ niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
+ niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
+
+ tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
+ m = fold_convert (niter_type, m);
+ mpz_t mstep, tmp, mmax;
+ mpz_init (mstep);
+ mpz_init (tmp);
+ mpz_init (mmax);
+ wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
+ wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
+ mpz_add (tmp, mmax, mstep);
+ mpz_sub_ui (tmp, tmp, 1);
+ mpz_fdiv_q (tmp, tmp, mstep);
+ niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
+ TYPE_SIGN (niter_type));
+ mpz_clear (mstep);
+ mpz_clear (tmp);
+
+ niter->may_be_zero
+ = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
+
+ niter->control.no_overflow = false;
+
+ return true;
+}
+
/* Determines number of iterations of loop whose ending condition
is IV0 < IV1. TYPE is the type of the iv. The number of
iterations is stored to NITER. BNDS bounds the difference
@@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
niter->bound = iv0->base;
}
+ /* {base, -C} < n, or n < {base, C} */
+ if (tree_int_cst_sign_bit (iv0->step)
+ || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
+ return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
+
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
@@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
}
}
-/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
- the condition for loop-until-wrap cases. For example:
- (unsigned){8, -1}_loop < 10 => {0, 1} != 9
- 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
- Return true if condition is successfully adjusted. */
-
-static bool
-adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
- affine_iv *iv1)
-{
- /* Only support simple cases for the moment. */
- if (TREE_CODE (iv0->base) != INTEGER_CST
- || TREE_CODE (iv1->base) != INTEGER_CST)
- return false;
-
- tree niter_type = unsigned_type_for (type), high, low;
- /* Case: i-- < 10. */
- if (integer_zerop (iv1->step))
- {
- /* TODO: Should handle case in which abs(step) != 1. */
- if (!integer_minus_onep (iv0->step))
- return false;
- /* Give up on infinite loop. */
- if (*code == LE_EXPR
- && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
- return false;
- high = fold_build2 (PLUS_EXPR, niter_type,
- fold_convert (niter_type, iv0->base),
- build_int_cst (niter_type, 1));
- low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
- }
- else if (integer_zerop (iv0->step))
- {
- /* TODO: Should handle case in which abs(step) != 1. */
- if (!integer_onep (iv1->step))
- return false;
- /* Give up on infinite loop. */
- if (*code == LE_EXPR
- && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
- return false;
- high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
- low = fold_build2 (MINUS_EXPR, niter_type,
- fold_convert (niter_type, iv1->base),
- build_int_cst (niter_type, 1));
- }
- else
- gcc_unreachable ();
-
- iv0->base = low;
- iv0->step = fold_convert (niter_type, integer_one_node);
- iv1->base = high;
- iv1->step = build_int_cst (niter_type, 0);
- *code = NE_EXPR;
- return true;
-}
-
/* Determine the number of iterations according to condition (for staying
inside loop) which compares two induction variables using comparison
operator CODE. The induction variable on left side of the comparison
@@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
return true;
}
- /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
- adjusting iv0, iv1 and code. */
- if (code != NE_EXPR
- && (tree_int_cst_sign_bit (iv0->step)
- || (!integer_zerop (iv1->step)
- && !tree_int_cst_sign_bit (iv1->step)))
- && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
- return false;
-
/* OK, now we know we have a senseful loop. Handle several cases, depending
on what comparison operator is used. */
bound_difference (loop, iv1->base, iv0->base, &bnds);
--
2.17.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
@ 2021-07-01 7:22 ` Bin.Cheng
2021-07-01 11:32 ` guojiufu
2021-07-01 12:35 ` Richard Biener
1 sibling, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2021-07-01 7:22 UTC (permalink / raw)
To: Jiufu Guo
Cc: gcc-patches List, Richard Biener, amker, Segher Boessenkool,
jlaw, Bill Schmidt, dje.gcc
On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
> unsigned cnt = 0;
> for (unsigned i = start; i > val; ++i)
> cnt++;
> return cnt;
> }
>
> The number of iterations should be about UINT_MAX - start.
>
> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
>
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>
> gcc/ChangeLog:
>
> PR tree-optimization/101145
> * tree-ssa-loop-niter.c
> (number_of_iterations_until_wrap): New function.
> (number_of_iterations_lt): Invoke above function.
> (adjust_cond_for_loop_until_wrap):
> Merge to number_of_iterations_until_wrap.
> (number_of_iterations_cond): Update invokes for
> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/101145
> * gcc.dg/vect/pr101145.c: New test.
> * gcc.dg/vect/pr101145.inc: New test.
> * gcc.dg/vect/pr101145_1.c: New test.
> * gcc.dg/vect/pr101145_2.c: New test.
> * gcc.dg/vect/pr101145_3.c: New test.
> ---
> gcc/testsuite/gcc.dg/vect/pr101145.c | 187 +++++++++++++++++++++++++
> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
> 6 files changed, 380 insertions(+), 65 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b5add827018..06db6a36ef8 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
> }
> }
>
> +/* Determines number of iterations of loop whose ending condition
> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
> + The number of iterations is stored to NITER. */
> +
> +static bool
> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
> + affine_iv *iv1, class tree_niter_desc *niter)
> +{
> + tree niter_type = unsigned_type_for (type);
> + tree max, min;
> +
> + if (POINTER_TYPE_P (type))
> + {
> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> + }
> + else
> + {
> + max = TYPE_MAX_VALUE (type);
> + min = TYPE_MIN_VALUE (type);
> + }
> +
> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
> + tree step;
> +
> + /* n < {base, C}. */
> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
> + && !tree_int_cst_sign_bit (iv1->step))
> + {
> + step = iv1->step;
> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
max/iv1->base could be of pointer type, not sure if this is canonical though.
> + if (TREE_CODE (iv1->base) == INTEGER_CST)
> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
> + low = iv0->base;
> + }
> + /* {base, -C} < n. */
> + else if (TREE_CODE (iv0->step) == INTEGER_CST
> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
> + {
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
> + if (TREE_CODE (iv0->base) == INTEGER_CST)
> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
> + high = iv1->base;
> + }
> + else
> + return false;
> +
> + /* (delta + step - 1) / step */
> + step = fold_convert (niter_type, step);
> + niter->niter = fold_convert (niter_type, niter->niter);
> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
> +
> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> + m = fold_convert (niter_type, m);
> + mpz_t mstep, tmp, mmax;
> + mpz_init (mstep);
> + mpz_init (tmp);
> + mpz_init (mmax);
> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> + mpz_add (tmp, mmax, mstep);
> + mpz_sub_ui (tmp, tmp, 1);
> + mpz_fdiv_q (tmp, tmp, mstep);
> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
> + TYPE_SIGN (niter_type));
This computation is similar to function number_of_iterations_lt, could
we factor it out into an independent function?
> + mpz_clear (mstep);
> + mpz_clear (tmp);
> +
> + niter->may_be_zero
> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
If iv0->base and iv1->base are constant and iv1->base <= iv0->base,
the number of iteration is actually zero, but here we rely on
may_be_zero (== true), which loses information. Could we specially
handle this case and do a fast return?
Could you test this on some more targets(x86, aarch64) please? Otherwise LGTM.
Thanks,
bin
> +
> + niter->control.no_overflow = false;
> +
> + return true;
> +}
> +
> /* Determines number of iterations of loop whose ending condition
> is IV0 < IV1. TYPE is the type of the iv. The number of
> iterations is stored to NITER. BNDS bounds the difference
> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
> niter->bound = iv0->base;
> }
>
> + /* {base, -C} < n, or n < {base, C} */
> + if (tree_int_cst_sign_bit (iv0->step)
> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
> + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
> +
> delta = fold_build2 (MINUS_EXPR, niter_type,
> fold_convert (niter_type, iv1->base),
> fold_convert (niter_type, iv0->base));
> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
> }
> }
>
> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> - the condition for loop-until-wrap cases. For example:
> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
> - Return true if condition is successfully adjusted. */
> -
> -static bool
> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
> - affine_iv *iv1)
> -{
> - /* Only support simple cases for the moment. */
> - if (TREE_CODE (iv0->base) != INTEGER_CST
> - || TREE_CODE (iv1->base) != INTEGER_CST)
> - return false;
> -
> - tree niter_type = unsigned_type_for (type), high, low;
> - /* Case: i-- < 10. */
> - if (integer_zerop (iv1->step))
> - {
> - /* TODO: Should handle case in which abs(step) != 1. */
> - if (!integer_minus_onep (iv0->step))
> - return false;
> - /* Give up on infinite loop. */
> - if (*code == LE_EXPR
> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> - return false;
> - high = fold_build2 (PLUS_EXPR, niter_type,
> - fold_convert (niter_type, iv0->base),
> - build_int_cst (niter_type, 1));
> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> - }
> - else if (integer_zerop (iv0->step))
> - {
> - /* TODO: Should handle case in which abs(step) != 1. */
> - if (!integer_onep (iv1->step))
> - return false;
> - /* Give up on infinite loop. */
> - if (*code == LE_EXPR
> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> - return false;
> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> - low = fold_build2 (MINUS_EXPR, niter_type,
> - fold_convert (niter_type, iv1->base),
> - build_int_cst (niter_type, 1));
> - }
> - else
> - gcc_unreachable ();
> -
> - iv0->base = low;
> - iv0->step = fold_convert (niter_type, integer_one_node);
> - iv1->base = high;
> - iv1->step = build_int_cst (niter_type, 0);
> - *code = NE_EXPR;
> - return true;
> -}
> -
> /* Determine the number of iterations according to condition (for staying
> inside loop) which compares two induction variables using comparison
> operator CODE. The induction variable on left side of the comparison
> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
> return true;
> }
>
> - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
> - adjusting iv0, iv1 and code. */
> - if (code != NE_EXPR
> - && (tree_int_cst_sign_bit (iv0->step)
> - || (!integer_zerop (iv1->step)
> - && !tree_int_cst_sign_bit (iv1->step)))
> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> - return false;
> -
> /* OK, now we know we have a senseful loop. Handle several cases, depending
> on what comparison operator is used. */
> bound_difference (loop, iv1->base, iv0->base, &bnds);
> --
> 2.17.1
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 7:22 ` Bin.Cheng
@ 2021-07-01 11:32 ` guojiufu
0 siblings, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-01 11:32 UTC (permalink / raw)
To: Bin.Cheng
Cc: gcc-patches List, Richard Biener, amker, Segher Boessenkool,
jlaw, Bill Schmidt, dje.gcc
On 2021-07-01 15:22, Bin.Cheng wrote:
> On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>> unsigned cnt = 0;
>> for (unsigned i = start; i > val; ++i)
>> cnt++;
>> return cnt;
>> }
>>
>> The number of iterations should be about UINT_MAX - start.
>>
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>>
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/101145
>> * tree-ssa-loop-niter.c
>> (number_of_iterations_until_wrap): New function.
>> (number_of_iterations_lt): Invoke above function.
>> (adjust_cond_for_loop_until_wrap):
>> Merge to number_of_iterations_until_wrap.
>> (number_of_iterations_cond): Update invokes for
>> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR tree-optimization/101145
>> * gcc.dg/vect/pr101145.c: New test.
>> * gcc.dg/vect/pr101145.inc: New test.
>> * gcc.dg/vect/pr101145_1.c: New test.
>> * gcc.dg/vect/pr101145_2.c: New test.
>> * gcc.dg/vect/pr101145_3.c: New test.
>> ---
>> gcc/testsuite/gcc.dg/vect/pr101145.c | 187
>> +++++++++++++++++++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
>> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
>> 6 files changed, 380 insertions(+), 65 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>
>
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
>> *iv0, affine_iv *iv1,
>> }
>> }
>>
>> +/* Determines number of iterations of loop whose ending condition
>> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
>> + The number of iterations is stored to NITER. */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
>> *iv0,
>> + affine_iv *iv1, class tree_niter_desc
>> *niter)
>> +{
>> + tree niter_type = unsigned_type_for (type);
>> + tree max, min;
>> +
>> + if (POINTER_TYPE_P (type))
>> + {
>> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> + }
>> + else
>> + {
>> + max = TYPE_MAX_VALUE (type);
>> + min = TYPE_MIN_VALUE (type);
>> + }
>> +
>> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> + tree step;
>> +
>> + /* n < {base, C}. */
>> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
>> INTEGER_CST
>> + && !tree_int_cst_sign_bit (iv1->step))
>> + {
>> + step = iv1->step;
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
>> iv1->base);
> max/iv1->base could be of pointer type, not sure if this is canonical
> though.
Thanks. Pointer needs careful attention. I added case pr101145_3.c for
pointer,
as test, the iteration number is 7: 0xffffffffffffffe4 -
0xffffffffffffffff,
where pointer type is pointer to int: "int *". It works as expected.
I notice in number_of_iterations_lt, there are code likes:
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
This would also be ok.
>
>> + if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + low = iv0->base;
>> + }
>> + /* {base, -C} < n. */
>> + else if (TREE_CODE (iv0->step) == INTEGER_CST
>> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> (iv1->step))
>> + {
>> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
>> iv0->step);
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
>> min);
>> + if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + high = iv1->base;
>> + }
>> + else
>> + return false;
>> +
>> + /* (delta + step - 1) / step */
>> + step = fold_convert (niter_type, step);
>> + niter->niter = fold_convert (niter_type, niter->niter);
>> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
>> step);
>> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
>> niter->niter, step);
>> +
>> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> + m = fold_convert (niter_type, m);
>> + mpz_t mstep, tmp, mmax;
>> + mpz_init (mstep);
>> + mpz_init (tmp);
>> + mpz_init (mmax);
>> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> + mpz_add (tmp, mmax, mstep);
>> + mpz_sub_ui (tmp, tmp, 1);
>> + mpz_fdiv_q (tmp, tmp, mstep);
>> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
>> false),
>> + TYPE_SIGN (niter_type));
> This computation is similar to function number_of_iterations_lt, could
> we factor it out into an independent function?
Thanks! Will update as you said.
>
>> + mpz_clear (mstep);
>> + mpz_clear (tmp);
>> +
>> + niter->may_be_zero
>> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> If iv0->base and iv1->base are constant and iv1->base <= iv0->base,
> the number of iteration is actually zero, but here we rely on
> may_be_zero (== true), which loses information. Could we specially
> handle this case and do a fast return?
As tests, if iv1->base <= iv0->base in user source code, this function
will not be called, even number_of_iterations_exit_assumptions will not
be called. But it is still may be possible to run into this function if
some previous optimizations generate constant bases.
Then a fast return would be useful, I will update the patch.
>
> Could you test this on some more targets(x86, aarch64) please?
> Otherwise LGTM.
I will have some tests on other targets. Hope there is no difference
between
targets for this patch :)
BR,
Jiufu Guo
>
> Thanks,
> bin
>> +
>> + niter->control.no_overflow = false;
>> +
>> + return true;
>> +}
>> +
>> /* Determines number of iterations of loop whose ending condition
>> is IV0 < IV1. TYPE is the type of the iv. The number of
>> iterations is stored to NITER. BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
>> type, affine_iv *iv0,
>> niter->bound = iv0->base;
>> }
>>
>> + /* {base, -C} < n, or n < {base, C} */
>> + if (tree_int_cst_sign_bit (iv0->step)
>> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
>> (iv1->step)))
>> + return number_of_iterations_until_wrap (loop, type, iv0, iv1,
>> niter);
>> +
>> delta = fold_build2 (MINUS_EXPR, niter_type,
>> fold_convert (niter_type, iv1->base),
>> fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> }
>> }
>>
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> - the condition for loop-until-wrap cases. For example:
>> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
>> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
>> - Return true if condition is successfully adjusted. */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
>> *code,
>> - affine_iv *iv1)
>> -{
>> - /* Only support simple cases for the moment. */
>> - if (TREE_CODE (iv0->base) != INTEGER_CST
>> - || TREE_CODE (iv1->base) != INTEGER_CST)
>> - return false;
>> -
>> - tree niter_type = unsigned_type_for (type), high, low;
>> - /* Case: i-- < 10. */
>> - if (integer_zerop (iv1->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_minus_onep (iv0->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> - return false;
>> - high = fold_build2 (PLUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv0->base),
>> - build_int_cst (niter_type, 1));
>> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> - }
>> - else if (integer_zerop (iv0->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_onep (iv1->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> - return false;
>> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> - low = fold_build2 (MINUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv1->base),
>> - build_int_cst (niter_type, 1));
>> - }
>> - else
>> - gcc_unreachable ();
>> -
>> - iv0->base = low;
>> - iv0->step = fold_convert (niter_type, integer_one_node);
>> - iv1->base = high;
>> - iv1->step = build_int_cst (niter_type, 0);
>> - *code = NE_EXPR;
>> - return true;
>> -}
>> -
>> /* Determine the number of iterations according to condition (for
>> staying
>> inside loop) which compares two induction variables using
>> comparison
>> operator CODE. The induction variable on left side of the
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>> return true;
>> }
>>
>> - /* Handle special case loops: while (i-- < 10) and while (10 < i++)
>> by
>> - adjusting iv0, iv1 and code. */
>> - if (code != NE_EXPR
>> - && (tree_int_cst_sign_bit (iv0->step)
>> - || (!integer_zerop (iv1->step)
>> - && !tree_int_cst_sign_bit (iv1->step)))
>> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> - return false;
>> -
>> /* OK, now we know we have a senseful loop. Handle several cases,
>> depending
>> on what comparison operator is used. */
>> bound_difference (loop, iv1->base, iv0->base, &bnds);
>> --
>> 2.17.1
>>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
2021-07-01 7:22 ` Bin.Cheng
@ 2021-07-01 12:35 ` Richard Biener
2021-07-01 14:12 ` guojiufu
2021-07-02 7:55 ` guojiufu
1 sibling, 2 replies; 8+ messages in thread
From: Richard Biener @ 2021-07-01 12:35 UTC (permalink / raw)
To: Jiufu Guo; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker
On Thu, 1 Jul 2021, Jiufu Guo wrote:
> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
> unsigned cnt = 0;
> for (unsigned i = start; i > val; ++i)
> cnt++;
> return cnt;
> }
>
> The number of iterations should be about UINT_MAX - start.
For
unsigned foo(unsigned val, unsigned start)
{
unsigned cnt = 0;
for (unsigned i = start; i >= val; ++i)
cnt++;
return cnt;
}
and val == 0 the loop never terminates. I don't see anywhere
in the patch that you disregard GE_EXPR and I remember
the code handles GE as well as GT? From a quick look this is
also not covered by a testcase you add - not exactly sure
how it would materialize in a miscompilation.
> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
>
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>
> gcc/ChangeLog:
>
> PR tree-optimization/101145
> * tree-ssa-loop-niter.c
> (number_of_iterations_until_wrap): New function.
> (number_of_iterations_lt): Invoke above function.
> (adjust_cond_for_loop_until_wrap):
> Merge to number_of_iterations_until_wrap.
> (number_of_iterations_cond): Update invokes for
> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/101145
> * gcc.dg/vect/pr101145.c: New test.
> * gcc.dg/vect/pr101145.inc: New test.
> * gcc.dg/vect/pr101145_1.c: New test.
> * gcc.dg/vect/pr101145_2.c: New test.
> * gcc.dg/vect/pr101145_3.c: New test.
> ---
> gcc/testsuite/gcc.dg/vect/pr101145.c | 187 +++++++++++++++++++++++++
> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
> 6 files changed, 380 insertions(+), 65 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c
> new file mode 100644
> index 00000000000..74031b031cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> @@ -0,0 +1,187 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#include <limits.h>
> +
> +unsigned __attribute__ ((noinline))
> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + while (n < ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> + while (UINT_MAX - 64 < ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + l = UINT_MAX - 32;
> + while (n < ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + while (n <= ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{ // infininate
> + while (0 <= ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + //no loop
> + l = UINT_MAX;
> + while (n < ++l)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + while (--l < n)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> + while (--l < 64)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> + l = 32;
> + while (--l < n)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +
> +int a[3200], b[3200];
> +int fail;
> +
> +int
> +main ()
> +{
> + unsigned l, n;
> + unsigned res;
> + /* l > n*/
> + n = UINT_MAX - 64;
> + l = n + 32;
> + res = foo (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n;
> + res = foo (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n - 1;
> + res = foo (a, b, l, n);
> + if (res != l + 1)
> + fail++;
> +
> + l = n - 32;
> + res = foo (a, b, l, n);
> + if (res != l + 1)
> + fail++;
> +
> + l = UINT_MAX;
> + res = foo (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n + 32;
> + res = foo_1 (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n + 32;
> + res = foo_2 (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n;
> + res = foo_3 (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n - 1;
> + res = foo_3 (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + l = n - 2;
> + res = foo_3 (a, b, l, n);
> + if (res != l + 1)
> + fail++;
> +
> + res = foo_5 (a, b, l, n);
> + if (res != 0)
> + fail++;
> +
> + n = 64;
> + l = n - 32;
> + res = bar (a, b, l, n);
> + res++;
> + if (res != 0)
> + fail++;
> +
> + l = n;
> + res = bar (a, b, l, n);
> + res++;
> + if (res != 0)
> + fail++;
> +
> + l = n + 1;
> + res = bar (a, b, l, n);
> + res++;
> + if (res != l)
> + fail++;
> +
> + l = 0;
> + res = bar (a, b, l, n);
> + res++;
> + if (res != l)
> + fail++;
> +
> + l = 32;
> + res = bar_1 (a, b, l, n);
> + res++;
> + if (res != 0)
> + fail++;
> +
> + res = bar_1 (a, b, l, n);
> + res++;
> + if (res != 0)
> + fail++;
> +
> + if (fail)
> + __builtin_abort ();
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> new file mode 100644
> index 00000000000..6eed3fa8aca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> @@ -0,0 +1,63 @@
> +TYPE __attribute__ ((noinline))
> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> +{
> + for (l = L_BASE; n < l; l += C)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +TYPE __attribute__ ((noinline))
> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> +{
> + for (l = L_BASE_DOWN; l < n; l -= C)
> + *a++ = *b++ + 1;
> + return l;
> +}
> +
> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
> +
> +int a[1000], b[1000];
> +int fail;
> +
> +int
> +main ()
> +{
> + TYPE res;
> + TYPE l;
> + TYPE n;
> + n = N_BASE;
> + l = n - C;
> + res = foo_sign (a, b, l, n);
> + if (res != l)
> + fail++;
> +
> + l = n;
> + res = foo_sign (a, b, l, n);
> + if (res != l)
> + fail++;
> +
> + l = n + C;
> + res = foo_sign (a, b, l, n);
> + if (neq ((res - MIN) / C, 0))
> + fail++;
> +
> + n = N_BASE_DOWN;
> + l = n - C;
> + res = bar_sign (a, b, l, n);
> + if (neq ((MAX - res) / C, 0))
> + fail++;
> +
> + l = n;
> + res = bar_sign (a, b, l, n);
> + if (res != l)
> + fail++;
> +
> + l = n + C;
> + res = bar_sign (a, b, l, n);
> + if (res != l)
> + fail++;
> +
> + if (fail)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> new file mode 100644
> index 00000000000..94f6b99b893
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE signed char
> +#define MIN -128
> +#define MAX 127
> +#define N_BASE (MAX - 32)
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 3
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> new file mode 100644
> index 00000000000..d3cfc9e01e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE unsigned char
> +#define MIN 0
> +#define MAX 255
> +#define N_BASE (MAX - 32 + 1)
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 2
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> new file mode 100644
> index 00000000000..e2cf9b1f7e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> @@ -0,0 +1,15 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#define TYPE int *
> +#define MIN ((TYPE)0)
> +#define MAX ((TYPE)((long long)-1))
> +#define N_BASE ((TYPE) (-32))
> +#define N_BASE_DOWN (MIN + 32)
> +
> +#define C 1
> +#define L_BASE l
> +#define L_BASE_DOWN l
> +
> +#include "pr101145.inc"
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b5add827018..06db6a36ef8 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
> }
> }
>
> +/* Determines number of iterations of loop whose ending condition
> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
> + The number of iterations is stored to NITER. */
> +
> +static bool
> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
> + affine_iv *iv1, class tree_niter_desc *niter)
> +{
> + tree niter_type = unsigned_type_for (type);
> + tree max, min;
> +
> + if (POINTER_TYPE_P (type))
> + {
> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> + }
> + else
> + {
> + max = TYPE_MAX_VALUE (type);
> + min = TYPE_MIN_VALUE (type);
> + }
It would be cheaper to use wide_int here - wi::min/max_value
(TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
code below.
> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
> + tree step;
> +
> + /* n < {base, C}. */
> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
> + && !tree_int_cst_sign_bit (iv1->step))
> + {
> + step = iv1->step;
> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
> + if (TREE_CODE (iv1->base) == INTEGER_CST)
> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
> + low = iv0->base;
> + }
> + /* {base, -C} < n. */
> + else if (TREE_CODE (iv0->step) == INTEGER_CST
> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
> + {
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
> + if (TREE_CODE (iv0->base) == INTEGER_CST)
> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
> + high = iv1->base;
> + }
> + else
> + return false;
> +
> + /* (delta + step - 1) / step */
> + step = fold_convert (niter_type, step);
> + niter->niter = fold_convert (niter_type, niter->niter);
> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
> +
> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> + m = fold_convert (niter_type, m);
> + mpz_t mstep, tmp, mmax;
> + mpz_init (mstep);
> + mpz_init (tmp);
> + mpz_init (mmax);
> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> + mpz_add (tmp, mmax, mstep);
> + mpz_sub_ui (tmp, tmp, 1);
> + mpz_fdiv_q (tmp, tmp, mstep);
> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
> + TYPE_SIGN (niter_type));
> + mpz_clear (mstep);
> + mpz_clear (tmp);
You forget to clear mmax? I wonder why you need to involve GMP here
at all since this is just add/sub/divide - you could use
widest_int directly I think. At least the back-and-forth between
trees, wide-int and mpz looks odd.
Richard.
> + niter->may_be_zero
> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> +
> + niter->control.no_overflow = false;
> +
> + return true;
> +}
> +
> /* Determines number of iterations of loop whose ending condition
> is IV0 < IV1. TYPE is the type of the iv. The number of
> iterations is stored to NITER. BNDS bounds the difference
> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
> niter->bound = iv0->base;
> }
>
> + /* {base, -C} < n, or n < {base, C} */
> + if (tree_int_cst_sign_bit (iv0->step)
> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step)))
> + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
> +
> delta = fold_build2 (MINUS_EXPR, niter_type,
> fold_convert (niter_type, iv1->base),
> fold_convert (niter_type, iv0->base));
> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
> }
> }
>
> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> - the condition for loop-until-wrap cases. For example:
> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
> - Return true if condition is successfully adjusted. */
> -
> -static bool
> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
> - affine_iv *iv1)
> -{
> - /* Only support simple cases for the moment. */
> - if (TREE_CODE (iv0->base) != INTEGER_CST
> - || TREE_CODE (iv1->base) != INTEGER_CST)
> - return false;
> -
> - tree niter_type = unsigned_type_for (type), high, low;
> - /* Case: i-- < 10. */
> - if (integer_zerop (iv1->step))
> - {
> - /* TODO: Should handle case in which abs(step) != 1. */
> - if (!integer_minus_onep (iv0->step))
> - return false;
> - /* Give up on infinite loop. */
> - if (*code == LE_EXPR
> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> - return false;
> - high = fold_build2 (PLUS_EXPR, niter_type,
> - fold_convert (niter_type, iv0->base),
> - build_int_cst (niter_type, 1));
> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> - }
> - else if (integer_zerop (iv0->step))
> - {
> - /* TODO: Should handle case in which abs(step) != 1. */
> - if (!integer_onep (iv1->step))
> - return false;
> - /* Give up on infinite loop. */
> - if (*code == LE_EXPR
> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> - return false;
> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> - low = fold_build2 (MINUS_EXPR, niter_type,
> - fold_convert (niter_type, iv1->base),
> - build_int_cst (niter_type, 1));
> - }
> - else
> - gcc_unreachable ();
> -
> - iv0->base = low;
> - iv0->step = fold_convert (niter_type, integer_one_node);
> - iv1->base = high;
> - iv1->step = build_int_cst (niter_type, 0);
> - *code = NE_EXPR;
> - return true;
> -}
> -
> /* Determine the number of iterations according to condition (for staying
> inside loop) which compares two induction variables using comparison
> operator CODE. The induction variable on left side of the comparison
> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
> return true;
> }
>
> - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
> - adjusting iv0, iv1 and code. */
> - if (code != NE_EXPR
> - && (tree_int_cst_sign_bit (iv0->step)
> - || (!integer_zerop (iv1->step)
> - && !tree_int_cst_sign_bit (iv1->step)))
> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> - return false;
> -
> /* OK, now we know we have a senseful loop. Handle several cases, depending
> on what comparison operator is used. */
> bound_difference (loop, iv1->base, iv0->base, &bnds);
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 12:35 ` Richard Biener
@ 2021-07-01 14:12 ` guojiufu
2021-07-02 0:51 ` Bin.Cheng
2021-07-02 7:55 ` guojiufu
1 sibling, 1 reply; 8+ messages in thread
From: guojiufu @ 2021-07-01 14:12 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker
On 2021-07-01 20:35, Richard Biener wrote:
> On Thu, 1 Jul 2021, Jiufu Guo wrote:
>
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>> unsigned cnt = 0;
>> for (unsigned i = start; i > val; ++i)
>> cnt++;
>> return cnt;
>> }
>>
>> The number of iterations should be about UINT_MAX - start.
>
> For
>
> unsigned foo(unsigned val, unsigned start)
> {
> unsigned cnt = 0;
> for (unsigned i = start; i >= val; ++i)
> cnt++;
> return cnt;
> }
>
> and val == 0 the loop never terminates. I don't see anywhere
> in the patch that you disregard GE_EXPR and I remember
> the code handles GE as well as GT? From a quick look this is
> also not covered by a testcase you add - not exactly sure
> how it would materialize in a miscompilation.
In number_of_iterations_cond, there is code:
if (code == GE_EXPR || code == GT_EXPR
|| (code == NE_EXPR && integer_zerop (iv0->step)))
{
std::swap (iv0, iv1);
code = swap_tree_comparison (code);
}
It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
and LE (val <= i) is converted to LT (val - 1 < i).
So, the code is added to number_of_iterations_lt.
But, this patch leads mis-compilation for unsigned "i >= val" as
above transforms: converting LE (val <= i) to LT (val - 1 < i)
seems not appropriate (e.g where val=0).
Thanks for pointing out this!!!
I would investigate a way to handle this correctly.
A possible way maybe just to return false for this kind of LE.
Any suggestions?
>
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>>
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/101145
>> * tree-ssa-loop-niter.c
>> (number_of_iterations_until_wrap): New function.
>> (number_of_iterations_lt): Invoke above function.
>> (adjust_cond_for_loop_until_wrap):
>> Merge to number_of_iterations_until_wrap.
>> (number_of_iterations_cond): Update invokes for
>> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR tree-optimization/101145
>> * gcc.dg/vect/pr101145.c: New test.
>> * gcc.dg/vect/pr101145.inc: New test.
>> * gcc.dg/vect/pr101145_1.c: New test.
>> * gcc.dg/vect/pr101145_2.c: New test.
>> * gcc.dg/vect/pr101145_3.c: New test.
>> ---
>> gcc/testsuite/gcc.dg/vect/pr101145.c | 187
>> +++++++++++++++++++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
>> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
>> 6 files changed, 380 insertions(+), 65 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> new file mode 100644
>> index 00000000000..74031b031cf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> @@ -0,0 +1,187 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#include <limits.h>
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> unsigned)
>> +{
>> + while (UINT_MAX - 64 < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + l = UINT_MAX - 32;
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (n <= ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{ // infininate
>> + while (0 <= ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + //no loop
>> + l = UINT_MAX;
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (--l < n)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> unsigned)
>> +{
>> + while (--l < 64)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + l = 32;
>> + while (--l < n)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +
>> +int a[3200], b[3200];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> + unsigned l, n;
>> + unsigned res;
>> + /* l > n*/
>> + n = UINT_MAX - 64;
>> + l = n + 32;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 1;
>> + res = foo (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + l = n - 32;
>> + res = foo (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + l = UINT_MAX;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 32;
>> + res = foo_1 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 32;
>> + res = foo_2 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = foo_3 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 1;
>> + res = foo_3 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 2;
>> + res = foo_3 (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + res = foo_5 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + n = 64;
>> + l = n - 32;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 1;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != l)
>> + fail++;
>> +
>> + l = 0;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != l)
>> + fail++;
>> +
>> + l = 32;
>> + res = bar_1 (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + res = bar_1 (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + if (fail)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> new file mode 100644
>> index 00000000000..6eed3fa8aca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> + for (l = L_BASE; n < l; l += C)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> + for (l = L_BASE_DOWN; l < n; l -= C)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> +
>> +int a[1000], b[1000];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> + TYPE res;
>> + TYPE l;
>> + TYPE n;
>> + n = N_BASE;
>> + l = n - C;
>> + res = foo_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n;
>> + res = foo_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n + C;
>> + res = foo_sign (a, b, l, n);
>> + if (neq ((res - MIN) / C, 0))
>> + fail++;
>> +
>> + n = N_BASE_DOWN;
>> + l = n - C;
>> + res = bar_sign (a, b, l, n);
>> + if (neq ((MAX - res) / C, 0))
>> + fail++;
>> +
>> + l = n;
>> + res = bar_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n + C;
>> + res = bar_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + if (fail)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> new file mode 100644
>> index 00000000000..94f6b99b893
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE signed char
>> +#define MIN -128
>> +#define MAX 127
>> +#define N_BASE (MAX - 32)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 3
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> new file mode 100644
>> index 00000000000..d3cfc9e01e1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE unsigned char
>> +#define MIN 0
>> +#define MAX 255
>> +#define N_BASE (MAX - 32 + 1)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 2
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> new file mode 100644
>> index 00000000000..e2cf9b1f7e6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE int *
>> +#define MIN ((TYPE)0)
>> +#define MAX ((TYPE)((long long)-1))
>> +#define N_BASE ((TYPE) (-32))
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 1
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
>> *iv0, affine_iv *iv1,
>> }
>> }
>>
>> +/* Determines number of iterations of loop whose ending condition
>> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
>> + The number of iterations is stored to NITER. */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
>> *iv0,
>> + affine_iv *iv1, class tree_niter_desc *niter)
>> +{
>> + tree niter_type = unsigned_type_for (type);
>> + tree max, min;
>> +
>> + if (POINTER_TYPE_P (type))
>> + {
>> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> + }
>> + else
>> + {
>> + max = TYPE_MAX_VALUE (type);
>> + min = TYPE_MIN_VALUE (type);
>> + }
>
> It would be cheaper to use wide_int here - wi::min/max_value
> (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> code below.
Thanks! I think you mean to use wide_int for "tree high = max, low =
min,".
I would check and update the patch to use wide_int.
The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with
iv0/iv1->base,
for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?
>
>> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> + tree step;
>> +
>> + /* n < {base, C}. */
>> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
>> INTEGER_CST
>> + && !tree_int_cst_sign_bit (iv1->step))
>> + {
>> + step = iv1->step;
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
>> iv1->base);
>> + if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + low = iv0->base;
>> + }
>> + /* {base, -C} < n. */
>> + else if (TREE_CODE (iv0->step) == INTEGER_CST
>> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> (iv1->step))
>> + {
>> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
>> iv0->step);
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
>> min);
>> + if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + high = iv1->base;
>> + }
>> + else
>> + return false;
>> +
>> + /* (delta + step - 1) / step */
>> + step = fold_convert (niter_type, step);
>> + niter->niter = fold_convert (niter_type, niter->niter);
>> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
>> step);
>> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
>> niter->niter, step);
>> +
>> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> + m = fold_convert (niter_type, m);
>> + mpz_t mstep, tmp, mmax;
>> + mpz_init (mstep);
>> + mpz_init (tmp);
>> + mpz_init (mmax);
>> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> + mpz_add (tmp, mmax, mstep);
>> + mpz_sub_ui (tmp, tmp, 1);
>> + mpz_fdiv_q (tmp, tmp, mstep);
>> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
>> false),
>> + TYPE_SIGN (niter_type));
>> + mpz_clear (mstep);
>> + mpz_clear (tmp);
>
> You forget to clear mmax? I wonder why you need to involve GMP here
Oh, thanks for pointing out this :) need to clear mmax.
> at all since this is just add/sub/divide - you could use
> widest_int directly I think. At least the back-and-forth between
> trees, wide-int and mpz looks odd.
I once use 'trees' to do the calculation, while I encounter the issue
that the may not hold the precision for some cases:
"mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.
I would check to use widest_int (e.g. long[4]) which seems enough to
hold all types.
BR,
Jiufu Guo.
>
> Richard.
>
>> + niter->may_be_zero
>> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> +
>> + niter->control.no_overflow = false;
>> +
>> + return true;
>> +}
>> +
>> /* Determines number of iterations of loop whose ending condition
>> is IV0 < IV1. TYPE is the type of the iv. The number of
>> iterations is stored to NITER. BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
>> type, affine_iv *iv0,
>> niter->bound = iv0->base;
>> }
>>
>> + /* {base, -C} < n, or n < {base, C} */
>> + if (tree_int_cst_sign_bit (iv0->step)
>> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
>> (iv1->step)))
>> + return number_of_iterations_until_wrap (loop, type, iv0, iv1,
>> niter);
>> +
>> delta = fold_build2 (MINUS_EXPR, niter_type,
>> fold_convert (niter_type, iv1->base),
>> fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> }
>> }
>>
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> - the condition for loop-until-wrap cases. For example:
>> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
>> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
>> - Return true if condition is successfully adjusted. */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
>> *code,
>> - affine_iv *iv1)
>> -{
>> - /* Only support simple cases for the moment. */
>> - if (TREE_CODE (iv0->base) != INTEGER_CST
>> - || TREE_CODE (iv1->base) != INTEGER_CST)
>> - return false;
>> -
>> - tree niter_type = unsigned_type_for (type), high, low;
>> - /* Case: i-- < 10. */
>> - if (integer_zerop (iv1->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_minus_onep (iv0->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> - return false;
>> - high = fold_build2 (PLUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv0->base),
>> - build_int_cst (niter_type, 1));
>> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> - }
>> - else if (integer_zerop (iv0->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_onep (iv1->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> - return false;
>> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> - low = fold_build2 (MINUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv1->base),
>> - build_int_cst (niter_type, 1));
>> - }
>> - else
>> - gcc_unreachable ();
>> -
>> - iv0->base = low;
>> - iv0->step = fold_convert (niter_type, integer_one_node);
>> - iv1->base = high;
>> - iv1->step = build_int_cst (niter_type, 0);
>> - *code = NE_EXPR;
>> - return true;
>> -}
>> -
>> /* Determine the number of iterations according to condition (for
>> staying
>> inside loop) which compares two induction variables using
>> comparison
>> operator CODE. The induction variable on left side of the
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>> return true;
>> }
>>
>> - /* Handle special case loops: while (i-- < 10) and while (10 < i++)
>> by
>> - adjusting iv0, iv1 and code. */
>> - if (code != NE_EXPR
>> - && (tree_int_cst_sign_bit (iv0->step)
>> - || (!integer_zerop (iv1->step)
>> - && !tree_int_cst_sign_bit (iv1->step)))
>> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> - return false;
>> -
>> /* OK, now we know we have a senseful loop. Handle several cases,
>> depending
>> on what comparison operator is used. */
>> bound_difference (loop, iv1->base, iv0->base, &bnds);
>>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 14:12 ` guojiufu
@ 2021-07-02 0:51 ` Bin.Cheng
2021-07-02 4:06 ` guojiufu
0 siblings, 1 reply; 8+ messages in thread
From: Bin.Cheng @ 2021-07-02 0:51 UTC (permalink / raw)
To: guojiufu
Cc: Richard Biener, Segher Boessenkool, amker, gcc-patches List,
Bill Schmidt, jlaw, dje.gcc
On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On 2021-07-01 20:35, Richard Biener wrote:
> > On Thu, 1 Jul 2021, Jiufu Guo wrote:
> >
> >> For code like:
> >> unsigned foo(unsigned val, unsigned start)
> >> {
> >> unsigned cnt = 0;
> >> for (unsigned i = start; i > val; ++i)
> >> cnt++;
> >> return cnt;
> >> }
> >>
> >> The number of iterations should be about UINT_MAX - start.
> >
> > For
> >
> > unsigned foo(unsigned val, unsigned start)
> > {
> > unsigned cnt = 0;
> > for (unsigned i = start; i >= val; ++i)
> > cnt++;
> > return cnt;
> > }
> >
> > and val == 0 the loop never terminates. I don't see anywhere
> > in the patch that you disregard GE_EXPR and I remember
> > the code handles GE as well as GT? From a quick look this is
> > also not covered by a testcase you add - not exactly sure
> > how it would materialize in a miscompilation.
>
> In number_of_iterations_cond, there is code:
> if (code == GE_EXPR || code == GT_EXPR
> || (code == NE_EXPR && integer_zerop (iv0->step)))
> {
> std::swap (iv0, iv1);
> code = swap_tree_comparison (code);
> }
> It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
> and LE (val <= i) is converted to LT (val - 1 < i).
> So, the code is added to number_of_iterations_lt.
>
> But, this patch leads mis-compilation for unsigned "i >= val" as
> above transforms: converting LE (val <= i) to LT (val - 1 < i)
> seems not appropriate (e.g where val=0).
I don't know where the exact code is, but IIRC, number_of_iteration
handles boundary conditions when transforming <= into <. You may
check it out.
> Thanks for pointing out this!!!
>
> I would investigate a way to handle this correctly.
> A possible way maybe just to return false for this kind of LE.
IIRC, it checks the boundary conditions, either returns false or
simply introduces more assumptions.
>
> Any suggestions?
>
> >
> >> There is function adjust_cond_for_loop_until_wrap which
> >> handles similar work for const bases.
> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
> >> function number_of_iterations_cond/number_of_iterations_lt
> >> to analyze number of iterations for this kind of loop.
> >>
> >> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> >>
> >> gcc/ChangeLog:
> >>
> >> PR tree-optimization/101145
> >> * tree-ssa-loop-niter.c
> >> (number_of_iterations_until_wrap): New function.
> >> (number_of_iterations_lt): Invoke above function.
> >> (adjust_cond_for_loop_until_wrap):
> >> Merge to number_of_iterations_until_wrap.
> >> (number_of_iterations_cond): Update invokes for
> >> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> PR tree-optimization/101145
> >> * gcc.dg/vect/pr101145.c: New test.
> >> * gcc.dg/vect/pr101145.inc: New test.
> >> * gcc.dg/vect/pr101145_1.c: New test.
> >> * gcc.dg/vect/pr101145_2.c: New test.
> >> * gcc.dg/vect/pr101145_3.c: New test.
> >> ---
> >> gcc/testsuite/gcc.dg/vect/pr101145.c | 187
> >> +++++++++++++++++++++++++
> >> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
> >> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
> >> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
> >> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
> >> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
> >> 6 files changed, 380 insertions(+), 65 deletions(-)
> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> new file mode 100644
> >> index 00000000000..74031b031cf
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> @@ -0,0 +1,187 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#include <limits.h>
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + while (n < ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> + while (UINT_MAX - 64 < ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + l = UINT_MAX - 32;
> >> + while (n < ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + while (n <= ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{ // infininate
> >> + while (0 <= ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + //no loop
> >> + l = UINT_MAX;
> >> + while (n < ++l)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + while (--l < n)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> + while (--l < 64)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> + l = 32;
> >> + while (--l < n)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +
> >> +int a[3200], b[3200];
> >> +int fail;
> >> +
> >> +int
> >> +main ()
> >> +{
> >> + unsigned l, n;
> >> + unsigned res;
> >> + /* l > n*/
> >> + n = UINT_MAX - 64;
> >> + l = n + 32;
> >> + res = foo (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n;
> >> + res = foo (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n - 1;
> >> + res = foo (a, b, l, n);
> >> + if (res != l + 1)
> >> + fail++;
> >> +
> >> + l = n - 32;
> >> + res = foo (a, b, l, n);
> >> + if (res != l + 1)
> >> + fail++;
> >> +
> >> + l = UINT_MAX;
> >> + res = foo (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n + 32;
> >> + res = foo_1 (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n + 32;
> >> + res = foo_2 (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n;
> >> + res = foo_3 (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n - 1;
> >> + res = foo_3 (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n - 2;
> >> + res = foo_3 (a, b, l, n);
> >> + if (res != l + 1)
> >> + fail++;
> >> +
> >> + res = foo_5 (a, b, l, n);
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + n = 64;
> >> + l = n - 32;
> >> + res = bar (a, b, l, n);
> >> + res++;
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n;
> >> + res = bar (a, b, l, n);
> >> + res++;
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + l = n + 1;
> >> + res = bar (a, b, l, n);
> >> + res++;
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + l = 0;
> >> + res = bar (a, b, l, n);
> >> + res++;
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + l = 32;
> >> + res = bar_1 (a, b, l, n);
> >> + res++;
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + res = bar_1 (a, b, l, n);
> >> + res++;
> >> + if (res != 0)
> >> + fail++;
> >> +
> >> + if (fail)
> >> + __builtin_abort ();
> >> + return 0;
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> new file mode 100644
> >> index 00000000000..6eed3fa8aca
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> @@ -0,0 +1,63 @@
> >> +TYPE __attribute__ ((noinline))
> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> +{
> >> + for (l = L_BASE; n < l; l += C)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +TYPE __attribute__ ((noinline))
> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> +{
> >> + for (l = L_BASE_DOWN; l < n; l -= C)
> >> + *a++ = *b++ + 1;
> >> + return l;
> >> +}
> >> +
> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
> >> +
> >> +int a[1000], b[1000];
> >> +int fail;
> >> +
> >> +int
> >> +main ()
> >> +{
> >> + TYPE res;
> >> + TYPE l;
> >> + TYPE n;
> >> + n = N_BASE;
> >> + l = n - C;
> >> + res = foo_sign (a, b, l, n);
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + l = n;
> >> + res = foo_sign (a, b, l, n);
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + l = n + C;
> >> + res = foo_sign (a, b, l, n);
> >> + if (neq ((res - MIN) / C, 0))
> >> + fail++;
> >> +
> >> + n = N_BASE_DOWN;
> >> + l = n - C;
> >> + res = bar_sign (a, b, l, n);
> >> + if (neq ((MAX - res) / C, 0))
> >> + fail++;
> >> +
> >> + l = n;
> >> + res = bar_sign (a, b, l, n);
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + l = n + C;
> >> + res = bar_sign (a, b, l, n);
> >> + if (res != l)
> >> + fail++;
> >> +
> >> + if (fail)
> >> + __builtin_abort ();
> >> + return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> new file mode 100644
> >> index 00000000000..94f6b99b893
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE signed char
> >> +#define MIN -128
> >> +#define MAX 127
> >> +#define N_BASE (MAX - 32)
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 3
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> new file mode 100644
> >> index 00000000000..d3cfc9e01e1
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE unsigned char
> >> +#define MIN 0
> >> +#define MAX 255
> >> +#define N_BASE (MAX - 32 + 1)
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 2
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> new file mode 100644
> >> index 00000000000..e2cf9b1f7e6
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#define TYPE int *
> >> +#define MIN ((TYPE)0)
> >> +#define MAX ((TYPE)((long long)-1))
> >> +#define N_BASE ((TYPE) (-32))
> >> +#define N_BASE_DOWN (MIN + 32)
> >> +
> >> +#define C 1
> >> +#define L_BASE l
> >> +#define L_BASE_DOWN l
> >> +
> >> +#include "pr101145.inc"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
> >> } */
> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> index b5add827018..06db6a36ef8 100644
> >> --- a/gcc/tree-ssa-loop-niter.c
> >> +++ b/gcc/tree-ssa-loop-niter.c
> >> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
> >> *iv0, affine_iv *iv1,
> >> }
> >> }
> >>
> >> +/* Determines number of iterations of loop whose ending condition
> >> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
> >> + The number of iterations is stored to NITER. */
> >> +
> >> +static bool
> >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
> >> *iv0,
> >> + affine_iv *iv1, class tree_niter_desc *niter)
> >> +{
> >> + tree niter_type = unsigned_type_for (type);
> >> + tree max, min;
> >> +
> >> + if (POINTER_TYPE_P (type))
> >> + {
> >> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> >> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> >> + }
> >> + else
> >> + {
> >> + max = TYPE_MAX_VALUE (type);
> >> + min = TYPE_MIN_VALUE (type);
> >> + }
> >
> > It would be cheaper to use wide_int here - wi::min/max_value
> > (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> > code below.
>
> Thanks! I think you mean to use wide_int for "tree high = max, low =
> min,".
> I would check and update the patch to use wide_int.
>
> The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with
> iv0/iv1->base,
> for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?
>
> >
> >> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
> >> + tree step;
> >> +
> >> + /* n < {base, C}. */
> >> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
> >> INTEGER_CST
> >> + && !tree_int_cst_sign_bit (iv1->step))
> >> + {
> >> + step = iv1->step;
> >> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
> >> iv1->base);
> >> + if (TREE_CODE (iv1->base) == INTEGER_CST)
> >> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> >> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
> >> + low = iv0->base;
> >> + }
> >> + /* {base, -C} < n. */
> >> + else if (TREE_CODE (iv0->step) == INTEGER_CST
> >> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop
> >> (iv1->step))
> >> + {
> >> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
> >> iv0->step);
> >> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
> >> min);
> >> + if (TREE_CODE (iv0->base) == INTEGER_CST)
> >> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> >> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
> >> + high = iv1->base;
> >> + }
> >> + else
> >> + return false;
> >> +
> >> + /* (delta + step - 1) / step */
> >> + step = fold_convert (niter_type, step);
> >> + niter->niter = fold_convert (niter_type, niter->niter);
> >> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
> >> step);
> >> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
> >> niter->niter, step);
> >> +
> >> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> >> + m = fold_convert (niter_type, m);
> >> + mpz_t mstep, tmp, mmax;
> >> + mpz_init (mstep);
> >> + mpz_init (tmp);
> >> + mpz_init (mmax);
> >> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> >> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> >> + mpz_add (tmp, mmax, mstep);
> >> + mpz_sub_ui (tmp, tmp, 1);
> >> + mpz_fdiv_q (tmp, tmp, mstep);
> >> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
> >> false),
> >> + TYPE_SIGN (niter_type));
> >> + mpz_clear (mstep);
> >> + mpz_clear (tmp);
> >
> > You forget to clear mmax? I wonder why you need to involve GMP here
> Oh, thanks for pointing out this :) need to clear mmax.
>
> > at all since this is just add/sub/divide - you could use
> > widest_int directly I think. At least the back-and-forth between
> > trees, wide-int and mpz looks odd.
> I once use 'trees' to do the calculation, while I encounter the issue
> that the may not hold the precision for some cases:
> "mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.
>
> I would check to use widest_int (e.g. long[4]) which seems enough to
> hold all types.
>
>
> BR,
> Jiufu Guo.
>
> >
> > Richard.
> >
> >> + niter->may_be_zero
> >> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
> >> +
> >> + niter->control.no_overflow = false;
> >> +
> >> + return true;
> >> +}
> >> +
> >> /* Determines number of iterations of loop whose ending condition
> >> is IV0 < IV1. TYPE is the type of the iv. The number of
> >> iterations is stored to NITER. BNDS bounds the difference
> >> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
> >> type, affine_iv *iv0,
> >> niter->bound = iv0->base;
> >> }
> >>
> >> + /* {base, -C} < n, or n < {base, C} */
> >> + if (tree_int_cst_sign_bit (iv0->step)
> >> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
> >> (iv1->step)))
> >> + return number_of_iterations_until_wrap (loop, type, iv0, iv1,
> >> niter);
> >> +
> >> delta = fold_build2 (MINUS_EXPR, niter_type,
> >> fold_convert (niter_type, iv1->base),
> >> fold_convert (niter_type, iv0->base));
> >> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
> >> }
> >> }
> >>
> >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
> >> - the condition for loop-until-wrap cases. For example:
> >> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
> >> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
> >> - Return true if condition is successfully adjusted. */
> >> -
> >> -static bool
> >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
> >> *code,
> >> - affine_iv *iv1)
> >> -{
> >> - /* Only support simple cases for the moment. */
> >> - if (TREE_CODE (iv0->base) != INTEGER_CST
> >> - || TREE_CODE (iv1->base) != INTEGER_CST)
> >> - return false;
> >> -
> >> - tree niter_type = unsigned_type_for (type), high, low;
> >> - /* Case: i-- < 10. */
> >> - if (integer_zerop (iv1->step))
> >> - {
> >> - /* TODO: Should handle case in which abs(step) != 1. */
> >> - if (!integer_minus_onep (iv0->step))
> >> - return false;
> >> - /* Give up on infinite loop. */
> >> - if (*code == LE_EXPR
> >> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
> >> - return false;
> >> - high = fold_build2 (PLUS_EXPR, niter_type,
> >> - fold_convert (niter_type, iv0->base),
> >> - build_int_cst (niter_type, 1));
> >> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
> >> - }
> >> - else if (integer_zerop (iv0->step))
> >> - {
> >> - /* TODO: Should handle case in which abs(step) != 1. */
> >> - if (!integer_onep (iv1->step))
> >> - return false;
> >> - /* Give up on infinite loop. */
> >> - if (*code == LE_EXPR
> >> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
> >> - return false;
> >> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
> >> - low = fold_build2 (MINUS_EXPR, niter_type,
> >> - fold_convert (niter_type, iv1->base),
> >> - build_int_cst (niter_type, 1));
> >> - }
> >> - else
> >> - gcc_unreachable ();
> >> -
> >> - iv0->base = low;
> >> - iv0->step = fold_convert (niter_type, integer_one_node);
> >> - iv1->base = high;
> >> - iv1->step = build_int_cst (niter_type, 0);
> >> - *code = NE_EXPR;
> >> - return true;
> >> -}
> >> -
> >> /* Determine the number of iterations according to condition (for
> >> staying
> >> inside loop) which compares two induction variables using
> >> comparison
> >> operator CODE. The induction variable on left side of the
> >> comparison
> >> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
> >> return true;
> >> }
> >>
> >> - /* Handle special case loops: while (i-- < 10) and while (10 < i++)
> >> by
> >> - adjusting iv0, iv1 and code. */
> >> - if (code != NE_EXPR
> >> - && (tree_int_cst_sign_bit (iv0->step)
> >> - || (!integer_zerop (iv1->step)
> >> - && !tree_int_cst_sign_bit (iv1->step)))
> >> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
> >> - return false;
> >> -
> >> /* OK, now we know we have a senseful loop. Handle several cases,
> >> depending
> >> on what comparison operator is used. */
> >> bound_difference (loop, iv1->base, iv0->base, &bnds);
> >>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-02 0:51 ` Bin.Cheng
@ 2021-07-02 4:06 ` guojiufu
0 siblings, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-02 4:06 UTC (permalink / raw)
To: Bin.Cheng
Cc: Richard Biener, Segher Boessenkool, amker, gcc-patches List,
Bill Schmidt, jlaw, dje.gcc
On 2021-07-02 08:51, Bin.Cheng wrote:
> On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> On 2021-07-01 20:35, Richard Biener wrote:
>> > On Thu, 1 Jul 2021, Jiufu Guo wrote:
>> >
>> >> For code like:
>> >> unsigned foo(unsigned val, unsigned start)
>> >> {
>> >> unsigned cnt = 0;
>> >> for (unsigned i = start; i > val; ++i)
>> >> cnt++;
>> >> return cnt;
>> >> }
>> >>
>> >> The number of iterations should be about UINT_MAX - start.
>> >
>> > For
>> >
>> > unsigned foo(unsigned val, unsigned start)
>> > {
>> > unsigned cnt = 0;
>> > for (unsigned i = start; i >= val; ++i)
>> > cnt++;
>> > return cnt;
>> > }
>> >
>> > and val == 0 the loop never terminates. I don't see anywhere
>> > in the patch that you disregard GE_EXPR and I remember
>> > the code handles GE as well as GT? From a quick look this is
>> > also not covered by a testcase you add - not exactly sure
>> > how it would materialize in a miscompilation.
>>
>> In number_of_iterations_cond, there is code:
>> if (code == GE_EXPR || code == GT_EXPR
>> || (code == NE_EXPR && integer_zerop (iv0->step)))
>> {
>> std::swap (iv0, iv1);
>> code = swap_tree_comparison (code);
>> }
>> It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
>> and LE (val <= i) is converted to LT (val - 1 < i).
>> So, the code is added to number_of_iterations_lt.
>>
>> But, this patch leads mis-compilation for unsigned "i >= val" as
>> above transforms: converting LE (val <= i) to LT (val - 1 < i)
>> seems not appropriate (e.g where val=0).
> I don't know where the exact code is, but IIRC, number_of_iteration
> handles boundary conditions when transforming <= into <. You may
> check it out.
Yes, in number_of_iterations_le, there is code to check MAX/MIN
if (integer_nonzerop (iv0->step))
assumption = fold_build2 (NE_EXPR, boolean_type_node,
iv1->base, TYPE_MAX_VALUE (type));
else
assumption = fold_build2 (NE_EXPR, boolean_type_node,
iv0->base, TYPE_MIN_VALUE (type));
Checking why this code does not help.
>
>> Thanks for pointing out this!!!
>>
>> I would investigate a way to handle this correctly.
>> A possible way maybe just to return false for this kind of LE.
> IIRC, it checks the boundary conditions, either returns false or
> simply introduces more assumptions.
Thanks! Adding more assumptions would help.
The below code also runs into infinite, more assumptions may help this
code.
__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
unsigned cnt = 0;
for (unsigned i = start; val <= i; i+=16)
cnt++;
return cnt;
}
foo (4, 8);
Thanks again!
BR,
Jiufu Guo
>>
>> Any suggestions?
>>
>> >
>> >> There is function adjust_cond_for_loop_until_wrap which
>> >> handles similar work for const bases.
>> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> >> function number_of_iterations_cond/number_of_iterations_lt
>> >> to analyze number of iterations for this kind of loop.
>> >>
>> >> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>> >>
>> >> gcc/ChangeLog:
>> >>
>> >> PR tree-optimization/101145
>> >> * tree-ssa-loop-niter.c
>> >> (number_of_iterations_until_wrap): New function.
>> >> (number_of_iterations_lt): Invoke above function.
>> >> (adjust_cond_for_loop_until_wrap):
>> >> Merge to number_of_iterations_until_wrap.
>> >> (number_of_iterations_cond): Update invokes for
>> >> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>> >>
>> >> gcc/testsuite/ChangeLog:
>> >>
>> >> PR tree-optimization/101145
>> >> * gcc.dg/vect/pr101145.c: New test.
>> >> * gcc.dg/vect/pr101145.inc: New test.
>> >> * gcc.dg/vect/pr101145_1.c: New test.
>> >> * gcc.dg/vect/pr101145_2.c: New test.
>> >> * gcc.dg/vect/pr101145_3.c: New test.
>> >> ---
>> >> gcc/testsuite/gcc.dg/vect/pr101145.c | 187
>> >> +++++++++++++++++++++++++
>> >> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
>> >> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
>> >> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
>> >> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
>> >> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
>> >> 6 files changed, 380 insertions(+), 65 deletions(-)
>> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >>
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> new file mode 100644
>> >> index 00000000000..74031b031cf
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> >> @@ -0,0 +1,187 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#include <limits.h>
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + while (n < ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> >> unsigned)
>> >> +{
>> >> + while (UINT_MAX - 64 < ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + l = UINT_MAX - 32;
>> >> + while (n < ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + while (n <= ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{ // infininate
>> >> + while (0 <= ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + //no loop
>> >> + l = UINT_MAX;
>> >> + while (n < ++l)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + while (--l < n)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> >> unsigned)
>> >> +{
>> >> + while (--l < 64)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +unsigned __attribute__ ((noinline))
>> >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> >> n)
>> >> +{
>> >> + l = 32;
>> >> + while (--l < n)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +
>> >> +int a[3200], b[3200];
>> >> +int fail;
>> >> +
>> >> +int
>> >> +main ()
>> >> +{
>> >> + unsigned l, n;
>> >> + unsigned res;
>> >> + /* l > n*/
>> >> + n = UINT_MAX - 64;
>> >> + l = n + 32;
>> >> + res = foo (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n;
>> >> + res = foo (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n - 1;
>> >> + res = foo (a, b, l, n);
>> >> + if (res != l + 1)
>> >> + fail++;
>> >> +
>> >> + l = n - 32;
>> >> + res = foo (a, b, l, n);
>> >> + if (res != l + 1)
>> >> + fail++;
>> >> +
>> >> + l = UINT_MAX;
>> >> + res = foo (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n + 32;
>> >> + res = foo_1 (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n + 32;
>> >> + res = foo_2 (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n;
>> >> + res = foo_3 (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n - 1;
>> >> + res = foo_3 (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n - 2;
>> >> + res = foo_3 (a, b, l, n);
>> >> + if (res != l + 1)
>> >> + fail++;
>> >> +
>> >> + res = foo_5 (a, b, l, n);
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + n = 64;
>> >> + l = n - 32;
>> >> + res = bar (a, b, l, n);
>> >> + res++;
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n;
>> >> + res = bar (a, b, l, n);
>> >> + res++;
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + l = n + 1;
>> >> + res = bar (a, b, l, n);
>> >> + res++;
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + l = 0;
>> >> + res = bar (a, b, l, n);
>> >> + res++;
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + l = 32;
>> >> + res = bar_1 (a, b, l, n);
>> >> + res++;
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + res = bar_1 (a, b, l, n);
>> >> + res++;
>> >> + if (res != 0)
>> >> + fail++;
>> >> +
>> >> + if (fail)
>> >> + __builtin_abort ();
>> >> + return 0;
>> >> +}
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> new file mode 100644
>> >> index 00000000000..6eed3fa8aca
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> >> @@ -0,0 +1,63 @@
>> >> +TYPE __attribute__ ((noinline))
>> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> >> +{
>> >> + for (l = L_BASE; n < l; l += C)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +TYPE __attribute__ ((noinline))
>> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> >> +{
>> >> + for (l = L_BASE_DOWN; l < n; l -= C)
>> >> + *a++ = *b++ + 1;
>> >> + return l;
>> >> +}
>> >> +
>> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> >> +
>> >> +int a[1000], b[1000];
>> >> +int fail;
>> >> +
>> >> +int
>> >> +main ()
>> >> +{
>> >> + TYPE res;
>> >> + TYPE l;
>> >> + TYPE n;
>> >> + n = N_BASE;
>> >> + l = n - C;
>> >> + res = foo_sign (a, b, l, n);
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + l = n;
>> >> + res = foo_sign (a, b, l, n);
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + l = n + C;
>> >> + res = foo_sign (a, b, l, n);
>> >> + if (neq ((res - MIN) / C, 0))
>> >> + fail++;
>> >> +
>> >> + n = N_BASE_DOWN;
>> >> + l = n - C;
>> >> + res = bar_sign (a, b, l, n);
>> >> + if (neq ((MAX - res) / C, 0))
>> >> + fail++;
>> >> +
>> >> + l = n;
>> >> + res = bar_sign (a, b, l, n);
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + l = n + C;
>> >> + res = bar_sign (a, b, l, n);
>> >> + if (res != l)
>> >> + fail++;
>> >> +
>> >> + if (fail)
>> >> + __builtin_abort ();
>> >> + return 0;
>> >> +}
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> new file mode 100644
>> >> index 00000000000..94f6b99b893
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE signed char
>> >> +#define MIN -128
>> >> +#define MAX 127
>> >> +#define N_BASE (MAX - 32)
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 3
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> new file mode 100644
>> >> index 00000000000..d3cfc9e01e1
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE unsigned char
>> >> +#define MIN 0
>> >> +#define MAX 255
>> >> +#define N_BASE (MAX - 32 + 1)
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 2
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> new file mode 100644
>> >> index 00000000000..e2cf9b1f7e6
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> >> @@ -0,0 +1,15 @@
>> >> +/* { dg-require-effective-target vect_int } */
>> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> >> +#define TYPE int *
>> >> +#define MIN ((TYPE)0)
>> >> +#define MAX ((TYPE)((long long)-1))
>> >> +#define N_BASE ((TYPE) (-32))
>> >> +#define N_BASE_DOWN (MIN + 32)
>> >> +
>> >> +#define C 1
>> >> +#define L_BASE l
>> >> +#define L_BASE_DOWN l
>> >> +
>> >> +#include "pr101145.inc"
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> >> } */
>> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> >> index b5add827018..06db6a36ef8 100644
>> >> --- a/gcc/tree-ssa-loop-niter.c
>> >> +++ b/gcc/tree-ssa-loop-niter.c
>> >> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
>> >> *iv0, affine_iv *iv1,
>> >> }
>> >> }
>> >>
>> >> +/* Determines number of iterations of loop whose ending condition
>> >> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
>> >> + The number of iterations is stored to NITER. */
>> >> +
>> >> +static bool
>> >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
>> >> *iv0,
>> >> + affine_iv *iv1, class tree_niter_desc *niter)
>> >> +{
>> >> + tree niter_type = unsigned_type_for (type);
>> >> + tree max, min;
>> >> +
>> >> + if (POINTER_TYPE_P (type))
>> >> + {
>> >> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> >> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> >> + }
>> >> + else
>> >> + {
>> >> + max = TYPE_MAX_VALUE (type);
>> >> + min = TYPE_MIN_VALUE (type);
>> >> + }
>> >
>> > It would be cheaper to use wide_int here - wi::min/max_value
>> > (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
>> > code below.
>>
>> Thanks! I think you mean to use wide_int for "tree high = max, low =
>> min,".
>> I would check and update the patch to use wide_int.
>>
>> The MAX/MIN would be in type 'tree' when binding to MINUS_EXPR with
>> iv0/iv1->base,
>> for this, we may still need to use "TYPE_MAX/MIN_VALUE (type)", right?
>>
>> >
>> >> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> >> + tree step;
>> >> +
>> >> + /* n < {base, C}. */
>> >> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
>> >> INTEGER_CST
>> >> + && !tree_int_cst_sign_bit (iv1->step))
>> >> + {
>> >> + step = iv1->step;
>> >> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
>> >> iv1->base);
>> >> + if (TREE_CODE (iv1->base) == INTEGER_CST)
>> >> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> >> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> >> + low = iv0->base;
>> >> + }
>> >> + /* {base, -C} < n. */
>> >> + else if (TREE_CODE (iv0->step) == INTEGER_CST
>> >> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> >> (iv1->step))
>> >> + {
>> >> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
>> >> iv0->step);
>> >> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
>> >> min);
>> >> + if (TREE_CODE (iv0->base) == INTEGER_CST)
>> >> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> >> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> >> + high = iv1->base;
>> >> + }
>> >> + else
>> >> + return false;
>> >> +
>> >> + /* (delta + step - 1) / step */
>> >> + step = fold_convert (niter_type, step);
>> >> + niter->niter = fold_convert (niter_type, niter->niter);
>> >> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
>> >> step);
>> >> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
>> >> niter->niter, step);
>> >> +
>> >> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> >> + m = fold_convert (niter_type, m);
>> >> + mpz_t mstep, tmp, mmax;
>> >> + mpz_init (mstep);
>> >> + mpz_init (tmp);
>> >> + mpz_init (mmax);
>> >> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> >> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> >> + mpz_add (tmp, mmax, mstep);
>> >> + mpz_sub_ui (tmp, tmp, 1);
>> >> + mpz_fdiv_q (tmp, tmp, mstep);
>> >> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
>> >> false),
>> >> + TYPE_SIGN (niter_type));
>> >> + mpz_clear (mstep);
>> >> + mpz_clear (tmp);
>> >
>> > You forget to clear mmax? I wonder why you need to involve GMP here
>> Oh, thanks for pointing out this :) need to clear mmax.
>>
>> > at all since this is just add/sub/divide - you could use
>> > widest_int directly I think. At least the back-and-forth between
>> > trees, wide-int and mpz looks odd.
>> I once use 'trees' to do the calculation, while I encounter the issue
>> that the may not hold the precision for some cases:
>> "mmax" is MAX(1<<64-1); step is 4, then mmax + 4 becomes 3.
>>
>> I would check to use widest_int (e.g. long[4]) which seems enough to
>> hold all types.
>>
>>
>> BR,
>> Jiufu Guo.
>>
>> >
>> > Richard.
>> >
>> >> + niter->may_be_zero
>> >> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> >> +
>> >> + niter->control.no_overflow = false;
>> >> +
>> >> + return true;
>> >> +}
>> >> +
>> >> /* Determines number of iterations of loop whose ending condition
>> >> is IV0 < IV1. TYPE is the type of the iv. The number of
>> >> iterations is stored to NITER. BNDS bounds the difference
>> >> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
>> >> type, affine_iv *iv0,
>> >> niter->bound = iv0->base;
>> >> }
>> >>
>> >> + /* {base, -C} < n, or n < {base, C} */
>> >> + if (tree_int_cst_sign_bit (iv0->step)
>> >> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
>> >> (iv1->step)))
>> >> + return number_of_iterations_until_wrap (loop, type, iv0, iv1,
>> >> niter);
>> >> +
>> >> delta = fold_build2 (MINUS_EXPR, niter_type,
>> >> fold_convert (niter_type, iv1->base),
>> >> fold_convert (niter_type, iv0->base));
>> >> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> >> }
>> >> }
>> >>
>> >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> >> - the condition for loop-until-wrap cases. For example:
>> >> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
>> >> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
>> >> - Return true if condition is successfully adjusted. */
>> >> -
>> >> -static bool
>> >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
>> >> *code,
>> >> - affine_iv *iv1)
>> >> -{
>> >> - /* Only support simple cases for the moment. */
>> >> - if (TREE_CODE (iv0->base) != INTEGER_CST
>> >> - || TREE_CODE (iv1->base) != INTEGER_CST)
>> >> - return false;
>> >> -
>> >> - tree niter_type = unsigned_type_for (type), high, low;
>> >> - /* Case: i-- < 10. */
>> >> - if (integer_zerop (iv1->step))
>> >> - {
>> >> - /* TODO: Should handle case in which abs(step) != 1. */
>> >> - if (!integer_minus_onep (iv0->step))
>> >> - return false;
>> >> - /* Give up on infinite loop. */
>> >> - if (*code == LE_EXPR
>> >> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> >> - return false;
>> >> - high = fold_build2 (PLUS_EXPR, niter_type,
>> >> - fold_convert (niter_type, iv0->base),
>> >> - build_int_cst (niter_type, 1));
>> >> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> >> - }
>> >> - else if (integer_zerop (iv0->step))
>> >> - {
>> >> - /* TODO: Should handle case in which abs(step) != 1. */
>> >> - if (!integer_onep (iv1->step))
>> >> - return false;
>> >> - /* Give up on infinite loop. */
>> >> - if (*code == LE_EXPR
>> >> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> >> - return false;
>> >> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> >> - low = fold_build2 (MINUS_EXPR, niter_type,
>> >> - fold_convert (niter_type, iv1->base),
>> >> - build_int_cst (niter_type, 1));
>> >> - }
>> >> - else
>> >> - gcc_unreachable ();
>> >> -
>> >> - iv0->base = low;
>> >> - iv0->step = fold_convert (niter_type, integer_one_node);
>> >> - iv1->base = high;
>> >> - iv1->step = build_int_cst (niter_type, 0);
>> >> - *code = NE_EXPR;
>> >> - return true;
>> >> -}
>> >> -
>> >> /* Determine the number of iterations according to condition (for
>> >> staying
>> >> inside loop) which compares two induction variables using
>> >> comparison
>> >> operator CODE. The induction variable on left side of the
>> >> comparison
>> >> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>> >> return true;
>> >> }
>> >>
>> >> - /* Handle special case loops: while (i-- < 10) and while (10 < i++)
>> >> by
>> >> - adjusting iv0, iv1 and code. */
>> >> - if (code != NE_EXPR
>> >> - && (tree_int_cst_sign_bit (iv0->step)
>> >> - || (!integer_zerop (iv1->step)
>> >> - && !tree_int_cst_sign_bit (iv1->step)))
>> >> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> >> - return false;
>> >> -
>> >> /* OK, now we know we have a senseful loop. Handle several cases,
>> >> depending
>> >> on what comparison operator is used. */
>> >> bound_difference (loop, iv1->base, iv0->base, &bnds);
>> >>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] Analyze niter for until-wrap condition [PR101145]
2021-07-01 12:35 ` Richard Biener
2021-07-01 14:12 ` guojiufu
@ 2021-07-02 7:55 ` guojiufu
1 sibling, 0 replies; 8+ messages in thread
From: guojiufu @ 2021-07-02 7:55 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, wschmidt, segher, dje.gcc, jlaw, amker
On 2021-07-01 20:35, Richard Biener wrote:
> On Thu, 1 Jul 2021, Jiufu Guo wrote:
>
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>> unsigned cnt = 0;
>> for (unsigned i = start; i > val; ++i)
>> cnt++;
>> return cnt;
>> }
>>
>> The number of iterations should be about UINT_MAX - start.
>
> For
>
> unsigned foo(unsigned val, unsigned start)
> {
> unsigned cnt = 0;
> for (unsigned i = start; i >= val; ++i)
> cnt++;
> return cnt;
> }
>
> and val == 0 the loop never terminates. I don't see anywhere
> in the patch that you disregard GE_EXPR and I remember
> the code handles GE as well as GT? From a quick look this is
> also not covered by a testcase you add - not exactly sure
> how it would materialize in a miscompilation.
>
Find a similar issue on the below code with the trunk.
The below code should run infinite, but it exits quickly.
#include <limits.h>
__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
unsigned cnt = 0;
for (unsigned i = start; i <= val; i+=16)
cnt++;
return cnt;
}
int main()
{
return foo (UINT_MAX-7, 8);
}
Just opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101291
BR,
Jiufu Guo.
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>>
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/101145
>> * tree-ssa-loop-niter.c
>> (number_of_iterations_until_wrap): New function.
>> (number_of_iterations_lt): Invoke above function.
>> (adjust_cond_for_loop_until_wrap):
>> Merge to number_of_iterations_until_wrap.
>> (number_of_iterations_cond): Update invokes for
>> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR tree-optimization/101145
>> * gcc.dg/vect/pr101145.c: New test.
>> * gcc.dg/vect/pr101145.inc: New test.
>> * gcc.dg/vect/pr101145_1.c: New test.
>> * gcc.dg/vect/pr101145_2.c: New test.
>> * gcc.dg/vect/pr101145_3.c: New test.
>> ---
>> gcc/testsuite/gcc.dg/vect/pr101145.c | 187
>> +++++++++++++++++++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
>> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
>> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
>> gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
>> 6 files changed, 380 insertions(+), 65 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> new file mode 100644
>> index 00000000000..74031b031cf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> @@ -0,0 +1,187 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#include <limits.h>
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> unsigned)
>> +{
>> + while (UINT_MAX - 64 < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + l = UINT_MAX - 32;
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (n <= ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{ // infininate
>> + while (0 <= ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + //no loop
>> + l = UINT_MAX;
>> + while (n < ++l)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + while (--l < n)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
>> unsigned)
>> +{
>> + while (--l < 64)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +unsigned __attribute__ ((noinline))
>> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> + l = 32;
>> + while (--l < n)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +
>> +int a[3200], b[3200];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> + unsigned l, n;
>> + unsigned res;
>> + /* l > n*/
>> + n = UINT_MAX - 64;
>> + l = n + 32;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 1;
>> + res = foo (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + l = n - 32;
>> + res = foo (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + l = UINT_MAX;
>> + res = foo (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 32;
>> + res = foo_1 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 32;
>> + res = foo_2 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = foo_3 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 1;
>> + res = foo_3 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n - 2;
>> + res = foo_3 (a, b, l, n);
>> + if (res != l + 1)
>> + fail++;
>> +
>> + res = foo_5 (a, b, l, n);
>> + if (res != 0)
>> + fail++;
>> +
>> + n = 64;
>> + l = n - 32;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + l = n + 1;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != l)
>> + fail++;
>> +
>> + l = 0;
>> + res = bar (a, b, l, n);
>> + res++;
>> + if (res != l)
>> + fail++;
>> +
>> + l = 32;
>> + res = bar_1 (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + res = bar_1 (a, b, l, n);
>> + res++;
>> + if (res != 0)
>> + fail++;
>> +
>> + if (fail)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> new file mode 100644
>> index 00000000000..6eed3fa8aca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> + for (l = L_BASE; n < l; l += C)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> + for (l = L_BASE_DOWN; l < n; l -= C)
>> + *a++ = *b++ + 1;
>> + return l;
>> +}
>> +
>> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> +
>> +int a[1000], b[1000];
>> +int fail;
>> +
>> +int
>> +main ()
>> +{
>> + TYPE res;
>> + TYPE l;
>> + TYPE n;
>> + n = N_BASE;
>> + l = n - C;
>> + res = foo_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n;
>> + res = foo_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n + C;
>> + res = foo_sign (a, b, l, n);
>> + if (neq ((res - MIN) / C, 0))
>> + fail++;
>> +
>> + n = N_BASE_DOWN;
>> + l = n - C;
>> + res = bar_sign (a, b, l, n);
>> + if (neq ((MAX - res) / C, 0))
>> + fail++;
>> +
>> + l = n;
>> + res = bar_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + l = n + C;
>> + res = bar_sign (a, b, l, n);
>> + if (res != l)
>> + fail++;
>> +
>> + if (fail)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> new file mode 100644
>> index 00000000000..94f6b99b893
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE signed char
>> +#define MIN -128
>> +#define MAX 127
>> +#define N_BASE (MAX - 32)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 3
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> new file mode 100644
>> index 00000000000..d3cfc9e01e1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE unsigned char
>> +#define MIN 0
>> +#define MAX 255
>> +#define N_BASE (MAX - 32 + 1)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 2
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> new file mode 100644
>> index 00000000000..e2cf9b1f7e6
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE int *
>> +#define MIN ((TYPE)0)
>> +#define MAX ((TYPE)((long long)-1))
>> +#define N_BASE ((TYPE) (-32))
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 1
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +
>> +#include "pr101145.inc"
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
>> } */
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index b5add827018..06db6a36ef8 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
>> *iv0, affine_iv *iv1,
>> }
>> }
>>
>> +/* Determines number of iterations of loop whose ending condition
>> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
>> + The number of iterations is stored to NITER. */
>> +
>> +static bool
>> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv
>> *iv0,
>> + affine_iv *iv1, class tree_niter_desc *niter)
>> +{
>> + tree niter_type = unsigned_type_for (type);
>> + tree max, min;
>> +
>> + if (POINTER_TYPE_P (type))
>> + {
>> + max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
>> + min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
>> + }
>> + else
>> + {
>> + max = TYPE_MAX_VALUE (type);
>> + min = TYPE_MIN_VALUE (type);
>> + }
>
> It would be cheaper to use wide_int here - wi::min/max_value
> (TYPE_PRECISION (...), TYPE_SIGN (...)) and in the simplification
> code below.
>
>> + tree high = max, low = min, one = build_int_cst (niter_type, 1);
>> + tree step;
>> +
>> + /* n < {base, C}. */
>> + if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) ==
>> INTEGER_CST
>> + && !tree_int_cst_sign_bit (iv1->step))
>> + {
>> + step = iv1->step;
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, max,
>> iv1->base);
>> + if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
>> + else if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + low = iv0->base;
>> + }
>> + /* {base, -C} < n. */
>> + else if (TREE_CODE (iv0->step) == INTEGER_CST
>> + && tree_int_cst_sign_bit (iv0->step) && integer_zerop
>> (iv1->step))
>> + {
>> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step),
>> iv0->step);
>> + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base,
>> min);
>> + if (TREE_CODE (iv0->base) == INTEGER_CST)
>> + high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
>> + else if (TREE_CODE (iv1->base) == INTEGER_CST)
>> + high = iv1->base;
>> + }
>> + else
>> + return false;
>> +
>> + /* (delta + step - 1) / step */
>> + step = fold_convert (niter_type, step);
>> + niter->niter = fold_convert (niter_type, niter->niter);
>> + niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter,
>> step);
>> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type,
>> niter->niter, step);
>> +
>> + tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
>> + m = fold_convert (niter_type, m);
>> + mpz_t mstep, tmp, mmax;
>> + mpz_init (mstep);
>> + mpz_init (tmp);
>> + mpz_init (mmax);
>> + wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
>> + wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
>> + mpz_add (tmp, mmax, mstep);
>> + mpz_sub_ui (tmp, tmp, 1);
>> + mpz_fdiv_q (tmp, tmp, mstep);
>> + niter->max = widest_int::from (wi::from_mpz (niter_type, tmp,
>> false),
>> + TYPE_SIGN (niter_type));
>> + mpz_clear (mstep);
>> + mpz_clear (tmp);
>
> You forget to clear mmax? I wonder why you need to involve GMP here
> at all since this is just add/sub/divide - you could use
> widest_int directly I think. At least the back-and-forth between
> trees, wide-int and mpz looks odd.
>
> Richard.
>
>> + niter->may_be_zero
>> + = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
>> +
>> + niter->control.no_overflow = false;
>> +
>> + return true;
>> +}
>> +
>> /* Determines number of iterations of loop whose ending condition
>> is IV0 < IV1. TYPE is the type of the iv. The number of
>> iterations is stored to NITER. BNDS bounds the difference
>> @@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree
>> type, affine_iv *iv0,
>> niter->bound = iv0->base;
>> }
>>
>> + /* {base, -C} < n, or n < {base, C} */
>> + if (tree_int_cst_sign_bit (iv0->step)
>> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit
>> (iv1->step)))
>> + return number_of_iterations_until_wrap (loop, type, iv0, iv1,
>> niter);
>> +
>> delta = fold_build2 (MINUS_EXPR, niter_type,
>> fold_convert (niter_type, iv1->base),
>> fold_convert (niter_type, iv0->base));
>> @@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> }
>> }
>>
>> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
>> - the condition for loop-until-wrap cases. For example:
>> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9
>> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8
>> - Return true if condition is successfully adjusted. */
>> -
>> -static bool
>> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code
>> *code,
>> - affine_iv *iv1)
>> -{
>> - /* Only support simple cases for the moment. */
>> - if (TREE_CODE (iv0->base) != INTEGER_CST
>> - || TREE_CODE (iv1->base) != INTEGER_CST)
>> - return false;
>> -
>> - tree niter_type = unsigned_type_for (type), high, low;
>> - /* Case: i-- < 10. */
>> - if (integer_zerop (iv1->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_minus_onep (iv0->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
>> - return false;
>> - high = fold_build2 (PLUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv0->base),
>> - build_int_cst (niter_type, 1));
>> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
>> - }
>> - else if (integer_zerop (iv0->step))
>> - {
>> - /* TODO: Should handle case in which abs(step) != 1. */
>> - if (!integer_onep (iv1->step))
>> - return false;
>> - /* Give up on infinite loop. */
>> - if (*code == LE_EXPR
>> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
>> - return false;
>> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
>> - low = fold_build2 (MINUS_EXPR, niter_type,
>> - fold_convert (niter_type, iv1->base),
>> - build_int_cst (niter_type, 1));
>> - }
>> - else
>> - gcc_unreachable ();
>> -
>> - iv0->base = low;
>> - iv0->step = fold_convert (niter_type, integer_one_node);
>> - iv1->base = high;
>> - iv1->step = build_int_cst (niter_type, 0);
>> - *code = NE_EXPR;
>> - return true;
>> -}
>> -
>> /* Determine the number of iterations according to condition (for
>> staying
>> inside loop) which compares two induction variables using
>> comparison
>> operator CODE. The induction variable on left side of the
>> comparison
>> @@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
>> return true;
>> }
>>
>> - /* Handle special case loops: while (i-- < 10) and while (10 < i++)
>> by
>> - adjusting iv0, iv1 and code. */
>> - if (code != NE_EXPR
>> - && (tree_int_cst_sign_bit (iv0->step)
>> - || (!integer_zerop (iv1->step)
>> - && !tree_int_cst_sign_bit (iv1->step)))
>> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
>> - return false;
>> -
>> /* OK, now we know we have a senseful loop. Handle several cases,
>> depending
>> on what comparison operator is used. */
>> bound_difference (loop, iv1->base, iv0->base, &bnds);
>>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2021-07-02 7:56 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-01 2:05 [PATCH] Analyze niter for until-wrap condition [PR101145] Jiufu Guo
2021-07-01 7:22 ` Bin.Cheng
2021-07-01 11:32 ` guojiufu
2021-07-01 12:35 ` Richard Biener
2021-07-01 14:12 ` guojiufu
2021-07-02 0:51 ` Bin.Cheng
2021-07-02 4:06 ` guojiufu
2021-07-02 7:55 ` guojiufu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).