* [RFC] Overflow check in simplifying exit cond comparing two IVs.
@ 2021-10-18 13:37 Jiufu Guo
2021-10-28 2:19 ` guojiufu
2021-10-28 9:13 ` Richard Biener
0 siblings, 2 replies; 6+ messages in thread
From: Jiufu Guo @ 2021-10-18 13:37 UTC (permalink / raw)
To: gcc-patches
Cc: amker.cheng, rguenther, guojiufu, wschmidt, segher, dje.gcc, jlaw
With reference the discussions in:
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
Base on the patches in above discussion, we may draft a patch to fix the
issue.
In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
'{b0,s0-s1} op {b1,0}', we also compute the condition which could assume
both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
< the niter "of untill wrap for iv0 or iv1".
Does this patch make sense?
BR,
Jiufu Guo
gcc/ChangeLog:
PR tree-optimization/100740
* tree-ssa-loop-niter.c (number_of_iterations_cond): Add
assume condition for combining of two IVs
gcc/testsuite/ChangeLog:
* gcc.c-torture/execute/pr100740.c: New test.
---
gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++---
.../gcc.c-torture/execute/pr100740.c | 11 ++
2 files changed, 99 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 75109407124..f2987a4448d 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop,
provided that either below condition is satisfied:
- a) the test is NE_EXPR;
- b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
+ a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
+ b) assumptions in below table also need to be satisfied.
+
+ | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) |
+ |---------+---------+---------------------+---------------------|
+ | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow |
+ | (b0,2) | (b1,-1) | true | true |
+ | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow |
+ | | | | |
+ | (b0,1) | (b1,2) | false | before iv0 overflow |
+ | (b0,-1) | (b1,2) | false | true |
+ | (b0,-2) | (b1,-1) | false | before iv1 overflow |
+ 'true' in above table means no need additional condition.
+ 'false' means this case can not satify the transform.
+ The first three rows: iv0->step > iv1->step;
+ The second three rows: iv0->step < iv1->step.
This rarely occurs in practice, but it is simple enough to manage. */
if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
{
+ if (TREE_CODE (iv0->step) != INTEGER_CST
+ || TREE_CODE (iv1->step) != INTEGER_CST)
+ return false;
+ if (!iv0->no_overflow || !iv1->no_overflow)
+ return false;
+
tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
- tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
- iv0->step, iv1->step);
-
- /* No need to check sign of the new step since below code takes care
- of this well. */
- if (code != NE_EXPR
- && (TREE_CODE (step) != INTEGER_CST
- || !iv0->no_overflow || !iv1->no_overflow))
+ tree step
+ = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
+
+ if (code != NE_EXPR && tree_int_cst_sign_bit (step))
return false;
- iv0->step = step;
- if (!POINTER_TYPE_P (type))
- iv0->no_overflow = false;
+ bool positive0 = !tree_int_cst_sign_bit (iv0->step);
+ bool positive1 = !tree_int_cst_sign_bit (iv1->step);
- iv1->step = build_int_cst (step_type, 0);
- iv1->no_overflow = true;
+ /* Cases in rows 2 and 4 of above table. */
+ if ((positive0 && !positive1) || (!positive0 && positive1))
+ {
+ iv0->step = step;
+ iv1->step = build_int_cst (step_type, 0);
+ return number_of_iterations_cond (loop, type, iv0, code, iv1,
+ niter, only_exit, every_iteration);
+ }
+
+ affine_iv i_0, i_1;
+ class tree_niter_desc num;
+ i_0 = *iv0;
+ i_1 = *iv1;
+ i_0.step = step;
+ i_1.step = build_int_cst (step_type, 0);
+ if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1, &num,
+ only_exit, every_iteration))
+ return false;
+
+ affine_iv i0, i1;
+ class tree_niter_desc num_wrap;
+ i0 = *iv0;
+ i1 = *iv1;
+
+ /* Reset iv0 and iv1 to calculate the niter which cause overflow. */
+ if (tree_int_cst_lt (i1.step, i0.step))
+ {
+ if (positive0 && positive1)
+ i0.step = build_int_cst (step_type, 0);
+ else if (!positive0 && !positive1)
+ i1.step = build_int_cst (step_type, 0);
+ if (code == NE_EXPR)
+ code = LT_EXPR;
+ }
+ else
+ {
+ if (positive0 && positive1)
+ i1.step = build_int_cst (step_type, 0);
+ else if (!positive0 && !positive1)
+ i0.step = build_int_cst (step_type, 0);
+ gcc_assert (code == NE_EXPR);
+ code = GT_EXPR;
+ }
+
+ /* Calculate the niter which cause overflow. */
+ if (!number_of_iterations_cond (loop, type, &i0, code, &i1, &num_wrap,
+ only_exit, every_iteration))
+ return false;
+
+ /* Make assumption there is no overflow. */
+ tree assum
+ = fold_build2 (LE_EXPR, boolean_type_node, num.niter,
+ fold_convert (TREE_TYPE (num.niter), num_wrap.niter));
+ num.assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ num.assumptions, assum);
+
+ *iv0 = i_0;
+ *iv1 = i_1;
+ *niter = num;
+ return true;
}
/* If the result of the comparison is a constant, the loop is weird. More
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
new file mode 100644
index 00000000000..8fcdaffef3b
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/100740 */
+
+unsigned a, b;
+int main() {
+ unsigned c = 0;
+ for (a = 0; a < 2; a++)
+ for (b = 0; b < 2; b++)
+ if (++c < a)
+ __builtin_abort ();
+ return 0;
+}
--
2.17.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] Overflow check in simplifying exit cond comparing two IVs.
2021-10-18 13:37 [RFC] Overflow check in simplifying exit cond comparing two IVs Jiufu Guo
@ 2021-10-28 2:19 ` guojiufu
2021-10-28 9:13 ` Richard Biener
1 sibling, 0 replies; 6+ messages in thread
From: guojiufu @ 2021-10-28 2:19 UTC (permalink / raw)
To: gcc-patches; +Cc: amker.cheng, rguenther, wschmidt, segher, dje.gcc, jlaw
I just had a test on ppc64le, this patch pass bootstrap and regtest.
Is this patch OK for trunk?
Thanks for any comments.
BR,
Jiufu
On 2021-10-18 21:37, Jiufu Guo wrote:
> With reference the discussions in:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
>
> Base on the patches in above discussion, we may draft a patch to fix
> the
> issue.
>
> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
> '{b0,s0-s1} op {b1,0}', we also compute the condition which could
> assume
> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
> < the niter "of untill wrap for iv0 or iv1".
>
> Does this patch make sense?
>
> BR,
> Jiufu Guo
>
> gcc/ChangeLog:
>
> PR tree-optimization/100740
> * tree-ssa-loop-niter.c (number_of_iterations_cond): Add
> assume condition for combining of two IVs
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/pr100740.c: New test.
> ---
> gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++---
> .../gcc.c-torture/execute/pr100740.c | 11 ++
> 2 files changed, 99 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 75109407124..f2987a4448d 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop,
>
> provided that either below condition is satisfied:
>
> - a) the test is NE_EXPR;
> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
> + a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
> + b) assumptions in below table also need to be satisfied.
> +
> + | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) |
> + |---------+---------+---------------------+---------------------|
> + | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow |
> + | (b0,2) | (b1,-1) | true | true |
> + | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow |
> + | | | | |
> + | (b0,1) | (b1,2) | false | before iv0 overflow |
> + | (b0,-1) | (b1,2) | false | true |
> + | (b0,-2) | (b1,-1) | false | before iv1 overflow |
> + 'true' in above table means no need additional condition.
> + 'false' means this case can not satify the transform.
> + The first three rows: iv0->step > iv1->step;
> + The second three rows: iv0->step < iv1->step.
>
> This rarely occurs in practice, but it is simple enough to
> manage. */
> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
> {
> + if (TREE_CODE (iv0->step) != INTEGER_CST
> + || TREE_CODE (iv1->step) != INTEGER_CST)
> + return false;
> + if (!iv0->no_overflow || !iv1->no_overflow)
> + return false;
> +
> tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
> - iv0->step, iv1->step);
> -
> - /* No need to check sign of the new step since below code takes
> care
> - of this well. */
> - if (code != NE_EXPR
> - && (TREE_CODE (step) != INTEGER_CST
> - || !iv0->no_overflow || !iv1->no_overflow))
> + tree step
> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step,
> iv1->step);
> +
> + if (code != NE_EXPR && tree_int_cst_sign_bit (step))
> return false;
>
> - iv0->step = step;
> - if (!POINTER_TYPE_P (type))
> - iv0->no_overflow = false;
> + bool positive0 = !tree_int_cst_sign_bit (iv0->step);
> + bool positive1 = !tree_int_cst_sign_bit (iv1->step);
>
> - iv1->step = build_int_cst (step_type, 0);
> - iv1->no_overflow = true;
> + /* Cases in rows 2 and 4 of above table. */
> + if ((positive0 && !positive1) || (!positive0 && positive1))
> + {
> + iv0->step = step;
> + iv1->step = build_int_cst (step_type, 0);
> + return number_of_iterations_cond (loop, type, iv0, code, iv1,
> + niter, only_exit, every_iteration);
> + }
> +
> + affine_iv i_0, i_1;
> + class tree_niter_desc num;
> + i_0 = *iv0;
> + i_1 = *iv1;
> + i_0.step = step;
> + i_1.step = build_int_cst (step_type, 0);
> + if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1,
> &num,
> + only_exit, every_iteration))
> + return false;
> +
> + affine_iv i0, i1;
> + class tree_niter_desc num_wrap;
> + i0 = *iv0;
> + i1 = *iv1;
> +
> + /* Reset iv0 and iv1 to calculate the niter which cause
> overflow. */
> + if (tree_int_cst_lt (i1.step, i0.step))
> + {
> + if (positive0 && positive1)
> + i0.step = build_int_cst (step_type, 0);
> + else if (!positive0 && !positive1)
> + i1.step = build_int_cst (step_type, 0);
> + if (code == NE_EXPR)
> + code = LT_EXPR;
> + }
> + else
> + {
> + if (positive0 && positive1)
> + i1.step = build_int_cst (step_type, 0);
> + else if (!positive0 && !positive1)
> + i0.step = build_int_cst (step_type, 0);
> + gcc_assert (code == NE_EXPR);
> + code = GT_EXPR;
> + }
> +
> + /* Calculate the niter which cause overflow. */
> + if (!number_of_iterations_cond (loop, type, &i0, code, &i1,
> &num_wrap,
> + only_exit, every_iteration))
> + return false;
> +
> + /* Make assumption there is no overflow. */
> + tree assum
> + = fold_build2 (LE_EXPR, boolean_type_node, num.niter,
> + fold_convert (TREE_TYPE (num.niter), num_wrap.niter));
> + num.assumptions = fold_build2 (TRUTH_AND_EXPR,
> boolean_type_node,
> + num.assumptions, assum);
> +
> + *iv0 = i_0;
> + *iv1 = i_1;
> + *niter = num;
> + return true;
> }
>
> /* If the result of the comparison is a constant, the loop is
> weird. More
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> new file mode 100644
> index 00000000000..8fcdaffef3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> @@ -0,0 +1,11 @@
> +/* PR tree-optimization/100740 */
> +
> +unsigned a, b;
> +int main() {
> + unsigned c = 0;
> + for (a = 0; a < 2; a++)
> + for (b = 0; b < 2; b++)
> + if (++c < a)
> + __builtin_abort ();
> + return 0;
> +}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] Overflow check in simplifying exit cond comparing two IVs.
2021-10-18 13:37 [RFC] Overflow check in simplifying exit cond comparing two IVs Jiufu Guo
2021-10-28 2:19 ` guojiufu
@ 2021-10-28 9:13 ` Richard Biener
2021-12-09 6:53 ` Jiufu Guo
1 sibling, 1 reply; 6+ messages in thread
From: Richard Biener @ 2021-10-28 9:13 UTC (permalink / raw)
To: Jiufu Guo; +Cc: gcc-patches, amker.cheng, wschmidt, segher, dje.gcc, jlaw
On Mon, 18 Oct 2021, Jiufu Guo wrote:
> With reference the discussions in:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
>
> Base on the patches in above discussion, we may draft a patch to fix the
> issue.
>
> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
> '{b0,s0-s1} op {b1,0}', we also compute the condition which could assume
> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
> < the niter "of untill wrap for iv0 or iv1".
>
> Does this patch make sense?
Hum, the patch is mightly complex :/ I'm not sure we can throw
artficial IVs at number_of_iterations_cond and expect a meaningful
result.
ISTR the problem is with number_of_iterations_ne[_max], but I would
have to go and dig in myself again for a full recap of the problem.
I did plan to do that, but not before stage3 starts.
Thanks,
Richard.
> BR,
> Jiufu Guo
>
> gcc/ChangeLog:
>
> PR tree-optimization/100740
> * tree-ssa-loop-niter.c (number_of_iterations_cond): Add
> assume condition for combining of two IVs
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/pr100740.c: New test.
> ---
> gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++---
> .../gcc.c-torture/execute/pr100740.c | 11 ++
> 2 files changed, 99 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 75109407124..f2987a4448d 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop,
>
> provided that either below condition is satisfied:
>
> - a) the test is NE_EXPR;
> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
> + a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
> + b) assumptions in below table also need to be satisfied.
> +
> + | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) |
> + |---------+---------+---------------------+---------------------|
> + | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow |
> + | (b0,2) | (b1,-1) | true | true |
> + | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow |
> + | | | | |
> + | (b0,1) | (b1,2) | false | before iv0 overflow |
> + | (b0,-1) | (b1,2) | false | true |
> + | (b0,-2) | (b1,-1) | false | before iv1 overflow |
> + 'true' in above table means no need additional condition.
> + 'false' means this case can not satify the transform.
> + The first three rows: iv0->step > iv1->step;
> + The second three rows: iv0->step < iv1->step.
>
> This rarely occurs in practice, but it is simple enough to manage. */
> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
> {
> + if (TREE_CODE (iv0->step) != INTEGER_CST
> + || TREE_CODE (iv1->step) != INTEGER_CST)
> + return false;
> + if (!iv0->no_overflow || !iv1->no_overflow)
> + return false;
> +
> tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
> - iv0->step, iv1->step);
> -
> - /* No need to check sign of the new step since below code takes care
> - of this well. */
> - if (code != NE_EXPR
> - && (TREE_CODE (step) != INTEGER_CST
> - || !iv0->no_overflow || !iv1->no_overflow))
> + tree step
> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
> +
> + if (code != NE_EXPR && tree_int_cst_sign_bit (step))
> return false;
>
> - iv0->step = step;
> - if (!POINTER_TYPE_P (type))
> - iv0->no_overflow = false;
> + bool positive0 = !tree_int_cst_sign_bit (iv0->step);
> + bool positive1 = !tree_int_cst_sign_bit (iv1->step);
>
> - iv1->step = build_int_cst (step_type, 0);
> - iv1->no_overflow = true;
> + /* Cases in rows 2 and 4 of above table. */
> + if ((positive0 && !positive1) || (!positive0 && positive1))
> + {
> + iv0->step = step;
> + iv1->step = build_int_cst (step_type, 0);
> + return number_of_iterations_cond (loop, type, iv0, code, iv1,
> + niter, only_exit, every_iteration);
> + }
> +
> + affine_iv i_0, i_1;
> + class tree_niter_desc num;
> + i_0 = *iv0;
> + i_1 = *iv1;
> + i_0.step = step;
> + i_1.step = build_int_cst (step_type, 0);
> + if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1, &num,
> + only_exit, every_iteration))
> + return false;
> +
> + affine_iv i0, i1;
> + class tree_niter_desc num_wrap;
> + i0 = *iv0;
> + i1 = *iv1;
> +
> + /* Reset iv0 and iv1 to calculate the niter which cause overflow. */
> + if (tree_int_cst_lt (i1.step, i0.step))
> + {
> + if (positive0 && positive1)
> + i0.step = build_int_cst (step_type, 0);
> + else if (!positive0 && !positive1)
> + i1.step = build_int_cst (step_type, 0);
> + if (code == NE_EXPR)
> + code = LT_EXPR;
> + }
> + else
> + {
> + if (positive0 && positive1)
> + i1.step = build_int_cst (step_type, 0);
> + else if (!positive0 && !positive1)
> + i0.step = build_int_cst (step_type, 0);
> + gcc_assert (code == NE_EXPR);
> + code = GT_EXPR;
> + }
> +
> + /* Calculate the niter which cause overflow. */
> + if (!number_of_iterations_cond (loop, type, &i0, code, &i1, &num_wrap,
> + only_exit, every_iteration))
> + return false;
> +
> + /* Make assumption there is no overflow. */
> + tree assum
> + = fold_build2 (LE_EXPR, boolean_type_node, num.niter,
> + fold_convert (TREE_TYPE (num.niter), num_wrap.niter));
> + num.assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
> + num.assumptions, assum);
> +
> + *iv0 = i_0;
> + *iv1 = i_1;
> + *niter = num;
> + return true;
> }
>
> /* If the result of the comparison is a constant, the loop is weird. More
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> new file mode 100644
> index 00000000000..8fcdaffef3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> @@ -0,0 +1,11 @@
> +/* PR tree-optimization/100740 */
> +
> +unsigned a, b;
> +int main() {
> + unsigned c = 0;
> + for (a = 0; a < 2; a++)
> + for (b = 0; b < 2; b++)
> + if (++c < a)
> + __builtin_abort ();
> + return 0;
> +}
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] Overflow check in simplifying exit cond comparing two IVs.
2021-10-28 9:13 ` Richard Biener
@ 2021-12-09 6:53 ` Jiufu Guo
2021-12-10 4:28 ` Jiufu Guo
0 siblings, 1 reply; 6+ messages in thread
From: Jiufu Guo @ 2021-12-09 6:53 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, amker.cheng, wschmidt, segher, dje.gcc, jlaw
Richard Biener <rguenther@suse.de> writes:
> On Mon, 18 Oct 2021, Jiufu Guo wrote:
>
>> With reference the discussions in:
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
>>
>> Base on the patches in above discussion, we may draft a patch to fix the
>> issue.
>>
>> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
>> '{b0,s0-s1} op {b1,0}', we also compute the condition which could assume
>> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
>> < the niter "of untill wrap for iv0 or iv1".
>>
>> Does this patch make sense?
>
> Hum, the patch is mightly complex :/ I'm not sure we can throw
> artficial IVs at number_of_iterations_cond and expect a meaningful
> result.
>
> ISTR the problem is with number_of_iterations_ne[_max], but I would
> have to go and dig in myself again for a full recap of the problem.
> I did plan to do that, but not before stage3 starts.
>
> Thanks,
> Richard.
Hi Richard,
Thanks for your comment! It is really complex, using artificial IVs and
recursively calling number_of_iterations_cond. We may use a simpler way.
Not sure if you had started to dig into the problem. I refined a patch.
Hope this patch is helpful. This patch enhances the conditions in some
aspects. Attached are two test cases that could be handled.
---
gcc/tree-ssa-loop-niter.c | 92 +++++++++++++++----
.../gcc.c-torture/execute/pr100740.c | 11 +++
gcc/testsuite/gcc.dg/vect/pr102131.c | 47 ++++++++++
3 files changed, 134 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr102131.c
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 06954e437f5..ee1d7293c5c 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1788,6 +1788,70 @@ dump_affine_iv (FILE *file, affine_iv *iv)
}
}
+/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */
+
+static tree
+get_step_count (tree high, tree low, tree step, tree utype,
+ bool end_inclusive = false)
+{
+ tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
+ delta = fold_convert (utype,delta);
+ if (end_inclusive)
+ delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype));
+
+ if (tree_int_cst_sign_bit (step))
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+ step = fold_convert (utype, step);
+
+ return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step);
+}
+
+/* Get the additional assumption if both two steps are not zero.
+ Assumptions satisfy that there is no overflow or wrap during
+ v0 and v1 chasing. */
+
+static tree
+extra_iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step,
+ enum tree_code code)
+{
+ /* No need additional assumptions. */
+ if (code == NE_EXPR)
+ return boolean_true_node;
+
+ /* it not safe to transform {b0, 1} < {b1, 2}. */
+ if (tree_int_cst_sign_bit (step))
+ return boolean_false_node;
+
+ /* No need addition assumption for pointer. */
+ tree type = TREE_TYPE (iv0->base);
+ if (POINTER_TYPE_P (type))
+ return boolean_true_node;
+
+ bool positive0 = !tree_int_cst_sign_bit (iv0->step);
+ bool positive1 = !tree_int_cst_sign_bit (iv1->step);
+ bool positive = !tree_int_cst_sign_bit (step);
+ tree utype = unsigned_type_for (type);
+ bool add1 = code == LE_EXPR;
+ tree niter = positive
+ ? get_step_count (iv1->base, iv0->base, step, utype, add1)
+ : get_step_count (iv0->base, iv1->base, step, utype, add1);
+
+ int prec = TYPE_PRECISION (type);
+ signop sgn = TYPE_SIGN (type);
+ tree max = wide_int_to_tree (type, wi::max_value (prec, sgn));
+ tree min = wide_int_to_tree (type, wi::min_value (prec, sgn));
+ tree valid_niter0, valid_niter1;
+
+ valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype)
+ : get_step_count (iv0->base, min, iv0->step, utype);
+ valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype)
+ : get_step_count (iv1->base, min, iv1->step, utype);
+
+ tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0);
+ tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1);
+ return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1);
+}
+
/* Determine the number of iterations according to condition (for staying
inside loop) which compares two induction variables using comparison
operator CODE. The induction variable on left side of the comparison
@@ -1879,30 +1943,26 @@ number_of_iterations_cond (class loop *loop,
{iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
provided that either below condition is satisfied:
+ a. iv0.step and iv1.step are integer.
+ b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not
+ step over min or max of the type. */
- a) the test is NE_EXPR;
- b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
-
- This rarely occurs in practice, but it is simple enough to manage. */
if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
{
+ if (TREE_CODE (iv0->step) != INTEGER_CST
+ || TREE_CODE (iv1->step) != INTEGER_CST)
+ return false;
+
tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
- tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
- iv0->step, iv1->step);
-
- /* No need to check sign of the new step since below code takes care
- of this well. */
- if (code != NE_EXPR
- && (TREE_CODE (step) != INTEGER_CST
- || !iv0->no_overflow || !iv1->no_overflow))
+ tree step
+ = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
+
+ niter->assumptions = extra_iv_chase_assumption (iv0, iv1, step, code);
+ if (integer_zerop (niter->assumptions))
return false;
iv0->step = step;
- if (!POINTER_TYPE_P (type))
- iv0->no_overflow = false;
-
iv1->step = build_int_cst (step_type, 0);
- iv1->no_overflow = true;
}
/* If the result of the comparison is a constant, the loop is weird. More
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
new file mode 100644
index 00000000000..8fcdaffef3b
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/100740 */
+
+unsigned a, b;
+int main() {
+ unsigned c = 0;
+ for (a = 0; a < 2; a++)
+ for (b = 0; b < 2; b++)
+ if (++c < a)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr102131.c b/gcc/testsuite/gcc.dg/vect/pr102131.c
new file mode 100644
index 00000000000..23975cfeadb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr102131.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+#define MAX ((unsigned int) 0xffffffff)
+#define MIN ((unsigned int) (0))
+
+int arr[512];
+
+#define FUNC(NAME, CODE, S0, S1) \
+ unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1) \
+ { \
+ unsigned int n = 0; \
+ unsigned int i0, i1; \
+ int *p = arr; \
+ for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \
+ { \
+ n++; \
+ *p++ = i0 + i1; \
+ } \
+ return n; \
+ }
+
+FUNC (lt_5_1, <, 5, 1);
+FUNC (le_1_m5, <=, 1, -5);
+FUNC (lt_1_10, <, 1, 10);
+
+int
+main ()
+{
+ int fail = 0;
+ if (lt_5_1 (MAX - 124, MAX - 27) != 28)
+ fail++;
+
+ /* to save time, do not run this. */
+ /*
+ if (le_1_m5 (MIN + 1, MIN + 9) != 715827885)
+ fail++; */
+
+ if (lt_1_10 (MAX - 1000, MAX - 500) != 51)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
--
2.17.1
>
>
>> BR,
>> Jiufu Guo
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/100740
>> * tree-ssa-loop-niter.c (number_of_iterations_cond): Add
>> assume condition for combining of two IVs
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.c-torture/execute/pr100740.c: New test.
>> ---
>> gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++---
>> .../gcc.c-torture/execute/pr100740.c | 11 ++
>> 2 files changed, 99 insertions(+), 15 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
>>
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index 75109407124..f2987a4448d 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop,
>>
>> provided that either below condition is satisfied:
>>
>> - a) the test is NE_EXPR;
>> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
>> + a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
>> + b) assumptions in below table also need to be satisfied.
>> +
>> + | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) |
>> + |---------+---------+---------------------+---------------------|
>> + | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow |
>> + | (b0,2) | (b1,-1) | true | true |
>> + | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow |
>> + | | | | |
>> + | (b0,1) | (b1,2) | false | before iv0 overflow |
>> + | (b0,-1) | (b1,2) | false | true |
>> + | (b0,-2) | (b1,-1) | false | before iv1 overflow |
>> + 'true' in above table means no need additional condition.
>> + 'false' means this case can not satify the transform.
>> + The first three rows: iv0->step > iv1->step;
>> + The second three rows: iv0->step < iv1->step.
>>
>> This rarely occurs in practice, but it is simple enough to manage. */
>> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
>> {
>> + if (TREE_CODE (iv0->step) != INTEGER_CST
>> + || TREE_CODE (iv1->step) != INTEGER_CST)
>> + return false;
>> + if (!iv0->no_overflow || !iv1->no_overflow)
>> + return false;
>> +
>> tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
>> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
>> - iv0->step, iv1->step);
>> -
>> - /* No need to check sign of the new step since below code takes care
>> - of this well. */
>> - if (code != NE_EXPR
>> - && (TREE_CODE (step) != INTEGER_CST
>> - || !iv0->no_overflow || !iv1->no_overflow))
>> + tree step
>> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
>> +
>> + if (code != NE_EXPR && tree_int_cst_sign_bit (step))
>> return false;
>>
>> - iv0->step = step;
>> - if (!POINTER_TYPE_P (type))
>> - iv0->no_overflow = false;
>> + bool positive0 = !tree_int_cst_sign_bit (iv0->step);
>> + bool positive1 = !tree_int_cst_sign_bit (iv1->step);
>>
>> - iv1->step = build_int_cst (step_type, 0);
>> - iv1->no_overflow = true;
>> + /* Cases in rows 2 and 4 of above table. */
>> + if ((positive0 && !positive1) || (!positive0 && positive1))
>> + {
>> + iv0->step = step;
>> + iv1->step = build_int_cst (step_type, 0);
>> + return number_of_iterations_cond (loop, type, iv0, code, iv1,
>> + niter, only_exit, every_iteration);
>> + }
>> +
>> + affine_iv i_0, i_1;
>> + class tree_niter_desc num;
>> + i_0 = *iv0;
>> + i_1 = *iv1;
>> + i_0.step = step;
>> + i_1.step = build_int_cst (step_type, 0);
>> + if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1, &num,
>> + only_exit, every_iteration))
>> + return false;
>> +
>> + affine_iv i0, i1;
>> + class tree_niter_desc num_wrap;
>> + i0 = *iv0;
>> + i1 = *iv1;
>> +
>> + /* Reset iv0 and iv1 to calculate the niter which cause overflow. */
>> + if (tree_int_cst_lt (i1.step, i0.step))
>> + {
>> + if (positive0 && positive1)
>> + i0.step = build_int_cst (step_type, 0);
>> + else if (!positive0 && !positive1)
>> + i1.step = build_int_cst (step_type, 0);
>> + if (code == NE_EXPR)
>> + code = LT_EXPR;
>> + }
>> + else
>> + {
>> + if (positive0 && positive1)
>> + i1.step = build_int_cst (step_type, 0);
>> + else if (!positive0 && !positive1)
>> + i0.step = build_int_cst (step_type, 0);
>> + gcc_assert (code == NE_EXPR);
>> + code = GT_EXPR;
>> + }
>> +
>> + /* Calculate the niter which cause overflow. */
>> + if (!number_of_iterations_cond (loop, type, &i0, code, &i1, &num_wrap,
>> + only_exit, every_iteration))
>> + return false;
>> +
>> + /* Make assumption there is no overflow. */
>> + tree assum
>> + = fold_build2 (LE_EXPR, boolean_type_node, num.niter,
>> + fold_convert (TREE_TYPE (num.niter), num_wrap.niter));
>> + num.assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
>> + num.assumptions, assum);
>> +
>> + *iv0 = i_0;
>> + *iv1 = i_1;
>> + *niter = num;
>> + return true;
>> }
>>
>> /* If the result of the comparison is a constant, the loop is weird. More
>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
>> new file mode 100644
>> index 00000000000..8fcdaffef3b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
>> @@ -0,0 +1,11 @@
>> +/* PR tree-optimization/100740 */
>> +
>> +unsigned a, b;
>> +int main() {
>> + unsigned c = 0;
>> + for (a = 0; a < 2; a++)
>> + for (b = 0; b < 2; b++)
>> + if (++c < a)
>> + __builtin_abort ();
>> + return 0;
>> +}
>>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] Overflow check in simplifying exit cond comparing two IVs.
2021-12-09 6:53 ` Jiufu Guo
@ 2021-12-10 4:28 ` Jiufu Guo
2021-12-17 2:09 ` Jiufu Guo
0 siblings, 1 reply; 6+ messages in thread
From: Jiufu Guo @ 2021-12-10 4:28 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, amker.cheng, wschmidt, segher, dje.gcc, jlaw
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Richard Biener <rguenther@suse.de> writes:
>
>> On Mon, 18 Oct 2021, Jiufu Guo wrote:
>>
>>> With reference the discussions in:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
>>>
>>> Base on the patches in above discussion, we may draft a patch to fix the
>>> issue.
>>>
>>> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
>>> '{b0,s0-s1} op {b1,0}', we also compute the condition which could assume
>>> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
>>> < the niter "of untill wrap for iv0 or iv1".
>>>
>>> Does this patch make sense?
>>
>> Hum, the patch is mightly complex :/ I'm not sure we can throw
>> artficial IVs at number_of_iterations_cond and expect a meaningful
>> result.
>>
>> ISTR the problem is with number_of_iterations_ne[_max], but I would
>> have to go and dig in myself again for a full recap of the problem.
>> I did plan to do that, but not before stage3 starts.
>>
>> Thanks,
>> Richard.
>
> Hi Richard,
>
> Thanks for your comment! It is really complex, using artificial IVs and
> recursively calling number_of_iterations_cond. We may use a simpler way.
> Not sure if you had started to dig into the problem. I refined a patch.
> Hope this patch is helpful. This patch enhances the conditions in some
> aspects. Attached are two test cases that could be handled.
Some questions, I want to consult here, it may help to make the patch
works better.
- 1. For signed type, I'm wondering if we could leverage the idea about
"UB on signed overflow" in the phase to call number_of_iterations_cond
where may be far from user source code.
If we can, we may just ignore the assumption for signed type.
But then, there would be inconsitent behavior between noopt(-O0) and
opt (e.g. -O2/-O3). For example:
"{INT_MAX-124, +5} < {INT_MAX-27, +1}".
At -O0, the 'niter' would be 28; while, at -O3, it may result as 26.
- 2. For NEQ, which you may also concern, the assumption
"delta % step == 0" would make it safe. It seems current, we handle
NEQ where no_overflow is true for both iv0 and iv1.
- 3. In the current patch, DIV_EXPR is used, the cost may be high in
some cases. I'm wondering if the below idea is workable:
Extent to longer type, and using MULT instead DIV, for example:
a < b/c ===> a*c < b. a*c may be need to use longer type than 'a'.
-- 3.1 For some special case, e.g. "{b0, 5} < {b1, -5}", the assumption
may be able to simplied. For general case, still thinking to reduce
the runtime cost from assumption.
Thanks again!
BR,
Jiufu
>
> ---
> gcc/tree-ssa-loop-niter.c | 92 +++++++++++++++----
> .../gcc.c-torture/execute/pr100740.c | 11 +++
> gcc/testsuite/gcc.dg/vect/pr102131.c | 47 ++++++++++
> 3 files changed, 134 insertions(+), 16 deletions(-)
> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr102131.c
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 06954e437f5..ee1d7293c5c 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1788,6 +1788,70 @@ dump_affine_iv (FILE *file, affine_iv *iv)
> }
> }
>
> +/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */
> +
> +static tree
> +get_step_count (tree high, tree low, tree step, tree utype,
> + bool end_inclusive = false)
> +{
> + tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
> + delta = fold_convert (utype,delta);
> + if (end_inclusive)
> + delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype));
> +
> + if (tree_int_cst_sign_bit (step))
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
> + step = fold_convert (utype, step);
> +
> + return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step);
> +}
> +
> +/* Get the additional assumption if both two steps are not zero.
> + Assumptions satisfy that there is no overflow or wrap during
> + v0 and v1 chasing. */
> +
> +static tree
> +extra_iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step,
> + enum tree_code code)
> +{
> + /* No need additional assumptions. */
> + if (code == NE_EXPR)
> + return boolean_true_node;
> +
> + /* it not safe to transform {b0, 1} < {b1, 2}. */
> + if (tree_int_cst_sign_bit (step))
> + return boolean_false_node;
> +
> + /* No need addition assumption for pointer. */
> + tree type = TREE_TYPE (iv0->base);
> + if (POINTER_TYPE_P (type))
> + return boolean_true_node;
> +
> + bool positive0 = !tree_int_cst_sign_bit (iv0->step);
> + bool positive1 = !tree_int_cst_sign_bit (iv1->step);
> + bool positive = !tree_int_cst_sign_bit (step);
> + tree utype = unsigned_type_for (type);
> + bool add1 = code == LE_EXPR;
> + tree niter = positive
> + ? get_step_count (iv1->base, iv0->base, step, utype, add1)
> + : get_step_count (iv0->base, iv1->base, step, utype, add1);
> +
> + int prec = TYPE_PRECISION (type);
> + signop sgn = TYPE_SIGN (type);
> + tree max = wide_int_to_tree (type, wi::max_value (prec, sgn));
> + tree min = wide_int_to_tree (type, wi::min_value (prec, sgn));
> + tree valid_niter0, valid_niter1;
> +
> + valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype)
> + : get_step_count (iv0->base, min, iv0->step, utype);
> + valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype)
> + : get_step_count (iv1->base, min, iv1->step, utype);
> +
> + tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0);
> + tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1);
> + return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1);
> +}
> +
> /* Determine the number of iterations according to condition (for staying
> inside loop) which compares two induction variables using comparison
> operator CODE. The induction variable on left side of the comparison
> @@ -1879,30 +1943,26 @@ number_of_iterations_cond (class loop *loop,
> {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
>
> provided that either below condition is satisfied:
> + a. iv0.step and iv1.step are integer.
> + b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not
> + step over min or max of the type. */
>
> - a) the test is NE_EXPR;
> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
> -
> - This rarely occurs in practice, but it is simple enough to manage. */
> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
> {
> + if (TREE_CODE (iv0->step) != INTEGER_CST
> + || TREE_CODE (iv1->step) != INTEGER_CST)
> + return false;
> +
> tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
> - iv0->step, iv1->step);
> -
> - /* No need to check sign of the new step since below code takes care
> - of this well. */
> - if (code != NE_EXPR
> - && (TREE_CODE (step) != INTEGER_CST
> - || !iv0->no_overflow || !iv1->no_overflow))
> + tree step
> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
> +
> + niter->assumptions = extra_iv_chase_assumption (iv0, iv1, step, code);
> + if (integer_zerop (niter->assumptions))
> return false;
>
> iv0->step = step;
> - if (!POINTER_TYPE_P (type))
> - iv0->no_overflow = false;
> -
> iv1->step = build_int_cst (step_type, 0);
> - iv1->no_overflow = true;
> }
>
> /* If the result of the comparison is a constant, the loop is weird. More
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> new file mode 100644
> index 00000000000..8fcdaffef3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
> @@ -0,0 +1,11 @@
> +/* PR tree-optimization/100740 */
> +
> +unsigned a, b;
> +int main() {
> + unsigned c = 0;
> + for (a = 0; a < 2; a++)
> + for (b = 0; b < 2; b++)
> + if (++c < a)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr102131.c b/gcc/testsuite/gcc.dg/vect/pr102131.c
> new file mode 100644
> index 00000000000..23975cfeadb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr102131.c
> @@ -0,0 +1,47 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +#define MAX ((unsigned int) 0xffffffff)
> +#define MIN ((unsigned int) (0))
> +
> +int arr[512];
> +
> +#define FUNC(NAME, CODE, S0, S1) \
> + unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1) \
> + { \
> + unsigned int n = 0; \
> + unsigned int i0, i1; \
> + int *p = arr; \
> + for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \
> + { \
> + n++; \
> + *p++ = i0 + i1; \
> + } \
> + return n; \
> + }
> +
> +FUNC (lt_5_1, <, 5, 1);
> +FUNC (le_1_m5, <=, 1, -5);
> +FUNC (lt_1_10, <, 1, 10);
> +
> +int
> +main ()
> +{
> + int fail = 0;
> + if (lt_5_1 (MAX - 124, MAX - 27) != 28)
> + fail++;
> +
> + /* to save time, do not run this. */
> + /*
> + if (le_1_m5 (MIN + 1, MIN + 9) != 715827885)
> + fail++; */
> +
> + if (lt_1_10 (MAX - 1000, MAX - 500) != 51)
> + fail++;
> +
> + if (fail)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] Overflow check in simplifying exit cond comparing two IVs.
2021-12-10 4:28 ` Jiufu Guo
@ 2021-12-17 2:09 ` Jiufu Guo
0 siblings, 0 replies; 6+ messages in thread
From: Jiufu Guo @ 2021-12-17 2:09 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, amker.cheng, wschmidt, segher, dje.gcc, jlaw
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
>> Richard Biener <rguenther@suse.de> writes:
>>
>>> On Mon, 18 Oct 2021, Jiufu Guo wrote:
>>>
>>>> With reference the discussions in:
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
>>>>
>>>> Base on the patches in above discussion, we may draft a patch to fix the
>>>> issue.
>>>>
>>>> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
>>>> '{b0,s0-s1} op {b1,0}', we also compute the condition which could assume
>>>> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
>>>> < the niter "of untill wrap for iv0 or iv1".
>>>>
>>>> Does this patch make sense?
>>>
>>> Hum, the patch is mightly complex :/ I'm not sure we can throw
>>> artficial IVs at number_of_iterations_cond and expect a meaningful
>>> result.
>>>
>>> ISTR the problem is with number_of_iterations_ne[_max], but I would
>>> have to go and dig in myself again for a full recap of the problem.
>>> I did plan to do that, but not before stage3 starts.
>>>
>>> Thanks,
>>> Richard.
>>
>> Hi Richard,
>>
>> Thanks for your comment! It is really complex, using artificial IVs and
>> recursively calling number_of_iterations_cond. We may use a simpler way.
>> Not sure if you had started to dig into the problem. I refined a patch.
>> Hope this patch is helpful. This patch enhances the conditions in some
>> aspects. Attached are two test cases that could be handled.
>
> Some questions, I want to consult here, it may help to make the patch
> works better.
>
> - 1. For signed type, I'm wondering if we could leverage the idea about
> "UB on signed overflow" in the phase to call number_of_iterations_cond
> where may be far from user source code.
> If we can, we may just ignore the assumption for signed type.
> But then, there would be inconsitent behavior between noopt(-O0) and
> opt (e.g. -O2/-O3). For example:
> "{INT_MAX-124, +5} < {INT_MAX-27, +1}".
> At -O0, the 'niter' would be 28; while, at -O3, it may result as 26.
>
> - 2. For NEQ, which you may also concern, the assumption
> "delta % step == 0" would make it safe. It seems current, we handle
> NEQ where no_overflow is true for both iv0 and iv1.
For overflow behavior on signed, here is a case. It runs a long time
when build with noopt. At opt level (e.g. -O3), it runs end quickly,
and gets a number of iteration(25).
------
#define TYPE int
#define FUNC(NAME, CODE, S0, S1) \
TYPE __attribute__ ((noinline)) NAME (TYPE b0, TYPE b1) \
{ \
__builtin_printf ("%s %d, %d\n", __FUNCTION__, b0, b1); \
TYPE n = 0; \
TYPE i0, i1; \
for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \
n++; \
return n; \
}
FUNC (ne_4_0, !=, 4, 0);
int
main ()
{
TYPE r = ne_4_0 (1000, 1103); /* b0 < b1, niter % s != 0 */
__builtin_printf ("res: %ld\n", r);
return r;
}
----------
If using unsinged for TYPE, it runs a long time, even build with -O3.
For unsigned, the assumption checking "delta % step == 0" is added.
While for signed, there is no assumption checking. Here, signed
overflow is treated as UB. And then with option -fwrapv, it also runs
a long time, since this option defines the behavior on overflow.
So, in some aspects, it seems reasonable for current behavior including
it returns a niter(25).
Then we may keep current behavior for questions 1 and 2.
Thanks for comments!
BR,
Jiufu
>
> - 3. In the current patch, DIV_EXPR is used, the cost may be high in
> some cases. I'm wondering if the below idea is workable:
> Extent to longer type, and using MULT instead DIV, for example:
> a < b/c ===> a*c < b. a*c may be need to use longer type than 'a'.
>
> -- 3.1 For some special case, e.g. "{b0, 5} < {b1, -5}", the assumption
> may be able to simplied. For general case, still thinking to reduce
> the runtime cost from assumption.
>
>
> Thanks again!
>
> BR,
> Jiufu
>
>>
>> ---
>> gcc/tree-ssa-loop-niter.c | 92 +++++++++++++++----
>> .../gcc.c-torture/execute/pr100740.c | 11 +++
>> gcc/testsuite/gcc.dg/vect/pr102131.c | 47 ++++++++++
>> 3 files changed, 134 insertions(+), 16 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
>> create mode 100644 gcc/testsuite/gcc.dg/vect/pr102131.c
>>
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index 06954e437f5..ee1d7293c5c 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -1788,6 +1788,70 @@ dump_affine_iv (FILE *file, affine_iv *iv)
>> }
>> }
>>
>> +/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */
>> +
>> +static tree
>> +get_step_count (tree high, tree low, tree step, tree utype,
>> + bool end_inclusive = false)
>> +{
>> + tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
>> + delta = fold_convert (utype,delta);
>> + if (end_inclusive)
>> + delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype));
>> +
>> + if (tree_int_cst_sign_bit (step))
>> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
>> + step = fold_convert (utype, step);
>> +
>> + return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step);
>> +}
>> +
>> +/* Get the additional assumption if both two steps are not zero.
>> + Assumptions satisfy that there is no overflow or wrap during
>> + v0 and v1 chasing. */
>> +
>> +static tree
>> +extra_iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step,
>> + enum tree_code code)
>> +{
>> + /* No need additional assumptions. */
>> + if (code == NE_EXPR)
>> + return boolean_true_node;
>> +
>> + /* it not safe to transform {b0, 1} < {b1, 2}. */
>> + if (tree_int_cst_sign_bit (step))
>> + return boolean_false_node;
>> +
>> + /* No need addition assumption for pointer. */
>> + tree type = TREE_TYPE (iv0->base);
>> + if (POINTER_TYPE_P (type))
>> + return boolean_true_node;
>> +
>> + bool positive0 = !tree_int_cst_sign_bit (iv0->step);
>> + bool positive1 = !tree_int_cst_sign_bit (iv1->step);
>> + bool positive = !tree_int_cst_sign_bit (step);
>> + tree utype = unsigned_type_for (type);
>> + bool add1 = code == LE_EXPR;
>> + tree niter = positive
>> + ? get_step_count (iv1->base, iv0->base, step, utype, add1)
>> + : get_step_count (iv0->base, iv1->base, step, utype, add1);
>> +
>> + int prec = TYPE_PRECISION (type);
>> + signop sgn = TYPE_SIGN (type);
>> + tree max = wide_int_to_tree (type, wi::max_value (prec, sgn));
>> + tree min = wide_int_to_tree (type, wi::min_value (prec, sgn));
>> + tree valid_niter0, valid_niter1;
>> +
>> + valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype)
>> + : get_step_count (iv0->base, min, iv0->step, utype);
>> + valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype)
>> + : get_step_count (iv1->base, min, iv1->step, utype);
>> +
>> + tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0);
>> + tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1);
>> + return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1);
>> +}
>> +
>> /* Determine the number of iterations according to condition (for staying
>> inside loop) which compares two induction variables using comparison
>> operator CODE. The induction variable on left side of the comparison
>> @@ -1879,30 +1943,26 @@ number_of_iterations_cond (class loop *loop,
>> {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
>>
>> provided that either below condition is satisfied:
>> + a. iv0.step and iv1.step are integer.
>> + b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not
>> + step over min or max of the type. */
>>
>> - a) the test is NE_EXPR;
>> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
>> -
>> - This rarely occurs in practice, but it is simple enough to manage. */
>> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
>> {
>> + if (TREE_CODE (iv0->step) != INTEGER_CST
>> + || TREE_CODE (iv1->step) != INTEGER_CST)
>> + return false;
>> +
>> tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
>> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
>> - iv0->step, iv1->step);
>> -
>> - /* No need to check sign of the new step since below code takes care
>> - of this well. */
>> - if (code != NE_EXPR
>> - && (TREE_CODE (step) != INTEGER_CST
>> - || !iv0->no_overflow || !iv1->no_overflow))
>> + tree step
>> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step);
>> +
>> + niter->assumptions = extra_iv_chase_assumption (iv0, iv1, step, code);
>> + if (integer_zerop (niter->assumptions))
>> return false;
>>
>> iv0->step = step;
>> - if (!POINTER_TYPE_P (type))
>> - iv0->no_overflow = false;
>> -
>> iv1->step = build_int_cst (step_type, 0);
>> - iv1->no_overflow = true;
>> }
>>
>> /* If the result of the comparison is a constant, the loop is weird. More
>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
>> new file mode 100644
>> index 00000000000..8fcdaffef3b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
>> @@ -0,0 +1,11 @@
>> +/* PR tree-optimization/100740 */
>> +
>> +unsigned a, b;
>> +int main() {
>> + unsigned c = 0;
>> + for (a = 0; a < 2; a++)
>> + for (b = 0; b < 2; b++)
>> + if (++c < a)
>> + __builtin_abort ();
>> + return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr102131.c b/gcc/testsuite/gcc.dg/vect/pr102131.c
>> new file mode 100644
>> index 00000000000..23975cfeadb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr102131.c
>> @@ -0,0 +1,47 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-additional-options "-O3" } */
>> +#define MAX ((unsigned int) 0xffffffff)
>> +#define MIN ((unsigned int) (0))
>> +
>> +int arr[512];
>> +
>> +#define FUNC(NAME, CODE, S0, S1) \
>> + unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1) \
>> + { \
>> + unsigned int n = 0; \
>> + unsigned int i0, i1; \
>> + int *p = arr; \
>> + for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \
>> + { \
>> + n++; \
>> + *p++ = i0 + i1; \
>> + } \
>> + return n; \
>> + }
>> +
>> +FUNC (lt_5_1, <, 5, 1);
>> +FUNC (le_1_m5, <=, 1, -5);
>> +FUNC (lt_1_10, <, 1, 10);
>> +
>> +int
>> +main ()
>> +{
>> + int fail = 0;
>> + if (lt_5_1 (MAX - 124, MAX - 27) != 28)
>> + fail++;
>> +
>> + /* to save time, do not run this. */
>> + /*
>> + if (le_1_m5 (MIN + 1, MIN + 9) != 715827885)
>> + fail++; */
>> +
>> + if (lt_1_10 (MAX - 1000, MAX - 500) != 51)
>> + fail++;
>> +
>> + if (fail)
>> + __builtin_abort ();
>> +
>> + return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-12-17 2:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-18 13:37 [RFC] Overflow check in simplifying exit cond comparing two IVs Jiufu Guo
2021-10-28 2:19 ` guojiufu
2021-10-28 9:13 ` Richard Biener
2021-12-09 6:53 ` Jiufu Guo
2021-12-10 4:28 ` Jiufu Guo
2021-12-17 2:09 ` Jiufu Guo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).