public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR 110539: missed optimization after moving two_value to match.pd
@ 2023-07-07 17:56 Andrew Pinski
  2023-07-10  7:49 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Andrew Pinski @ 2023-07-07 17:56 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

When I moved two_value to match.pd, I removed the check for the {0,+-1}
as I had placed it after the {0,+-1} case for cond in match.pd.
In the case of {0,+-1} and non boolean, before we would optmize those
case to just `(convert)a` but after we would get `(convert)(a != 0)`
which was not handled anyways to just `(convert)a`.
So this adds a pattern to match `(convert)(zeroone != 0)` and simplify
to `(convert)zeroone`.

In the bug report, we do finally optimize `(convert)(zeroone != 0)` to
`zeroone` in VRP2 but that in itself is too late and we miss other
optimizations that would have happened.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

	PR tree-optimization/110539
	* match.pd ((convert)(zeroone !=/== 0)): Match
	and simplify to ((convert)zeroone)){,^1}.

gcc/testsuite/ChangeLog:

	PR tree-optimization/110539
	* gcc.dg/tree-ssa/pr110539-1.c: New test.
	* gcc.dg/tree-ssa/pr110539-2.c: New test.
	* gcc.dg/tree-ssa/pr110539-3.c: New test.
---
 gcc/match.pd                               | 15 +++++
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c | 12 ++++
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c | 12 ++++
 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c | 70 ++++++++++++++++++++++
 4 files changed, 109 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index c709153217a..87767a7778b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2060,6 +2060,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
     (vec_cond @0 @2 @3)))
 
+#if GIMPLE
+/* This cannot be done on generic as fold has the
+   exact opposite transformation:
+   `Fold ~X & 1 as (X & 1) == 0.`
+   `Fold (X ^ 1) & 1 as (X & 1) == 0.`  */
+/* (convert)(zeroone != 0) into (convert)zeroone */
+/* (convert)(zeroone == 0) into (convert)(zeroone^1) */
+(for neeq (ne eq)
+ (simplify
+  (convert (neeq zero_one_valued_p@0 integer_zerop))
+  (if (neeq == NE_EXPR)
+   (convert @0)
+   (convert (bit_xor @0 { build_one_cst (TREE_TYPE (@0)); } )))))
+#endif
+
 /* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
 (simplify
  (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
new file mode 100644
index 00000000000..6ba864cdd13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+        int b = a & 1;
+        int c = b != 0;
+        return c == b;
+}
+
+/* This should be optimized to just return 1; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
new file mode 100644
index 00000000000..17874d349ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+        int b = a & 1;
+        int c = b == 0;
+        return c == b;
+}
+
+/* This should be optimized to just return 0; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
new file mode 100644
index 00000000000..c8ef6f56dcd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void foo(void);
+static int a, c = 1;
+static short b;
+static int *d = &c, *e = &a;
+static int **f = &d;
+void __assert_fail() __attribute__((__noreturn__));
+static void g(short h) {
+    if (*d)
+        ;
+    else {
+        if (e) __assert_fail();
+        if (a) {
+            __builtin_unreachable();
+        } else
+            __assert_fail();
+    }
+    if ((((0, 0) || h) == h) + b) *f = 0;
+}
+int main() {
+    int i = 0 != 10 & a;
+    g(i);
+    *e = 9;
+    e = 0;
+    if (d == 0)
+        ;
+    else
+        foo();
+    ;
+}
+/* The call to foo should be optimized away. */
+/* The missed optimization at -O2 here was:
+        int b = a & 1;
+        int c = b != 0;
+        int d = c == b;
+  not being optimized to 1 early enough, it is done in vrp2 but
+  that is too late.
+  In phiopt2 we got:
+    _17 = i_7 != 0;
+    _12 = (int) _17;
+    if (i_7 == _12)
+      goto <bb 9>; [50.00%]
+    else
+      goto <bb 10>; [50.00%]
+
+    <bb 9> [local count: 268435456]:
+    d = 0B;
+
+    <bb 10> [local count: 536870913]:
+    e.1_3 = e;
+    *e.1_3 = 9;
+    e = 0B;
+    d.2_4 = d;
+    if (d.2_4 == 0B)
+
+  The first if is not optimized before, until vrp2 which is
+  too late as there are no passes which will then find the
+  load of d in `d.2_4 = d;` was `0B` after vrp2.
+
+  Now we optimize dom2 because phiopt2 we just get:
+    _12 = i_7;
+    if (i_7 == _12)
+      goto <bb 9>; [50.00%]
+    else
+      goto <bb 10>; [50.00%]
+ */
+
+/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix PR 110539: missed optimization after moving two_value to match.pd
  2023-07-07 17:56 [PATCH] Fix PR 110539: missed optimization after moving two_value to match.pd Andrew Pinski
@ 2023-07-10  7:49 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2023-07-10  7:49 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: gcc-patches

On Fri, Jul 7, 2023 at 7:57 PM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> When I moved two_value to match.pd, I removed the check for the {0,+-1}
> as I had placed it after the {0,+-1} case for cond in match.pd.
> In the case of {0,+-1} and non boolean, before we would optmize those
> case to just `(convert)a` but after we would get `(convert)(a != 0)`
> which was not handled anyways to just `(convert)a`.
> So this adds a pattern to match `(convert)(zeroone != 0)` and simplify
> to `(convert)zeroone`.
>
> In the bug report, we do finally optimize `(convert)(zeroone != 0)` to
> `zeroone` in VRP2 but that in itself is too late and we miss other
> optimizations that would have happened.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> gcc/ChangeLog:
>
>         PR tree-optimization/110539
>         * match.pd ((convert)(zeroone !=/== 0)): Match
>         and simplify to ((convert)zeroone)){,^1}.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/110539
>         * gcc.dg/tree-ssa/pr110539-1.c: New test.
>         * gcc.dg/tree-ssa/pr110539-2.c: New test.
>         * gcc.dg/tree-ssa/pr110539-3.c: New test.
> ---
>  gcc/match.pd                               | 15 +++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c | 12 ++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c | 12 ++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c | 70 ++++++++++++++++++++++
>  4 files changed, 109 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c709153217a..87767a7778b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -2060,6 +2060,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
>      (vec_cond @0 @2 @3)))
>
> +#if GIMPLE
> +/* This cannot be done on generic as fold has the
> +   exact opposite transformation:
> +   `Fold ~X & 1 as (X & 1) == 0.`
> +   `Fold (X ^ 1) & 1 as (X & 1) == 0.`  */
> +/* (convert)(zeroone != 0) into (convert)zeroone */

Not sure how the comment on GENERIC applies to this one?

> +/* (convert)(zeroone == 0) into (convert)(zeroone^1) */

This OTOH is a canonicalization, and I'm not sure a very good one
since (convert)(zeroone == 0) might be more easily recognized
as setCC and a zero flag might even be present in the def of zeroone?

> +(for neeq (ne eq)
> + (simplify
> +  (convert (neeq zero_one_valued_p@0 integer_zerop))
> +  (if (neeq == NE_EXPR)
> +   (convert @0)
> +   (convert (bit_xor @0 { build_one_cst (TREE_TYPE (@0)); } )))))
> +#endif
> +
>  /* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
>  (simplify
>   (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
> new file mode 100644
> index 00000000000..6ba864cdd13
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-optimized" } */
> +int f(int a)
> +{
> +        int b = a & 1;
> +        int c = b != 0;
> +        return c == b;
> +}
> +
> +/* This should be optimized to just return 1; */
> +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
> +/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
> new file mode 100644
> index 00000000000..17874d349ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-optimized" } */
> +int f(int a)
> +{
> +        int b = a & 1;
> +        int c = b == 0;
> +        return c == b;
> +}
> +
> +/* This should be optimized to just return 0; */
> +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
> +/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
> new file mode 100644
> index 00000000000..c8ef6f56dcd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
> @@ -0,0 +1,70 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +void foo(void);
> +static int a, c = 1;
> +static short b;
> +static int *d = &c, *e = &a;
> +static int **f = &d;
> +void __assert_fail() __attribute__((__noreturn__));
> +static void g(short h) {
> +    if (*d)
> +        ;
> +    else {
> +        if (e) __assert_fail();
> +        if (a) {
> +            __builtin_unreachable();
> +        } else
> +            __assert_fail();
> +    }
> +    if ((((0, 0) || h) == h) + b) *f = 0;
> +}
> +int main() {
> +    int i = 0 != 10 & a;
> +    g(i);
> +    *e = 9;
> +    e = 0;
> +    if (d == 0)
> +        ;
> +    else
> +        foo();
> +    ;
> +}
> +/* The call to foo should be optimized away. */
> +/* The missed optimization at -O2 here was:
> +        int b = a & 1;
> +        int c = b != 0;
> +        int d = c == b;
> +  not being optimized to 1 early enough, it is done in vrp2 but
> +  that is too late.
> +  In phiopt2 we got:
> +    _17 = i_7 != 0;
> +    _12 = (int) _17;
> +    if (i_7 == _12)
> +      goto <bb 9>; [50.00%]
> +    else
> +      goto <bb 10>; [50.00%]
> +
> +    <bb 9> [local count: 268435456]:
> +    d = 0B;
> +
> +    <bb 10> [local count: 536870913]:
> +    e.1_3 = e;
> +    *e.1_3 = 9;
> +    e = 0B;
> +    d.2_4 = d;
> +    if (d.2_4 == 0B)
> +
> +  The first if is not optimized before, until vrp2 which is
> +  too late as there are no passes which will then find the
> +  load of d in `d.2_4 = d;` was `0B` after vrp2.
> +
> +  Now we optimize dom2 because phiopt2 we just get:
> +    _12 = i_7;
> +    if (i_7 == _12)
> +      goto <bb 9>; [50.00%]
> +    else
> +      goto <bb 10>; [50.00%]
> + */
> +
> +/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-10  7:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-07 17:56 [PATCH] Fix PR 110539: missed optimization after moving two_value to match.pd Andrew Pinski
2023-07-10  7:49 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).