public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-2501] Fix part of PR 110293: `A NEEQ (A NEEQ CST)` part
@ 2023-07-13 14:55 Andrew Pinski
0 siblings, 0 replies; only message in thread
From: Andrew Pinski @ 2023-07-13 14:55 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:285c9d042e90a7425b37697edc9ec93a1b03b486
commit r14-2501-g285c9d042e90a7425b37697edc9ec93a1b03b486
Author: Andrew Pinski <apinski@marvell.com>
Date: Wed Jul 12 00:33:14 2023 -0700
Fix part of PR 110293: `A NEEQ (A NEEQ CST)` part
This fixes part of PR 110293, for the outer comparison case
being `!=` or `==`. In turn PR 110539 is able to be optimized
again as the if statement for `(a&1) == ((a & 1) != 0)` gets optimized
to `false` early enough to allow FRE/DOM to do a CSE for memory store/load.
OK? Bootstrapped and tested on x86_64-linux with no regressions.
gcc/ChangeLog:
PR tree-optimization/110293
PR tree-optimization/110539
* match.pd: Expand the `x != (typeof x)(x == 0)`
pattern to handle where the inner and outer comparsions
are either `!=` or `==` and handle other constants
than 0.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr110293-1.c: New test.
* gcc.dg/tree-ssa/pr110539-1.c: New test.
* gcc.dg/tree-ssa/pr110539-2.c: New test.
* gcc.dg/tree-ssa/pr110539-3.c: New test.
* gcc.dg/tree-ssa/pr110539-4.c: New test.
Diff:
---
gcc/match.pd | 39 ++++++++++++--
gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c | 58 +++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c | 12 +++++
gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c | 12 +++++
gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c | 75 +++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c | 82 ++++++++++++++++++++++++++++++
6 files changed, 274 insertions(+), 4 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 8543f777a28..351d9285e92 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6429,10 +6429,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (TYPE_UNSIGNED (TREE_TYPE (@0)))
{ constant_boolean_node (false, type); }))
-/* x != (typeof x)(x == 0) is always true. */
-(simplify
- (ne:c @0 (convert (eq @0 integer_zerop)))
- { constant_boolean_node (true, type); })
+/* x != (typeof x)(x == CST) -> CST == 0 ? 1 : (CST == 1 ? (x!=0&&x!=1) : x != 0) */
+/* x != (typeof x)(x != CST) -> CST == 1 ? 1 : (CST == 0 ? (x!=0&&x!=1) : x != 1) */
+/* x == (typeof x)(x == CST) -> CST == 0 ? 0 : (CST == 1 ? (x==0||x==1) : x != 0) */
+/* x == (typeof x)(x != CST) -> CST == 1 ? 0 : (CST == 0 ? (x==0||x==1) : x != 1) */
+(for outer (ne eq)
+ (for inner (ne eq)
+ (simplify
+ (outer:c @0 (convert (inner @0 INTEGER_CST@1)))
+ (with {
+ bool cst1 = integer_onep (@1);
+ bool cst0 = integer_zerop (@1);
+ bool innereq = inner == EQ_EXPR;
+ bool outereq = outer == EQ_EXPR;
+ }
+ (switch
+ (if (innereq ? cst0 : cst1)
+ { constant_boolean_node (!outereq, type); })
+ (if (innereq ? cst1 : cst0)
+ (with {
+ tree utype = unsigned_type_for (TREE_TYPE (@0));
+ tree ucst1 = build_one_cst (utype);
+ }
+ (if (!outereq)
+ (gt (convert:utype @0) { ucst1; })
+ (le (convert:utype @0) { ucst1; })
+ )
+ )
+ )
+ (if (innereq)
+ (ne @0 { build_zero_cst (TREE_TYPE (@0)); }))
+ (ne @0 { build_one_cst (TREE_TYPE (@0)); }))
+ )
+ )
+ )
+)
(for cmp (unordered ordered unlt unle ungt unge uneq ltgt)
/* If the second operand is NaN, the result is constant. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c
new file mode 100644
index 00000000000..24aea1a2d03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110293-1.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+
+_Bool eqeq0(unsigned x)
+{
+ return x == (x == 0);
+}
+_Bool eqeq1(unsigned x)
+{
+ return x == (x == 1);
+}
+_Bool eqeq2(unsigned x)
+{
+ return x == (x == 2);
+}
+
+_Bool neeq0(unsigned x)
+{
+ return x != (x == 0);
+}
+_Bool neeq1(unsigned x)
+{
+ return x != (x == 1);
+}
+_Bool neeq2(unsigned x)
+{
+ return x != (x == 2);
+}
+
+_Bool eqne0(unsigned x)
+{
+ return x == (x != 0);
+}
+_Bool eqne1(unsigned x)
+{
+ return x == (x != 1);
+}
+_Bool eqne2(unsigned x)
+{
+ return x == (x != 2);
+}
+
+_Bool nene0(unsigned x)
+{
+ return x != (x != 0);
+}
+_Bool nene1(unsigned x)
+{
+ return x != (x != 1);
+}
+_Bool nene2(unsigned x)
+{
+ return x != (x != 2);
+}
+
+/* All of these functions should have removed the inner most comparison which
+ means all of the conversions from bool to unsigned should have been removed too. */
+/* { dg-final { scan-tree-dump-not "nop_expr," "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
new file mode 100644
index 00000000000..6ba864cdd13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+ int b = a & 1;
+ int c = b != 0;
+ return c == b;
+}
+
+/* This should be optimized to just return 1; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 1;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
new file mode 100644
index 00000000000..17874d349ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+int f(int a)
+{
+ int b = a & 1;
+ int c = b == 0;
+ return c == b;
+}
+
+/* This should be optimized to just return 0; */
+/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
+/* { dg-final { scan-tree-dump "return 0;" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
new file mode 100644
index 00000000000..e2bd4dfb45d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-3.c
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void foo(void);
+static int a, c = 1;
+static short b;
+static int *d = &c, *e = &a;
+static int **f = &d;
+void __assert_fail() __attribute__((__noreturn__));
+static void g(short h) {
+ if (*d)
+ ;
+ else {
+ if (e) __assert_fail();
+ if (a) {
+ __builtin_unreachable();
+ } else
+ __assert_fail();
+ }
+ if ((((0, 0) || h) == h) + b) *f = 0;
+}
+int main() {
+ int i = 0 != 10 & a;
+ g(i);
+ *e = 9;
+ e = 0;
+ if (d == 0)
+ ;
+ else
+ foo();
+ ;
+}
+/* The call to foo should be optimized away. */
+/* The missed optimization at -O2 here was:
+ int b = a & 1;
+ int c = b != 0;
+ int d = c == b;
+ not being optimized to 1 early enough, it is done in vrp2 but
+ that is too late.
+ In phiopt2 we got:
+ _17 = i_7 != 0;
+ _12 = (int) _17;
+ if (i_7 == _12)
+ goto <bb 9>; [50.00%]
+ else
+ goto <bb 10>; [50.00%]
+
+ <bb 9> [local count: 268435456]:
+ d = 0B;
+
+ <bb 10> [local count: 536870913]:
+ e.1_3 = e;
+ *e.1_3 = 9;
+ e = 0B;
+ d.2_4 = d;
+ if (d.2_4 == 0B)
+
+ The first if is not optimized before, until vrp2 which is
+ too late as there are no passes which will then find the
+ load of d in `d.2_4 = d;` was `0B` after vrp2.
+
+ Now in forwprop3 (after phiopt2), we optimize:
+ _17 = i_7 != 0;
+ _12 = (int) _17;
+ if (i_7 == _12)
+ into just:
+ _t = (unsigned)i_7;
+ if (_t <= 1)
+
+ And then during ccp3, that is optimized away and that is optimized
+ early enough now that the load `d.2_4 = d;` is optimizd to just
+ `d.2_4 = 0B;`
+ */
+
+/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c
new file mode 100644
index 00000000000..2c03dcc87fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110539-4.c
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fstrict-aliasing -fdump-tree-optimized" } */
+
+/* This is a small variant of pr110539-3.c using -O1 -fstrict-aliasing
+ rather than -O2. Just to show VRP and PRE is not needed to optimize
+ the call to foo away. */
+
+
+void foo(void);
+static int a, c = 1;
+static short b;
+static int *d = &c, *e = &a;
+static int **f = &d;
+void __assert_fail() __attribute__((__noreturn__));
+static void g(int h) {
+ if (*d)
+ ;
+ else {
+ if (e) __assert_fail();
+ if (a) {
+ __builtin_unreachable();
+ } else
+ __assert_fail();
+ }
+ if (((h!=0) == h) + b) *f = 0;
+}
+
+int main() {
+ int i = 0 != 10 & a;
+ g(i);
+ *e = 9;
+ e = 0;
+ if (d == 0)
+ ;
+ else
+ foo();
+ ;
+}
+
+/* The call to foo should be optimized away. */
+/* The missed optimization at -O1 here was:
+ int b = a & 1;
+ int c = b != 0;
+ int d = c == b;
+ not being optimized to 1 early enough, it is done in vrp2 but
+ that is too late.
+ In phiopt2 we got:
+ _17 = i_7 != 0;
+ _12 = (int) _17;
+ if (i_7 == _12)
+ goto <bb 9>; [50.00%]
+ else
+ goto <bb 10>; [50.00%]
+
+ <bb 9> [local count: 268435456]:
+ d = 0B;
+
+ <bb 10> [local count: 536870913]:
+ e.1_3 = e;
+ *e.1_3 = 9;
+ e = 0B;
+ d.2_4 = d;
+ if (d.2_4 == 0B)
+
+ The first if is not optimized before, until vrp2 which is
+ too late as there are no passes which will then find the
+ load of d in `d.2_4 = d;` was `0B` after vrp2.
+
+ Now in forwprop3 (after phiopt2), we optimize:
+ _17 = i_7 != 0;
+ _12 = (int) _17;
+ if (i_7 == _12)
+ into just:
+ _t = (unsigned)i_7;
+ if (_t <= 1)
+
+ And then during ccp3, that is optimized away and that is optimized
+ early enough now that the load `d.2_4 = d;` is optimizd to just
+ `d.2_4 = 0B;`
+ */
+
+/* { dg-final { scan-tree-dump-not "foo \\(\\)" "optimized"} } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-07-13 14:55 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-13 14:55 [gcc r14-2501] Fix part of PR 110293: `A NEEQ (A NEEQ CST)` part Andrew Pinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).