public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] match.pd: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938]
@ 2023-08-04 21:48 Drew Ross
  2023-08-08 13:18 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Drew Ross @ 2023-08-04 21:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: Drew Ross

Adds a simplification for ((x ^ y) & z) | x to be folded into
(z & y) | x. Merges this simplification with ((x | y) & z) | x -> (z & y) | x
to prevent duplicate pattern. Tested successfully on x86_64 and x86 targets.

        PR tree-opt/109938

gcc/ChangeLog:

        * match.pd ((x ^ y) & z) | x -> (z & y) | x: New simplification.

gcc/testsuite/ChangeLog:

        * gcc.c-torture/execute/pr109938.c: New test.
        * gcc.dg/tree-ssa/pr109938.c: New test.
---
 gcc/match.pd                                  |  10 +-
 .../gcc.c-torture/execute/pr109938.c          |  33 +++++
 gcc/testsuite/gcc.dg/tree-ssa/pr109938.c      | 125 ++++++++++++++++++
 3 files changed, 164 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr109938.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109938.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ee6cef6b09d..884dc622b25 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1946,10 +1946,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (bitop:c (rbitop:c (bit_not @0) @1) @0)
   (bitop @0 @1)))
 
-/* ((x | y) & z) | x -> (z & y) | x */
-(simplify
-  (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
-  (bit_ior (bit_and @2 @1) @0))
+/* ((x |^ y) & z) | x -> (z & y) | x  */
+(for op (bit_ior bit_xor)
+ (simplify
+  (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2? (op:c @0 @1)) @2)) @3)
+  (if (bitwise_equal_p (@0, @3))
+   (convert (bit_ior (bit_and @1 (convert @2)) (convert @0))))))
 
 /* (x | CST1) & CST2 -> (x & CST2) | (CST1 & CST2) */
 (simplify
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr109938.c b/gcc/testsuite/gcc.c-torture/execute/pr109938.c
new file mode 100644
index 00000000000..a65d13b305d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr109938.c
@@ -0,0 +1,33 @@
+/* PR tree-opt/109938 */
+
+#include "../../gcc.dg/tree-ssa/pr109938.c"
+
+int 
+main ()
+{
+  if (t1 (29789, 29477, 23942) != 30045) __builtin_abort ();
+  if (t2 (-20196, 18743, -32901) != -1729) __builtin_abort ();
+  if (t3 (2136614690L, 1136698390L, 2123767997L) != 2145003318UL) __builtin_abort ();
+  if (t4 (-4878, 9977, 23313) != 61171) __builtin_abort ();
+  if (t5 (127, 99, 43) != 127) __builtin_abort ();
+  if (t6 (9176690219839792930LL, 3176690219839721234LL, 5671738468274920831LL)
+      != 9177833729112616754LL) __builtin_abort ();
+  if (t7 (29789, 29477, 23942) != 30045) __builtin_abort ();
+  if (t8 (23489, 99477, 87942) != 90053) __builtin_abort ();
+  if (t9 (10489, 66477, -73313) != 10749) __builtin_abort ();
+  if (t10 (2136614690L, -1136614690L, 4136614690UL) != 4284131106UL)
+    __builtin_abort ();
+  if (t11 (29789, 29477, 12345) != 29821) __builtin_abort ();
+  if (t12 (-120, 98, -73) != 170) __builtin_abort ();
+  if (t13 (9176690219839792930ULL, -3176690219839721234LL, 5671738468274920831ULL)
+      != 9221726284835125102ULL) __builtin_abort ();
+  v4si a1 = {29789, -20196, 23489, 10489};
+  v4si a2 = {29477, 18743, 99477, 66477}; 
+  v4si a3 = {23942, -32901, 87942, -73313};
+  v4si r1 = {30045, 63807, 90053, 10749}; 
+  v4si b1 = t14 (a1, a2, a3);
+  v4si b2 = t15 (a1, a2, a3);
+  if (__builtin_memcmp (&b1,  &r1,  sizeof (b1) != 0)) __builtin_abort();	
+  if (__builtin_memcmp (&b2,  &r1,  sizeof (b2) != 0)) __builtin_abort();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c b/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c
new file mode 100644
index 00000000000..0cae55886c6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse1 -Wno-psabi" } */
+
+typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
+
+/* Generic */
+__attribute__((noipa)) int
+t1 (int a, int b, int c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned int
+t2 (int a, unsigned int b, int c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned long
+t3 (unsigned long a, long b, unsigned long c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned short
+t4 (short a, unsigned short b, unsigned short c)
+{
+  return (unsigned short) ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned char
+t5 (unsigned char a, signed char b, signed char c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) long long
+t6 (long long a, long long b, long long c)
+{
+  return ((a ^ c) & (unsigned long long) b) | a;
+}
+
+/* Gimple */
+__attribute__((noipa)) int
+t7 (int a, int b, int c)
+{
+  int t1 = a ^ c;
+  int t2 = t1 & b;
+  int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) int
+t8 (int a, unsigned int b, unsigned int c)
+{
+  unsigned int t1 = a ^ c;
+  int t2 = t1 & b;
+  int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned int
+t9 (unsigned int a, unsigned int b, int c)
+{
+  unsigned int t1 = a ^ c;
+  unsigned int t2 = t1 & b;
+  unsigned int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned long
+t10 (unsigned long a, long b, unsigned long c)
+{
+  unsigned long t1 = a ^ c;
+  unsigned long t2 = t1 & b;
+  unsigned long t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned short
+t11 (short a, unsigned short b, short c)
+{
+  short t1 = a ^ c;
+  unsigned short t2 = t1 & b;
+  unsigned short t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned char
+t12 (signed char a, unsigned char b, signed char c)
+{
+  unsigned char t1 = a ^ c;
+  unsigned char t2 = t1 & b;
+  unsigned char t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned long long
+t13 (unsigned long long a, long long b, unsigned long long c)
+{
+  long long t1 = a ^ c;
+  long long t2 = t1 & b;
+  unsigned long long t3 = t2 | a;
+  return t3;
+}
+
+/* Vectors */
+__attribute__((noipa)) v4si
+t14 (v4si a, v4si b, v4si c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) v4si
+t15 (v4si a, v4si b, v4si c)
+{
+  v4si t1 = a ^ c;
+  v4si t2 = t1 & b;
+  v4si t3 = t2 | a;
+  return t3;
+}
+
+/* { dg-final { scan-tree-dump-not " \\\^ " "dse1" } } */
+/* { dg-final { scan-tree-dump-times " \\\| " 15 "dse1" } } */
+/* { dg-final { scan-tree-dump-times " & " 15 "dse1" } } */
-- 
2.39.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] match.pd: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938]
  2023-08-04 21:48 [PATCH] match.pd: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938] Drew Ross
@ 2023-08-08 13:18 ` Richard Biener
  2023-08-10 10:28   ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2023-08-08 13:18 UTC (permalink / raw)
  To: Drew Ross; +Cc: gcc-patches

On Fri, Aug 4, 2023 at 11:49 PM Drew Ross via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Adds a simplification for ((x ^ y) & z) | x to be folded into
> (z & y) | x. Merges this simplification with ((x | y) & z) | x -> (z & y) | x
> to prevent duplicate pattern. Tested successfully on x86_64 and x86 targets.

OK.

> +  (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2?

All these nop_convers makes me think that _maybe_ we could relax
type constraints on bitwise operations to allow different signs of
operands (and result).  Maybe we could simply transparently strip
them when matching expressions in the code generated by genmatch?
So when we match a bitwise operation the operands get stripped of
sign conversions which of course also means that for example

 (bit_ior (bit_and @0 @1) @0)

could have @0 and @1 having different types so on the code
generation side we'd either need to manually add (convert ..)s
or have genmatch recognize match operand places that can get
nop-conversions stripped and apply the convert itself.

For

 (plus (bit_ior @0 @1) @1)

and @1 being (nop_convert @2) that might get us not matching up
the operands then unless we also adjust that part.  So maybe it's
not really worth the trouble ...

Maybe it would be instead simpler to special case code generation
for conditionals that are singleton within an optional group, those
could be expanded "inline" (but we'd have to record the number of uses).

Richard.

>         PR tree-opt/109938
>
> gcc/ChangeLog:
>
>         * match.pd ((x ^ y) & z) | x -> (z & y) | x: New simplification.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.c-torture/execute/pr109938.c: New test.
>         * gcc.dg/tree-ssa/pr109938.c: New test.
> ---
>  gcc/match.pd                                  |  10 +-
>  .../gcc.c-torture/execute/pr109938.c          |  33 +++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr109938.c      | 125 ++++++++++++++++++
>  3 files changed, 164 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr109938.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109938.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ee6cef6b09d..884dc622b25 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1946,10 +1946,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (bitop:c (rbitop:c (bit_not @0) @1) @0)
>    (bitop @0 @1)))
>
> -/* ((x | y) & z) | x -> (z & y) | x */
> -(simplify
> -  (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
> -  (bit_ior (bit_and @2 @1) @0))
> +/* ((x |^ y) & z) | x -> (z & y) | x  */
> +(for op (bit_ior bit_xor)
> + (simplify
> +  (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2? (op:c @0 @1)) @2)) @3)
> +  (if (bitwise_equal_p (@0, @3))
> +   (convert (bit_ior (bit_and @1 (convert @2)) (convert @0))))))
>
>  /* (x | CST1) & CST2 -> (x & CST2) | (CST1 & CST2) */
>  (simplify
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr109938.c b/gcc/testsuite/gcc.c-torture/execute/pr109938.c
> new file mode 100644
> index 00000000000..a65d13b305d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr109938.c
> @@ -0,0 +1,33 @@
> +/* PR tree-opt/109938 */
> +
> +#include "../../gcc.dg/tree-ssa/pr109938.c"
> +
> +int
> +main ()
> +{
> +  if (t1 (29789, 29477, 23942) != 30045) __builtin_abort ();
> +  if (t2 (-20196, 18743, -32901) != -1729) __builtin_abort ();
> +  if (t3 (2136614690L, 1136698390L, 2123767997L) != 2145003318UL) __builtin_abort ();
> +  if (t4 (-4878, 9977, 23313) != 61171) __builtin_abort ();
> +  if (t5 (127, 99, 43) != 127) __builtin_abort ();
> +  if (t6 (9176690219839792930LL, 3176690219839721234LL, 5671738468274920831LL)
> +      != 9177833729112616754LL) __builtin_abort ();
> +  if (t7 (29789, 29477, 23942) != 30045) __builtin_abort ();
> +  if (t8 (23489, 99477, 87942) != 90053) __builtin_abort ();
> +  if (t9 (10489, 66477, -73313) != 10749) __builtin_abort ();
> +  if (t10 (2136614690L, -1136614690L, 4136614690UL) != 4284131106UL)
> +    __builtin_abort ();
> +  if (t11 (29789, 29477, 12345) != 29821) __builtin_abort ();
> +  if (t12 (-120, 98, -73) != 170) __builtin_abort ();
> +  if (t13 (9176690219839792930ULL, -3176690219839721234LL, 5671738468274920831ULL)
> +      != 9221726284835125102ULL) __builtin_abort ();
> +  v4si a1 = {29789, -20196, 23489, 10489};
> +  v4si a2 = {29477, 18743, 99477, 66477};
> +  v4si a3 = {23942, -32901, 87942, -73313};
> +  v4si r1 = {30045, 63807, 90053, 10749};
> +  v4si b1 = t14 (a1, a2, a3);
> +  v4si b2 = t15 (a1, a2, a3);
> +  if (__builtin_memcmp (&b1,  &r1,  sizeof (b1) != 0)) __builtin_abort();
> +  if (__builtin_memcmp (&b2,  &r1,  sizeof (b2) != 0)) __builtin_abort();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c b/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c
> new file mode 100644
> index 00000000000..0cae55886c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109938.c
> @@ -0,0 +1,125 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1 -Wno-psabi" } */
> +
> +typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
> +
> +/* Generic */
> +__attribute__((noipa)) int
> +t1 (int a, int b, int c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned int
> +t2 (int a, unsigned int b, int c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned long
> +t3 (unsigned long a, long b, unsigned long c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned short
> +t4 (short a, unsigned short b, unsigned short c)
> +{
> +  return (unsigned short) ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned char
> +t5 (unsigned char a, signed char b, signed char c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) long long
> +t6 (long long a, long long b, long long c)
> +{
> +  return ((a ^ c) & (unsigned long long) b) | a;
> +}
> +
> +/* Gimple */
> +__attribute__((noipa)) int
> +t7 (int a, int b, int c)
> +{
> +  int t1 = a ^ c;
> +  int t2 = t1 & b;
> +  int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) int
> +t8 (int a, unsigned int b, unsigned int c)
> +{
> +  unsigned int t1 = a ^ c;
> +  int t2 = t1 & b;
> +  int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned int
> +t9 (unsigned int a, unsigned int b, int c)
> +{
> +  unsigned int t1 = a ^ c;
> +  unsigned int t2 = t1 & b;
> +  unsigned int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned long
> +t10 (unsigned long a, long b, unsigned long c)
> +{
> +  unsigned long t1 = a ^ c;
> +  unsigned long t2 = t1 & b;
> +  unsigned long t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned short
> +t11 (short a, unsigned short b, short c)
> +{
> +  short t1 = a ^ c;
> +  unsigned short t2 = t1 & b;
> +  unsigned short t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned char
> +t12 (signed char a, unsigned char b, signed char c)
> +{
> +  unsigned char t1 = a ^ c;
> +  unsigned char t2 = t1 & b;
> +  unsigned char t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned long long
> +t13 (unsigned long long a, long long b, unsigned long long c)
> +{
> +  long long t1 = a ^ c;
> +  long long t2 = t1 & b;
> +  unsigned long long t3 = t2 | a;
> +  return t3;
> +}
> +
> +/* Vectors */
> +__attribute__((noipa)) v4si
> +t14 (v4si a, v4si b, v4si c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) v4si
> +t15 (v4si a, v4si b, v4si c)
> +{
> +  v4si t1 = a ^ c;
> +  v4si t2 = t1 & b;
> +  v4si t3 = t2 | a;
> +  return t3;
> +}
> +
> +/* { dg-final { scan-tree-dump-not " \\\^ " "dse1" } } */
> +/* { dg-final { scan-tree-dump-times " \\\| " 15 "dse1" } } */
> +/* { dg-final { scan-tree-dump-times " & " 15 "dse1" } } */
> --
> 2.39.3
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] match.pd: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938]
  2023-08-08 13:18 ` Richard Biener
@ 2023-08-10 10:28   ` Jakub Jelinek
  2023-08-10 15:43     ` [PATCH] match.pd, v2: " Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2023-08-10 10:28 UTC (permalink / raw)
  To: Richard Biener; +Cc: Drew Ross, gcc-patches

On Tue, Aug 08, 2023 at 03:18:51PM +0200, Richard Biener via Gcc-patches wrote:
> On Fri, Aug 4, 2023 at 11:49 PM Drew Ross via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Adds a simplification for ((x ^ y) & z) | x to be folded into
> > (z & y) | x. Merges this simplification with ((x | y) & z) | x -> (z & y) | x
> > to prevent duplicate pattern. Tested successfully on x86_64 and x86 targets.
> 
> OK.

Shouldn't
  (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
be changed to
  (bit_ior:c (nop_convert1?:s
	       (bit_and:cs (nop_convert2?:s (op:cs @0 @1)) @2)) @3)
rather than
  (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2? (op:c @0 @1)) @2)) @3)
in the patch?
I mean the :s modifiers were there for a reason, if some of the
intermediates aren't a single use, then the simplification doesn't simplify
anything and can even make things larger.

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] match.pd, v2: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938]
  2023-08-10 10:28   ` Jakub Jelinek
@ 2023-08-10 15:43     ` Jakub Jelinek
  2023-08-11  7:36       ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2023-08-10 15:43 UTC (permalink / raw)
  To: Richard Biener, Drew Ross; +Cc: gcc-patches

Hi!

On Thu, Aug 10, 2023 at 12:28:24PM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Tue, Aug 08, 2023 at 03:18:51PM +0200, Richard Biener via Gcc-patches wrote:
> > On Fri, Aug 4, 2023 at 11:49 PM Drew Ross via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Adds a simplification for ((x ^ y) & z) | x to be folded into
> > > (z & y) | x. Merges this simplification with ((x | y) & z) | x -> (z & y) | x
> > > to prevent duplicate pattern. Tested successfully on x86_64 and x86 targets.
> > 
> > OK.
> 
> Shouldn't
>   (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
> be changed to
>   (bit_ior:c (nop_convert1?:s
> 	       (bit_and:cs (nop_convert2?:s (op:cs @0 @1)) @2)) @3)
> rather than
>   (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2? (op:c @0 @1)) @2)) @3)
> in the patch?
> I mean the :s modifiers were there for a reason, if some of the
> intermediates aren't a single use, then the simplification doesn't simplify
> anything and can even make things larger.

Here it is in patch form.  Bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?

2023-08-10  Drew Ross  <drross@redhat.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/109938
        * match.pd (((x ^ y) & z) | x -> (z & y) | x): New simplification.

	* gcc.c-torture/execute/pr109938.c: New test.
	* gcc.dg/tree-ssa/pr109938.c: New test.

--- gcc/match.pd.jj	2023-08-10 09:26:19.390805079 +0200
+++ gcc/match.pd	2023-08-10 13:33:17.959654775 +0200
@@ -1972,10 +1972,14 @@ (define_operator_list SYNC_FETCH_AND_AND
   (if (bitwise_inverted_equal_p (@0, @2))
    (bitop @0 @1))))
 
-/* ((x | y) & z) | x -> (z & y) | x */
-(simplify
-  (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
-  (bit_ior (bit_and @2 @1) @0))
+/* ((x | y) & z) | x -> (z & y) | x
+   ((x ^ y) & z) | x -> (z & y) | x  */
+(for op (bit_ior bit_xor)
+ (simplify
+  (bit_ior:c (nop_convert1?:s
+	       (bit_and:cs (nop_convert2?:s (op:cs @0 @1)) @2)) @3)
+  (if (bitwise_equal_p (@0, @3))
+   (convert (bit_ior (bit_and @1 (convert @2)) (convert @0))))))
 
 /* (x | CST1) & CST2 -> (x & CST2) | (CST1 & CST2) */
 (simplify
--- gcc/testsuite/gcc.dg/tree-ssa/pr109938.c.jj	2023-08-10 13:22:19.513095403 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr109938.c	2023-08-10 13:35:24.428841774 +0200
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse2 -Wno-psabi" } */
+
+typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
+
+/* Generic */
+__attribute__((noipa)) int
+t1 (int a, int b, int c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned int
+t2 (int a, unsigned int b, int c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned long
+t3 (unsigned long a, long b, unsigned long c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned short
+t4 (short a, unsigned short b, unsigned short c)
+{
+  return (unsigned short) ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) unsigned char
+t5 (unsigned char a, signed char b, signed char c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) long long
+t6 (long long a, long long b, long long c)
+{
+  return ((a ^ c) & (unsigned long long) b) | a;
+}
+
+/* Gimple */
+__attribute__((noipa)) int
+t7 (int a, int b, int c)
+{
+  int t1 = a ^ c;
+  int t2 = t1 & b;
+  int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) int
+t8 (int a, unsigned int b, unsigned int c)
+{
+  unsigned int t1 = a ^ c;
+  int t2 = t1 & b;
+  int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned int
+t9 (unsigned int a, unsigned int b, int c)
+{
+  unsigned int t1 = a ^ c;
+  unsigned int t2 = t1 & b;
+  unsigned int t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned long
+t10 (unsigned long a, long b, unsigned long c)
+{
+  unsigned long t1 = a ^ c;
+  unsigned long t2 = t1 & b;
+  unsigned long t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned short
+t11 (short a, unsigned short b, short c)
+{
+  short t1 = a ^ c;
+  unsigned short t2 = t1 & b;
+  unsigned short t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned char
+t12 (signed char a, unsigned char b, signed char c)
+{
+  unsigned char t1 = a ^ c;
+  unsigned char t2 = t1 & b;
+  unsigned char t3 = t2 | a;
+  return t3;
+}
+
+__attribute__((noipa)) unsigned long long
+t13 (unsigned long long a, long long b, unsigned long long c)
+{
+  long long t1 = a ^ c;
+  long long t2 = t1 & b;
+  unsigned long long t3 = t2 | a;
+  return t3;
+}
+
+/* Vectors */
+__attribute__((noipa)) v4si
+t14 (v4si a, v4si b, v4si c)
+{
+  return ((a ^ c) & b) | a;
+}
+
+__attribute__((noipa)) v4si
+t15 (v4si a, v4si b, v4si c)
+{
+  v4si t1 = a ^ c;
+  v4si t2 = t1 & b;
+  v4si t3 = t2 | a;
+  return t3;
+}
+
+/* { dg-final { scan-tree-dump-not " \\\^ " "dse2" } } */
+/* { dg-final { scan-tree-dump-times " \\\| " 15 "dse2" } } */
+/* { dg-final { scan-tree-dump-times " & " 15 "dse2" } } */
--- gcc/testsuite/gcc.c-torture/execute/pr109938.c.jj	2023-08-10 13:22:19.513095403 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr109938.c	2023-08-10 13:22:19.513095403 +0200
@@ -0,0 +1,33 @@
+/* PR tree-opt/109938 */
+
+#include "../../gcc.dg/tree-ssa/pr109938.c"
+
+int 
+main ()
+{
+  if (t1 (29789, 29477, 23942) != 30045) __builtin_abort ();
+  if (t2 (-20196, 18743, -32901) != -1729) __builtin_abort ();
+  if (t3 (2136614690L, 1136698390L, 2123767997L) != 2145003318UL) __builtin_abort ();
+  if (t4 (-4878, 9977, 23313) != 61171) __builtin_abort ();
+  if (t5 (127, 99, 43) != 127) __builtin_abort ();
+  if (t6 (9176690219839792930LL, 3176690219839721234LL, 5671738468274920831LL)
+      != 9177833729112616754LL) __builtin_abort ();
+  if (t7 (29789, 29477, 23942) != 30045) __builtin_abort ();
+  if (t8 (23489, 99477, 87942) != 90053) __builtin_abort ();
+  if (t9 (10489, 66477, -73313) != 10749) __builtin_abort ();
+  if (t10 (2136614690L, -1136614690L, 4136614690UL) != 4284131106UL)
+    __builtin_abort ();
+  if (t11 (29789, 29477, 12345) != 29821) __builtin_abort ();
+  if (t12 (-120, 98, -73) != 170) __builtin_abort ();
+  if (t13 (9176690219839792930ULL, -3176690219839721234LL, 5671738468274920831ULL)
+      != 9221726284835125102ULL) __builtin_abort ();
+  v4si a1 = {29789, -20196, 23489, 10489};
+  v4si a2 = {29477, 18743, 99477, 66477}; 
+  v4si a3 = {23942, -32901, 87942, -73313};
+  v4si r1 = {30045, 63807, 90053, 10749}; 
+  v4si b1 = t14 (a1, a2, a3);
+  v4si b2 = t15 (a1, a2, a3);
+  if (__builtin_memcmp (&b1,  &r1,  sizeof (b1) != 0)) __builtin_abort();	
+  if (__builtin_memcmp (&b2,  &r1,  sizeof (b2) != 0)) __builtin_abort();
+  return 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] match.pd, v2: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938]
  2023-08-10 15:43     ` [PATCH] match.pd, v2: " Jakub Jelinek
@ 2023-08-11  7:36       ` Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2023-08-11  7:36 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Drew Ross, gcc-patches

On Thu, Aug 10, 2023 at 5:43 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> On Thu, Aug 10, 2023 at 12:28:24PM +0200, Jakub Jelinek via Gcc-patches wrote:
> > On Tue, Aug 08, 2023 at 03:18:51PM +0200, Richard Biener via Gcc-patches wrote:
> > > On Fri, Aug 4, 2023 at 11:49 PM Drew Ross via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > Adds a simplification for ((x ^ y) & z) | x to be folded into
> > > > (z & y) | x. Merges this simplification with ((x | y) & z) | x -> (z & y) | x
> > > > to prevent duplicate pattern. Tested successfully on x86_64 and x86 targets.
> > >
> > > OK.
> >
> > Shouldn't
> >   (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
> > be changed to
> >   (bit_ior:c (nop_convert1?:s
> >              (bit_and:cs (nop_convert2?:s (op:cs @0 @1)) @2)) @3)
> > rather than
> >   (bit_ior:c (nop_convert1? (bit_and:c (nop_convert2? (op:c @0 @1)) @2)) @3)
> > in the patch?
> > I mean the :s modifiers were there for a reason, if some of the
> > intermediates aren't a single use, then the simplification doesn't simplify
> > anything and can even make things larger.
>
> Here it is in patch form.  Bootstrapped/regtested on x86_64-linux and
> i686-linux, ok for trunk?

OK.

> 2023-08-10  Drew Ross  <drross@redhat.com>
>             Jakub Jelinek  <jakub@redhat.com>
>
>         PR tree-optimization/109938
>         * match.pd (((x ^ y) & z) | x -> (z & y) | x): New simplification.
>
>         * gcc.c-torture/execute/pr109938.c: New test.
>         * gcc.dg/tree-ssa/pr109938.c: New test.
>
> --- gcc/match.pd.jj     2023-08-10 09:26:19.390805079 +0200
> +++ gcc/match.pd        2023-08-10 13:33:17.959654775 +0200
> @@ -1972,10 +1972,14 @@ (define_operator_list SYNC_FETCH_AND_AND
>    (if (bitwise_inverted_equal_p (@0, @2))
>     (bitop @0 @1))))
>
> -/* ((x | y) & z) | x -> (z & y) | x */
> -(simplify
> -  (bit_ior:c (bit_and:cs (bit_ior:cs @0 @1) @2) @0)
> -  (bit_ior (bit_and @2 @1) @0))
> +/* ((x | y) & z) | x -> (z & y) | x
> +   ((x ^ y) & z) | x -> (z & y) | x  */
> +(for op (bit_ior bit_xor)
> + (simplify
> +  (bit_ior:c (nop_convert1?:s
> +              (bit_and:cs (nop_convert2?:s (op:cs @0 @1)) @2)) @3)
> +  (if (bitwise_equal_p (@0, @3))
> +   (convert (bit_ior (bit_and @1 (convert @2)) (convert @0))))))
>
>  /* (x | CST1) & CST2 -> (x & CST2) | (CST1 & CST2) */
>  (simplify
> --- gcc/testsuite/gcc.dg/tree-ssa/pr109938.c.jj 2023-08-10 13:22:19.513095403 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr109938.c    2023-08-10 13:35:24.428841774 +0200
> @@ -0,0 +1,125 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse2 -Wno-psabi" } */
> +
> +typedef int v4si __attribute__((vector_size(4 * sizeof(int))));
> +
> +/* Generic */
> +__attribute__((noipa)) int
> +t1 (int a, int b, int c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned int
> +t2 (int a, unsigned int b, int c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned long
> +t3 (unsigned long a, long b, unsigned long c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned short
> +t4 (short a, unsigned short b, unsigned short c)
> +{
> +  return (unsigned short) ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) unsigned char
> +t5 (unsigned char a, signed char b, signed char c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) long long
> +t6 (long long a, long long b, long long c)
> +{
> +  return ((a ^ c) & (unsigned long long) b) | a;
> +}
> +
> +/* Gimple */
> +__attribute__((noipa)) int
> +t7 (int a, int b, int c)
> +{
> +  int t1 = a ^ c;
> +  int t2 = t1 & b;
> +  int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) int
> +t8 (int a, unsigned int b, unsigned int c)
> +{
> +  unsigned int t1 = a ^ c;
> +  int t2 = t1 & b;
> +  int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned int
> +t9 (unsigned int a, unsigned int b, int c)
> +{
> +  unsigned int t1 = a ^ c;
> +  unsigned int t2 = t1 & b;
> +  unsigned int t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned long
> +t10 (unsigned long a, long b, unsigned long c)
> +{
> +  unsigned long t1 = a ^ c;
> +  unsigned long t2 = t1 & b;
> +  unsigned long t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned short
> +t11 (short a, unsigned short b, short c)
> +{
> +  short t1 = a ^ c;
> +  unsigned short t2 = t1 & b;
> +  unsigned short t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned char
> +t12 (signed char a, unsigned char b, signed char c)
> +{
> +  unsigned char t1 = a ^ c;
> +  unsigned char t2 = t1 & b;
> +  unsigned char t3 = t2 | a;
> +  return t3;
> +}
> +
> +__attribute__((noipa)) unsigned long long
> +t13 (unsigned long long a, long long b, unsigned long long c)
> +{
> +  long long t1 = a ^ c;
> +  long long t2 = t1 & b;
> +  unsigned long long t3 = t2 | a;
> +  return t3;
> +}
> +
> +/* Vectors */
> +__attribute__((noipa)) v4si
> +t14 (v4si a, v4si b, v4si c)
> +{
> +  return ((a ^ c) & b) | a;
> +}
> +
> +__attribute__((noipa)) v4si
> +t15 (v4si a, v4si b, v4si c)
> +{
> +  v4si t1 = a ^ c;
> +  v4si t2 = t1 & b;
> +  v4si t3 = t2 | a;
> +  return t3;
> +}
> +
> +/* { dg-final { scan-tree-dump-not " \\\^ " "dse2" } } */
> +/* { dg-final { scan-tree-dump-times " \\\| " 15 "dse2" } } */
> +/* { dg-final { scan-tree-dump-times " & " 15 "dse2" } } */
> --- gcc/testsuite/gcc.c-torture/execute/pr109938.c.jj   2023-08-10 13:22:19.513095403 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr109938.c      2023-08-10 13:22:19.513095403 +0200
> @@ -0,0 +1,33 @@
> +/* PR tree-opt/109938 */
> +
> +#include "../../gcc.dg/tree-ssa/pr109938.c"
> +
> +int
> +main ()
> +{
> +  if (t1 (29789, 29477, 23942) != 30045) __builtin_abort ();
> +  if (t2 (-20196, 18743, -32901) != -1729) __builtin_abort ();
> +  if (t3 (2136614690L, 1136698390L, 2123767997L) != 2145003318UL) __builtin_abort ();
> +  if (t4 (-4878, 9977, 23313) != 61171) __builtin_abort ();
> +  if (t5 (127, 99, 43) != 127) __builtin_abort ();
> +  if (t6 (9176690219839792930LL, 3176690219839721234LL, 5671738468274920831LL)
> +      != 9177833729112616754LL) __builtin_abort ();
> +  if (t7 (29789, 29477, 23942) != 30045) __builtin_abort ();
> +  if (t8 (23489, 99477, 87942) != 90053) __builtin_abort ();
> +  if (t9 (10489, 66477, -73313) != 10749) __builtin_abort ();
> +  if (t10 (2136614690L, -1136614690L, 4136614690UL) != 4284131106UL)
> +    __builtin_abort ();
> +  if (t11 (29789, 29477, 12345) != 29821) __builtin_abort ();
> +  if (t12 (-120, 98, -73) != 170) __builtin_abort ();
> +  if (t13 (9176690219839792930ULL, -3176690219839721234LL, 5671738468274920831ULL)
> +      != 9221726284835125102ULL) __builtin_abort ();
> +  v4si a1 = {29789, -20196, 23489, 10489};
> +  v4si a2 = {29477, 18743, 99477, 66477};
> +  v4si a3 = {23942, -32901, 87942, -73313};
> +  v4si r1 = {30045, 63807, 90053, 10749};
> +  v4si b1 = t14 (a1, a2, a3);
> +  v4si b2 = t15 (a1, a2, a3);
> +  if (__builtin_memcmp (&b1,  &r1,  sizeof (b1) != 0)) __builtin_abort();
> +  if (__builtin_memcmp (&b2,  &r1,  sizeof (b2) != 0)) __builtin_abort();
> +  return 0;
> +}
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-08-11  7:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-04 21:48 [PATCH] match.pd: Implement missed optimization ((x ^ y) & z) | x -> (z & y) | x [PR109938] Drew Ross
2023-08-08 13:18 ` Richard Biener
2023-08-10 10:28   ` Jakub Jelinek
2023-08-10 15:43     ` [PATCH] match.pd, v2: " Jakub Jelinek
2023-08-11  7:36       ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).