public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR 108874: aarch64 code regression with shift and ands
@ 2023-03-10  5:59 Andrew Pinski
  2023-03-10 10:56 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Andrew Pinski @ 2023-03-10  5:59 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andrew Pinski

After r6-2044-g98e30e515f184b, code like "((x & 0xff00ff00U) >> 8)"
would be optimized like (x >> 8) & 0xff00ffU which is normally better
except on aarch64, the shift right could be combined with another
operation in some cases. So we need to add a few define_splits
to the aarch64 backends that match "((x >> shift) & CST0) OP Y"
and splits it to:
TMP = X & CST1
(TMP >> shift) OP Y

Note this also gets us to matching rev16 back too so I added a
testcase to make sure we don't lose that matching any more.
Note when the generic patch to recognize those as bswap ROT 16,
we might regress again and need to add a few more patterns to
the aarch64 backend but will deal with that once that happens.

OK? Bootstrapped and tested on aarch64 with no regressions.

gcc/ChangeLog:

	* config/aarch64/aarch64.md: Add a new define_split
	to help combine.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/rev16_2.c: New test.
	* gcc.target/aarch64/shift_and_operator-1.c: New test.
---
 gcc/config/aarch64/aarch64.md                 | 21 ++++++++++
 gcc/testsuite/gcc.target/aarch64/rev16_2.c    | 39 +++++++++++++++++++
 .../gcc.target/aarch64/shift_and_operator-1.c | 22 +++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/rev16_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index af9087508ac..41cc563f10c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4656,6 +4656,27 @@ (define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
   [(set_attr "type" "logic_shift_imm")]
 )
 
+(define_split
+  [(set (match_operand:GPI 0 "register_operand")
+	(LOGICAL_OR_PLUS:GPI
+	  (and:GPI
+	    (lshiftrt:GPI (match_operand:GPI 1 "register_operand")
+			  (match_operand:QI 2 "aarch64_shift_imm_<mode>"))
+	    (match_operand:GPI 3 "aarch64_logical_immediate"))
+	  (match_operand:GPI 4 "register_operand")))]
+  "can_create_pseudo_p ()
+   && aarch64_bitmask_imm (UINTVAL (operands[3]) << UINTVAL (operands[2]), <MODE>mode)"
+  [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 6)))
+   (set (match_dup 0) (match_dup 7))]
+  {
+    operands[5] = gen_reg_rtx (<MODE>mode);
+    operands[6] = gen_int_mode (UINTVAL (operands[3]) << UINTVAL (operands[2]), <MODE>mode);
+    rtx shift = gen_rtx_LSHIFTRT (<MODE>mode, operands[5], operands[2]);
+    rtx_code new_code = <CODE>;
+    operands[7] = gen_rtx_fmt_ee (new_code, <MODE>mode, shift, operands[4]);
+  }
+)
+
 (define_split
   [(set (match_operand:GPI 0 "register_operand")
 	(LOGICAL_OR_PLUS:GPI
diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_2.c b/gcc/testsuite/gcc.target/aarch64/rev16_2.c
new file mode 100644
index 00000000000..621eb5dfbf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/rev16_2.c
@@ -0,0 +1,39 @@
+/* { dg-options "-O2" } */
+/* { dg-do compile } */
+
+extern void abort (void);
+
+typedef unsigned int __u32;
+
+__u32
+__rev16_32_alt (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
+         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
+}
+
+__u32
+__rev16_32 (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
+         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
+}
+
+typedef unsigned long long __u64;
+
+__u64
+__rev16_64_alt (__u64 x)
+{
+  return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
+         | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
+}
+
+__u64
+__rev16_64 (__u64 x)
+{
+  return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
+         | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
+}
+
+/* { dg-final { scan-assembler-times "rev16\\tx\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "rev16\\tw\[0-9\]+" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c
new file mode 100644
index 00000000000..49152c5495a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c
@@ -0,0 +1,22 @@
+/* { dg-options "-O2" } */
+/* { dg-do compile } */
+
+unsigned f(unsigned x, unsigned b)
+{
+  return ((x & 0xff00ff00U) >> 8) | b;
+}
+
+unsigned f0(unsigned x, unsigned b)
+{
+  return ((x & 0xff00ff00U) >> 8) ^ b;
+}
+unsigned f1(unsigned x, unsigned b)
+{
+  return ((x & 0xff00ff00U) >> 8) + b;
+}
+
+/* { dg-final { scan-assembler-times "lsr\\tw\[0-9\]+" 0 } } */
+/* { dg-final { scan-assembler-times "lsr 8" 3 } } */
+/* { dg-final { scan-assembler-times "eor\\tw\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "add\\tw\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "orr\\tw\[0-9\]+" 1 } } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix PR 108874: aarch64 code regression with shift and ands
  2023-03-10  5:59 [PATCH] Fix PR 108874: aarch64 code regression with shift and ands Andrew Pinski
@ 2023-03-10 10:56 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2023-03-10 10:56 UTC (permalink / raw)
  To: Andrew Pinski via Gcc-patches; +Cc: Andrew Pinski

Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> After r6-2044-g98e30e515f184b, code like "((x & 0xff00ff00U) >> 8)"
> would be optimized like (x >> 8) & 0xff00ffU which is normally better
> except on aarch64, the shift right could be combined with another
> operation in some cases. So we need to add a few define_splits
> to the aarch64 backends that match "((x >> shift) & CST0) OP Y"
> and splits it to:
> TMP = X & CST1
> (TMP >> shift) OP Y
>
> Note this also gets us to matching rev16 back too so I added a
> testcase to make sure we don't lose that matching any more.
> Note when the generic patch to recognize those as bswap ROT 16,
> we might regress again and need to add a few more patterns to
> the aarch64 backend but will deal with that once that happens.
>
> OK? Bootstrapped and tested on aarch64 with no regressions.
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.md: Add a new define_split
> 	to help combine.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/rev16_2.c: New test.
> 	* gcc.target/aarch64/shift_and_operator-1.c: New test.
> ---
>  gcc/config/aarch64/aarch64.md                 | 21 ++++++++++
>  gcc/testsuite/gcc.target/aarch64/rev16_2.c    | 39 +++++++++++++++++++
>  .../gcc.target/aarch64/shift_and_operator-1.c | 22 +++++++++++
>  3 files changed, 82 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/rev16_2.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index af9087508ac..41cc563f10c 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4656,6 +4656,27 @@ (define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3"
>    [(set_attr "type" "logic_shift_imm")]
>  )
>  
> +(define_split
> +  [(set (match_operand:GPI 0 "register_operand")
> +	(LOGICAL_OR_PLUS:GPI
> +	  (and:GPI
> +	    (lshiftrt:GPI (match_operand:GPI 1 "register_operand")
> +			  (match_operand:QI 2 "aarch64_shift_imm_<mode>"))
> +	    (match_operand:GPI 3 "aarch64_logical_immediate"))
> +	  (match_operand:GPI 4 "register_operand")))]
> +  "can_create_pseudo_p ()
> +   && aarch64_bitmask_imm (UINTVAL (operands[3]) << UINTVAL (operands[2]), <MODE>mode)"

Formatting nit: long line

> +  [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 6)))
> +   (set (match_dup 0) (match_dup 7))]
> +  {
> +    operands[5] = gen_reg_rtx (<MODE>mode);
> +    operands[6] = gen_int_mode (UINTVAL (operands[3]) << UINTVAL (operands[2]), <MODE>mode);

Here too.

> +    rtx shift = gen_rtx_LSHIFTRT (<MODE>mode, operands[5], operands[2]);
> +    rtx_code new_code = <CODE>;
> +    operands[7] = gen_rtx_fmt_ee (new_code, <MODE>mode, shift, operands[4]);

It should be possible to do the last three statements in the
rtl pattern, e.g. as:

  [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 6)))
   (set (match_dup 0) (LOGICAL_OR_PLUS:GPI
			(lshiftrt:GPI (match_dup 5) (match_dup 2))
			(match_dup 4)))]

OK with those change, thanks.

Richard

> +  }
> +)
> +
>  (define_split
>    [(set (match_operand:GPI 0 "register_operand")
>  	(LOGICAL_OR_PLUS:GPI
> diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_2.c b/gcc/testsuite/gcc.target/aarch64/rev16_2.c
> new file mode 100644
> index 00000000000..621eb5dfbf0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/rev16_2.c
> @@ -0,0 +1,39 @@
> +/* { dg-options "-O2" } */
> +/* { dg-do compile } */
> +
> +extern void abort (void);
> +
> +typedef unsigned int __u32;
> +
> +__u32
> +__rev16_32_alt (__u32 x)
> +{
> +  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
> +         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
> +}
> +
> +__u32
> +__rev16_32 (__u32 x)
> +{
> +  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
> +         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
> +}
> +
> +typedef unsigned long long __u64;
> +
> +__u64
> +__rev16_64_alt (__u64 x)
> +{
> +  return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
> +         | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
> +}
> +
> +__u64
> +__rev16_64 (__u64 x)
> +{
> +  return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
> +         | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
> +}
> +
> +/* { dg-final { scan-assembler-times "rev16\\tx\[0-9\]+" 2 } } */
> +/* { dg-final { scan-assembler-times "rev16\\tw\[0-9\]+" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c
> new file mode 100644
> index 00000000000..49152c5495a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c
> @@ -0,0 +1,22 @@
> +/* { dg-options "-O2" } */
> +/* { dg-do compile } */
> +
> +unsigned f(unsigned x, unsigned b)
> +{
> +  return ((x & 0xff00ff00U) >> 8) | b;
> +}
> +
> +unsigned f0(unsigned x, unsigned b)
> +{
> +  return ((x & 0xff00ff00U) >> 8) ^ b;
> +}
> +unsigned f1(unsigned x, unsigned b)
> +{
> +  return ((x & 0xff00ff00U) >> 8) + b;
> +}
> +
> +/* { dg-final { scan-assembler-times "lsr\\tw\[0-9\]+" 0 } } */
> +/* { dg-final { scan-assembler-times "lsr 8" 3 } } */
> +/* { dg-final { scan-assembler-times "eor\\tw\[0-9\]+" 1 } } */
> +/* { dg-final { scan-assembler-times "add\\tw\[0-9\]+" 1 } } */
> +/* { dg-final { scan-assembler-times "orr\\tw\[0-9\]+" 1 } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-03-10 10:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-10  5:59 [PATCH] Fix PR 108874: aarch64 code regression with shift and ands Andrew Pinski
2023-03-10 10:56 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).