public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Add zero_extract support for rv64gc
@ 2024-05-06 20:40 Christoph Müllner
  2024-05-06 21:24 ` Jeff Law
  2024-05-06 21:42 ` Vineet Gupta
  0 siblings, 2 replies; 6+ messages in thread
From: Christoph Müllner @ 2024-05-06 20:40 UTC (permalink / raw)
  To: gcc-patches, Kito Cheng, Jim Wilson, Palmer Dabbelt,
	Andrew Waterman, Philipp Tomsich, Jeff Law, Vineet Gupta
  Cc: Christoph Müllner

The combiner attempts to optimize a zero-extension of a logical right shift
using zero_extract. We already utilize this optimization for those cases
that result in a single instructions.  Let's add a insn_and_split
pattern that also matches the generic case, where we can emit an
optimized sequence of a slli/srli.

Tested with SPEC CPU 2017 (rv64gc).

	PR 111501

gcc/ChangeLog:

	* config/riscv/riscv.md (*lshr<GPR:mode>3_zero_extend_4): New
	pattern for zero-extraction.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/pr111501.c: New test.
	* gcc.target/riscv/zero-extend-rshift-32.c: New test.
	* gcc.target/riscv/zero-extend-rshift-64.c: New test.
	* gcc.target/riscv/zero-extend-rshift.c: New test.

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 gcc/config/riscv/riscv.md                     |  30 +++++
 gcc/testsuite/gcc.target/riscv/pr111501.c     |  32 +++++
 .../gcc.target/riscv/zero-extend-rshift-32.c  |  37 ++++++
 .../gcc.target/riscv/zero-extend-rshift-64.c  |  63 ++++++++++
 .../gcc.target/riscv/zero-extend-rshift.c     | 119 ++++++++++++++++++
 5 files changed, 281 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr111501.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d4676507b45..80cbecb78e8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
+;; Canonical form for a zero-extend of a logical right shift.
+;; Special cases are handled above.
+;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
+(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	 (zero_extract:GPR
+       (match_operand:GPR 1 "register_operand" " r")
+       (match_operand     2 "const_int_operand")
+       (match_operand     3 "const_int_operand")))
+   (clobber (match_scratch:GPR  4 "=&r"))]
+  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
+   && !TARGET_XTHEADBB"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+     (ashift:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
+{
+  int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
+  int sizebits = INTVAL (operands[2]);
+  int startbits = INTVAL (operands[3]);
+  int lshamt = regbits - sizebits - startbits;
+  int rshamt = lshamt + startbits;
+  operands[2] = GEN_INT (lshamt);
+  operands[3] = GEN_INT (rshamt);
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<GPR:MODE>")])
+
 ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
 ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
 ;; xor/addi/srli, and.
diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c b/gcc/testsuite/gcc.target/riscv/pr111501.c
new file mode 100644
index 00000000000..9355be242e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr111501.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-output 1 } */
+
+/*
+**do_shift:
+**    ...
+**    slli\ta[0-9],a[0-9],16
+**    srli\ta[0-9],a[0-9],48
+**    ...
+*/
+unsigned int
+do_shift(unsigned long csum)
+{
+  return (unsigned short)(csum >> 32);
+}
+
+/*
+**do_shift2:
+**    ...
+**    slli\ta[0-9],a[0-9],16
+**    srli\ta[0-9],a[0-9],48
+**    ...
+*/
+unsigned int
+do_shift2(unsigned long csum)
+{
+  return (csum << 16) >> 48;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
new file mode 100644
index 00000000000..2824d6fe074
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)				\
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)	\
+{									\
+    return (unsigned CT)(v >> N);					\
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+
+/*
+**ulong_ze_ushort_rshift_9_ulong:
+**    slli\ta[0-9],a[0-9],7
+**    srli\ta[0-9],a[0-9],16
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+
+/*
+**ulong_ze_ushort_rshift_14_ulong:
+**    slli\ta[0-9],a[0-9],2
+**    srli\ta[0-9],a[0-9],16
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(14)
+
+/*
+**ulong_ze_uint_rshift_23_ulong:
+**    srli\ta[0-9],a[0-9],23
+**    ret
+*/
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
new file mode 100644
index 00000000000..ec5c2745561
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)				\
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)	\
+{									\
+    return (unsigned CT)(v >> N);					\
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int)
+#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int)
+
+/*
+**ulong_ze_ushort_rshift_9_ulong:
+**    slli\ta[0-9],a[0-9],39
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+
+/*
+**ulong_ze_ushort_rshift_14_ulong:
+**    slli\ta[0-9],a[0-9],34
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(14)
+
+/*
+**ulong_ze_ushort_rshift_51_ulong:
+**    srli\ta[0-9],a[0-9],51
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(51)
+
+/*
+**ulong_ze_uint_rshift_23_ulong:
+**    slli\ta[0-9],a[0-9],9
+**    srli\ta[0-9],a[0-9],32
+**    ret
+*/
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
+
+/*
+**uint_ze_ushort_rshift_15_uint:
+**    slli\ta[0-9],a[0-9],33
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+UINT_ZE_USHORT_RSHIFT_N_UINT(15)
+
+/*
+**ulong_ze_ushort_rshift_15_uint:
+**    slli\ta[0-9],a[0-9],33
+**    srli\ta[0-9],a[0-9],48
+**    ret
+*/
+ULONG_ZE_USHORT_RSHIFT_N_UINT(15)
diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
new file mode 100644
index 00000000000..706264c8ff1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+// Tests for merging rshifts into zero-extensions.
+// u8-casts are not tested as they can be done with one instruction (andi 0xff).
+
+#define URT_ZE_UCT_RSHIFT_N_UAT(RT,CT,N,AT)				\
+unsigned RT u##RT##_ze_u##CT##_rshift_##N##_u##AT(unsigned AT v)	\
+{									\
+    return (unsigned CT)(v >> N);					\
+}
+
+#define ULONG_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,long)
+
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(1)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(7)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(8)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(9)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(15)
+// Below "srli 16" for rv32
+// Below "srliw 16" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(16)
+// Below "srli N" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(17)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(23)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(24)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(25)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(31)
+// Below compiler warning for rv32
+#if __riscv_xlen == 64
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(32)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(33)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(39)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(40)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(41)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(47)
+// Below "srli N" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(48)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(49)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(55)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(56)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(57)
+ULONG_ZE_USHORT_RSHIFT_N_ULONG(63)
+#endif /* __riscv_xlen == 64 */
+
+#define ULONG_ZE_UINT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(long,int,N,long)
+
+// Below "srli N" for rv32
+// Below "slli (32-N); srli 32" for rv64
+ULONG_ZE_UINT_RSHIFT_N_ULONG(1)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(7)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(8)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(9)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(15)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(16)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(17)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(23)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(24)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(25)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(31)
+// Below compiler warning for rv32
+#if __riscv_xlen == 64
+// Below "srli N" for rv64
+ULONG_ZE_UINT_RSHIFT_N_ULONG(32)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(33)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(39)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(40)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(41)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(47)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(48)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(49)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(55)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(56)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(57)
+ULONG_ZE_UINT_RSHIFT_N_ULONG(63)
+#endif /* __riscv_xlen == 64 */
+
+#define UINT_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,int)
+
+#if __riscv_xlen == 64
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+UINT_ZE_USHORT_RSHIFT_N_UINT(1)
+UINT_ZE_USHORT_RSHIFT_N_UINT(7)
+UINT_ZE_USHORT_RSHIFT_N_UINT(8)
+UINT_ZE_USHORT_RSHIFT_N_UINT(9)
+UINT_ZE_USHORT_RSHIFT_N_UINT(15)
+// Below "srliw N" for rv64
+UINT_ZE_USHORT_RSHIFT_N_UINT(16)
+UINT_ZE_USHORT_RSHIFT_N_UINT(17)
+UINT_ZE_USHORT_RSHIFT_N_UINT(23)
+UINT_ZE_USHORT_RSHIFT_N_UINT(24)
+UINT_ZE_USHORT_RSHIFT_N_UINT(25)
+UINT_ZE_USHORT_RSHIFT_N_UINT(31)
+#endif /* __riscv_xlen == 64 */
+
+#define UINT_ZE_USHORT_RSHIFT_N_ULONG(N) URT_ZE_UCT_RSHIFT_N_UAT(int,short,N,long)
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+UINT_ZE_USHORT_RSHIFT_N_ULONG(9)
+UINT_ZE_USHORT_RSHIFT_N_ULONG(15)
+
+#define ULONG_ZE_USHORT_RSHIFT_N_UINT(N) URT_ZE_UCT_RSHIFT_N_UAT(long,short,N,int)
+// Below "slli (16-N); srli 16" for rv32
+// Below "slli ((32+16)-N); srli (32+16)" for rv64
+ULONG_ZE_USHORT_RSHIFT_N_UINT(9)
+ULONG_ZE_USHORT_RSHIFT_N_UINT(15)
+
+/* { dg-final { scan-assembler-times "slli\t" 9 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "srli\t" 26 { target { rv32 } } } } */
+
+/* { dg-final { scan-assembler-times "slli\t" 36 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "srli\t" 54 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "srliw\t" 7 { target { rv64 } } } } */
-- 
2.44.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Add zero_extract support for rv64gc
  2024-05-06 20:40 [PATCH] RISC-V: Add zero_extract support for rv64gc Christoph Müllner
@ 2024-05-06 21:24 ` Jeff Law
  2024-05-08  7:44   ` Christoph Müllner
  2024-05-06 21:42 ` Vineet Gupta
  1 sibling, 1 reply; 6+ messages in thread
From: Jeff Law @ 2024-05-06 21:24 UTC (permalink / raw)
  To: Christoph Müllner, gcc-patches, Kito Cheng, Jim Wilson,
	Palmer Dabbelt, Andrew Waterman, Philipp Tomsich, Vineet Gupta



On 5/6/24 2:40 PM, Christoph Müllner wrote:
> The combiner attempts to optimize a zero-extension of a logical right shift
> using zero_extract. We already utilize this optimization for those cases
> that result in a single instructions.  Let's add a insn_and_split
> pattern that also matches the generic case, where we can emit an
> optimized sequence of a slli/srli.
> 
> Tested with SPEC CPU 2017 (rv64gc).
> 
> 	PR 111501
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv.md (*lshr<GPR:mode>3_zero_extend_4): New
> 	pattern for zero-extraction.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/pr111501.c: New test.
> 	* gcc.target/riscv/zero-extend-rshift-32.c: New test.
> 	* gcc.target/riscv/zero-extend-rshift-64.c: New test.
> 	* gcc.target/riscv/zero-extend-rshift.c: New test.
So I had Lyut looking in this space as well.  Mostly because there's a 
desire to avoid the srl+and approach and instead represent this stuff as 
shifts (which are fusible in our uarch).  SO I've already got some state...


> 
> Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> ---
>   gcc/config/riscv/riscv.md                     |  30 +++++
>   gcc/testsuite/gcc.target/riscv/pr111501.c     |  32 +++++
>   .../gcc.target/riscv/zero-extend-rshift-32.c  |  37 ++++++
>   .../gcc.target/riscv/zero-extend-rshift-64.c  |  63 ++++++++++
>   .../gcc.target/riscv/zero-extend-rshift.c     | 119 ++++++++++++++++++
>   5 files changed, 281 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.target/riscv/pr111501.c
>   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
>   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
>   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
> 
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index d4676507b45..80cbecb78e8 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
>     [(set_attr "type" "shift")
>      (set_attr "mode" "SI")])
>   
> +;; Canonical form for a zero-extend of a logical right shift.
> +;; Special cases are handled above.
> +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
> +(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +	 (zero_extract:GPR
> +       (match_operand:GPR 1 "register_operand" " r")
> +       (match_operand     2 "const_int_operand")
> +       (match_operand     3 "const_int_operand")))
> +   (clobber (match_scratch:GPR  4 "=&r"))]
> +  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
> +   && !TARGET_XTHEADBB"
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 4)
> +     (ashift:GPR (match_dup 1) (match_dup 2)))
> +   (set (match_dup 0)
> +     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
Consider adding support for signed extractions as well.  You just need 
an iterator across zero_extract/sign_extract and suitable selection of 
arithmetic vs logical right shift step.

A nit on the condition.   Bring the && INTVAL (operands[2]) == 1 down to 
a new line like you've gone with !TARGET_XTHEADBB.

You also want to make sure the condition rejects the cases handled by 
this pattern (or merge your pattern with this one):

> ;; Canonical form for a zero-extend of a logical right shift.
> (define_insn "*lshrsi3_zero_extend_2" 
>   [(set (match_operand:DI                   0 "register_operand" "=r")
>         (zero_extract:DI (match_operand:DI  1 "register_operand" " r")
>                          (match_operand     2 "const_int_operand")
>                          (match_operand     3 "const_int_operand")))]
>   "(TARGET_64BIT && (INTVAL (operands[3]) > 0)
>     && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
> {
>   return "srliw\t%0,%1,%3";
> }
>   [(set_attr "type" "shift")
>    (set_attr "mode" "SI")])

So generally going the right direction.  But needs another iteration.

Jeff


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Add zero_extract support for rv64gc
  2024-05-06 20:40 [PATCH] RISC-V: Add zero_extract support for rv64gc Christoph Müllner
  2024-05-06 21:24 ` Jeff Law
@ 2024-05-06 21:42 ` Vineet Gupta
  2024-05-06 21:46   ` Jeff Law
  2024-05-08  7:45   ` Christoph Müllner
  1 sibling, 2 replies; 6+ messages in thread
From: Vineet Gupta @ 2024-05-06 21:42 UTC (permalink / raw)
  To: Christoph Müllner, gcc-patches, Kito Cheng, Jim Wilson,
	Palmer Dabbelt, Andrew Waterman, Philipp Tomsich, Jeff Law



On 5/6/24 13:40, Christoph Müllner wrote:
> The combiner attempts to optimize a zero-extension of a logical right shift
> using zero_extract. We already utilize this optimization for those cases
> that result in a single instructions.  Let's add a insn_and_split
> pattern that also matches the generic case, where we can emit an
> optimized sequence of a slli/srli.
>
> ...
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index d4676507b45..80cbecb78e8 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
>    [(set_attr "type" "shift")
>     (set_attr "mode" "SI")])
>  
> +;; Canonical form for a zero-extend of a logical right shift.
> +;; Special cases are handled above.
> +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)

Dumb question: Why not for Zbs: Zb[abs] is going to be very common going
fwd and will end up being unused.

> +(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +	 (zero_extract:GPR
> +       (match_operand:GPR 1 "register_operand" " r")
> +       (match_operand     2 "const_int_operand")
> +       (match_operand     3 "const_int_operand")))
> +   (clobber (match_scratch:GPR  4 "=&r"))]
> +  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
> +   && !TARGET_XTHEADBB"
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 4)
> +     (ashift:GPR (match_dup 1) (match_dup 2)))
> +   (set (match_dup 0)
> +     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
> +{
> +  int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
> +  int sizebits = INTVAL (operands[2]);
> +  int startbits = INTVAL (operands[3]);
> +  int lshamt = regbits - sizebits - startbits;
> +  int rshamt = lshamt + startbits;
> +  operands[2] = GEN_INT (lshamt);
> +  operands[3] = GEN_INT (rshamt);
> +}
> +  [(set_attr "type" "shift")
> +   (set_attr "mode" "<GPR:MODE>")])
> +
>  ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
>  ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
>  ;; xor/addi/srli, and.
> diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c b/gcc/testsuite/gcc.target/riscv/pr111501.c
> new file mode 100644
> index 00000000000..9355be242e7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr111501.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target rv64 } */
> +/* { dg-options "-march=rv64gc" { target { rv64 } } } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
> +/* { dg-final { check-function-bodies "**" "" } } */

Is function body check really needed: isn't count of srli and slli each
sufficient ?
Last year we saw a lot of false failures due to unrelated scheduling
changes as such tripping these up.

> +/* { dg-allow-blank-lines-in-output 1 } */
> +
> +/*
> +**do_shift:
> +**    ...
> +**    slli\ta[0-9],a[0-9],16
> +**    srli\ta[0-9],a[0-9],48
> +**    ...
> +*/
> +unsigned int
> +do_shift(unsigned long csum)
> +{
> +  return (unsigned short)(csum >> 32);
> +}
> +
> +/*
> +**do_shift2:
> +**    ...
> +**    slli\ta[0-9],a[0-9],16
> +**    srli\ta[0-9],a[0-9],48
> +**    ...
> +*/
> +unsigned int
> +do_shift2(unsigned long csum)
> +{
> +  return (csum << 16) >> 48;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
> new file mode 100644
> index 00000000000..2824d6fe074
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target rv32 } */
> +/* { dg-options "-march=rv32gc" } */
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
> +/* { dg-final { check-function-bodies "**" "" } } */

Same as above, counts where possible.

-Vineet


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Add zero_extract support for rv64gc
  2024-05-06 21:42 ` Vineet Gupta
@ 2024-05-06 21:46   ` Jeff Law
  2024-05-08  7:45   ` Christoph Müllner
  1 sibling, 0 replies; 6+ messages in thread
From: Jeff Law @ 2024-05-06 21:46 UTC (permalink / raw)
  To: Vineet Gupta, Christoph Müllner, gcc-patches, Kito Cheng,
	Jim Wilson, Palmer Dabbelt, Andrew Waterman, Philipp Tomsich



On 5/6/24 3:42 PM, Vineet Gupta wrote:
> 
> 
> On 5/6/24 13:40, Christoph Müllner wrote:
>> The combiner attempts to optimize a zero-extension of a logical right shift
>> using zero_extract. We already utilize this optimization for those cases
>> that result in a single instructions.  Let's add a insn_and_split
>> pattern that also matches the generic case, where we can emit an
>> optimized sequence of a slli/srli.
>>
>> ...
>>
>> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
>> index d4676507b45..80cbecb78e8 100644
>> --- a/gcc/config/riscv/riscv.md
>> +++ b/gcc/config/riscv/riscv.md
>> @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
>>     [(set_attr "type" "shift")
>>      (set_attr "mode" "SI")])
>>   
>> +;; Canonical form for a zero-extend of a logical right shift.
>> +;; Special cases are handled above.
>> +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
> 
> Dumb question: Why not for Zbs: Zb[abs] is going to be very common going
> fwd and will end up being unused.
Zbs only handles single bit extractions.  The pattern rejects that case 
allowing the single bit patterns from bitmanip.md and thead.md to match 
them.

Jeff



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Add zero_extract support for rv64gc
  2024-05-06 21:24 ` Jeff Law
@ 2024-05-08  7:44   ` Christoph Müllner
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Müllner @ 2024-05-08  7:44 UTC (permalink / raw)
  To: Jeff Law
  Cc: gcc-patches, Kito Cheng, Jim Wilson, Palmer Dabbelt,
	Andrew Waterman, Philipp Tomsich, Vineet Gupta

On Mon, May 6, 2024 at 11:24 PM Jeff Law <jeffreyalaw@gmail.com> wrote:
>
>
>
> On 5/6/24 2:40 PM, Christoph Müllner wrote:
> > The combiner attempts to optimize a zero-extension of a logical right shift
> > using zero_extract. We already utilize this optimization for those cases
> > that result in a single instructions.  Let's add a insn_and_split
> > pattern that also matches the generic case, where we can emit an
> > optimized sequence of a slli/srli.
> >
> > Tested with SPEC CPU 2017 (rv64gc).
> >
> >       PR 111501
> >
> > gcc/ChangeLog:
> >
> >       * config/riscv/riscv.md (*lshr<GPR:mode>3_zero_extend_4): New
> >       pattern for zero-extraction.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/riscv/pr111501.c: New test.
> >       * gcc.target/riscv/zero-extend-rshift-32.c: New test.
> >       * gcc.target/riscv/zero-extend-rshift-64.c: New test.
> >       * gcc.target/riscv/zero-extend-rshift.c: New test.
> So I had Lyut looking in this space as well.  Mostly because there's a
> desire to avoid the srl+and approach and instead represent this stuff as
> shifts (which are fusible in our uarch).  SO I've already got some state...
>
>
> >
> > Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
> > ---
> >   gcc/config/riscv/riscv.md                     |  30 +++++
> >   gcc/testsuite/gcc.target/riscv/pr111501.c     |  32 +++++
> >   .../gcc.target/riscv/zero-extend-rshift-32.c  |  37 ++++++
> >   .../gcc.target/riscv/zero-extend-rshift-64.c  |  63 ++++++++++
> >   .../gcc.target/riscv/zero-extend-rshift.c     | 119 ++++++++++++++++++
> >   5 files changed, 281 insertions(+)
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/pr111501.c
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift-64.c
> >   create mode 100644 gcc/testsuite/gcc.target/riscv/zero-extend-rshift.c
> >
> > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > index d4676507b45..80cbecb78e8 100644
> > --- a/gcc/config/riscv/riscv.md
> > +++ b/gcc/config/riscv/riscv.md
> > @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
> >     [(set_attr "type" "shift")
> >      (set_attr "mode" "SI")])
> >
> > +;; Canonical form for a zero-extend of a logical right shift.
> > +;; Special cases are handled above.
> > +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
> > +(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
> > +  [(set (match_operand:GPR 0 "register_operand" "=r")
> > +      (zero_extract:GPR
> > +       (match_operand:GPR 1 "register_operand" " r")
> > +       (match_operand     2 "const_int_operand")
> > +       (match_operand     3 "const_int_operand")))
> > +   (clobber (match_scratch:GPR  4 "=&r"))]
> > +  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
> > +   && !TARGET_XTHEADBB"
> > +  "#"
> > +  "&& reload_completed"
> > +  [(set (match_dup 4)
> > +     (ashift:GPR (match_dup 1) (match_dup 2)))
> > +   (set (match_dup 0)
> > +     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
> Consider adding support for signed extractions as well.  You just need
> an iterator across zero_extract/sign_extract and suitable selection of
> arithmetic vs logical right shift step.

The sign-extension/extraction code was worse than the
zero-extension/extraction code.
So, I ended up doing some initial work for addressing corner cases first, before
converting this pattern using an any_extract iterator for the v2
(already on the list).

>
> A nit on the condition.   Bring the && INTVAL (operands[2]) == 1 down to
> a new line like you've gone with !TARGET_XTHEADBB.
>
> You also want to make sure the condition rejects the cases handled by
> this pattern (or merge your pattern with this one):

I kept the pattern, but added sign_extract support.

>
> > ;; Canonical form for a zero-extend of a logical right shift.
> > (define_insn "*lshrsi3_zero_extend_2"
> >   [(set (match_operand:DI                   0 "register_operand" "=r")
> >         (zero_extract:DI (match_operand:DI  1 "register_operand" " r")
> >                          (match_operand     2 "const_int_operand")
> >                          (match_operand     3 "const_int_operand")))]
> >   "(TARGET_64BIT && (INTVAL (operands[3]) > 0)
> >     && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
> > {
> >   return "srliw\t%0,%1,%3";
> > }
> >   [(set_attr "type" "shift")
> >    (set_attr "mode" "SI")])
>
> So generally going the right direction.  But needs another iteration.

Thanks for the review!

>
> Jeff
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] RISC-V: Add zero_extract support for rv64gc
  2024-05-06 21:42 ` Vineet Gupta
  2024-05-06 21:46   ` Jeff Law
@ 2024-05-08  7:45   ` Christoph Müllner
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Müllner @ 2024-05-08  7:45 UTC (permalink / raw)
  To: Vineet Gupta
  Cc: gcc-patches, Kito Cheng, Jim Wilson, Palmer Dabbelt,
	Andrew Waterman, Philipp Tomsich, Jeff Law

On Mon, May 6, 2024 at 11:43 PM Vineet Gupta <vineetg@rivosinc.com> wrote:
>
>
>
> On 5/6/24 13:40, Christoph Müllner wrote:
> > The combiner attempts to optimize a zero-extension of a logical right shift
> > using zero_extract. We already utilize this optimization for those cases
> > that result in a single instructions.  Let's add a insn_and_split
> > pattern that also matches the generic case, where we can emit an
> > optimized sequence of a slli/srli.
> >
> > ...
> >
> > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > index d4676507b45..80cbecb78e8 100644
> > --- a/gcc/config/riscv/riscv.md
> > +++ b/gcc/config/riscv/riscv.md
> > @@ -2792,6 +2792,36 @@ (define_insn "*lshrsi3_zero_extend_3"
> >    [(set_attr "type" "shift")
> >     (set_attr "mode" "SI")])
> >
> > +;; Canonical form for a zero-extend of a logical right shift.
> > +;; Special cases are handled above.
> > +;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
>
> Dumb question: Why not for Zbs: Zb[abs] is going to be very common going
> fwd and will end up being unused.
>
> > +(define_insn_and_split "*lshr<GPR:mode>3_zero_extend_4"
> > +  [(set (match_operand:GPR 0 "register_operand" "=r")
> > +      (zero_extract:GPR
> > +       (match_operand:GPR 1 "register_operand" " r")
> > +       (match_operand     2 "const_int_operand")
> > +       (match_operand     3 "const_int_operand")))
> > +   (clobber (match_scratch:GPR  4 "=&r"))]
> > +  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
> > +   && !TARGET_XTHEADBB"
> > +  "#"
> > +  "&& reload_completed"
> > +  [(set (match_dup 4)
> > +     (ashift:GPR (match_dup 1) (match_dup 2)))
> > +   (set (match_dup 0)
> > +     (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
> > +{
> > +  int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
> > +  int sizebits = INTVAL (operands[2]);
> > +  int startbits = INTVAL (operands[3]);
> > +  int lshamt = regbits - sizebits - startbits;
> > +  int rshamt = lshamt + startbits;
> > +  operands[2] = GEN_INT (lshamt);
> > +  operands[3] = GEN_INT (rshamt);
> > +}
> > +  [(set_attr "type" "shift")
> > +   (set_attr "mode" "<GPR:MODE>")])
> > +
> >  ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
> >  ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
> >  ;; xor/addi/srli, and.
> > diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c b/gcc/testsuite/gcc.target/riscv/pr111501.c
> > new file mode 100644
> > index 00000000000..9355be242e7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/pr111501.c
> > @@ -0,0 +1,32 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target rv64 } */
> > +/* { dg-options "-march=rv64gc" { target { rv64 } } } */
> > +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
>
> Is function body check really needed: isn't count of srli and slli each
> sufficient ?
> Last year we saw a lot of false failures due to unrelated scheduling
> changes as such tripping these up.

I've dropped the check-function-bodies in the v2.

Thanks!

>
> > +/* { dg-allow-blank-lines-in-output 1 } */
> > +
> > +/*
> > +**do_shift:
> > +**    ...
> > +**    slli\ta[0-9],a[0-9],16
> > +**    srli\ta[0-9],a[0-9],48
> > +**    ...
> > +*/
> > +unsigned int
> > +do_shift(unsigned long csum)
> > +{
> > +  return (unsigned short)(csum >> 32);
> > +}
> > +
> > +/*
> > +**do_shift2:
> > +**    ...
> > +**    slli\ta[0-9],a[0-9],16
> > +**    srli\ta[0-9],a[0-9],48
> > +**    ...
> > +*/
> > +unsigned int
> > +do_shift2(unsigned long csum)
> > +{
> > +  return (csum << 16) >> 48;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
> > new file mode 100644
> > index 00000000000..2824d6fe074
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/zero-extend-rshift-32.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target rv32 } */
> > +/* { dg-options "-march=rv32gc" } */
> > +/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
> > +/* { dg-final { check-function-bodies "**" "" } } */
>
> Same as above, counts where possible.
>
> -Vineet
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-05-08  7:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-06 20:40 [PATCH] RISC-V: Add zero_extract support for rv64gc Christoph Müllner
2024-05-06 21:24 ` Jeff Law
2024-05-08  7:44   ` Christoph Müllner
2024-05-06 21:42 ` Vineet Gupta
2024-05-06 21:46   ` Jeff Law
2024-05-08  7:45   ` Christoph Müllner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).