You mean change all split pattern like this ? ;; This helps to match zero_extend + sign_extend + fma. (define_insn_and_split "*zero_sign_extend_fma" [(set (match_operand:VWEXTI 0 "register_operand") (plus:VWEXTI (mult:VWEXTI (zero_extend:VWEXTI (match_operand: 2 "register_operand")) (sign_extend:VWEXTI (match_operand: 3 "register_operand"))) (match_operand:VWEXTI 1 "register_operand")))] "TARGET_VECTOR && can_create_pseudo_p ()" "#" "&& 1" [(const_int 0)] juzhe.zhong@rivai.ai From: Kito Cheng Date: 2023-06-12 20:37 To: juzhe.zhong@rivai.ai CC: gcc-patches; Kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp Subject: Re: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization We have two style predictor for those define_insn_and_split patterns, "TARGET_VECTOR"/"&& can_create_pseudo_p ()" and "TARGET_VECTOR && can_create_pseudo_p ()"/"&& 1", could you unify all to later form? I feel that would be safer since those patterns are really only valid before RA(can_create_pseudo_p() == true), although it's mostly used by combine pass so it's mostly safe, but IMO we should fix this soon rather than fix that until we hit this later. OK for this patch as it is, and I would like to have a separated patch to fix all those issues. On Mon, Jun 12, 2023 at 8:27 PM juzhe.zhong@rivai.ai wrote: > > Is this patch ok for trunk? > > > > juzhe.zhong@rivai.ai > > From: juzhe.zhong > Date: 2023-06-12 10:41 > To: gcc-patches > CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong > Subject: [PATCH] RISC-V: Add RVV narrow shift right lowering auto-vectorization > From: Juzhe-Zhong > > Optimize the following auto-vectorization codes: > void foo (int16_t * __restrict a, int32_t * __restrict b, int32_t c, int n) > { > for (int i = 0; i < n; i++) > a[i] = b[i] >> c; > } > > Before this patch: > foo: > ble a3,zero,.L5 > .L3: > vsetvli a5,a3,e32,m1,ta,ma > vle32.v v1,0(a1) > vsetvli a4,zero,e32,m1,ta,ma > vsra.vx v1,v1,a2 > vsetvli zero,zero,e16,mf2,ta,ma > slli a7,a5,2 > vncvt.x.x.w v1,v1 > slli a6,a5,1 > vsetvli zero,a5,e16,mf2,ta,ma > sub a3,a3,a5 > vse16.v v1,0(a0) > add a1,a1,a7 > add a0,a0,a6 > bne a3,zero,.L3 > .L5: > ret > > After this patch: > foo: > ble a3,zero,.L5 > .L3: > vsetvli a5,a3,e32,m1,ta,ma > vle32.v v1,0(a1) > vsetvli a7,zero,e16,mf2,ta,ma > slli a6,a5,2 > vnsra.wx v1,v1,a2 > slli a4,a5,1 > vsetvli zero,a5,e16,mf2,ta,ma > sub a3,a3,a5 > vse16.v v1,0(a0) > add a1,a1,a6 > add a0,a0,a4 > bne a3,zero,.L3 > .L5: > ret > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md (*vtrunc): New pattern. > (*trunc): Ditto. > * config/riscv/autovec.md (3): Change to define_insn_and_split. > (v3): Ditto. > (trunc2): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/binop/narrow-1.c: New test. > * gcc.target/riscv/rvv/autovec/binop/narrow-2.c: New test. > * gcc.target/riscv/rvv/autovec/binop/narrow-3.c: New test. > * gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c: New test. > * gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c: New test. > * gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c: New test. > > --- > gcc/config/riscv/autovec-opt.md | 46 +++++++++++++++++ > gcc/config/riscv/autovec.md | 43 ++++++++++------ > .../riscv/rvv/autovec/binop/narrow-1.c | 31 ++++++++++++ > .../riscv/rvv/autovec/binop/narrow-2.c | 32 ++++++++++++ > .../riscv/rvv/autovec/binop/narrow-3.c | 31 ++++++++++++ > .../riscv/rvv/autovec/binop/narrow_run-1.c | 50 +++++++++++++++++++ > .../riscv/rvv/autovec/binop/narrow_run-2.c | 46 +++++++++++++++++ > .../riscv/rvv/autovec/binop/narrow_run-3.c | 46 +++++++++++++++++ > 8 files changed, 311 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c > > diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md > index 7bb93eed220..aef28e445e1 100644 > --- a/gcc/config/riscv/autovec-opt.md > +++ b/gcc/config/riscv/autovec-opt.md > @@ -330,3 +330,49 @@ > } > [(set_attr "type" "viwmuladd") > (set_attr "mode" "")]) > + > +;; ------------------------------------------------------------------------- > +;; ---- [INT] Binary narrow shifts. > +;; ------------------------------------------------------------------------- > +;; Includes: > +;; - vnsrl.wv/vnsrl.wx/vnsrl.wi > +;; - vnsra.wv/vnsra.wx/vnsra.wi > +;; ------------------------------------------------------------------------- > + > +(define_insn_and_split "*vtrunc" > + [(set (match_operand: 0 "register_operand" "=vr,vr") > + (truncate: > + (any_shiftrt:VWEXTI > + (match_operand:VWEXTI 1 "register_operand" " vr,vr") > + (any_extend:VWEXTI > + (match_operand: 2 "vector_shift_operand" " vr,vk")))))] > + "TARGET_VECTOR" > + "#" > + "&& can_create_pseudo_p ()" > + [(const_int 0)] > +{ > + insn_code icode = code_for_pred_narrow (, mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); > + DONE; > +} > + [(set_attr "type" "vnshift") > + (set_attr "mode" "")]) > + > +(define_insn_and_split "*trunc" > + [(set (match_operand: 0 "register_operand" "=vr") > + (truncate: > + (any_shiftrt:VWEXTI > + (match_operand:VWEXTI 1 "register_operand" " vr") > + (match_operand: 2 "csr_operand" " rK"))))] > + "TARGET_VECTOR" > + "#" > + "&& can_create_pseudo_p ()" > + [(const_int 0)] > +{ > + operands[2] = gen_lowpart (Pmode, operands[2]); > + insn_code icode = code_for_pred_narrow_scalar (, mode); > + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); > + DONE; > +} > + [(set_attr "type" "vnshift") > + (set_attr "mode" "")]) > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index b7070099f29..eadc2c5b595 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -150,18 +150,23 @@ > ;; - vsll.vi/vsra.vi/vsrl.vi > ;; ------------------------------------------------------------------------- > -(define_expand "3" > - [(set (match_operand:VI 0 "register_operand") > +(define_insn_and_split "3" > + [(set (match_operand:VI 0 "register_operand" "=vr") > (any_shift:VI > - (match_operand:VI 1 "register_operand") > - (match_operand: 2 "csr_operand")))] > + (match_operand:VI 1 "register_operand" " vr") > + (match_operand: 2 "csr_operand" " rK")))] > "TARGET_VECTOR" > + "#" > + "&& can_create_pseudo_p ()" > + [(const_int 0)] > { > operands[2] = gen_lowpart (Pmode, operands[2]); > riscv_vector::emit_vlmax_insn (code_for_pred_scalar (, mode), > riscv_vector::RVV_BINOP, operands); > DONE; > -}) > +} > + [(set_attr "type" "vshift") > + (set_attr "mode" "")]) > ;; ------------------------------------------------------------------------- > ;; ---- [INT] Binary shifts by scalar. > @@ -170,17 +175,22 @@ > ;; - vsll.vv/vsra.vv/vsrl.vv > ;; ------------------------------------------------------------------------- > -(define_expand "v3" > - [(set (match_operand:VI 0 "register_operand") > +(define_insn_and_split "v3" > + [(set (match_operand:VI 0 "register_operand" "=vr,vr") > (any_shift:VI > - (match_operand:VI 1 "register_operand") > - (match_operand:VI 2 "vector_shift_operand")))] > + (match_operand:VI 1 "register_operand" " vr,vr") > + (match_operand:VI 2 "vector_shift_operand" " vr,vk")))] > "TARGET_VECTOR" > + "#" > + "&& can_create_pseudo_p ()" > + [(const_int 0)] > { > riscv_vector::emit_vlmax_insn (code_for_pred (, mode), > riscv_vector::RVV_BINOP, operands); > DONE; > -}) > +} > + [(set_attr "type" "vshift") > + (set_attr "mode" "")]) > ;; ------------------------------------------------------------------------- > ;; ---- [BOOL] Binary logical operations > @@ -395,16 +405,21 @@ > ;; ------------------------------------------------------------------------- > ;; - vncvt.x.x.w > ;; ------------------------------------------------------------------------- > -(define_expand "trunc2" > - [(set (match_operand: 0 "register_operand") > +(define_insn_and_split "trunc2" > + [(set (match_operand: 0 "register_operand" "=vr") > (truncate: > - (match_operand:VWEXTI 1 "register_operand")))] > + (match_operand:VWEXTI 1 "register_operand" " vr")))] > "TARGET_VECTOR" > + "#" > + "&& can_create_pseudo_p ()" > + [(const_int 0)] > { > insn_code icode = code_for_pred_trunc (mode); > riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands); > DONE; > -}) > +} > + [(set_attr "type" "vshift") > + (set_attr "mode" "")]) > ;; ------------------------------------------------------------------------- > ;; Truncation to a mode whose inner mode size is a quarter of mode's. > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c > new file mode 100644 > index 00000000000..3de8d85b52d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-1.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ > + > +#include > + > +#define TEST_TYPE(TYPE1, TYPE2) \ > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \ > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 *__restrict b, int n) \ > + { \ > + for (int i = 0; i < n; i++) \ > + dst[i] = a[i] >> b[i]; \ > + } > + > +#define TEST_ALL() \ > + TEST_TYPE (int16_t, int8_t) \ > + TEST_TYPE (int16_t, uint8_t) \ > + TEST_TYPE (uint16_t, int8_t) \ > + TEST_TYPE (uint16_t, uint8_t) \ > + TEST_TYPE (int32_t, int16_t) \ > + TEST_TYPE (int32_t, uint16_t) \ > + TEST_TYPE (uint32_t, int16_t) \ > + TEST_TYPE (uint32_t, uint16_t) \ > + TEST_TYPE (int64_t, int32_t) \ > + TEST_TYPE (int64_t, uint32_t) \ > + TEST_TYPE (uint64_t, int32_t) \ > + TEST_TYPE (uint64_t, uint32_t) > + > +TEST_ALL () > + > +/* { dg-final { scan-assembler-times {\tvnsra\.wv} 6 } } */ > +/* { dg-final { scan-assembler-times {\tvnsrl\.wv} 5 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c > new file mode 100644 > index 00000000000..e5c2e37f5fa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-2.c > @@ -0,0 +1,32 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ > + > +#include > + > +#define TEST_TYPE(TYPE1, TYPE2) \ > + __attribute__ (( \ > + noipa)) void vnshift_##TYPE1##_##TYPE2 (TYPE2 *__restrict dst, \ > + TYPE1 *__restrict a, int n) \ > + { \ > + for (int i = 0; i < n; i++) \ > + dst[i] = a[i] >> 7; \ > + } > + > +#define TEST_ALL() \ > + TEST_TYPE (int16_t, int8_t) \ > + TEST_TYPE (int16_t, uint8_t) \ > + TEST_TYPE (uint16_t, int8_t) \ > + TEST_TYPE (uint16_t, uint8_t) \ > + TEST_TYPE (int32_t, int16_t) \ > + TEST_TYPE (int32_t, uint16_t) \ > + TEST_TYPE (uint32_t, int16_t) \ > + TEST_TYPE (uint32_t, uint16_t) \ > + TEST_TYPE (int64_t, int32_t) \ > + TEST_TYPE (int64_t, uint32_t) \ > + TEST_TYPE (uint64_t, int32_t) \ > + TEST_TYPE (uint64_t, uint32_t) > + > +TEST_ALL () > + > +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ > +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c > new file mode 100644 > index 00000000000..3b288466394 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow-3.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ > + > +#include > + > +#define TEST_TYPE(TYPE1, TYPE2) \ > + __attribute__ ((noipa)) void vnshift_##TYPE1##_##TYPE2 ( \ > + TYPE2 *__restrict dst, TYPE1 *__restrict a, TYPE2 b, int n) \ > + { \ > + for (int i = 0; i < n; i++) \ > + dst[i] = a[i] >> b; \ > + } > + > +#define TEST_ALL() \ > + TEST_TYPE (int16_t, int8_t) \ > + TEST_TYPE (int16_t, uint8_t) \ > + TEST_TYPE (uint16_t, int8_t) \ > + TEST_TYPE (uint16_t, uint8_t) \ > + TEST_TYPE (int32_t, int16_t) \ > + TEST_TYPE (int32_t, uint16_t) \ > + TEST_TYPE (uint32_t, int16_t) \ > + TEST_TYPE (uint32_t, uint16_t) \ > + TEST_TYPE (int64_t, int32_t) \ > + TEST_TYPE (int64_t, uint32_t) \ > + TEST_TYPE (uint64_t, int32_t) \ > + TEST_TYPE (uint64_t, uint32_t) > + > +TEST_ALL () > + > +/* { dg-final { scan-assembler-times {\tvnsra\.wx} 4 } } */ > +/* { dg-final { scan-assembler-times {\tvnsrl\.wx} 4 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c > new file mode 100644 > index 00000000000..2a898104fa8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-1.c > @@ -0,0 +1,50 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ > + > +#include > +#include "narrow-1.c" > + > +#define RUN(TYPE1, TYPE2, SZ) \ > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + TYPE2 b##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ > + b##TYPE1##_##TYPE2##_##SZ[i] = i % (sizeof (TYPE2) * 3); \ > + } \ > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ > + a##TYPE1##_##TYPE2##_##SZ, \ > + b##TYPE1##_##TYPE2##_##SZ, SZ); \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] \ > + >> b##TYPE1##_##TYPE2##_##SZ[i])); \ > + } > + > +#define RUN_ALL(SZ) \ > + RUN (int16_t, int8_t, SZ) \ > + RUN (int16_t, uint8_t, SZ) \ > + RUN (uint16_t, int8_t, SZ) \ > + RUN (uint16_t, uint8_t, SZ) \ > + RUN (int32_t, int16_t, SZ) \ > + RUN (int32_t, uint16_t, SZ) \ > + RUN (uint32_t, int16_t, SZ) \ > + RUN (uint32_t, uint16_t, SZ) \ > + RUN (int64_t, int32_t, SZ) \ > + RUN (int64_t, uint32_t, SZ) \ > + RUN (uint64_t, int32_t, SZ) \ > + RUN (uint64_t, uint32_t, SZ) > + > +int > +main () > +{ > + RUN_ALL (15) > + RUN_ALL (16) > + RUN_ALL (17) > + RUN_ALL (127) > + RUN_ALL (128) > + RUN_ALL (129) > + RUN_ALL (512) > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c > new file mode 100644 > index 00000000000..1630ba1a5f8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-2.c > @@ -0,0 +1,46 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ > + > +#include > +#include "narrow-2.c" > + > +#define RUN(TYPE1, TYPE2, SZ) \ > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ > + } \ > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ > + a##TYPE1##_##TYPE2##_##SZ, SZ); \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 7)); \ > + } > + > +#define RUN_ALL(SZ) \ > + RUN (int16_t, int8_t, SZ) \ > + RUN (int16_t, uint8_t, SZ) \ > + RUN (uint16_t, int8_t, SZ) \ > + RUN (uint16_t, uint8_t, SZ) \ > + RUN (int32_t, int16_t, SZ) \ > + RUN (int32_t, uint16_t, SZ) \ > + RUN (uint32_t, int16_t, SZ) \ > + RUN (uint32_t, uint16_t, SZ) \ > + RUN (int64_t, int32_t, SZ) \ > + RUN (int64_t, uint32_t, SZ) \ > + RUN (uint64_t, int32_t, SZ) \ > + RUN (uint64_t, uint32_t, SZ) > + > +int > +main () > +{ > + RUN_ALL (15) > + RUN_ALL (16) > + RUN_ALL (17) > + RUN_ALL (127) > + RUN_ALL (128) > + RUN_ALL (129) > + RUN_ALL (512) > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c > new file mode 100644 > index 00000000000..7638851e4fa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/narrow_run-3.c > @@ -0,0 +1,46 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */ > + > +#include > +#include "narrow-3.c" > + > +#define RUN(TYPE1, TYPE2, SZ) \ > + TYPE1 a##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + TYPE2 dst##TYPE1##_##TYPE2##_##SZ[SZ]; \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + a##TYPE1##_##TYPE2##_##SZ[i] = i % 8723; \ > + } \ > + vnshift_##TYPE1##_##TYPE2 (dst##TYPE1##_##TYPE2##_##SZ, \ > + a##TYPE1##_##TYPE2##_##SZ, 9, SZ); \ > + for (int i = 0; i < SZ; i++) \ > + { \ > + assert (dst##TYPE1##_##TYPE2##_##SZ[i] \ > + == (TYPE2) (a##TYPE1##_##TYPE2##_##SZ[i] >> 9)); \ > + } > + > +#define RUN_ALL(SZ) \ > + RUN (int16_t, int8_t, SZ) \ > + RUN (int16_t, uint8_t, SZ) \ > + RUN (uint16_t, int8_t, SZ) \ > + RUN (uint16_t, uint8_t, SZ) \ > + RUN (int32_t, int16_t, SZ) \ > + RUN (int32_t, uint16_t, SZ) \ > + RUN (uint32_t, int16_t, SZ) \ > + RUN (uint32_t, uint16_t, SZ) \ > + RUN (int64_t, int32_t, SZ) \ > + RUN (int64_t, uint32_t, SZ) \ > + RUN (uint64_t, int32_t, SZ) \ > + RUN (uint64_t, uint32_t, SZ) > + > +int > +main () > +{ > + RUN_ALL (15) > + RUN_ALL (16) > + RUN_ALL (17) > + RUN_ALL (127) > + RUN_ALL (128) > + RUN_ALL (129) > + RUN_ALL (512) > +} > -- > 2.36.3 > >