I have commented in commit log: before this patch: The mask is: .LC1: .byte 68 ====> 0b01000100 However, this is incorrect for RVV since RVV always uses 1-bit compact mask, now after this patch: .LC1: .byte 10 ====> 0b1010 juzhe.zhong@rivai.ai From: Kito Cheng Date: 2023-06-28 11:16 To: juzhe.zhong@rivai.ai CC: gcc-patches; kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp Subject: Re: [PATCH V2] RISC-V: Fix bug of pre-calculated const vector mask Do you mind giving some comments about what the difference between the two versions? On Wed, Jun 28, 2023 at 11:14 AM juzhe.zhong@rivai.ai wrote: > > This patch is the critical patch for following patches since it is a bug which I already address in rvv-next. > > ________________________________ > juzhe.zhong@rivai.ai > > > From: Juzhe-Zhong > Date: 2023-06-28 09:59 > To: gcc-patches > CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong > Subject: [PATCH V2] RISC-V: Fix bug of pre-calculated const vector mask > This bug blocks the following patches. > > GCC doesn't know RVV is using compact mask model. > Consider this following case: > > #define N 16 > > int > main () > { > int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > int8_t out[N] = {0}; > for (int8_t i = 0; i < N; ++i) > if (mask[i]) > out[i] = i; > for (int8_t i = 0; i < N; ++i) > { > if (mask[i]) > assert (out[i] == i); > else > assert (out[i] == 0); > } > } > > Before this patch, the pre-calculated mask in constant memory pool: > .LC1: > .byte 68 ====> 0b01000100 > > This is incorrect, such case failed in execution. > > After this patch: > .LC1: > .byte 10 ====> 0b1010 > > Pass on exection. > > gcc/ChangeLog: > > * config/riscv/riscv-v.cc (rvv_builder::get_compact_mask): New function. > (expand_const_vector): Ditto. > * config/riscv/riscv.cc (riscv_const_insns): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c: New test. > * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c: New test. > > --- > gcc/config/riscv/riscv-v.cc | 64 +++++++++++++++++-- > gcc/config/riscv/riscv.cc | 6 ++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-1.c | 23 +++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-2.c | 23 +++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-3.c | 23 +++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-4.c | 23 +++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-5.c | 25 ++++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-6.c | 27 ++++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-7.c | 30 +++++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-8.c | 30 +++++++++ > .../riscv/rvv/autovec/vls-vlmax/bitmask-9.c | 30 +++++++++ > 11 files changed, 299 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c > > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index adb8d7d36a5..5da0dc5e998 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -291,6 +291,7 @@ public: > bool single_step_npatterns_p () const; > bool npatterns_all_equal_p () const; > + rtx get_compact_mask () const; > machine_mode new_mode () const { return m_new_mode; } > scalar_mode inner_mode () const { return m_inner_mode; } > @@ -505,6 +506,47 @@ rvv_builder::npatterns_all_equal_p () const > return true; > } > +/* Generate the compact mask. > + > + E.g: mask = { 0, -1 }, mode = VNx2BI, bitsize = 128bits. > + > + GCC by default will generate the mask = 0b00000001xxxxx. > + > + However, it's not expected mask for RVV since RVV > + prefers the compact mask = 0b10xxxxx. > +*/ > +rtx > +rvv_builder::get_compact_mask () const > +{ > + /* If TARGET_MIN_VLEN == 32, the minimum LMUL = 1/4. > + Otherwise, the minimum LMUL = 1/8. */ > + unsigned min_lmul = TARGET_MIN_VLEN == 32 ? 4 : 8; > + unsigned min_container_size > + = BYTES_PER_RISCV_VECTOR.to_constant () / min_lmul; > + unsigned container_size = MAX (CEIL (npatterns (), 8), min_container_size); > + machine_mode container_mode > + = get_vector_mode (QImode, container_size).require (); > + > + unsigned nunits = GET_MODE_NUNITS (container_mode).to_constant (); > + rtvec v = rtvec_alloc (nunits); > + for (unsigned i = 0; i < nunits; i++) > + RTVEC_ELT (v, i) = const0_rtx; > + > + unsigned char b = 0; > + for (unsigned i = 0; i < npatterns (); i++) > + { > + if (INTVAL (elt (i))) > + b = b | (1 << (i % 8)); > + > + if ((i > 0 && (i % 8) == 7) || (i == (npatterns () - 1))) > + { > + RTVEC_ELT (v, ((i + 7) / 8) - 1) = gen_int_mode (b, QImode); > + b = 0; > + } > + } > + return gen_rtx_CONST_VECTOR (container_mode, v); > +} > + > static unsigned > get_sew (machine_mode mode) > { > @@ -1141,11 +1183,23 @@ expand_const_vector (rtx target, rtx src) > if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) > { > rtx elt; > - gcc_assert ( > - const_vec_duplicate_p (src, &elt) > - && (rtx_equal_p (elt, const0_rtx) || rtx_equal_p (elt, const1_rtx))); > - rtx ops[] = {target, src}; > - emit_vlmax_insn (code_for_pred_mov (mode), RVV_UNOP, ops); > + unsigned int nelts; > + if (const_vec_duplicate_p (src, &elt)) > + { > + rtx ops[] = {target, src}; > + emit_vlmax_insn (code_for_pred_mov (mode), RVV_UNOP, ops); > + } > + else if (GET_MODE_NUNITS (mode).is_constant (&nelts)) > + { > + rvv_builder builder (mode, nelts, 1); > + for (unsigned int i = 0; i < nelts; i++) > + builder.quick_push (CONST_VECTOR_ELT (src, i)); > + rtx mask = builder.get_compact_mask (); > + rtx mem = validize_mem (force_const_mem (GET_MODE (mask), mask)); > + emit_move_insn (target, gen_rtx_MEM (mode, XEXP (mem, 0))); > + } > + else > + gcc_unreachable (); > return; > } > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 280aa0b33b9..86c83f0906d 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -1323,6 +1323,12 @@ riscv_const_insns (rtx x) > return 1 + 4; /*vmv.v.x + memory access. */ > } > } > + > + /* GCC doesn't known RVV is using compact model of mask, > + we should by default handle mask in mov pattern. */ > + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL) > + /* TODO: We can adjust it according real cost model of vlm.v. */ > + return 1; > } > /* TODO: We may support more const vector in the future. */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c > new file mode 100644 > index 00000000000..81229fd62b9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ > + > +#include > +#include > +#define N 16 > + > +int > +main () > +{ > + int mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int64_t out[N] = {0}; > + for (int i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c > new file mode 100644 > index 00000000000..a23e47171bc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ > + > +#include > +#include > +#define N 16 > + > +int > +main () > +{ > + int mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int out[N] = {0}; > + for (int i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c > new file mode 100644 > index 00000000000..6ea8fdd89c0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ > + > +#include > +#include > +#define N 16 > + > +int > +main () > +{ > + int16_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int16_t out[N] = {0}; > + for (int16_t i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int16_t i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c > new file mode 100644 > index 00000000000..2d97c26abfd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ > + > +#include > +#include > +#define N 16 > + > +int > +main () > +{ > + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int8_t out[N] = {0}; > + for (int8_t i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int8_t i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c > new file mode 100644 > index 00000000000..b89b70e99a6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c > @@ -0,0 +1,25 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -O3" } */ > + > +#include > +#include > + > +#define N 32 > + > +int > +main () > +{ > + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int8_t out[N] = {0}; > + for (int8_t i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int8_t i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c > new file mode 100644 > index 00000000000..ac8d91e793b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4 -O3" } */ > + > +#include > +#include > + > +#define N 64 > + > +int > +main () > +{ > + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int8_t out[N] = {0}; > + for (int8_t i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int8_t i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c > new file mode 100644 > index 00000000000..f538db23b1d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c > @@ -0,0 +1,30 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ > + > +#include > +#include > + > +#define N 128 > + > +int > +main () > +{ > + uint8_t mask[N] > + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + uint8_t out[N] = {0}; > + for (uint8_t i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (uint8_t i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c > new file mode 100644 > index 00000000000..5abb34c1686 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c > @@ -0,0 +1,30 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ > + > +#include > +#include > + > +#define N 128 > + > +int > +main () > +{ > + int mask[N] > + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int out[N] = {0}; > + for (int i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c > new file mode 100644 > index 00000000000..6fdaa516534 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c > @@ -0,0 +1,30 @@ > +/* { dg-do run { target { riscv_vector } } } */ > +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ > + > +#include > +#include > + > +#define N 128 > + > +int > +main () > +{ > + int64_t mask[N] > + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, > + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; > + int64_t out[N] = {0}; > + for (int i = 0; i < N; ++i) > + if (mask[i]) > + out[i] = i; > + for (int i = 0; i < N; ++i) > + { > + if (mask[i]) > + assert (out[i] == i); > + else > + assert (out[i] == 0); > + } > +} > -- > 2.36.1 >