This patch is the critical patch for following patches since it is a bug which I already address in rvv-next. juzhe.zhong@rivai.ai From: Juzhe-Zhong Date: 2023-06-28 09:59 To: gcc-patches CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; Juzhe-Zhong Subject: [PATCH V2] RISC-V: Fix bug of pre-calculated const vector mask This bug blocks the following patches. GCC doesn't know RVV is using compact mask model. Consider this following case: #define N 16 int main () { int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; int8_t out[N] = {0}; for (int8_t i = 0; i < N; ++i) if (mask[i]) out[i] = i; for (int8_t i = 0; i < N; ++i) { if (mask[i]) assert (out[i] == i); else assert (out[i] == 0); } } Before this patch, the pre-calculated mask in constant memory pool: .LC1: .byte 68 ====> 0b01000100 This is incorrect, such case failed in execution. After this patch: .LC1: .byte 10 ====> 0b1010 Pass on exection. gcc/ChangeLog: * config/riscv/riscv-v.cc (rvv_builder::get_compact_mask): New function. (expand_const_vector): Ditto. * config/riscv/riscv.cc (riscv_const_insns): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c: New test. --- gcc/config/riscv/riscv-v.cc | 64 +++++++++++++++++-- gcc/config/riscv/riscv.cc | 6 ++ .../riscv/rvv/autovec/vls-vlmax/bitmask-1.c | 23 +++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-2.c | 23 +++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-3.c | 23 +++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-4.c | 23 +++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-5.c | 25 ++++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-6.c | 27 ++++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-7.c | 30 +++++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-8.c | 30 +++++++++ .../riscv/rvv/autovec/vls-vlmax/bitmask-9.c | 30 +++++++++ 11 files changed, 299 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index adb8d7d36a5..5da0dc5e998 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -291,6 +291,7 @@ public: bool single_step_npatterns_p () const; bool npatterns_all_equal_p () const; + rtx get_compact_mask () const; machine_mode new_mode () const { return m_new_mode; } scalar_mode inner_mode () const { return m_inner_mode; } @@ -505,6 +506,47 @@ rvv_builder::npatterns_all_equal_p () const return true; } +/* Generate the compact mask. + + E.g: mask = { 0, -1 }, mode = VNx2BI, bitsize = 128bits. + + GCC by default will generate the mask = 0b00000001xxxxx. + + However, it's not expected mask for RVV since RVV + prefers the compact mask = 0b10xxxxx. +*/ +rtx +rvv_builder::get_compact_mask () const +{ + /* If TARGET_MIN_VLEN == 32, the minimum LMUL = 1/4. + Otherwise, the minimum LMUL = 1/8. */ + unsigned min_lmul = TARGET_MIN_VLEN == 32 ? 4 : 8; + unsigned min_container_size + = BYTES_PER_RISCV_VECTOR.to_constant () / min_lmul; + unsigned container_size = MAX (CEIL (npatterns (), 8), min_container_size); + machine_mode container_mode + = get_vector_mode (QImode, container_size).require (); + + unsigned nunits = GET_MODE_NUNITS (container_mode).to_constant (); + rtvec v = rtvec_alloc (nunits); + for (unsigned i = 0; i < nunits; i++) + RTVEC_ELT (v, i) = const0_rtx; + + unsigned char b = 0; + for (unsigned i = 0; i < npatterns (); i++) + { + if (INTVAL (elt (i))) + b = b | (1 << (i % 8)); + + if ((i > 0 && (i % 8) == 7) || (i == (npatterns () - 1))) + { + RTVEC_ELT (v, ((i + 7) / 8) - 1) = gen_int_mode (b, QImode); + b = 0; + } + } + return gen_rtx_CONST_VECTOR (container_mode, v); +} + static unsigned get_sew (machine_mode mode) { @@ -1141,11 +1183,23 @@ expand_const_vector (rtx target, rtx src) if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) { rtx elt; - gcc_assert ( - const_vec_duplicate_p (src, &elt) - && (rtx_equal_p (elt, const0_rtx) || rtx_equal_p (elt, const1_rtx))); - rtx ops[] = {target, src}; - emit_vlmax_insn (code_for_pred_mov (mode), RVV_UNOP, ops); + unsigned int nelts; + if (const_vec_duplicate_p (src, &elt)) + { + rtx ops[] = {target, src}; + emit_vlmax_insn (code_for_pred_mov (mode), RVV_UNOP, ops); + } + else if (GET_MODE_NUNITS (mode).is_constant (&nelts)) + { + rvv_builder builder (mode, nelts, 1); + for (unsigned int i = 0; i < nelts; i++) + builder.quick_push (CONST_VECTOR_ELT (src, i)); + rtx mask = builder.get_compact_mask (); + rtx mem = validize_mem (force_const_mem (GET_MODE (mask), mask)); + emit_move_insn (target, gen_rtx_MEM (mode, XEXP (mem, 0))); + } + else + gcc_unreachable (); return; } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 280aa0b33b9..86c83f0906d 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1323,6 +1323,12 @@ riscv_const_insns (rtx x) return 1 + 4; /*vmv.v.x + memory access. */ } } + + /* GCC doesn't known RVV is using compact model of mask, + we should by default handle mask in mov pattern. */ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL) + /* TODO: We can adjust it according real cost model of vlm.v. */ + return 1; } /* TODO: We may support more const vector in the future. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c new file mode 100644 index 00000000000..81229fd62b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-1.c @@ -0,0 +1,23 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ + +#include +#include +#define N 16 + +int +main () +{ + int mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int64_t out[N] = {0}; + for (int i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c new file mode 100644 index 00000000000..a23e47171bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-2.c @@ -0,0 +1,23 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ + +#include +#include +#define N 16 + +int +main () +{ + int mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int out[N] = {0}; + for (int i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c new file mode 100644 index 00000000000..6ea8fdd89c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-3.c @@ -0,0 +1,23 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ + +#include +#include +#define N 16 + +int +main () +{ + int16_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int16_t out[N] = {0}; + for (int16_t i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int16_t i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c new file mode 100644 index 00000000000..2d97c26abfd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-4.c @@ -0,0 +1,23 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */ + +#include +#include +#define N 16 + +int +main () +{ + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int8_t out[N] = {0}; + for (int8_t i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int8_t i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c new file mode 100644 index 00000000000..b89b70e99a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-5.c @@ -0,0 +1,25 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -O3" } */ + +#include +#include + +#define N 32 + +int +main () +{ + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int8_t out[N] = {0}; + for (int8_t i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int8_t i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c new file mode 100644 index 00000000000..ac8d91e793b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-6.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m4 -O3" } */ + +#include +#include + +#define N 64 + +int +main () +{ + int8_t mask[N] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int8_t out[N] = {0}; + for (int8_t i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int8_t i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c new file mode 100644 index 00000000000..f538db23b1d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-7.c @@ -0,0 +1,30 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ + +#include +#include + +#define N 128 + +int +main () +{ + uint8_t mask[N] + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + uint8_t out[N] = {0}; + for (uint8_t i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (uint8_t i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c new file mode 100644 index 00000000000..5abb34c1686 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-8.c @@ -0,0 +1,30 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ + +#include +#include + +#define N 128 + +int +main () +{ + int mask[N] + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int out[N] = {0}; + for (int i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c new file mode 100644 index 00000000000..6fdaa516534 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/bitmask-9.c @@ -0,0 +1,30 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m8 -O3" } */ + +#include +#include + +#define N 128 + +int +main () +{ + int64_t mask[N] + = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + int64_t out[N] = {0}; + for (int i = 0; i < N; ++i) + if (mask[i]) + out[i] = i; + for (int i = 0; i < N; ++i) + { + if (mask[i]) + assert (out[i] == i); + else + assert (out[i] == 0); + } +} -- 2.36.1