FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test FAIL: gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c execution test juzhe.zhong@rivai.ai From: Robin Dapp Date: 2023-12-09 00:25 To: gcc-patches; palmer; kito.cheng; Jeff Law; 钟居哲 CC: rdapp.gcc Subject: Re: [PATCH] RISC-V: Add vectorized strcmp. Ah, I forgot to attach the current v2 that also enables strncmp. It was additionally tested with -minline-strncmp on rv64gcv. Regards Robin Subject: [PATCH v2] RISC-V: Add vectorized strcmp and strncmp. This patch adds vectorized strcmp and strncmp implementations and tests. Similar to strlen, expansion is still guarded by -minline-str(n)cmp. gcc/ChangeLog: PR target/112109 * config/riscv/riscv-protos.h (expand_strcmp): Declare. * config/riscv/riscv-string.cc (riscv_expand_strcmp): Add strategy handling and delegation to scalar and vector expanders. (expand_strcmp): Vectorized implementation. * config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test. --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++- gcc/config/riscv/riscv.md | 6 +- .../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++ .../riscv/rvv/autovec/builtin/strcmp.c | 13 ++ .../riscv/rvv/autovec/builtin/strncmp-run.c | 136 +++++++++++++++ .../riscv/rvv/autovec/builtin/strncmp.c | 13 ++ 7 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index c7b5789a4b3..20bbb5b859c 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *); void expand_cond_ternop (unsigned, rtx *); void expand_popcount (rtx *); void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false); +bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, poly_int64); /* Rounding mode bitfield for fixed point VXRM. */ diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 6cde1bf89a0..11c1f74d0b3 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2, return false; alignment = UINTVAL (align_rtx); - if (TARGET_ZBB || TARGET_XTHEADBB) + if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR) { - return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, - ncompare); + bool ok = riscv_vector::expand_strcmp (result, src1, src2, + bytes_rtx, alignment, + ncompare); + if (ok) + return true; } + if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR) + return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, + ncompare); + return false; } @@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle, } } +/* Implement cmpstr using vector instructions. The ALIGNMENT and + NCOMPARE parameters are unused for now. */ + +bool +expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes, + unsigned HOST_WIDE_INT, bool) +{ + gcc_assert (TARGET_VECTOR); + + /* We don't support big endian. */ + if (BYTES_BIG_ENDIAN) + return false; + + bool with_length = nbytes != NULL_RTX; + + if (with_length + && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes))) + return false; + + if (with_length && CONST_INT_P (nbytes)) + nbytes = force_reg (Pmode, nbytes); + + machine_mode mode = E_QImode; + unsigned int isize = GET_MODE_SIZE (mode).to_constant (); + int lmul = TARGET_MAX_LMUL; + poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); + + machine_mode vmode; + if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits) + .exists (&vmode)) + gcc_unreachable (); + + machine_mode mask_mode = riscv_vector::get_mask_mode (vmode); + + /* Prepare addresses. */ + rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0)); + rtx vsrc1 = change_address (src1, vmode, src_addr1); + + rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0)); + rtx vsrc2 = change_address (src2, vmode, src_addr2); + + /* Set initial pointer bump to 0. */ + rtx cnt = gen_reg_rtx (Pmode); + emit_move_insn (cnt, CONST0_RTX (Pmode)); + + rtx sub = gen_reg_rtx (Pmode); + emit_move_insn (sub, CONST0_RTX (Pmode)); + + /* Create source vectors. */ + rtx vec1 = gen_reg_rtx (vmode); + rtx vec2 = gen_reg_rtx (vmode); + + rtx done = gen_label_rtx (); + rtx loop = gen_label_rtx (); + emit_label (loop); + + /* Bump the pointers. */ + emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt))); + emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt))); + + rtx vlops1[] = {vec1, vsrc1}; + rtx vlops2[] = {vec2, vsrc2}; + + if (!with_length) + { + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1); + + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2); + } + else + { + nbytes = gen_lowpart (Pmode, nbytes); + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1, nbytes); + + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2, nbytes); + } + + /* Read the vl for the next pointer bump. */ + if (Pmode == SImode) + emit_insn (gen_read_vlsi (cnt)); + else + emit_insn (gen_read_vldi_zero_extend (cnt)); + + if (with_length) + { + rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done)); + emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt))); + } + + /* Look for a \0 in the first string. */ + rtx mask0 = gen_reg_rtx (mask_mode); + rtx eq0 + = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)), + vec1); + rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)}; + emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode), + riscv_vector::COMPARE_OP, vmsops1, cnt); + + /* Look for vec1 != vec2 (includes vec2[i] == 0). */ + rtx maskne = gen_reg_rtx (mask_mode); + rtx ne = gen_rtx_NE (mask_mode, vec1, vec2); + rtx vmsops[] = {maskne, ne, vec1, vec2}; + emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP, + vmsops, cnt); + + /* Combine both masks into one. */ + rtx mask = gen_reg_rtx (mask_mode); + rtx vmorops[] = {mask, mask0, maskne}; + emit_nonvlmax_insn (code_for_pred (IOR, mask_mode), + riscv_vector::BINARY_MASK_OP, vmorops, cnt); + + /* Find the first bit in the mask (the first unequal element). */ + rtx found_at = gen_reg_rtx (Pmode); + rtx vfops[] = {found_at, mask}; + emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode), + riscv_vector::CPOP_OP, vfops, cnt); + + /* Emit the loop condition. */ + rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop)); + + /* Walk up to the difference point. */ + emit_insn ( + gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at))); + emit_insn ( + gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at))); + + /* Load the respective byte and compute the difference. */ + rtx c1 = gen_reg_rtx (Pmode); + rtx c2 = gen_reg_rtx (Pmode); + + do_load_from_addr (mode, c1, src_addr1, src1); + do_load_from_addr (mode, c2, src_addr2, src2); + + do_sub3 (sub, c1, c2); + + if (with_length) + emit_label (done); + + emit_insn (gen_movsi (result, gen_lowpart (SImode, sub))); + return true; +} + } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 6f9dec8c152..eed997116b0 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3702,7 +3702,8 @@ (define_expand "cmpstrnsi" (match_operand:BLK 2))) (use (match_operand:SI 3)) (use (match_operand:SI 4))])] - "riscv_inline_strncmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strncmp && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { if (riscv_expand_strcmp (operands[0], operands[1], operands[2], operands[3], operands[4])) @@ -3722,7 +3723,8 @@ (define_expand "cmpstrsi" (compare:SI (match_operand:BLK 1) (match_operand:BLK 2))) (use (match_operand:SI 3))])] - "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strcmp && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { if (riscv_expand_strcmp (operands[0], operands[1], operands[2], NULL_RTX, operands[3])) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c new file mode 100644 index 00000000000..6dec7da91c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +#include + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +int +__attribute__ ((noipa, optimize ("0"))) +foo2 (const char *s, const char *t) +{ + return strcmp (s, t); +} + +#define SZ 10 + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890"}; + + for (int i = 0; i < SZ; i++) + for (int j = 0; j < SZ; j++) + if (foo (s[i], s[j]) != foo2 (s[i], s[j])) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c new file mode 100644 index 00000000000..f9d33a74fc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +/* { dg-final { scan-assembler-times "vle8ff" 2 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ +/* { dg-final { scan-assembler-times "vmor.m" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c new file mode 100644 index 00000000000..8d1471a3a13 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c @@ -0,0 +1,136 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +#include + +int +__attribute__ ((noipa, optimize ("0"))) +foo2 (const char *s, const char *t, int n) +{ + return strncmp (s, t, n); +} + +#define SZ 11 + +#define TEST(I, J, N) \ + int res_##I_##J_##N = __builtin_strncmp (s[I], s[J], N); \ + int ref_##I_##J_##N = foo2 (s[I], s[J], N); \ + if (res_##I_##J_##N != ref_##I_##J_##N) \ + __builtin_abort (); + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890", + "ds0fi0349r0sdmfvi0sjf0c9fj034mrx903cw0efmc9jfsicn2390crrm0i90msdfi0sdf0"}; + + for (int i = 0; i < SZ; i++) + for (int j = 0; j < SZ; j++) + { + TEST(i, j, 0) + TEST(i, j, 1) + TEST(i, j, 2) + TEST(i, j, 3) + TEST(i, j, 4) + TEST(i, j, 5) + TEST(i, j, 6) + TEST(i, j, 7) + TEST(i, j, 8) + TEST(i, j, 9) + TEST(i, j, 10) + TEST(i, j, 11) + TEST(i, j, 12) + TEST(i, j, 13) + TEST(i, j, 14) + TEST(i, j, 15) + TEST(i, j, 16) + TEST(i, j, 17) + TEST(i, j, 18) + TEST(i, j, 19) + TEST(i, j, 20) + TEST(i, j, 21) + TEST(i, j, 22) + TEST(i, j, 23) + TEST(i, j, 24) + TEST(i, j, 25) + TEST(i, j, 26) + TEST(i, j, 27) + TEST(i, j, 28) + TEST(i, j, 29) + TEST(i, j, 30) + TEST(i, j, 31) + TEST(i, j, 32) + TEST(i, j, 33) + TEST(i, j, 34) + TEST(i, j, 35) + TEST(i, j, 36) + TEST(i, j, 37) + TEST(i, j, 38) + TEST(i, j, 39) + TEST(i, j, 40) + TEST(i, j, 41) + TEST(i, j, 42) + TEST(i, j, 43) + TEST(i, j, 44) + TEST(i, j, 45) + TEST(i, j, 46) + TEST(i, j, 47) + TEST(i, j, 48) + TEST(i, j, 49) + TEST(i, j, 50) + TEST(i, j, 51) + TEST(i, j, 52) + TEST(i, j, 53) + TEST(i, j, 54) + TEST(i, j, 55) + TEST(i, j, 56) + TEST(i, j, 57) + TEST(i, j, 58) + TEST(i, j, 59) + TEST(i, j, 60) + TEST(i, j, 61) + TEST(i, j, 62) + TEST(i, j, 63) + TEST(i, j, 64) + TEST(i, j, 65) + TEST(i, j, 66) + TEST(i, j, 67) + TEST(i, j, 68) + TEST(i, j, 69) + TEST(i, j, 70) + TEST(i, j, 71) + TEST(i, j, 72) + TEST(i, j, 73) + TEST(i, j, 74) + TEST(i, j, 75) + TEST(i, j, 76) + TEST(i, j, 77) + TEST(i, j, 78) + TEST(i, j, 79) + TEST(i, j, 80) + TEST(i, j, 81) + TEST(i, j, 82) + TEST(i, j, 83) + TEST(i, j, 84) + TEST(i, j, 85) + TEST(i, j, 86) + TEST(i, j, 87) + TEST(i, j, 88) + TEST(i, j, 89) + TEST(i, j, 90) + TEST(i, j, 91) + TEST(i, j, 92) + TEST(i, j, 93) + TEST(i, j, 94) + TEST(i, j, 95) + TEST(i, j, 96) + TEST(i, j, 97) + TEST(i, j, 98) + TEST(i, j, 99) + TEST(i, j, 100) + TEST(i, j, 101) + TEST(i, j, 102) + TEST(i, j, 103) + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c new file mode 100644 index 00000000000..a89633ea9d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strncmp" } */ + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strncmp (s, t, 7); +} + +/* { dg-final { scan-assembler-times "vle8ff" 2 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ +/* { dg-final { scan-assembler-times "vmor.m" 1 } } */ -- 2.43.0