lgtm juzhe.zhong@rivai.ai From: Robin Dapp Date: 2023-12-01 23:23 To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zhong@rivai.ai CC: rdapp.gcc Subject: [PATCH] RISC-V: Add vectorized strcmp. Hi, this patch adds a vectorized strcmp implementation and tests. Similar to strlen, expansion is still guarded by -minline-strcmp. I just realized I forgot to make it a series but this one is actually dependent on the NFC patch and the rawmemchr fix before. Regards Robin gcc/ChangeLog: * config/riscv/riscv-protos.h (expand_strcmp): Declare. * config/riscv/riscv-string.cc (riscv_expand_strcmp): Add strategy handling and delegation to scalar and vector expanders. (expand_strcmp): Vectorized implementation. * config/riscv/riscv.md: Add TARGET_VECTOR to strcmp expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test. --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++- gcc/config/riscv/riscv.md | 3 +- .../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++ .../riscv/rvv/autovec/builtin/strcmp.c | 13 ++ 5 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index c94c82a9973..5878a674413 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *); void expand_cond_ternop (unsigned, rtx *); void expand_popcount (rtx *); void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false); +bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, poly_int64); /* Rounding mode bitfield for fixed point VXRM. */ diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 6cde1bf89a0..11c1f74d0b3 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2, return false; alignment = UINTVAL (align_rtx); - if (TARGET_ZBB || TARGET_XTHEADBB) + if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR) { - return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, - ncompare); + bool ok = riscv_vector::expand_strcmp (result, src1, src2, + bytes_rtx, alignment, + ncompare); + if (ok) + return true; } + if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR) + return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, + ncompare); + return false; } @@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle, } } +/* Implement cmpstr using vector instructions. The ALIGNMENT and + NCOMPARE parameters are unused for now. */ + +bool +expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes, + unsigned HOST_WIDE_INT, bool) +{ + gcc_assert (TARGET_VECTOR); + + /* We don't support big endian. */ + if (BYTES_BIG_ENDIAN) + return false; + + bool with_length = nbytes != NULL_RTX; + + if (with_length + && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes))) + return false; + + if (with_length && CONST_INT_P (nbytes)) + nbytes = force_reg (Pmode, nbytes); + + machine_mode mode = E_QImode; + unsigned int isize = GET_MODE_SIZE (mode).to_constant (); + int lmul = TARGET_MAX_LMUL; + poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); + + machine_mode vmode; + if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits) + .exists (&vmode)) + gcc_unreachable (); + + machine_mode mask_mode = riscv_vector::get_mask_mode (vmode); + + /* Prepare addresses. */ + rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0)); + rtx vsrc1 = change_address (src1, vmode, src_addr1); + + rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0)); + rtx vsrc2 = change_address (src2, vmode, src_addr2); + + /* Set initial pointer bump to 0. */ + rtx cnt = gen_reg_rtx (Pmode); + emit_move_insn (cnt, CONST0_RTX (Pmode)); + + rtx sub = gen_reg_rtx (Pmode); + emit_move_insn (sub, CONST0_RTX (Pmode)); + + /* Create source vectors. */ + rtx vec1 = gen_reg_rtx (vmode); + rtx vec2 = gen_reg_rtx (vmode); + + rtx done = gen_label_rtx (); + rtx loop = gen_label_rtx (); + emit_label (loop); + + /* Bump the pointers. */ + emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt))); + emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt))); + + rtx vlops1[] = {vec1, vsrc1}; + rtx vlops2[] = {vec2, vsrc2}; + + if (!with_length) + { + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1); + + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2); + } + else + { + nbytes = gen_lowpart (Pmode, nbytes); + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1, nbytes); + + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2, nbytes); + } + + /* Read the vl for the next pointer bump. */ + if (Pmode == SImode) + emit_insn (gen_read_vlsi (cnt)); + else + emit_insn (gen_read_vldi_zero_extend (cnt)); + + if (with_length) + { + rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done)); + emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt))); + } + + /* Look for a \0 in the first string. */ + rtx mask0 = gen_reg_rtx (mask_mode); + rtx eq0 + = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)), + vec1); + rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)}; + emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode), + riscv_vector::COMPARE_OP, vmsops1, cnt); + + /* Look for vec1 != vec2 (includes vec2[i] == 0). */ + rtx maskne = gen_reg_rtx (mask_mode); + rtx ne = gen_rtx_NE (mask_mode, vec1, vec2); + rtx vmsops[] = {maskne, ne, vec1, vec2}; + emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP, + vmsops, cnt); + + /* Combine both masks into one. */ + rtx mask = gen_reg_rtx (mask_mode); + rtx vmorops[] = {mask, mask0, maskne}; + emit_nonvlmax_insn (code_for_pred (IOR, mask_mode), + riscv_vector::BINARY_MASK_OP, vmorops, cnt); + + /* Find the first bit in the mask (the first unequal element). */ + rtx found_at = gen_reg_rtx (Pmode); + rtx vfops[] = {found_at, mask}; + emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode), + riscv_vector::CPOP_OP, vfops, cnt); + + /* Emit the loop condition. */ + rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop)); + + /* Walk up to the difference point. */ + emit_insn ( + gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at))); + emit_insn ( + gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at))); + + /* Load the respective byte and compute the difference. */ + rtx c1 = gen_reg_rtx (Pmode); + rtx c2 = gen_reg_rtx (Pmode); + + do_load_from_addr (mode, c1, src_addr1, src1); + do_load_from_addr (mode, c2, src_addr2, src2); + + do_sub3 (sub, c1, c2); + + if (with_length) + emit_label (done); + + emit_insn (gen_movsi (result, gen_lowpart (SImode, sub))); + return true; +} + } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 54015eed57c..b805b1723b8 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3726,7 +3726,8 @@ (define_expand "cmpstrsi" (compare:SI (match_operand:BLK 1) (match_operand:BLK 2))) (use (match_operand:SI 3))])] - "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strcmp && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { if (riscv_expand_strcmp (operands[0], operands[1], operands[2], NULL_RTX, operands[3])) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c new file mode 100644 index 00000000000..6dec7da91c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +#include + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +int +__attribute__ ((noipa, optimize ("0"))) +foo2 (const char *s, const char *t) +{ + return strcmp (s, t); +} + +#define SZ 10 + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890"}; + + for (int i = 0; i < SZ; i++) + for (int j = 0; j < SZ; j++) + if (foo (s[i], s[j]) != foo2 (s[i], s[j])) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c new file mode 100644 index 00000000000..f9d33a74fc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +/* { dg-final { scan-assembler-times "vle8ff" 2 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ +/* { dg-final { scan-assembler-times "vmor.m" 1 } } */ -- 2.43.0