* [PATCH v2 2/3] RISC-V: setmem for RISCV with V extension
2023-12-19 9:53 [PATCH v2 0/3] RISC-V: vectorised memory operations Sergei Lewis
2023-12-19 9:53 ` [PATCH v2 1/3] RISC-V: movmem for RISCV with V extension Sergei Lewis
@ 2023-12-19 9:53 ` Sergei Lewis
2023-12-20 5:38 ` Jeff Law
2023-12-19 9:53 ` [PATCH v2 3/3] RISC-V: cmpmem " Sergei Lewis
2 siblings, 1 reply; 10+ messages in thread
From: Sergei Lewis @ 2023-12-19 9:53 UTC (permalink / raw)
To: gcc-patches
gcc/ChangeLog
* config/riscv/riscv-protos.h (riscv_vector::expand_vec_setmem): New function
declaration.
* config/riscv/riscv-string.cc (riscv_vector::expand_vec_setmem): New
function: this generates an inline vectorised memory set, if and only if we
know the entire operation can be performed in a single vector store
* config/riscv/riscv.md (setmem<mode>): Try riscv_vector::expand_vec_setmem
for constant lengths
gcc/testsuite/ChangeLog
* gcc.target/riscv/rvv/base/setmem-1.c: New tests
* gcc.target/riscv/rvv/base/setmem-2.c: New tests
* gcc.target/riscv/rvv/base/setmem-3.c: New tests
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 90 +++++++++++++++
gcc/config/riscv/riscv.md | 14 +++
.../gcc.target/riscv/rvv/base/setmem-1.c | 103 ++++++++++++++++++
.../gcc.target/riscv/rvv/base/setmem-2.c | 51 +++++++++
.../gcc.target/riscv/rvv/base/setmem-3.c | 69 ++++++++++++
6 files changed, 328 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index eaee53ce94e..c4531589300 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -637,6 +637,7 @@ void expand_popcount (rtx *);
void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, rtx);
+bool expand_vec_setmem (rtx, rtx, rtx, rtx);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 11c1f74d0b3..e506b92a552 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1247,4 +1247,94 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
return true;
}
+/* Check we are permitted to vectorise a memory operation.
+ If so, return true and populate lmul_out.
+ Otherwise, return false and leave lmul_out unchanged. */
+static bool
+check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out)
+{
+ /* If we either can't or have been asked not to vectorise, respect this. */
+ if (!TARGET_VECTOR)
+ return false;
+ if (!(stringop_strategy & STRATEGY_VECTOR))
+ return false;
+
+ /* If we can't reason about the length, don't vectorise. */
+ if (!CONST_INT_P (length_in))
+ return false;
+
+ HOST_WIDE_INT length = INTVAL (length_in);
+
+ /* If it's tiny, default operation is likely better; maybe worth
+ considering fractional lmul in the future as well. */
+ if (length < (TARGET_MIN_VLEN / 8))
+ return false;
+
+ /* If we've been asked to use a specific LMUL,
+ check the operation fits and do that. */
+ if (riscv_autovec_lmul != RVV_DYNAMIC)
+ {
+ lmul_out = TARGET_MAX_LMUL;
+ return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8));
+ }
+
+ /* Find smallest lmul large enough for entire op. */
+ HOST_WIDE_INT lmul = 1;
+ while ((lmul <= 8) && (length > ((lmul * TARGET_MIN_VLEN) / 8)))
+ {
+ lmul <<= 1;
+ }
+
+ if (lmul > 8)
+ return false;
+
+ lmul_out = lmul;
+ return true;
+}
+
+/* Used by setmemdi in riscv.md. */
+bool
+expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in,
+ rtx alignment_in)
+{
+ HOST_WIDE_INT lmul;
+ /* Check we are able and allowed to vectorise this operation;
+ bail if not. */
+ if (!check_vectorise_memory_operation (length_in, lmul))
+ return false;
+
+ machine_mode vmode
+ = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
+ .require ();
+ rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
+ rtx dst = change_address (dst_in, vmode, dst_addr);
+
+ rtx fill_value = gen_reg_rtx (vmode);
+ rtx broadcast_ops[] = { fill_value, fill_value_in };
+
+ /* If the length is exactly vlmax for the selected mode, do that.
+ Otherwise, use a predicated store. */
+ if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+ {
+ emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
+ broadcast_ops);
+ emit_move_insn (dst, fill_value);
+ }
+ else
+ {
+ if (!satisfies_constraint_K (length_in))
+ length_in = force_reg (Pmode, length_in);
+ emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
+ broadcast_ops, length_in);
+ machine_mode mask_mode
+ = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode))
+ .require ();
+ rtx mask = CONSTM1_RTX (mask_mode);
+ emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in,
+ get_avl_type_rtx (riscv_vector::NONVLMAX)));
+ }
+
+ return true;
+}
+
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1b3f66fd15c..dd34211ca80 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2387,6 +2387,20 @@
FAIL;
})
+(define_expand "setmemsi"
+ [(set (match_operand:BLK 0 "memory_operand") ;; Dest
+ (match_operand:QI 2 "nonmemory_operand")) ;; Value
+ (use (match_operand:SI 1 "const_int_operand")) ;; Length
+ (match_operand:SI 3 "const_int_operand")] ;; Align
+ "TARGET_VECTOR"
+{
+ if (riscv_vector::expand_vec_setmem (operands[0], operands[1], operands[2],
+ operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
;; Expand in-line code to clear the instruction cache between operand[0] and
;; operand[1].
(define_expand "clear_cache"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-1.c
new file mode 100644
index 00000000000..1c08be978a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-1.c
@@ -0,0 +1,103 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param=riscv-autovec-lmul=dynamic" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Tiny memsets should use scalar ops.
+** f1:
+** sb\s+a1,0\(a0\)
+** ret
+*/
+void *
+f1 (void *a, int const b)
+{
+ return __builtin_memset (a, b, 1);
+}
+
+/* Tiny memsets should use scalar ops.
+** f2:
+** sb\s+a1,0\(a0\)
+** sb\s+a1,1\(a0\)
+** ret
+*/
+void *
+f2 (void *a, int const b)
+{
+ return __builtin_memset (a, b, 2);
+}
+
+/* Tiny memsets should use scalar ops.
+** f3:
+** sb\s+a1,0\(a0\)
+** sb\s+a1,1\(a0\)
+** sb\s+a1,2\(a0\)
+** ret
+*/
+void *
+f3 (void *a, int const b)
+{
+ return __builtin_memset (a, b, 3);
+}
+
+/* Vectorise+inline minimum vector register width with LMUL=1
+** f4:
+** (
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
+** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f4 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Vectorised code should use smallest lmul known to fit length
+** f5:
+** (
+** vsetivli\s+zero,\d+,e8,m2,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m2,ta,ma
+** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f5 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES + 1);
+}
+
+/* Vectorise+inline up to LMUL=8
+** f6:
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f6 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES * 8);
+}
+
+/* Don't vectorise if the move is too large for one operation.
+** f7:
+** li\s+a2,\d+
+** tail\s+memset
+*/
+void *
+f7 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES * 8 + 1);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
new file mode 100644
index 00000000000..82d181dff3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param riscv-autovec-lmul=m1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Small memsets shouldn't be vectorised.
+** f1:
+** (
+** sb\s+a1,0\(a0\)
+** ...
+** |
+** li\s+a2,\d+
+** tail\s+memset
+** )
+*/
+void *
+f1 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1);
+}
+
+/* Vectorise+inline minimum vector register width using requested lmul.
+** f2:
+** (
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
+** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f2 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Don't vectorise if the move is too large for requested lmul.
+** f3:
+** li\s+a2,\d+
+** tail\s+memset
+*/
+void *
+f3 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES + 1);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
new file mode 100644
index 00000000000..f043d9e0784
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Small memsets shouldn't be vectorised.
+** f1:
+** (
+** sb\s+a1,0\(a0\)
+** ...
+** |
+** li\s+a2,\d+
+** tail\s+memset
+** )
+*/
+void *
+f1 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1);
+}
+
+/* Vectorise+inline minimum vector register width using requested lmul.
+** f2:
+** (
+** vsetivli\s+zero,\d+,e8,m8,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f2 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Vectorise+inline operations up to requested lmul.
+** f3:
+** (
+** vsetivli\s+zero,\d+,e8,m8,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** )
+** vmv\.v\.x\s+v\d+,a1
+** vse8\.v\s+v\d+,0\(a0\)
+** ret
+*/
+void *
+f3 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES * 8);
+}
+
+/* Don't vectorise if the move is too large for requested lmul.
+** f4:
+** li\s+a2,\d+
+** tail\s+memset
+*/
+void *
+f4 (void *a, int const b)
+{
+ return __builtin_memset (a, b, MIN_VECTOR_BYTES * 8 + 1);
+}
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 3/3] RISC-V: cmpmem for RISCV with V extension
2023-12-19 9:53 [PATCH v2 0/3] RISC-V: vectorised memory operations Sergei Lewis
2023-12-19 9:53 ` [PATCH v2 1/3] RISC-V: movmem for RISCV with V extension Sergei Lewis
2023-12-19 9:53 ` [PATCH v2 2/3] RISC-V: setmem " Sergei Lewis
@ 2023-12-19 9:53 ` Sergei Lewis
2 siblings, 0 replies; 10+ messages in thread
From: Sergei Lewis @ 2023-12-19 9:53 UTC (permalink / raw)
To: gcc-patches
gcc/ChangeLog:
* config/riscv/riscv-protos.h (riscv_vector::expand_vec_cmpmem): New function
declaration.
* config/riscv/riscv-string.cc (riscv_vector::expand_vec_cmpmem): New
function; this generates an inline vectorised memory compare, if and only if
we know the entire operation can be performed in a single vector load per
input
* config/riscv/riscv.md (cmpmemsi): Try riscv_vector::expand_vec_cmpmem for
constant lengths
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/cmpmem-1.c: New codegen tests
* gcc.target/riscv/rvv/base/cmpmem-2.c: New execution tests
* gcc.target/riscv/rvv/base/cmpmem-3.c: New codegen tests
* gcc.target/riscv/rvv/base/cmpmem-4.c: New codegen tests
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 100 ++++++++++++++++++
gcc/config/riscv/riscv.md | 15 +++
.../gcc.target/riscv/rvv/base/cmpmem-1.c | 88 +++++++++++++++
.../gcc.target/riscv/rvv/base/cmpmem-2.c | 74 +++++++++++++
.../gcc.target/riscv/rvv/base/cmpmem-3.c | 45 ++++++++
.../gcc.target/riscv/rvv/base/cmpmem-4.c | 62 +++++++++++
7 files changed, 385 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-4.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c4531589300..301aa9b8889 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -638,6 +638,7 @@ void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, rtx);
bool expand_vec_setmem (rtx, rtx, rtx, rtx);
+bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index e506b92a552..3b634851753 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1337,4 +1337,104 @@ expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in,
return true;
}
+/* Used by cmpmemsi in riscv.md. */
+
+bool
+expand_vec_cmpmem (rtx result_out, rtx blk_a_in, rtx blk_b_in, rtx length_in)
+{
+ HOST_WIDE_INT lmul;
+ /* Check we are able and allowed to vectorise this operation;
+ bail if not. */
+ if (!check_vectorise_memory_operation (length_in, lmul))
+ return false;
+
+ /* Strategy:
+ load entire blocks at a and b into vector regs
+ generate mask of bytes that differ
+ find first set bit in mask
+ find offset of first set bit in mask, use 0 if none set
+ result is ((char*)a[offset] - (char*)b[offset])
+ */
+
+ machine_mode vmode
+ = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
+ .require ();
+ rtx blk_a_addr = copy_addr_to_reg (XEXP (blk_a_in, 0));
+ rtx blk_a = change_address (blk_a_in, vmode, blk_a_addr);
+ rtx blk_b_addr = copy_addr_to_reg (XEXP (blk_b_in, 0));
+ rtx blk_b = change_address (blk_b_in, vmode, blk_b_addr);
+
+ rtx vec_a = gen_reg_rtx (vmode);
+ rtx vec_b = gen_reg_rtx (vmode);
+
+ machine_mode mask_mode = get_mask_mode (vmode);
+ rtx mask = gen_reg_rtx (mask_mode);
+ rtx mismatch_ofs = gen_reg_rtx (Pmode);
+
+ rtx ne = gen_rtx_NE (mask_mode, vec_a, vec_b);
+ rtx vmsops[] = { mask, ne, vec_a, vec_b };
+ rtx vfops[] = { mismatch_ofs, mask };
+
+ /* If the length is exactly vlmax for the selected mode, do that.
+ Otherwise, use a predicated store. */
+
+ if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+ {
+ emit_move_insn (vec_a, blk_a);
+ emit_move_insn (vec_b, blk_b);
+ emit_vlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops);
+
+ emit_vlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops);
+ }
+ else
+ {
+ if (!satisfies_constraint_K (length_in))
+ length_in = force_reg (Pmode, length_in);
+
+ rtx memmask = CONSTM1_RTX (mask_mode);
+
+ rtx m_ops_a[] = { vec_a, memmask, blk_a };
+ rtx m_ops_b[] = { vec_b, memmask, blk_b };
+
+ emit_nonvlmax_insn (code_for_pred_mov (vmode),
+ riscv_vector::UNARY_OP_TAMA, m_ops_a, length_in);
+ emit_nonvlmax_insn (code_for_pred_mov (vmode),
+ riscv_vector::UNARY_OP_TAMA, m_ops_b, length_in);
+
+ emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+ vmsops, length_in);
+
+ emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+ riscv_vector::CPOP_OP, vfops, length_in);
+ }
+
+ /* Mismatch_ofs is -1 if blocks match, or the offset of
+ the first mismatch otherwise. */
+ rtx ltz = gen_reg_rtx (Xmode);
+ emit_insn (gen_slt_3 (LT, Xmode, Xmode, ltz, mismatch_ofs, const0_rtx));
+ /* mismatch_ofs += (mismatch_ofs < 0) ? 1 : 0. */
+ emit_insn (
+ gen_rtx_SET (mismatch_ofs, gen_rtx_PLUS (Pmode, mismatch_ofs, ltz)));
+
+ /* Unconditionally load the bytes at mismatch_ofs and subtract them
+ to get our result. */
+ emit_insn (gen_rtx_SET (blk_a_addr,
+ gen_rtx_PLUS (Pmode, mismatch_ofs, blk_a_addr)));
+ emit_insn (gen_rtx_SET (blk_b_addr,
+ gen_rtx_PLUS (Pmode, mismatch_ofs, blk_b_addr)));
+
+ blk_a = change_address (blk_a, QImode, blk_a_addr);
+ blk_b = change_address (blk_b, QImode, blk_b_addr);
+
+ rtx byte_a = gen_reg_rtx (SImode);
+ rtx byte_b = gen_reg_rtx (SImode);
+ do_zero_extendqi2 (byte_a, blk_a);
+ do_zero_extendqi2 (byte_b, blk_b);
+
+ emit_insn (gen_rtx_SET (result_out, gen_rtx_MINUS (SImode, byte_a, byte_b)));
+
+ return true;
+}
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index dd34211ca80..08dd22ea733 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2401,6 +2401,21 @@
FAIL;
})
+(define_expand "cmpmemsi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (compare:SI (match_operand:BLK 1 "memory_operand" "")
+ (match_operand:BLK 2 "memory_operand" "")))
+ (use (match_operand:SI 3 "general_operand" ""))
+ (use (match_operand:SI 4 "" ""))]
+ "TARGET_VECTOR"
+{
+ if (riscv_vector::expand_vec_cmpmem (operands[0], operands[1],
+ operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
;; Expand in-line code to clear the instruction cache between operand[0] and
;; operand[1].
(define_expand "clear_cache"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-1.c
new file mode 100644
index 00000000000..d4c665dc791
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-1.c
@@ -0,0 +1,88 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param=riscv-autovec-lmul=dynamic" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Trivial memcmp should use inline scalar ops.
+** f1:
+** lbu\s+a\d+,0\(a0\)
+** lbu\s+a\d+,0\(a1\)
+** subw?\s+a0,a\d+,a\d+
+** ret
+*/
+int
+f1 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, 1);
+}
+
+/* Tiny __builtin_memcmp should use libc.
+** f2:
+** li\s+a\d,\d+
+** tail\s+memcmp
+*/
+int
+f2 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES - 1);
+}
+
+/* Vectorise+inline minimum vector register width with LMUL=1
+** f3:
+** (
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
+** )
+** ...
+** ret
+*/
+int
+f3 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Vectorised code should use smallest lmul known to fit length
+** f4:
+** (
+** vsetivli\s+zero,\d+,e8,m2,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m2,ta,ma
+** )
+** ...
+** ret
+*/
+int
+f4 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES + 1);
+}
+
+/* Vectorise+inline up to LMUL=8
+** f5:
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** ...
+** ret
+*/
+int
+f5 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES * 8);
+}
+
+/* Don't inline if the length is too large for one operation.
+** f6:
+** li\s+a2,\d+
+** tail\s+memcmp
+*/
+int
+f6 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES * 8 + 1);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-2.c
new file mode 100644
index 00000000000..81c8bdb33ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-add-options riscv_v } */
+/* { dg-options "-O2 --param=riscv-autovec-lmul=dynamic" } */
+
+#include <stdlib.h>
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+static inline __attribute__ ((always_inline)) void
+do_one_test (int const size, int const diff_offset, int const diff_dir)
+{
+ unsigned char A[size];
+ unsigned char B[size];
+ unsigned char const fill_value = 0x55;
+ __builtin_memset (A, fill_value, size);
+ __builtin_memset (B, fill_value, size);
+
+ if (diff_dir != 0)
+ {
+ if (diff_dir < 0)
+ {
+ A[diff_offset] = fill_value - 1;
+ }
+ else
+ {
+ A[diff_offset] = fill_value + 1;
+ }
+ }
+
+ if (__builtin_memcmp (A, B, size) != diff_dir)
+ {
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ do_one_test (0, 0, 0);
+
+ do_one_test (1, 0, -1);
+ do_one_test (1, 0, 0);
+ do_one_test (1, 0, 1);
+
+ do_one_test (MIN_VECTOR_BYTES - 1, 0, -1);
+ do_one_test (MIN_VECTOR_BYTES - 1, 0, 0);
+ do_one_test (MIN_VECTOR_BYTES - 1, 0, 1);
+ do_one_test (MIN_VECTOR_BYTES - 1, 1, -1);
+ do_one_test (MIN_VECTOR_BYTES - 1, 1, 0);
+ do_one_test (MIN_VECTOR_BYTES - 1, 1, 1);
+
+ do_one_test (MIN_VECTOR_BYTES, 0, -1);
+ do_one_test (MIN_VECTOR_BYTES, 0, 0);
+ do_one_test (MIN_VECTOR_BYTES, 0, 1);
+ do_one_test (MIN_VECTOR_BYTES, MIN_VECTOR_BYTES - 1, -1);
+ do_one_test (MIN_VECTOR_BYTES, MIN_VECTOR_BYTES - 1, 0);
+ do_one_test (MIN_VECTOR_BYTES, MIN_VECTOR_BYTES - 1, 1);
+
+ do_one_test (MIN_VECTOR_BYTES + 1, 0, -1);
+ do_one_test (MIN_VECTOR_BYTES + 1, 0, 0);
+ do_one_test (MIN_VECTOR_BYTES + 1, 0, 1);
+ do_one_test (MIN_VECTOR_BYTES + 1, MIN_VECTOR_BYTES, -1);
+ do_one_test (MIN_VECTOR_BYTES + 1, MIN_VECTOR_BYTES, 0);
+ do_one_test (MIN_VECTOR_BYTES + 1, MIN_VECTOR_BYTES, 1);
+
+ do_one_test (MIN_VECTOR_BYTES * 8, 0, -1);
+ do_one_test (MIN_VECTOR_BYTES * 8, 0, 0);
+ do_one_test (MIN_VECTOR_BYTES * 8, 0, 1);
+ do_one_test (MIN_VECTOR_BYTES * 8, MIN_VECTOR_BYTES * 8 - 1, -1);
+ do_one_test (MIN_VECTOR_BYTES * 8, MIN_VECTOR_BYTES * 8 - 1, 0);
+ do_one_test (MIN_VECTOR_BYTES * 8, MIN_VECTOR_BYTES * 8 - 1, 1);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-3.c
new file mode 100644
index 00000000000..dfad1b96c60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-3.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param riscv-autovec-lmul=m1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Tiny __builtin_memcmp should use libc.
+** f1:
+** li\s+a\d,\d+
+** tail\s+memcmp
+*/
+int
+f1 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES - 1);
+}
+
+/* Vectorise+inline minimum vector register width with LMUL=1
+** f2:
+** (
+** vsetivli\s+zero,\d+,e8,m1,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m1,ta,ma
+** )
+** ...
+** ret
+*/
+int
+f2 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Don't inline if the length is too large for one operation.
+** f3:
+** li\s+a2,\d+
+** tail\s+memcmp
+*/
+int
+f3 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES + 1);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-4.c
new file mode 100644
index 00000000000..55a61eae029
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/cmpmem-4.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 --param riscv-autovec-lmul=m8" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8)
+
+/* Tiny __builtin_memcmp should use libc.
+** f1:
+** li\s+a\d,\d+
+** tail\s+memcmp
+*/
+int
+f1 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES - 1);
+}
+
+/* Vectorise+inline minimum vector register width with LMUL=8 as requested
+** f2:
+** (
+** vsetivli\s+zero,\d+,e8,m8,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** )
+** ...
+** ret
+*/
+int
+f2 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES);
+}
+
+/* Vectorise+inline anything that fits
+** f3:
+** (
+** vsetivli\s+zero,\d+,e8,m8,ta,ma
+** |
+** li\s+a\d+,\d+
+** vsetvli\s+zero,a\d+,e8,m8,ta,ma
+** )
+** ...
+** ret
+*/
+int
+f3 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES * 8);
+}
+
+/* Don't inline if the length is too large for one operation.
+** f4:
+** li\s+a2,\d+
+** tail\s+memcmp
+*/
+int
+f4 (void *a, void *b)
+{
+ return __builtin_memcmp (a, b, MIN_VECTOR_BYTES * 8 + 1);
+}
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread