public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Robin Dapp <rdapp.gcc@gmail.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>,
	palmer <palmer@dabbelt.com>, "kito.cheng" <kito.cheng@gmail.com>,
	"Jeff Law" <jeffreyalaw@gmail.com>, 钟居哲 <juzhe.zhong@rivai.ai>
Cc: rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Add vectorized strcmp.
Date: Fri, 8 Dec 2023 17:25:31 +0100	[thread overview]
Message-ID: <6b17380b-5bd8-4399-bc5b-f90742478a8e@gmail.com> (raw)
In-Reply-To: <a02ff7fd-176a-4ea2-ab97-a731a1f69407@gmail.com>

Ah, I forgot to attach the current v2 that also enables strncmp.
It was additionally tested with -minline-strncmp on rv64gcv.

Regards
 Robin

Subject: [PATCH v2] RISC-V: Add vectorized strcmp and strncmp.

This patch adds vectorized strcmp and strncmp implementations and
tests.  Similar to strlen, expansion is still guarded by
-minline-str(n)cmp.

gcc/ChangeLog:

	PR target/112109

	* config/riscv/riscv-protos.h (expand_strcmp): Declare.
	* config/riscv/riscv-string.cc (riscv_expand_strcmp): Add
	strategy handling and delegation to scalar and vector expanders.
	(expand_strcmp): Vectorized implementation.
	* config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp
	expander.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test.
	* gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test.
	* gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test.
	* gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test.
---
 gcc/config/riscv/riscv-protos.h               |   1 +
 gcc/config/riscv/riscv-string.cc              | 161 +++++++++++++++++-
 gcc/config/riscv/riscv.md                     |   6 +-
 .../riscv/rvv/autovec/builtin/strcmp-run.c    |  32 ++++
 .../riscv/rvv/autovec/builtin/strcmp.c        |  13 ++
 .../riscv/rvv/autovec/builtin/strncmp-run.c   | 136 +++++++++++++++
 .../riscv/rvv/autovec/builtin/strncmp.c       |  13 ++
 7 files changed, 357 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c7b5789a4b3..20bbb5b859c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *);
 void expand_cond_ternop (unsigned, rtx *);
 void expand_popcount (rtx *);
 void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
+bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, poly_int64);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 6cde1bf89a0..11c1f74d0b3 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
     return false;
   alignment = UINTVAL (align_rtx);
 
-  if (TARGET_ZBB || TARGET_XTHEADBB)
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
     {
-      return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
-					 ncompare);
+      bool ok = riscv_vector::expand_strcmp (result, src1, src2,
+					     bytes_rtx, alignment,
+					     ncompare);
+      if (ok)
+	return true;
     }
 
+  if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
+    return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment,
+				       ncompare);
+
   return false;
 }
 
@@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
     }
 }
 
+/* Implement cmpstr<mode> using vector instructions.  The ALIGNMENT and
+   NCOMPARE parameters are unused for now.  */
+
+bool
+expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
+	       unsigned HOST_WIDE_INT, bool)
+{
+  gcc_assert (TARGET_VECTOR);
+
+  /* We don't support big endian.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  bool with_length = nbytes != NULL_RTX;
+
+  if (with_length
+      && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes)))
+    return false;
+
+  if (with_length && CONST_INT_P (nbytes))
+    nbytes = force_reg (Pmode, nbytes);
+
+  machine_mode mode = E_QImode;
+  unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
+  int lmul = TARGET_MAX_LMUL;
+  poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
+
+  machine_mode vmode;
+  if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits)
+	 .exists (&vmode))
+    gcc_unreachable ();
+
+  machine_mode mask_mode = riscv_vector::get_mask_mode (vmode);
+
+  /* Prepare addresses.  */
+  rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0));
+  rtx vsrc1 = change_address (src1, vmode, src_addr1);
+
+  rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0));
+  rtx vsrc2 = change_address (src2, vmode, src_addr2);
+
+  /* Set initial pointer bump to 0.  */
+  rtx cnt = gen_reg_rtx (Pmode);
+  emit_move_insn (cnt, CONST0_RTX (Pmode));
+
+  rtx sub = gen_reg_rtx (Pmode);
+  emit_move_insn (sub, CONST0_RTX (Pmode));
+
+  /* Create source vectors.  */
+  rtx vec1 = gen_reg_rtx (vmode);
+  rtx vec2 = gen_reg_rtx (vmode);
+
+  rtx done = gen_label_rtx ();
+  rtx loop = gen_label_rtx ();
+  emit_label (loop);
+
+  /* Bump the pointers.  */
+  emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt)));
+  emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt)));
+
+  rtx vlops1[] = {vec1, vsrc1};
+  rtx vlops2[] = {vec2, vsrc2};
+
+  if (!with_length)
+    {
+      emit_vlmax_insn (code_for_pred_fault_load (vmode),
+		       riscv_vector::UNARY_OP, vlops1);
+
+      emit_vlmax_insn (code_for_pred_fault_load (vmode),
+		       riscv_vector::UNARY_OP, vlops2);
+    }
+  else
+    {
+      nbytes = gen_lowpart (Pmode, nbytes);
+      emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+			  riscv_vector::UNARY_OP, vlops1, nbytes);
+
+      emit_nonvlmax_insn (code_for_pred_fault_load (vmode),
+			  riscv_vector::UNARY_OP, vlops2, nbytes);
+    }
+
+  /* Read the vl for the next pointer bump.  */
+  if (Pmode == SImode)
+    emit_insn (gen_read_vlsi (cnt));
+  else
+    emit_insn (gen_read_vldi_zero_extend (cnt));
+
+  if (with_length)
+    {
+      rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx);
+      emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done));
+      emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt)));
+    }
+
+  /* Look for a \0 in the first string.  */
+  rtx mask0 = gen_reg_rtx (mask_mode);
+  rtx eq0
+    = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)),
+		  vec1);
+  rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)};
+  emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
+		      riscv_vector::COMPARE_OP, vmsops1, cnt);
+
+  /* Look for vec1 != vec2 (includes vec2[i] == 0).  */
+  rtx maskne = gen_reg_rtx (mask_mode);
+  rtx ne = gen_rtx_NE (mask_mode, vec1, vec2);
+  rtx vmsops[] = {maskne, ne, vec1, vec2};
+  emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP,
+		      vmsops, cnt);
+
+  /* Combine both masks into one.  */
+  rtx mask = gen_reg_rtx (mask_mode);
+  rtx vmorops[] = {mask, mask0, maskne};
+  emit_nonvlmax_insn (code_for_pred (IOR, mask_mode),
+		      riscv_vector::BINARY_MASK_OP, vmorops, cnt);
+
+  /* Find the first bit in the mask (the first unequal element).  */
+  rtx found_at = gen_reg_rtx (Pmode);
+  rtx vfops[] = {found_at, mask};
+  emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode),
+		      riscv_vector::CPOP_OP, vfops, cnt);
+
+  /* Emit the loop condition.  */
+  rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx);
+  emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop));
+
+  /* Walk up to the difference point.  */
+  emit_insn (
+    gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at)));
+  emit_insn (
+    gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at)));
+
+  /* Load the respective byte and compute the difference.  */
+  rtx c1 = gen_reg_rtx (Pmode);
+  rtx c2 = gen_reg_rtx (Pmode);
+
+  do_load_from_addr (mode, c1, src_addr1, src1);
+  do_load_from_addr (mode, c2, src_addr2, src2);
+
+  do_sub3 (sub, c1, c2);
+
+  if (with_length)
+    emit_label (done);
+
+  emit_insn (gen_movsi (result, gen_lowpart (SImode, sub)));
+  return true;
+}
+
 }
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f9dec8c152..eed997116b0 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3702,7 +3702,8 @@ (define_expand "cmpstrnsi"
 			  (match_operand:BLK 2)))
 	      (use (match_operand:SI 3))
 	      (use (match_operand:SI 4))])]
-  "riscv_inline_strncmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+  "riscv_inline_strncmp && !optimize_size
+    && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
 {
   if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
                            operands[3], operands[4]))
@@ -3722,7 +3723,8 @@ (define_expand "cmpstrsi"
 	      (compare:SI (match_operand:BLK 1)
 			  (match_operand:BLK 2)))
 	      (use (match_operand:SI 3))])]
-  "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+  "riscv_inline_strcmp && !optimize_size
+    && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
 {
   if (riscv_expand_strcmp (operands[0], operands[1], operands[2],
                            NULL_RTX, operands[3]))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
new file mode 100644
index 00000000000..6dec7da91c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+  return __builtin_strcmp (s, t);
+}
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t)
+{
+  return strcmp (s, t);
+}
+
+#define SZ 10
+
+int main ()
+{
+  const char *s[SZ]
+    = {"",  "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+       "a", "z",    "1", "9",  "12345678901234567889012345678901234567890"};
+
+  for (int i = 0; i < SZ; i++)
+    for (int j = 0; j < SZ; j++)
+      if (foo (s[i], s[j]) != foo2 (s[i], s[j]))
+        __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
new file mode 100644
index 00000000000..f9d33a74fc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+  return __builtin_strcmp (s, t);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
new file mode 100644
index 00000000000..8d1471a3a13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c
@@ -0,0 +1,136 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strcmp" } */
+
+#include <string.h>
+
+int
+__attribute__ ((noipa, optimize ("0")))
+foo2 (const char *s, const char *t, int n)
+{
+  return strncmp (s, t, n);
+}
+
+#define SZ 11
+
+#define TEST(I, J, N)                                                          \
+  int res_##I_##J_##N = __builtin_strncmp (s[I], s[J], N);                     \
+  int ref_##I_##J_##N = foo2 (s[I], s[J], N);                                  \
+  if (res_##I_##J_##N != ref_##I_##J_##N)                                      \
+    __builtin_abort ();
+
+int main ()
+{
+  const char *s[SZ]
+    = {"",  "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+       "a", "z",    "1", "9",  "12345678901234567889012345678901234567890",
+       "ds0fi0349r0sdmfvi0sjf0c9fj034mrx903cw0efmc9jfsicn2390crrm0i90msdfi0sdf0"};
+
+  for (int i = 0; i < SZ; i++)
+    for (int j = 0; j < SZ; j++)
+      {
+        TEST(i, j, 0)
+        TEST(i, j, 1)
+        TEST(i, j, 2)
+        TEST(i, j, 3)
+        TEST(i, j, 4)
+        TEST(i, j, 5)
+        TEST(i, j, 6)
+        TEST(i, j, 7)
+        TEST(i, j, 8)
+        TEST(i, j, 9)
+        TEST(i, j, 10)
+        TEST(i, j, 11)
+        TEST(i, j, 12)
+        TEST(i, j, 13)
+        TEST(i, j, 14)
+        TEST(i, j, 15)
+        TEST(i, j, 16)
+        TEST(i, j, 17)
+        TEST(i, j, 18)
+        TEST(i, j, 19)
+        TEST(i, j, 20)
+        TEST(i, j, 21)
+        TEST(i, j, 22)
+        TEST(i, j, 23)
+        TEST(i, j, 24)
+        TEST(i, j, 25)
+        TEST(i, j, 26)
+        TEST(i, j, 27)
+        TEST(i, j, 28)
+        TEST(i, j, 29)
+        TEST(i, j, 30)
+        TEST(i, j, 31)
+        TEST(i, j, 32)
+        TEST(i, j, 33)
+        TEST(i, j, 34)
+        TEST(i, j, 35)
+        TEST(i, j, 36)
+        TEST(i, j, 37)
+        TEST(i, j, 38)
+        TEST(i, j, 39)
+        TEST(i, j, 40)
+        TEST(i, j, 41)
+        TEST(i, j, 42)
+        TEST(i, j, 43)
+        TEST(i, j, 44)
+        TEST(i, j, 45)
+        TEST(i, j, 46)
+        TEST(i, j, 47)
+        TEST(i, j, 48)
+        TEST(i, j, 49)
+        TEST(i, j, 50)
+        TEST(i, j, 51)
+        TEST(i, j, 52)
+        TEST(i, j, 53)
+        TEST(i, j, 54)
+        TEST(i, j, 55)
+        TEST(i, j, 56)
+        TEST(i, j, 57)
+        TEST(i, j, 58)
+        TEST(i, j, 59)
+        TEST(i, j, 60)
+        TEST(i, j, 61)
+        TEST(i, j, 62)
+        TEST(i, j, 63)
+        TEST(i, j, 64)
+        TEST(i, j, 65)
+        TEST(i, j, 66)
+        TEST(i, j, 67)
+        TEST(i, j, 68)
+        TEST(i, j, 69)
+        TEST(i, j, 70)
+        TEST(i, j, 71)
+        TEST(i, j, 72)
+        TEST(i, j, 73)
+        TEST(i, j, 74)
+        TEST(i, j, 75)
+        TEST(i, j, 76)
+        TEST(i, j, 77)
+        TEST(i, j, 78)
+        TEST(i, j, 79)
+        TEST(i, j, 80)
+        TEST(i, j, 81)
+        TEST(i, j, 82)
+        TEST(i, j, 83)
+        TEST(i, j, 84)
+        TEST(i, j, 85)
+        TEST(i, j, 86)
+        TEST(i, j, 87)
+        TEST(i, j, 88)
+        TEST(i, j, 89)
+        TEST(i, j, 90)
+        TEST(i, j, 91)
+        TEST(i, j, 92)
+        TEST(i, j, 93)
+        TEST(i, j, 94)
+        TEST(i, j, 95)
+        TEST(i, j, 96)
+        TEST(i, j, 97)
+        TEST(i, j, 98)
+        TEST(i, j, 99)
+        TEST(i, j, 100)
+        TEST(i, j, 101)
+        TEST(i, j, 102)
+        TEST(i, j, 103)
+      }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
new file mode 100644
index 00000000000..a89633ea9d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strncmp" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s, const char *t)
+{
+  return __builtin_strncmp (s, t, 7);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 2 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
+/* { dg-final { scan-assembler-times "vmor.m" 1 } } */
-- 
2.43.0



  reply	other threads:[~2023-12-08 16:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-01 15:23 Robin Dapp
2023-12-01 22:58 ` 钟居哲
2023-12-08 13:20   ` Robin Dapp
2023-12-08 16:25     ` Robin Dapp [this message]
2023-12-09  1:56       ` 钟居哲
2023-12-09 13:51         ` Robin Dapp
2023-12-09 14:03           ` 钟居哲
2023-12-09 14:07             ` Robin Dapp
2023-12-09 14:17               ` 钟居哲
2023-12-11  1:44                 ` Li, Pan2
2023-12-11  8:34                   ` Robin Dapp
2023-12-11 11:33                     ` Li, Pan2
2023-12-11 13:14                       ` Robin Dapp
2023-12-11 13:36                         ` Li, Pan2
2023-12-11 13:41                           ` Robin Dapp

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6b17380b-5bd8-4399-bc5b-f90742478a8e@gmail.com \
    --to=rdapp.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=juzhe.zhong@rivai.ai \
    --cc=kito.cheng@gmail.com \
    --cc=palmer@dabbelt.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).