public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for str(n)cmp inline expansion
@ 2022-12-01 13:24 Philipp Tomsich
  0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2022-12-01 13:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:64014fe4975b7950b5d27a882bc0254838ab7266

commit 64014fe4975b7950b5d27a882bc0254838ab7266
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:18 2022 +0200

    riscv: Add support for str(n)cmp inline expansion
    
    This patch implements expansions for the cmpstrsi and the cmpstrnsi
    builtins using Zbb instructions (if available).
    This allows to inline calls to strcmp() and strncmp().
    
    The expansion basically emits a peeled comparison sequence (i.e. a peeled
    comparison loop) which compares XLEN bits per step if possible.
    
    The emitted sequence can be controlled, by setting the maximum number
    of compared bytes (-mstring-compare-inline-limit).
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strn_compare): New
              prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER3): New helper
              macros.
            (GEN_EMIT_HELPER2): New helper macros.
            (expand_strncmp_zbb_sequence): New function.
            (riscv_emit_str_compare_zbb): New function.
            (riscv_expand_strn_compare): New function.
            * config/riscv/riscv.md (cmpstrnsi): Invoke expansion functions
              for strn_compare.
            (cmpstrsi): Invoke expansion functions for strn_compare.
            * config/riscv/riscv.opt: Add new parameter
              '-mstring-compare-inline-limit'.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 333 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  48 +++
 gcc/config/riscv/riscv.opt                         |   5 +
 gcc/doc/invoke.texi                                |   8 +
 .../gcc.target/riscv/zbb-strcmp-unaligned.c        |  36 +++
 gcc/testsuite/gcc.target/riscv/zbb-strcmp.c        |  55 ++++
 7 files changed, 486 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a08b6aa5ba6..e903a6b2fe6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -98,6 +98,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
+extern bool riscv_expand_strn_compare (rtx, rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index bfb819b35f5..02719ecc048 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -74,6 +74,11 @@ GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
 GEN_EMIT_HELPER2(clz) /* do_clz2  */
 GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
 GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+GEN_EMIT_HELPER3(xor) /* do_xor3  */
+GEN_EMIT_HELPER3(ashl) /* do_ashl3  */
+GEN_EMIT_HELPER2(bswap) /* do_bswap2  */
+GEN_EMIT_HELPER3(riscv_ior_not) /* do_riscv_ior_not3  */
+GEN_EMIT_HELPER3(riscv_and_not) /* do_riscv_and_not3  */
 
 #undef GEN_EMIT_HELPER2
 #undef GEN_EMIT_HELPER3
@@ -261,6 +266,334 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   return false;
 }
 
+/* Generate the sequence of compares for strcmp/strncmp using zbb instructions.
+   BYTES_TO_COMPARE is the number of bytes to be compared.
+   BASE_ALIGN is the smaller of the alignment of the two strings.
+   ORIG_SRC1 is the unmodified rtx for the first string.
+   ORIG_SRC2 is the unmodified rtx for the second string.
+   DATA1 is the register for loading the first string.
+   DATA2 is the register for loading the second string.
+   HAS_NUL is the register holding non-NUL bytes for NUL-bytes in the string.
+   TARGET is the rtx for the result register (SImode)
+   EQUALITY_COMPARE_REST if set, then we hand over to libc if string matches.
+   END_LABEL is the location before the calculation of the result value.
+   FINAL_LABEL is the location after the calculation of the result value.  */
+
+static void
+expand_strncmp_zbb_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+			     rtx src1, rtx src2, rtx data1, rtx data2,
+			     rtx target, rtx orc, bool equality_compare_rest,
+			     rtx end_label, rtx final_label)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+  rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+  rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+  unsigned HOST_WIDE_INT offset = 0;
+
+  rtx m1 = gen_reg_rtx (Pmode);
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+
+  /* Generate a compare sequence.  */
+  while (bytes_to_compare > 0)
+    {
+      machine_mode load_mode = QImode;
+      unsigned HOST_WIDE_INT load_mode_size = 1;
+      if (bytes_to_compare > 1)
+	{
+	  load_mode = Pmode;
+	  load_mode_size = p_mode_size;
+	}
+      unsigned HOST_WIDE_INT cmp_bytes = 0;
+
+      if (bytes_to_compare >= load_mode_size)
+	cmp_bytes = load_mode_size;
+      else
+	cmp_bytes = bytes_to_compare;
+
+      unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+      /* load_mode_size...bytes we will read
+	 cmp_bytes...bytes we will compare (might be less than load_mode_size)
+	 bytes_to_compare...bytes we will compare (incl. cmp_bytes)
+	 remain...bytes left to compare (excl. cmp_bytes)  */
+
+      rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+      rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+
+      do_load_from_addr (load_mode, data1, addr1, src1);
+      do_load_from_addr (load_mode, data2, addr2, src2);
+
+      if (load_mode_size == 1)
+	{
+	  /* Special case for comparing just single (last) byte.  */
+	  gcc_assert (remain == 0);
+
+	  if (!equality_compare_rest)
+	    {
+	      /* Calculate difference and jump to final_label.  */
+	      rtx result = gen_reg_rtx (Pmode);
+	      do_sub3 (result, data1, data2);
+	      emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+	      emit_jump_insn (gen_jump (final_label));
+	    }
+	  else
+	    {
+	      /* Compare both bytes and jump to final_label if not equal.  */
+	      rtx result = gen_reg_rtx (Pmode);
+	      do_sub3 (result, data1, data2);
+	      emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+	      /* Check if str1[i] is NULL.  */
+	      rtx cond1 = gen_rtx_EQ (VOIDmode, data1, const0_rtx);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond1,
+				       data1, const0_rtx, final_label));
+	      /* Check if str1[i] == str2[i].  */
+	      rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond2,
+				       data1, data2, final_label));
+	      /* Processing will fall through to libc calls.  */
+	    }
+	}
+      else
+	{
+	  /* Eliminate irrelevant data (behind the N-th character).  */
+	  if (bytes_to_compare < p_mode_size)
+	    {
+	      gcc_assert (remain == 0);
+	     /* Set a NUL-byte after the relevant data (behind the string).  */
+	      unsigned long im = 0xffUL;
+	      rtx imask = gen_rtx_CONST_INT (Pmode, im);
+	      rtx m_reg = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (m_reg, imask));
+	      do_ashl3 (m_reg, m_reg, GEN_INT (cmp_bytes * BITS_PER_UNIT));
+	      do_riscv_and_not3 (data1, m_reg, data1);
+	      do_riscv_and_not3 (data2, m_reg, data2);
+	      do_orcb2 (orc, data1);
+	      emit_jump_insn (gen_jump (end_label));
+	    }
+	  else
+	    {
+	      /* Check if data1 contains a NUL character.  */
+	      do_orcb2 (orc, data1);
+	      rtx cond1 = gen_rtx_NE (VOIDmode, orc, m1);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond1, orc, m1,
+						     end_label));
+
+	      /* Break out if u1 != u2 */
+	      rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond2, data1,
+						     data2, end_label));
+
+	      /* Fast-exit for complete and equal strings.  */
+	      if (remain == 0 && !equality_compare_rest)
+		{
+		  /* All compared and everything was equal.  */
+		  emit_insn (gen_rtx_SET (target, gen_rtx_CONST_INT (SImode, 0)));
+		  emit_jump_insn (gen_jump (final_label));
+		}
+	    }
+	}
+
+      offset += cmp_bytes;
+      bytes_to_compare -= cmp_bytes;
+    }
+  /* Processing will fall through to libc calls.  */
+}
+
+/* Emit a string comparison sequence using Zbb instruction.
+
+   OPERANDS[0] is the target (result).
+   OPERANDS[1] is the first source.
+   OPERANDS[2] is the second source.
+   If NO_LENGTH is zero, then:
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the alignment in bytes.
+   If NO_LENGTH is nonzero, then:
+   OPERANDS[3] is the alignment in bytes.
+   BYTES_TO_COMPARE is the maximum number of bytes to compare.
+   EQUALITY_COMPARE_REST defines if str(n)cmp should be called on equality.
+ */
+
+static bool
+riscv_emit_str_compare_zbb (rtx target, rtx src1, rtx src2,
+			    unsigned HOST_WIDE_INT length,
+			    unsigned HOST_WIDE_INT bytes_to_compare,
+			    bool equality_compare_rest,
+			    unsigned HOST_WIDE_INT alignment)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+
+  gcc_assert (TARGET_ZBB);
+
+  /* Enable only if we can access at least one XLEN-register.  */
+  if (bytes_to_compare < p_mode_size)
+    return false;
+
+  /* Limit to 12-bits (maximum load-offset).  */
+  if (bytes_to_compare > IMM_REACH)
+    return false;
+
+  /* We don't support big endian.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  /* We need aligned strings.  */
+  if (alignment < p_mode_size)
+    return false;
+
+  rtx data1 = gen_reg_rtx (Pmode);
+  rtx data2 = gen_reg_rtx (Pmode);
+  rtx orc = gen_reg_rtx (Pmode);
+  rtx end_label = gen_label_rtx ();
+  rtx final_label = gen_label_rtx ();
+
+  /* Generate a sequence of zbb instructions to compare out
+     to the length specified.  */
+  expand_strncmp_zbb_sequence (bytes_to_compare, src1, src2, data1, data2,
+			       target, orc, equality_compare_rest,
+			       end_label, final_label);
+
+  if (equality_compare_rest)
+    {
+      /* Update pointers past what has been compared already.  */
+      rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+      rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+      unsigned HOST_WIDE_INT offset = bytes_to_compare;
+      rtx src1 = force_reg (Pmode,
+			    gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
+      rtx src2 = force_reg (Pmode,
+			    gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
+
+      /* Construct call to strcmp/strncmp to compare the rest of the string.  */
+      if (length == 0)
+	{
+	  tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
+	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+				   target, LCT_NORMAL, GET_MODE (target),
+				   src1, Pmode, src2, Pmode);
+	}
+      else
+	{
+	  unsigned HOST_WIDE_INT delta = length - bytes_to_compare;
+	  gcc_assert (delta > 0);
+	  rtx len_rtx = gen_reg_rtx (Pmode);
+	  emit_move_insn (len_rtx, gen_int_mode (delta, Pmode));
+	  tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
+	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+				   target, LCT_NORMAL, GET_MODE (target),
+				   src1, Pmode, src2, Pmode, len_rtx, Pmode);
+	}
+
+      emit_jump_insn (gen_jump (final_label));
+    }
+
+  emit_barrier (); /* No fall-through.  */
+
+  emit_label (end_label);
+
+  /* Convert non-equal bytes into non-NUL bytes.  */
+  rtx diff = gen_reg_rtx (Pmode);
+  do_xor3 (diff, data1, data2);
+  do_orcb2 (diff, diff);
+
+  /* Convert non-equal or NUL-bytes into non-NUL bytes.  */
+  rtx syndrome = gen_reg_rtx (Pmode);
+  do_riscv_ior_not3 (syndrome, orc, diff);
+
+  /* Count the number of equal bits from the beginning of the word.  */
+  rtx shift = gen_reg_rtx (Pmode);
+  do_ctz2 (shift, syndrome);
+
+  do_bswap2 (data1, data1);
+  do_bswap2 (data2, data2);
+
+  /* The most-significant-non-zero bit of the syndrome marks either the
+     first bit that is different, or the top bit of the first zero byte.
+     Shifting left now will bring the critical information into the
+     top bits.  */
+  do_ashl3 (data1, data1, gen_lowpart (QImode, shift));
+  do_ashl3 (data2, data2, gen_lowpart (QImode, shift));
+
+  /* But we need to zero-extend (char is unsigned) the value and then
+     perform a signed 32-bit subtraction.  */
+  unsigned int shiftr = p_mode_size * BITS_PER_UNIT - 8;
+  do_lshr3 (data1, data1, GEN_INT (shiftr));
+  do_lshr3 (data2, data2, GEN_INT (shiftr));
+
+  rtx result = gen_reg_rtx (Pmode);
+  do_sub3 (result, data1, data2);
+  emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+
+  /* And we are done.  */
+  emit_label (final_label);
+  return true;
+}
+
+/* Expand a string compare operation.
+
+   The result will be stored in TARGET.
+   The strings are referenced by SRC1 and SRC2.
+   The argument BYTES_RTX either holds the number of characters to
+   compare, or is NULL_RTX. The argument ALIGN_RTX hold the alignment.
+ 
+   Return true if expansion was successful, or false otherwise.  */
+
+bool
+riscv_expand_strn_compare (rtx target, rtx src1, rtx src2,
+			   rtx bytes_rtx, rtx align_rtx)
+{
+  const unsigned HOST_WIDE_INT compare_max = riscv_string_compare_inline_limit;
+  unsigned HOST_WIDE_INT length;
+  unsigned HOST_WIDE_INT bytes_to_compare;
+  bool equality_compare_rest;
+  unsigned HOST_WIDE_INT alignment;
+
+  if (riscv_string_compare_inline_limit == 0)
+    return false;
+
+  /* Decide how many bytes to compare inline and what to do if there is
+     no difference detected at the end of the compared bytes.
+     We might call libc to continue the comparison.  */
+  if (bytes_rtx == NULL_RTX)
+    {
+      length = 0;
+      bytes_to_compare = compare_max;
+      equality_compare_rest = true;
+    }
+  else
+    {
+      /* If we have a length, it must be constant.  */
+      if (!CONST_INT_P (bytes_rtx))
+	return false;
+      length = UINTVAL (bytes_rtx);
+
+      /* Limit the bytes to compare if necessary.  */
+      if (length <= compare_max)
+	{
+	  bytes_to_compare = length;
+	  equality_compare_rest = false;
+	}
+      else
+	{
+	  bytes_to_compare = compare_max;
+	  equality_compare_rest = true;
+	}
+    }
+
+  if (!CONST_INT_P (align_rtx))
+    return false;
+  alignment = UINTVAL (align_rtx);
+
+  if (TARGET_ZBB)
+    {
+      return riscv_emit_str_compare_zbb (target, src1, src2,
+					 length, bytes_to_compare,
+					 equality_compare_rest,
+					 alignment);
+    }
+
+  return false;
+}
+
 /* If the provided string is aligned, then read XLEN bytes
    in a loop and use orc.b to find NUL-bytes.  */
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 15581c130ba..513ca8ee6f3 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3231,6 +3231,54 @@
     FAIL;
 })
 
+;; String compare N insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the source1
+;; Argument 2 is the source2
+;; Argument 3 is the length
+;; Argument 4 is the alignment
+
+(define_expand "cmpstrnsi"
+  [(parallel [(set (match_operand:SI 0)
+	      (compare:SI (match_operand:BLK 1)
+			  (match_operand:BLK 2)))
+	      (use (match_operand:SI 3))
+	      (use (match_operand:SI 4))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands[0], operands[1], operands[2],
+				 operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+;; String compare insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the source1
+;; Argument 2 is the source2
+;; Argument 3 is the alignment
+
+(define_expand "cmpstrsi"
+  [(parallel [(set (match_operand:SI 0)
+	      (compare:SI (match_operand:BLK 1)
+			  (match_operand:BLK 2)))
+	      (use (match_operand:SI 3))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands[0], operands[1], operands[2],
+				 NULL_RTX, operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 ;; Search character in string (generalization of strlen).
 ;; Argument 0 is the resulting offset
 ;; Argument 1 is the string
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 9595078bdd4..43dff64fb3a 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -252,3 +252,8 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+mstring-compare-inline-limit=
+Target Var(riscv_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
+Max number of bytes to compare.
+
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 88a5f7aa069..a4695454158 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1213,6 +1213,7 @@ See RS/6000 and PowerPC Options.
 -msave-restore  -mno-save-restore @gol
 -mshorten-memrefs  -mno-shorten-memrefs @gol
 -mstrict-align  -mno-strict-align @gol
+-mstring-compare-inline-limit=@var{num} @gol
 -mcmodel=medlow  -mcmodel=medany @gol
 -mexplicit-relocs  -mno-explicit-relocs @gol
 -mrelax  -mno-relax @gol
@@ -28653,6 +28654,13 @@ Do not or do generate unaligned memory accesses.  The default is set depending
 on whether the processor we are optimizing for supports fast unaligned access
 or not.
 
+@item -mstring-compare-inline-limit=@var{num}
+@opindex mstring-compare-inline-limit
+Compare at most @var{num} string bytes with inline code.
+If the difference or end of string is not found at the
+end of the inline compare a call to @code{strcmp} or @code{strncmp} will
+take care of the rest of the comparison. The default is 64 bytes.
+
 @item -mcmodel=medlow
 @opindex mcmodel=medlow
 Generate code for the medium-low code model. The program and its statically
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
new file mode 100644
index 00000000000..2126c849e0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */
+
+typedef long unsigned int size_t;
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  return __builtin_strcmp (s1, s2);
+}
+
+int
+my_str_cmp_const (const char *s1)
+{
+  return __builtin_strcmp (s1, "foo");
+}
+
+int
+my_strn_cmp (const char *s1, const char *s2, size_t n)
+{
+  return __builtin_strncmp (s1, s2, n);
+}
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
new file mode 100644
index 00000000000..3465e7ffee3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+typedef long unsigned int size_t;
+
+/* Emits 8+1 orc.b instructions.  */
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strcmp (s1, s2);
+}
+
+/* 8+1 because the backend does not know the size of "foo".  */
+
+int
+my_str_cmp_const (const char *s1)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strcmp (s1, "foo");
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* Note expanded because the backend does not know the size of "foo".  */
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-times "orc.b\t" 32 } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for str(n)cmp inline expansion
@ 2022-11-18 23:19 Philipp Tomsich
  0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2022-11-18 23:19 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c4548b8e64ce546a56f1dae6bc7c61f9e1e86e19

commit c4548b8e64ce546a56f1dae6bc7c61f9e1e86e19
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:18 2022 +0200

    riscv: Add support for str(n)cmp inline expansion
    
    This patch implements expansions for the cmpstrsi and the cmpstrnsi
    builtins using Zbb instructions (if available).
    This allows to inline calls to strcmp() and strncmp().
    
    The expansion basically emits a peeled comparison sequence (i.e. a peeled
    comparison loop) which compares XLEN bits per step if possible.
    
    The emitted sequence can be controlled, by setting the maximum number
    of compared bytes (-mstring-compare-inline-limit).
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strn_compare): New
              prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER3): New helper
              macros.
            (GEN_EMIT_HELPER2): New helper macros.
            (expand_strncmp_zbb_sequence): New function.
            (riscv_emit_str_compare_zbb): New function.
            (riscv_expand_strn_compare): New function.
            * config/riscv/riscv.md (cmpstrnsi): Invoke expansion functions
              for strn_compare.
            (cmpstrsi): Invoke expansion functions for strn_compare.
            * config/riscv/riscv.opt: Add new parameter
              '-mstring-compare-inline-limit'.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 333 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  48 +++
 gcc/config/riscv/riscv.opt                         |   5 +
 gcc/doc/invoke.texi                                |   8 +
 .../gcc.target/riscv/zbb-strcmp-unaligned.c        |  36 +++
 gcc/testsuite/gcc.target/riscv/zbb-strcmp.c        |  55 ++++
 7 files changed, 486 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a08b6aa5ba6..e903a6b2fe6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -98,6 +98,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
+extern bool riscv_expand_strn_compare (rtx, rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index bfb819b35f5..02719ecc048 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -74,6 +74,11 @@ GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
 GEN_EMIT_HELPER2(clz) /* do_clz2  */
 GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
 GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+GEN_EMIT_HELPER3(xor) /* do_xor3  */
+GEN_EMIT_HELPER3(ashl) /* do_ashl3  */
+GEN_EMIT_HELPER2(bswap) /* do_bswap2  */
+GEN_EMIT_HELPER3(riscv_ior_not) /* do_riscv_ior_not3  */
+GEN_EMIT_HELPER3(riscv_and_not) /* do_riscv_and_not3  */
 
 #undef GEN_EMIT_HELPER2
 #undef GEN_EMIT_HELPER3
@@ -261,6 +266,334 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   return false;
 }
 
+/* Generate the sequence of compares for strcmp/strncmp using zbb instructions.
+   BYTES_TO_COMPARE is the number of bytes to be compared.
+   BASE_ALIGN is the smaller of the alignment of the two strings.
+   ORIG_SRC1 is the unmodified rtx for the first string.
+   ORIG_SRC2 is the unmodified rtx for the second string.
+   DATA1 is the register for loading the first string.
+   DATA2 is the register for loading the second string.
+   HAS_NUL is the register holding non-NUL bytes for NUL-bytes in the string.
+   TARGET is the rtx for the result register (SImode)
+   EQUALITY_COMPARE_REST if set, then we hand over to libc if string matches.
+   END_LABEL is the location before the calculation of the result value.
+   FINAL_LABEL is the location after the calculation of the result value.  */
+
+static void
+expand_strncmp_zbb_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
+			     rtx src1, rtx src2, rtx data1, rtx data2,
+			     rtx target, rtx orc, bool equality_compare_rest,
+			     rtx end_label, rtx final_label)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+  rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+  rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+  unsigned HOST_WIDE_INT offset = 0;
+
+  rtx m1 = gen_reg_rtx (Pmode);
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+
+  /* Generate a compare sequence.  */
+  while (bytes_to_compare > 0)
+    {
+      machine_mode load_mode = QImode;
+      unsigned HOST_WIDE_INT load_mode_size = 1;
+      if (bytes_to_compare > 1)
+	{
+	  load_mode = Pmode;
+	  load_mode_size = p_mode_size;
+	}
+      unsigned HOST_WIDE_INT cmp_bytes = 0;
+
+      if (bytes_to_compare >= load_mode_size)
+	cmp_bytes = load_mode_size;
+      else
+	cmp_bytes = bytes_to_compare;
+
+      unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
+
+      /* load_mode_size...bytes we will read
+	 cmp_bytes...bytes we will compare (might be less than load_mode_size)
+	 bytes_to_compare...bytes we will compare (incl. cmp_bytes)
+	 remain...bytes left to compare (excl. cmp_bytes)  */
+
+      rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
+      rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
+
+      do_load_from_addr (load_mode, data1, addr1, src1);
+      do_load_from_addr (load_mode, data2, addr2, src2);
+
+      if (load_mode_size == 1)
+	{
+	  /* Special case for comparing just single (last) byte.  */
+	  gcc_assert (remain == 0);
+
+	  if (!equality_compare_rest)
+	    {
+	      /* Calculate difference and jump to final_label.  */
+	      rtx result = gen_reg_rtx (Pmode);
+	      do_sub3 (result, data1, data2);
+	      emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+	      emit_jump_insn (gen_jump (final_label));
+	    }
+	  else
+	    {
+	      /* Compare both bytes and jump to final_label if not equal.  */
+	      rtx result = gen_reg_rtx (Pmode);
+	      do_sub3 (result, data1, data2);
+	      emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+	      /* Check if str1[i] is NULL.  */
+	      rtx cond1 = gen_rtx_EQ (VOIDmode, data1, const0_rtx);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond1,
+				       data1, const0_rtx, final_label));
+	      /* Check if str1[i] == str2[i].  */
+	      rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond2,
+				       data1, data2, final_label));
+	      /* Processing will fall through to libc calls.  */
+	    }
+	}
+      else
+	{
+	  /* Eliminate irrelevant data (behind the N-th character).  */
+	  if (bytes_to_compare < p_mode_size)
+	    {
+	      gcc_assert (remain == 0);
+	     /* Set a NUL-byte after the relevant data (behind the string).  */
+	      unsigned long im = 0xffUL;
+	      rtx imask = gen_rtx_CONST_INT (Pmode, im);
+	      rtx m_reg = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (m_reg, imask));
+	      do_ashl3 (m_reg, m_reg, GEN_INT (cmp_bytes * BITS_PER_UNIT));
+	      do_riscv_and_not3 (data1, m_reg, data1);
+	      do_riscv_and_not3 (data2, m_reg, data2);
+	      do_orcb2 (orc, data1);
+	      emit_jump_insn (gen_jump (end_label));
+	    }
+	  else
+	    {
+	      /* Check if data1 contains a NUL character.  */
+	      do_orcb2 (orc, data1);
+	      rtx cond1 = gen_rtx_NE (VOIDmode, orc, m1);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond1, orc, m1,
+						     end_label));
+
+	      /* Break out if u1 != u2 */
+	      rtx cond2 = gen_rtx_NE (VOIDmode, data1, data2);
+	      emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond2, data1,
+						     data2, end_label));
+
+	      /* Fast-exit for complete and equal strings.  */
+	      if (remain == 0 && !equality_compare_rest)
+		{
+		  /* All compared and everything was equal.  */
+		  emit_insn (gen_rtx_SET (target, gen_rtx_CONST_INT (SImode, 0)));
+		  emit_jump_insn (gen_jump (final_label));
+		}
+	    }
+	}
+
+      offset += cmp_bytes;
+      bytes_to_compare -= cmp_bytes;
+    }
+  /* Processing will fall through to libc calls.  */
+}
+
+/* Emit a string comparison sequence using Zbb instruction.
+
+   OPERANDS[0] is the target (result).
+   OPERANDS[1] is the first source.
+   OPERANDS[2] is the second source.
+   If NO_LENGTH is zero, then:
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the alignment in bytes.
+   If NO_LENGTH is nonzero, then:
+   OPERANDS[3] is the alignment in bytes.
+   BYTES_TO_COMPARE is the maximum number of bytes to compare.
+   EQUALITY_COMPARE_REST defines if str(n)cmp should be called on equality.
+ */
+
+static bool
+riscv_emit_str_compare_zbb (rtx target, rtx src1, rtx src2,
+			    unsigned HOST_WIDE_INT length,
+			    unsigned HOST_WIDE_INT bytes_to_compare,
+			    bool equality_compare_rest,
+			    unsigned HOST_WIDE_INT alignment)
+{
+  const unsigned HOST_WIDE_INT p_mode_size = GET_MODE_SIZE (Pmode);
+
+  gcc_assert (TARGET_ZBB);
+
+  /* Enable only if we can access at least one XLEN-register.  */
+  if (bytes_to_compare < p_mode_size)
+    return false;
+
+  /* Limit to 12-bits (maximum load-offset).  */
+  if (bytes_to_compare > IMM_REACH)
+    return false;
+
+  /* We don't support big endian.  */
+  if (BYTES_BIG_ENDIAN)
+    return false;
+
+  /* We need aligned strings.  */
+  if (alignment < p_mode_size)
+    return false;
+
+  rtx data1 = gen_reg_rtx (Pmode);
+  rtx data2 = gen_reg_rtx (Pmode);
+  rtx orc = gen_reg_rtx (Pmode);
+  rtx end_label = gen_label_rtx ();
+  rtx final_label = gen_label_rtx ();
+
+  /* Generate a sequence of zbb instructions to compare out
+     to the length specified.  */
+  expand_strncmp_zbb_sequence (bytes_to_compare, src1, src2, data1, data2,
+			       target, orc, equality_compare_rest,
+			       end_label, final_label);
+
+  if (equality_compare_rest)
+    {
+      /* Update pointers past what has been compared already.  */
+      rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
+      rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
+      unsigned HOST_WIDE_INT offset = bytes_to_compare;
+      rtx src1 = force_reg (Pmode,
+			    gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
+      rtx src2 = force_reg (Pmode,
+			    gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
+
+      /* Construct call to strcmp/strncmp to compare the rest of the string.  */
+      if (length == 0)
+	{
+	  tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
+	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+				   target, LCT_NORMAL, GET_MODE (target),
+				   src1, Pmode, src2, Pmode);
+	}
+      else
+	{
+	  unsigned HOST_WIDE_INT delta = length - bytes_to_compare;
+	  gcc_assert (delta > 0);
+	  rtx len_rtx = gen_reg_rtx (Pmode);
+	  emit_move_insn (len_rtx, gen_int_mode (delta, Pmode));
+	  tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
+	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
+				   target, LCT_NORMAL, GET_MODE (target),
+				   src1, Pmode, src2, Pmode, len_rtx, Pmode);
+	}
+
+      emit_jump_insn (gen_jump (final_label));
+    }
+
+  emit_barrier (); /* No fall-through.  */
+
+  emit_label (end_label);
+
+  /* Convert non-equal bytes into non-NUL bytes.  */
+  rtx diff = gen_reg_rtx (Pmode);
+  do_xor3 (diff, data1, data2);
+  do_orcb2 (diff, diff);
+
+  /* Convert non-equal or NUL-bytes into non-NUL bytes.  */
+  rtx syndrome = gen_reg_rtx (Pmode);
+  do_riscv_ior_not3 (syndrome, orc, diff);
+
+  /* Count the number of equal bits from the beginning of the word.  */
+  rtx shift = gen_reg_rtx (Pmode);
+  do_ctz2 (shift, syndrome);
+
+  do_bswap2 (data1, data1);
+  do_bswap2 (data2, data2);
+
+  /* The most-significant-non-zero bit of the syndrome marks either the
+     first bit that is different, or the top bit of the first zero byte.
+     Shifting left now will bring the critical information into the
+     top bits.  */
+  do_ashl3 (data1, data1, gen_lowpart (QImode, shift));
+  do_ashl3 (data2, data2, gen_lowpart (QImode, shift));
+
+  /* But we need to zero-extend (char is unsigned) the value and then
+     perform a signed 32-bit subtraction.  */
+  unsigned int shiftr = p_mode_size * BITS_PER_UNIT - 8;
+  do_lshr3 (data1, data1, GEN_INT (shiftr));
+  do_lshr3 (data2, data2, GEN_INT (shiftr));
+
+  rtx result = gen_reg_rtx (Pmode);
+  do_sub3 (result, data1, data2);
+  emit_insn (gen_movsi (target, gen_lowpart (SImode, result)));
+
+  /* And we are done.  */
+  emit_label (final_label);
+  return true;
+}
+
+/* Expand a string compare operation.
+
+   The result will be stored in TARGET.
+   The strings are referenced by SRC1 and SRC2.
+   The argument BYTES_RTX either holds the number of characters to
+   compare, or is NULL_RTX. The argument ALIGN_RTX hold the alignment.
+ 
+   Return true if expansion was successful, or false otherwise.  */
+
+bool
+riscv_expand_strn_compare (rtx target, rtx src1, rtx src2,
+			   rtx bytes_rtx, rtx align_rtx)
+{
+  const unsigned HOST_WIDE_INT compare_max = riscv_string_compare_inline_limit;
+  unsigned HOST_WIDE_INT length;
+  unsigned HOST_WIDE_INT bytes_to_compare;
+  bool equality_compare_rest;
+  unsigned HOST_WIDE_INT alignment;
+
+  if (riscv_string_compare_inline_limit == 0)
+    return false;
+
+  /* Decide how many bytes to compare inline and what to do if there is
+     no difference detected at the end of the compared bytes.
+     We might call libc to continue the comparison.  */
+  if (bytes_rtx == NULL_RTX)
+    {
+      length = 0;
+      bytes_to_compare = compare_max;
+      equality_compare_rest = true;
+    }
+  else
+    {
+      /* If we have a length, it must be constant.  */
+      if (!CONST_INT_P (bytes_rtx))
+	return false;
+      length = UINTVAL (bytes_rtx);
+
+      /* Limit the bytes to compare if necessary.  */
+      if (length <= compare_max)
+	{
+	  bytes_to_compare = length;
+	  equality_compare_rest = false;
+	}
+      else
+	{
+	  bytes_to_compare = compare_max;
+	  equality_compare_rest = true;
+	}
+    }
+
+  if (!CONST_INT_P (align_rtx))
+    return false;
+  alignment = UINTVAL (align_rtx);
+
+  if (TARGET_ZBB)
+    {
+      return riscv_emit_str_compare_zbb (target, src1, src2,
+					 length, bytes_to_compare,
+					 equality_compare_rest,
+					 alignment);
+    }
+
+  return false;
+}
+
 /* If the provided string is aligned, then read XLEN bytes
    in a loop and use orc.b to find NUL-bytes.  */
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 15581c130ba..513ca8ee6f3 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3231,6 +3231,54 @@
     FAIL;
 })
 
+;; String compare N insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the source1
+;; Argument 2 is the source2
+;; Argument 3 is the length
+;; Argument 4 is the alignment
+
+(define_expand "cmpstrnsi"
+  [(parallel [(set (match_operand:SI 0)
+	      (compare:SI (match_operand:BLK 1)
+			  (match_operand:BLK 2)))
+	      (use (match_operand:SI 3))
+	      (use (match_operand:SI 4))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands[0], operands[1], operands[2],
+				 operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+;; String compare insn.
+;; Argument 0 is the target (result)
+;; Argument 1 is the source1
+;; Argument 2 is the source2
+;; Argument 3 is the alignment
+
+(define_expand "cmpstrsi"
+  [(parallel [(set (match_operand:SI 0)
+	      (compare:SI (match_operand:BLK 1)
+			  (match_operand:BLK 2)))
+	      (use (match_operand:SI 3))])]
+  ""
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  if (riscv_expand_strn_compare (operands[0], operands[1], operands[2],
+				 NULL_RTX, operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 ;; Search character in string (generalization of strlen).
 ;; Argument 0 is the resulting offset
 ;; Argument 1 is the string
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 9595078bdd4..43dff64fb3a 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -252,3 +252,8 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213)
 misa-spec=
 Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC)
 Set the version of RISC-V ISA spec.
+
+mstring-compare-inline-limit=
+Target Var(riscv_string_compare_inline_limit) Init(64) RejectNegative Joined UInteger Save
+Max number of bytes to compare.
+
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 88a5f7aa069..a4695454158 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1213,6 +1213,7 @@ See RS/6000 and PowerPC Options.
 -msave-restore  -mno-save-restore @gol
 -mshorten-memrefs  -mno-shorten-memrefs @gol
 -mstrict-align  -mno-strict-align @gol
+-mstring-compare-inline-limit=@var{num} @gol
 -mcmodel=medlow  -mcmodel=medany @gol
 -mexplicit-relocs  -mno-explicit-relocs @gol
 -mrelax  -mno-relax @gol
@@ -28653,6 +28654,13 @@ Do not or do generate unaligned memory accesses.  The default is set depending
 on whether the processor we are optimizing for supports fast unaligned access
 or not.
 
+@item -mstring-compare-inline-limit=@var{num}
+@opindex mstring-compare-inline-limit
+Compare at most @var{num} string bytes with inline code.
+If the difference or end of string is not found at the
+end of the inline compare a call to @code{strcmp} or @code{strncmp} will
+take care of the rest of the comparison. The default is 64 bytes.
+
 @item -mcmodel=medlow
 @opindex mcmodel=medlow
 Generate code for the medium-low code model. The program and its statically
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
new file mode 100644
index 00000000000..2126c849e0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp-unaligned.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */
+
+typedef long unsigned int size_t;
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  return __builtin_strcmp (s1, s2);
+}
+
+int
+my_str_cmp_const (const char *s1)
+{
+  return __builtin_strcmp (s1, "foo");
+}
+
+int
+my_strn_cmp (const char *s1, const char *s2, size_t n)
+{
+  return __builtin_strncmp (s1, s2, n);
+}
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
new file mode 100644
index 00000000000..3465e7ffee3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strcmp.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -mstring-compare-inline-limit=64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Oz" "-Og" } } */
+
+typedef long unsigned int size_t;
+
+/* Emits 8+1 orc.b instructions.  */
+
+int
+my_str_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strcmp (s1, s2);
+}
+
+/* 8+1 because the backend does not know the size of "foo".  */
+
+int
+my_str_cmp_const (const char *s1)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strcmp (s1, "foo");
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* Note expanded because the backend does not know the size of "foo".  */
+
+int
+my_strn_cmp_const (const char *s1, size_t n)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  return __builtin_strncmp (s1, "foo", n);
+}
+
+/* Emits 6+1 orc.b instructions.  */
+
+int
+my_strn_cmp_bounded (const char *s1, const char *s2)
+{
+  s1 = __builtin_assume_aligned (s1, 4096);
+  s2 = __builtin_assume_aligned (s2, 4096);
+  return __builtin_strncmp (s1, s2, 42);
+}
+
+/* { dg-final { scan-assembler-times "orc.b\t" 32 } } */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-12-01 13:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-01 13:24 [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for str(n)cmp inline expansion Philipp Tomsich
  -- strict thread matches above, loose matches on Subject: below --
2022-11-18 23:19 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).