public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-15 14:03 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-15 14:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d97e9be78f7a21e17250a8e682f8f6a810c7d442

commit d97e9be78f7a21e17250a8e682f8f6a810c7d442
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-12-01 13:24 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-12-01 13:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:630b7f64c0bc850069d230b3fc2ecd25b29eb6ac

commit 630b7f64c0bc850069d230b3fc2ecd25b29eb6ac
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-18 20:27 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3e7bc4c007c752f3c9739bfb4b4aa3b9c542e0b4

commit 3e7bc4c007c752f3c9739bfb4b4aa3b9c542e0b4
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-18 20:23 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 20:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1b1c78c31579bef64dfa44c459db2cd1eca4926c

commit 1b1c78c31579bef64dfa44c459db2cd1eca4926c
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-18 11:36 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-18 11:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:700a8672cef67a5ced0e091c39e61eec79b899e0

commit 700a8672cef67a5ced0e091c39e61eec79b899e0
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-17 22:27 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-17 22:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4c96c29290b56132dbc5ce320d50d59946dd128b

commit 4c96c29290b56132dbc5ce320d50d59946dd128b
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd54..a08b6aa5ba6 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bfb819b35f5 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b60..15581c130ba 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab97..b85ad2e7ce5 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257..87d40c42545 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion
@ 2022-11-15 15:01 Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2022-11-15 15:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:50881951576a7e3e048bfb84eac5b47966cf349e

commit 50881951576a7e3e048bfb84eac5b47966cf349e
Author: Christoph Müllner <christoph.muellner@vrull.eu>
Date:   Wed Sep 28 11:19:13 2022 +0200

    riscv: Add support for strlen inline expansion
    
    This patch implements the expansion of the strlen builtin
    using Zbb instructions (if available) for aligned strings
    using the following sequence:
    
          li      a3,-1
          addi    a4,a0,8
    .L2:  ld      a5,0(a0)
          addi    a0,a0,8
          orc.b   a5,a5
          beq     a5,a3,6 <.L2>
          not     a5,a5
          ctz     a5,a5
          srli    a5,a5,0x3
          add     a0,a0,a5
          sub     a0,a0,a4
    
    This allows to inline calls to strlen(), with optimized code for
    determining the length of a string.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-protos.h (riscv_expand_strlen): New
            prototype.
            * config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
            macro.
            (GEN_EMIT_HELPER3): New helper macro.
            (do_load_from_addr): New helper function.
            (riscv_expand_strlen_zbb): New function.
            (riscv_expand_strlen): New function.
            * config/riscv/riscv.md (strlen<mode>): Invoke expansion
            function for strlen.
            * emit-rtl.cc (emit_likely_jump_insn): New function.
            (emit_unlikely_jump_insn): New function.
            * rtl.h (emit_likely_jump_insn): New prototype.
            (emit_unlikely_jump_insn): New prototype.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/zbb-strlen-unaligned.c: New test.
            * gcc.target/riscv/zbb-strlen.c: New test.
    
    Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>

Diff:
---
 gcc/config/riscv/riscv-protos.h                    |   1 +
 gcc/config/riscv/riscv-string.cc                   | 131 +++++++++++++++++++++
 gcc/config/riscv/riscv.md                          |  28 +++++
 gcc/emit-rtl.cc                                    |  24 ++++
 gcc/rtl.h                                          |   2 +
 .../gcc.target/riscv/zbb-strlen-unaligned.c        |  13 ++
 gcc/testsuite/gcc.target/riscv/zbb-strlen.c        |  18 +++
 7 files changed, 217 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3ca8733fd547..a08b6aa5ba67 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -97,6 +97,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx, rtx, rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be1..bfb819b35f54 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,74 @@
 #include "predict.h"
 #include "optabs.h"
 
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER2(name)				\
+static rtx_insn *					\
+do_## name ## 2(rtx dest, rtx src)			\
+{							\
+  rtx_insn *insn;					\
+  if (GET_MODE (dest) == DImode)			\
+    insn = emit_insn (gen_ ## name ## di2 (dest, src));	\
+  else							\
+    insn = emit_insn (gen_ ## name ## si2 (dest, src));	\
+  return insn;						\
+}
+
+/* Emit proper instruction depending on mode of dest.  */
+
+#define GEN_EMIT_HELPER3(name)					\
+static rtx_insn *						\
+do_## name ## 3(rtx dest, rtx src1, rtx src2)			\
+{								\
+  rtx_insn *insn;						\
+  if (GET_MODE (dest) == DImode)				\
+    insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2));	\
+  else								\
+    insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2));	\
+  return insn;							\
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3  */
+GEN_EMIT_HELPER3(sub) /* do_sub3  */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3  */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2  */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2  */
+GEN_EMIT_HELPER2(clz) /* do_clz2  */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2  */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2  */
+
+#undef GEN_EMIT_HELPER2
+#undef GEN_EMIT_HELPER3
+
+/* Helper function to load a byte or a Pmode register.
+
+   MODE is the mode to use for the load (QImode or Pmode).
+   DEST is the destination register for the data.
+   ADDR_REG is the register that holds the address.
+   ADDR is the address expression to load from.
+
+   This function returns an rtx containing the register,
+   where the ADDR is stored.  */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+  rtx mem = gen_rtx_MEM (mode, addr_reg);
+  MEM_COPY_ATTRIBUTES (mem, addr);
+  set_mem_size (mem, GET_MODE_SIZE (mode));
+
+  if (mode == QImode)
+    do_zero_extendqi2 (dest, mem);
+  else if (mode == Pmode)
+    emit_move_insn (dest, mem);
+  else
+    gcc_unreachable ();
+
+  return addr_reg;
+}
+
+
 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
    Assume that the areas do not overlap.  */
 
@@ -192,3 +260,66 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
     }
   return false;
 }
+
+/* If the provided string is aligned, then read XLEN bytes
+   in a loop and use orc.b to find NUL-bytes.  */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+  rtx m1, addr, addr_plus_regsz, word, zeros;
+  rtx loop_label, cond;
+
+  gcc_assert (TARGET_ZBB);
+
+  /* The alignment needs to be known and big enough.  */
+  if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+    return false;
+
+  m1 = gen_reg_rtx (Pmode);
+  addr = copy_addr_to_reg (XEXP (src, 0));
+  addr_plus_regsz = gen_reg_rtx (Pmode);
+  word = gen_reg_rtx (Pmode);
+  zeros = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_SET (m1, constm1_rtx));
+  do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  /* Load a word and use orc.b to find a zero-byte.  */
+  do_load_from_addr (Pmode, word, addr, src);
+  do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+  do_orcb2 (word, word);
+  cond = gen_rtx_EQ (VOIDmode, word, m1);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, word, m1, loop_label));
+
+  /* Calculate the return value by counting zero-bits.  */
+  do_one_cmpl2 (word, word);
+  if (TARGET_BIG_ENDIAN)
+    do_clz2 (zeros, word);
+  else
+    do_ctz2 (zeros, word);
+
+  do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+  do_add3 (addr, addr, zeros);
+  do_sub3 (result, addr, addr_plus_regsz);
+
+  return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+   Return false if we should let the compiler generate normal
+   code, probably a strlen call.  */
+
+bool
+riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
+{
+  gcc_assert (search_char == const0_rtx);
+
+  if (TARGET_ZBB)
+    return riscv_expand_strlen_zbb (result, src, align);
+
+  return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 1abc0ce3b606..15581c130ba4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@
 
   ;; OR-COMBINE
   UNSPEC_ORC_B
+
+  ;; ZBB STRLEN
+  UNSPEC_STRLEN
 ])
 
 (define_c_enum "unspecv" [
@@ -3228,6 +3231,31 @@
     FAIL;
 })
 
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:X 0 "register_operand")
+	(unspec:X [(match_operand:BLK 1 "general_operand")
+		     (match_operand:SI 2 "const_int_operand")
+		     (match_operand:SI 3 "const_int_operand")]
+		  UNSPEC_STRLEN))]
+  ""
+{
+  rtx search_char = operands[2];
+
+  if (optimize_insn_for_size_p () || search_char != const0_rtx)
+    FAIL;
+
+  if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index f25fb70ab974..b85ad2e7ce59 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -5167,6 +5167,30 @@ emit_jump_insn (rtx x)
   return last;
 }
 
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very likely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_likely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_likely ());
+  return jump;
+}
+
+/* Make an insn of code JUMP_INSN with pattern X,
+   add a REG_BR_PROB note that indicates very unlikely probability,
+   and add it to the end of the doubly-linked list.  */
+
+rtx_insn *
+emit_unlikely_jump_insn (rtx x)
+{
+  rtx_insn *jump = emit_jump_insn (x);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+  return jump;
+}
+
 /* Make an insn of code CALL_INSN with pattern X
    and add it to the end of the doubly-linked list.  */
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a8c4709257d..87d40c42545e 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *);
 extern rtx_insn *emit_insn (rtx);
 extern rtx_insn *emit_debug_insn (rtx);
 extern rtx_insn *emit_jump_insn (rtx);
+extern rtx_insn *emit_likely_jump_insn (rtx);
+extern rtx_insn *emit_unlikely_jump_insn (rtx);
 extern rtx_insn *emit_call_insn (rtx);
 extern rtx_code_label *emit_label (rtx);
 extern rtx_jump_table_data *emit_jump_table_data (rtx);
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 000000000000..39da70a5021f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 000000000000..d01b7fc552d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+  s = __builtin_assume_aligned (s, 4096);
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-12-01 13:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-15 14:03 [gcc(refs/vendors/vrull/heads/for-upstream)] riscv: Add support for strlen inline expansion Philipp Tomsich
2022-11-15 15:01 Philipp Tomsich
2022-11-17 22:27 Philipp Tomsich
2022-11-18 11:36 Philipp Tomsich
2022-11-18 20:23 Philipp Tomsich
2022-11-18 20:27 Philipp Tomsich
2022-12-01 13:24 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).