From: Christoph Muellner <christoph.muellner@vrull.eu>
To: gcc-patches@gcc.gnu.org, Kito Cheng <kito.cheng@sifive.com>,
Jim Wilson <jim.wilson.gcc@gmail.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Andrew Waterman <andrew@sifive.com>,
Philipp Tomsich <philipp.tomsich@vrull.eu>,
Jeff Law <jeffreyalaw@gmail.com>,
Vineet Gupta <vineetg@rivosinc.com>
Cc: "Christoph Müllner" <christoph.muellner@vrull.eu>
Subject: [PATCH 6/7] riscv: Add support for strlen inline expansion
Date: Mon, 14 Nov 2022 00:05:20 +0100 [thread overview]
Message-ID: <20221113230521.712693-7-christoph.muellner@vrull.eu> (raw)
In-Reply-To: <20221113230521.712693-1-christoph.muellner@vrull.eu>
From: Christoph Müllner <christoph.muellner@vrull.eu>
This patch implements the expansion of the strlen builtin
using Zbb instructions (if available) for aligned strings
using the following sequence:
li a3,-1
addi a4,a0,8
.L2: ld a5,0(a0)
addi a0,a0,8
orc.b a5,a5
beq a5,a3,6 <.L2>
not a5,a5
ctz a5,a5
srli a5,a5,0x3
add a0,a0,a5
sub a0,a0,a4
This allows to inline calls to strlen(), with optimized code for
determining the length of a string.
gcc/ChangeLog:
* config/riscv/riscv-protos.h (riscv_expand_strlen): New
prototype.
* config/riscv/riscv-string.cc (riscv_emit_unlikely_jump): New
function.
(GEN_EMIT_HELPER2): New helper macro.
(GEN_EMIT_HELPER3): New helper macro.
(do_load_from_addr): New helper function.
(riscv_expand_strlen_zbb): New function.
(riscv_expand_strlen): New function.
* config/riscv/riscv.md (strlen<mode>): Invoke expansion
functions for strlen.
Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-string.cc | 149 ++++++++++++++++++
gcc/config/riscv/riscv.md | 28 ++++
.../gcc.target/riscv/zbb-strlen-unaligned.c | 13 ++
gcc/testsuite/gcc.target/riscv/zbb-strlen.c | 18 +++
5 files changed, 209 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-strlen.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 344515dbaf4..18187e3bd78 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -96,6 +96,7 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
/* Routines implemented in riscv-string.c. */
extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_strlen (rtx[]);
/* Information about one CPU we know about. */
struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 1137df475be..bf96522b608 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -38,6 +38,81 @@
#include "predict.h"
#include "optabs.h"
+/* Emit unlikely jump instruction. */
+
+static rtx_insn *
+riscv_emit_unlikely_jump (rtx insn)
+{
+ rtx_insn *jump = emit_jump_insn (insn);
+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+ return jump;
+}
+
+/* Emit proper instruction depending on type of dest. */
+
+#define GEN_EMIT_HELPER2(name) \
+static rtx_insn * \
+do_## name ## 2(rtx dest, rtx src) \
+{ \
+ rtx_insn *insn; \
+ if (GET_MODE (dest) == DImode) \
+ insn = emit_insn (gen_ ## name ## di2 (dest, src)); \
+ else \
+ insn = emit_insn (gen_ ## name ## si2 (dest, src)); \
+ return insn; \
+}
+
+/* Emit proper instruction depending on type of dest. */
+
+#define GEN_EMIT_HELPER3(name) \
+static rtx_insn * \
+do_## name ## 3(rtx dest, rtx src1, rtx src2) \
+{ \
+ rtx_insn *insn; \
+ if (GET_MODE (dest) == DImode) \
+ insn = emit_insn (gen_ ## name ## di3 (dest, src1, src2)); \
+ else \
+ insn = emit_insn (gen_ ## name ## si3 (dest, src1, src2)); \
+ return insn; \
+}
+
+GEN_EMIT_HELPER3(add) /* do_add3 */
+GEN_EMIT_HELPER3(sub) /* do_sub3 */
+GEN_EMIT_HELPER3(lshr) /* do_lshr3 */
+GEN_EMIT_HELPER2(orcb) /* do_orcb2 */
+GEN_EMIT_HELPER2(one_cmpl) /* do_one_cmpl2 */
+GEN_EMIT_HELPER2(clz) /* do_clz2 */
+GEN_EMIT_HELPER2(ctz) /* do_ctz2 */
+GEN_EMIT_HELPER2(zero_extendqi) /* do_zero_extendqi2 */
+
+/* Helper function to load a byte or a Pmode register.
+
+ MODE is the mode to use for the load (QImode or Pmode).
+ DEST is the destination register for the data.
+ ADDR_REG is the register that holds the address.
+ ADDR is the address expression to load from.
+
+ This function returns an rtx containing the register,
+ where the ADDR is stored. */
+
+static rtx
+do_load_from_addr (machine_mode mode, rtx dest, rtx addr_reg, rtx addr)
+{
+ rtx mem = gen_rtx_MEM (mode, addr_reg);
+ MEM_COPY_ATTRIBUTES (mem, addr);
+ set_mem_size (mem, GET_MODE_SIZE (mode));
+
+ if (mode == QImode)
+ do_zero_extendqi2 (dest, mem);
+ else if (mode == Pmode)
+ emit_move_insn (dest, mem);
+ else
+ gcc_unreachable ();
+
+ return addr_reg;
+}
+
+
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
Assume that the areas do not overlap. */
@@ -192,3 +267,77 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
}
return false;
}
+
+/* If the provided string is aligned, then read XLEN bytes
+ in a loop and use orc.b to find NUL-bytes. */
+
+static bool
+riscv_expand_strlen_zbb (rtx result, rtx src, rtx align)
+{
+ rtx m1, addr, addr_plus_regsz, word, zeros;
+ rtx loop_label, cond;
+
+ gcc_assert (TARGET_ZBB);
+
+ /* The alignment needs to be known and big enough. */
+ if (!CONST_INT_P (align) || UINTVAL (align) < GET_MODE_SIZE (Pmode))
+ return false;
+
+ m1 = gen_reg_rtx (Pmode);
+ addr = copy_addr_to_reg (XEXP (src, 0));
+ addr_plus_regsz = gen_reg_rtx (Pmode);
+ word = gen_reg_rtx (Pmode);
+ zeros = gen_reg_rtx (Pmode);
+
+ emit_insn (gen_rtx_SET (m1, constm1_rtx));
+ do_add3 (addr_plus_regsz, addr, GEN_INT (UNITS_PER_WORD));
+
+ loop_label = gen_label_rtx ();
+ emit_label (loop_label);
+
+ /* Load a word and use orc.b to find a zero-byte. */
+ do_load_from_addr (Pmode, word, addr, src);
+ do_add3 (addr, addr, GEN_INT (UNITS_PER_WORD));
+ do_orcb2 (word, word);
+ cond = gen_rtx_EQ (VOIDmode, word, m1);
+ riscv_emit_unlikely_jump (gen_cbranch4 (Pmode, cond,
+ word, m1, loop_label));
+
+ /* Calculate the return value by counting zero-bits. */
+ do_one_cmpl2 (word, word);
+ if (TARGET_BIG_ENDIAN)
+ do_clz2 (zeros, word);
+ else
+ do_ctz2 (zeros, word);
+
+ do_lshr3 (zeros, zeros, GEN_INT (exact_log2 (BITS_PER_UNIT)));
+ do_add3 (addr, addr, zeros);
+ do_sub3 (result, addr, addr_plus_regsz);
+
+ return true;
+}
+
+/* Expand a strlen operation and return true if successful.
+ Return false if we should let the compiler generate normal
+ code, probably a strlen call.
+
+ OPERANDS[0] is the target (result).
+ OPERANDS[1] is the source.
+ OPERANDS[2] is the search byte (must be 0)
+ OPERANDS[3] is the alignment in bytes. */
+
+bool
+riscv_expand_strlen (rtx operands[])
+{
+ rtx result = operands[0];
+ rtx src = operands[1];
+ rtx search_char = operands[2];
+ rtx align = operands[3];
+
+ gcc_assert (search_char == const0_rtx);
+
+ if (TARGET_ZBB)
+ return riscv_expand_strlen_zbb (result, src, align);
+
+ return false;
+}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 43b97f1181e..f05c764c3d4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -65,6 +65,9 @@ (define_c_enum "unspec" [
;; OR-COMBINE
UNSPEC_ORC_B
+
+ ;; ZBB STRLEN
+ UNSPEC_STRLEN
])
(define_c_enum "unspecv" [
@@ -3007,6 +3010,31 @@ (define_expand "cpymemsi"
FAIL;
})
+;; Search character in string (generalization of strlen).
+;; Argument 0 is the resulting offset
+;; Argument 1 is the string
+;; Argument 2 is the search character
+;; Argument 3 is the alignment
+
+(define_expand "strlen<mode>"
+ [(set (match_operand:X 0 "register_operand")
+ (unspec:X [(match_operand:BLK 1 "general_operand")
+ (match_operand:SI 2 "const_int_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_STRLEN))]
+ ""
+{
+ rtx search_char = operands[2];
+
+ if (optimize_insn_for_size_p () || search_char != const0_rtx)
+ FAIL;
+
+ if (riscv_expand_strlen (operands))
+ DONE;
+ else
+ FAIL;
+})
+
(include "bitmanip.md")
(include "sync.md")
(include "peephole.md")
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
new file mode 100644
index 00000000000..39da70a5021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-unaligned.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-not "orc.b\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen.c b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
new file mode 100644
index 00000000000..d01b7fc552d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-Os" } } */
+
+typedef long unsigned int size_t;
+
+size_t
+my_str_len (const char *s)
+{
+ s = __builtin_assume_aligned (s, 4096);
+ return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler "orc.b\t" } } */
+/* { dg-final { scan-assembler-not "jalr" } } */
+/* { dg-final { scan-assembler-not "call" } } */
+/* { dg-final { scan-assembler-not "jr" } } */
+/* { dg-final { scan-assembler-not "tail" } } */
--
2.38.1
next prev parent reply other threads:[~2022-11-13 23:05 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-13 23:05 [PATCH 0/7] riscv: Improve builtins expansion Christoph Muellner
2022-11-13 23:05 ` [PATCH 1/7] riscv: bitmanip: add orc.b as an unspec Christoph Muellner
2022-11-14 16:51 ` Jeff Law
2022-11-14 17:53 ` Jeff Law
2022-11-14 19:05 ` Philipp Tomsich
2022-11-13 23:05 ` [PATCH 2/7] riscv: bitmanip/zbb: Add prefix/postfix and enable visiblity Christoph Muellner
2022-11-14 16:55 ` Jeff Law
2022-11-13 23:05 ` [PATCH 3/7] riscv: Enable overlap-by-pieces via tune param Christoph Muellner
2022-11-14 2:48 ` Vineet Gupta
2022-11-14 7:59 ` Philipp Tomsich
2022-11-14 8:29 ` Christoph Müllner
2022-11-14 19:04 ` Jeff Law
2022-11-14 19:07 ` Christoph Müllner
2022-11-13 23:05 ` [PATCH 4/7] riscv: Move riscv_block_move_loop to separate file Christoph Muellner
2022-11-14 16:56 ` Jeff Law
2022-11-13 23:05 ` [PATCH 5/7] riscv: Use by-pieces to do overlapping accesses in block_move_straight Christoph Muellner
2022-11-14 17:16 ` Jeff Law
2022-11-14 19:01 ` Christoph Müllner
2022-11-14 19:05 ` Jeff Law
2022-11-13 23:05 ` Christoph Muellner [this message]
2022-11-14 18:17 ` [PATCH 6/7] riscv: Add support for strlen inline expansion Jeff Law
2022-11-14 21:07 ` Christoph Müllner
2022-11-13 23:05 ` [PATCH 7/7] riscv: Add support for str(n)cmp " Christoph Muellner
2022-11-14 19:28 ` Jeff Law
2022-11-14 21:49 ` Christoph Müllner
2022-11-15 0:22 ` Jeff Law
2022-11-15 0:46 ` Kito Cheng
2022-11-15 0:53 ` Palmer Dabbelt
2022-11-15 1:55 ` Kito Cheng
2022-11-15 3:41 ` Jeff Law
2022-11-15 22:22 ` Christoph Müllner
2022-11-16 0:15 ` Philipp Tomsich
2022-11-21 3:24 ` Kito Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221113230521.712693-7-christoph.muellner@vrull.eu \
--to=christoph.muellner@vrull.eu \
--cc=andrew@sifive.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=jim.wilson.gcc@gmail.com \
--cc=kito.cheng@sifive.com \
--cc=palmer@dabbelt.com \
--cc=philipp.tomsich@vrull.eu \
--cc=vineetg@rivosinc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).