From: Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
To: krebbel@linux.ibm.com, gcc-patches@gcc.gnu.org
Cc: Stefan Schulze Frielinghaus <stefansf@linux.ibm.com>
Subject: [PATCH 1/3] s390: Refactor block operation cpymem
Date: Mon, 15 May 2023 09:17:36 +0200 [thread overview]
Message-ID: <20230515071738.563660-2-stefansf@linux.ibm.com> (raw)
In-Reply-To: <20230515071738.563660-1-stefansf@linux.ibm.com>
Do not perform a libc function call into memcpy in case the size is not
a compile-time constant but bounded and the upper bound is less than or
equal to 256 bytes.
gcc/ChangeLog:
* config/s390/s390-protos.h (s390_expand_cpymem): Change
function signature.
* config/s390/s390.cc (s390_expand_cpymem): For memcpy's less
than or equal to 256 byte do not perform a libc call.
(s390_expand_insv): Adapt new function signature of
s390_expand_cpymem.
* config/s390/s390.md: Change expander into a version which
takes 8 operands.
---
gcc/config/s390/s390-protos.h | 2 +-
gcc/config/s390/s390.cc | 84 +++++++++++++++++++++++++++--------
gcc/config/s390/s390.md | 10 +++--
3 files changed, 74 insertions(+), 22 deletions(-)
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 67fe09e732d..2c7495ca247 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -107,7 +107,7 @@ extern void s390_reload_symref_address (rtx , rtx , rtx , bool);
extern void s390_expand_plus_operand (rtx, rtx, rtx);
extern void emit_symbolic_move (rtx *);
extern void s390_load_address (rtx, rtx);
-extern bool s390_expand_cpymem (rtx, rtx, rtx);
+extern bool s390_expand_cpymem (rtx, rtx, rtx, rtx, rtx);
extern void s390_expand_setmem (rtx, rtx, rtx);
extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern void s390_expand_vec_strlen (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 505de995da8..95ea5e8d009 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -5650,27 +5650,27 @@ legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
return NULL_RTX;
}
-/* Emit code to move LEN bytes from DST to SRC. */
+/* Emit code to move LEN bytes from SRC to DST. */
bool
-s390_expand_cpymem (rtx dst, rtx src, rtx len)
+s390_expand_cpymem (rtx dst, rtx src, rtx len, rtx min_len_rtx, rtx max_len_rtx)
{
- /* When tuning for z10 or higher we rely on the Glibc functions to
- do the right thing. Only for constant lengths below 64k we will
- generate inline code. */
- if (s390_tune >= PROCESSOR_2097_Z10
- && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
- return false;
+ /* Exit early in case nothing has to be done. */
+ if (CONST_INT_P (len) && UINTVAL (len) == 0)
+ return true;
+
+ unsigned HOST_WIDE_INT min_len = UINTVAL (min_len_rtx);
+ unsigned HOST_WIDE_INT max_len
+ = max_len_rtx ? UINTVAL (max_len_rtx) : HOST_WIDE_INT_M1U;
/* Expand memcpy for constant length operands without a loop if it
is shorter that way.
With a constant length argument a
memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
- if (GET_CODE (len) == CONST_INT
- && INTVAL (len) >= 0
- && INTVAL (len) <= 256 * 6
- && (!TARGET_MVCLE || INTVAL (len) <= 256))
+ if (CONST_INT_P (len)
+ && UINTVAL (len) <= 6 * 256
+ && (!TARGET_MVCLE || UINTVAL (len) <= 256))
{
HOST_WIDE_INT o, l;
@@ -5681,14 +5681,57 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
emit_insn (gen_cpymem_short (newdst, newsrc,
GEN_INT (l > 256 ? 255 : l - 1)));
}
+
+ return true;
}
- else if (TARGET_MVCLE)
+ else if (TARGET_MVCLE
+ && (s390_tune < PROCESSOR_2097_Z10
+ || (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16))))
{
emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
+ return true;
}
- else
+ /* Non-constant length and no loop required. */
+ else if (!CONST_INT_P (len) && max_len <= 256)
+ {
+ rtx_code_label *end_label;
+
+ if (min_len == 0)
+ {
+ end_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX,
+ GET_MODE (len), 1, end_label,
+ profile_probability::very_unlikely ());
+ }
+
+ rtx lenm1 = expand_binop (GET_MODE (len), add_optab, len, constm1_rtx,
+ NULL_RTX, 1, OPTAB_DIRECT);
+
+ /* Prefer a vectorized implementation over one which makes use of an
+ execute instruction since it is faster (although it increases register
+ pressure). */
+ if (max_len <= 16 && TARGET_VX)
+ {
+ rtx tmp = gen_reg_rtx (V16QImode);
+ lenm1 = convert_to_mode (SImode, lenm1, 1);
+ emit_insn (gen_vllv16qi (tmp, lenm1, src));
+ emit_insn (gen_vstlv16qi (tmp, lenm1, dst));
+ }
+ else if (TARGET_Z15)
+ emit_insn (gen_mvcrl (dst, src, convert_to_mode (SImode, lenm1, 1)));
+ else
+ emit_insn (
+ gen_cpymem_short (dst, src, convert_to_mode (Pmode, lenm1, 1)));
+
+ if (min_len == 0)
+ emit_label (end_label);
+
+ return true;
+ }
+
+ else if (s390_tune < PROCESSOR_2097_Z10 || (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16)))
{
rtx dst_addr, src_addr, count, blocks, temp;
rtx_code_label *loop_start_label = gen_label_rtx ();
@@ -5706,8 +5749,9 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
blocks = gen_reg_rtx (mode);
convert_move (count, len, 1);
- emit_cmp_and_jump_insns (count, const0_rtx,
- EQ, NULL_RTX, mode, 1, end_label);
+ if (min_len == 0)
+ emit_cmp_and_jump_insns (count, const0_rtx, EQ, NULL_RTX, mode, 1,
+ end_label);
emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
@@ -5767,8 +5811,11 @@ s390_expand_cpymem (rtx dst, rtx src, rtx len)
emit_insn (gen_cpymem_short (dst, src,
convert_to_mode (Pmode, count, 1)));
emit_label (end_label);
+
+ return true;
}
- return true;
+
+ return false;
}
/* Emit code to set LEN bytes at DST to VAL.
@@ -6599,7 +6646,8 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
dest = adjust_address (dest, BLKmode, 0);
set_mem_size (dest, size);
- s390_expand_cpymem (dest, src_mem, GEN_INT (size));
+ rtx size_rtx = GEN_INT (size);
+ s390_expand_cpymem (dest, src_mem, size_rtx, size_rtx, size_rtx);
return true;
}
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 00d39608e1d..d9ce287ab85 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3341,11 +3341,15 @@
(define_expand "cpymem<mode>"
[(set (match_operand:BLK 0 "memory_operand" "") ; destination
(match_operand:BLK 1 "memory_operand" "")) ; source
- (use (match_operand:GPR 2 "general_operand" "")) ; count
- (match_operand 3 "" "")]
+ (use (match_operand:GPR 2 "general_operand" "")) ; size
+ (match_operand 3 "") ; align
+ (match_operand 4 "") ; expected align
+ (match_operand 5 "") ; expected size
+ (match_operand 6 "") ; minimal size
+ (match_operand 7 "")] ; maximal size
""
{
- if (s390_expand_cpymem (operands[0], operands[1], operands[2]))
+ if (s390_expand_cpymem (operands[0], operands[1], operands[2], operands[6], operands[7]))
DONE;
else
FAIL;
--
2.39.2
next prev parent reply other threads:[~2023-05-15 7:18 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-15 7:17 [PATCH 0/3] Refactor memory block operations Stefan Schulze Frielinghaus
2023-05-15 7:17 ` Stefan Schulze Frielinghaus [this message]
2023-05-15 7:17 ` [PATCH 2/3] s390: Add block operation movmem Stefan Schulze Frielinghaus
2023-05-15 7:17 ` [PATCH 3/3] s390: Refactor block operation setmem Stefan Schulze Frielinghaus
2023-05-15 20:18 ` [PATCH 0/3] Refactor memory block operations Andreas Krebbel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230515071738.563660-2-stefansf@linux.ibm.com \
--to=stefansf@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=krebbel@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).