public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Wilco Dijkstra <wilco@sourceware.org>
To: glibc-cvs@sourceware.org
Subject: [glibc/release/2.35/master] AArch64: Optimize strcpy
Date: Wed, 10 Apr 2024 15:07:11 +0000 (GMT) [thread overview]
Message-ID: <20240410150711.A00C9386101D@sourceware.org> (raw)
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=6f2ca6aab9c60f8f74aba8bf8585fb4932863184
commit 6f2ca6aab9c60f8f74aba8bf8585fb4932863184
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Wed Jan 11 13:52:39 2023 +0000
AArch64: Optimize strcpy
Unroll the main loop. Large strings are around 20% faster on modern CPUs.
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
(cherry picked from commit 349e48c01e85bd96006860084e76d322e6ca02f1)
Diff:
---
sysdeps/aarch64/strcpy.S | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 78d27b4aa6..6eeda12df6 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -30,7 +30,6 @@
* MTE compatible.
*/
-/* Arguments and results. */
#define dstin x0
#define srcin x1
#define result x0
@@ -76,14 +75,14 @@ ENTRY (STRCPY)
ld1 {vdata.16b}, [src]
cmeq vhas_nul.16b, vdata.16b, 0
lsl shift, srcin, 2
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
+ shrn vend.8b, vhas_nul.8h, 4
fmov synd, dend
lsr synd, synd, shift
cbnz synd, L(tail)
ldr dataq, [src, 16]!
cmeq vhas_nul.16b, vdata.16b, 0
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
+ shrn vend.8b, vhas_nul.8h, 4
fmov synd, dend
cbz synd, L(start_loop)
@@ -102,13 +101,10 @@ ENTRY (STRCPY)
IFSTPCPY (add result, dstin, len)
ret
- .p2align 4,,8
L(tail):
rbit synd, synd
clz len, synd
lsr len, len, 2
-
- .p2align 4
L(less16):
tbz len, 3, L(less8)
sub tmp, len, 7
@@ -141,31 +137,37 @@ L(zerobyte):
.p2align 4
L(start_loop):
- sub len, src, srcin
+ sub tmp, srcin, dstin
ldr dataq2, [srcin]
- add dst, dstin, len
+ sub dst, src, tmp
str dataq2, [dstin]
-
- .p2align 5
L(loop):
- str dataq, [dst], 16
- ldr dataq, [src, 16]!
+ str dataq, [dst], 32
+ ldr dataq, [src, 16]
+ cmeq vhas_nul.16b, vdata.16b, 0
+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+ fmov synd, dend
+ cbnz synd, L(loopend)
+ str dataq, [dst, -16]
+ ldr dataq, [src, 32]!
cmeq vhas_nul.16b, vdata.16b, 0
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbz synd, L(loop)
-
+ add dst, dst, 16
+L(loopend):
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov synd, dend
+ sub dst, dst, 31
#ifndef __AARCH64EB__
rbit synd, synd
#endif
clz len, synd
lsr len, len, 2
- sub tmp, len, 15
- ldr dataq, [src, tmp]
- str dataq, [dst, tmp]
- IFSTPCPY (add result, dst, len)
+ add dst, dst, len
+ ldr dataq, [dst, tmp]
+ str dataq, [dst]
+ IFSTPCPY (add result, dst, 15)
ret
END (STRCPY)
reply other threads:[~2024-04-10 15:07 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240410150711.A00C9386101D@sourceware.org \
--to=wilco@sourceware.org \
--cc=glibc-cvs@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).