public inbox for glibc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug string/28214] New: [suggestion] using stnp instead of stp in memset of aarch64
@ 2021-08-10 2:10 wangxuszcn at foxmail dot com
2022-04-21 14:41 ` [Bug string/28214] " wdijkstr at arm dot com
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: wangxuszcn at foxmail dot com @ 2021-08-10 2:10 UTC (permalink / raw)
To: glibc-bugs
https://sourceware.org/bugzilla/show_bug.cgi?id=28214
Bug ID: 28214
Summary: [suggestion] using stnp instead of stp in memset of
aarch64
Product: glibc
Version: unspecified
Status: UNCONFIRMED
Severity: enhancement
Priority: P2
Component: string
Assignee: unassigned at sourceware dot org
Reporter: wangxuszcn at foxmail dot com
Target Milestone: ---
Created attachment 13608
--> https://sourceware.org/bugzilla/attachment.cgi?id=13608&action=edit
memset_change_stp_to_stnp in aarch64
Generally, after the memset is called to perform initialization, the
destination address is not used immediately, suggest that using stnp instead of
stp in memset of aarch64.
Background Knowledge:
The ARM v8-A architecture provides load/store non-temporal pair instructions
(LDNP/STNP) that provide a hint to the memory system that an access is
non-temporal or streaming, and unlikely to be repeated in the near future.
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 9067ea2..83bae2f 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -74,8 +74,8 @@ L(set_medium):
32 bytes from the end. */
L(set96):
str q0, [dstin, 16]
- stp q0, q0, [dstin, 32]
- stp q0, q0, [dstend, -32]
+ stnp q0, q0, [dstin, 32]
+ stnp q0, q0, [dstend, -32]
ret
.p2align 3
@@ -91,13 +91,13 @@ L(no_zva):
sub count, dstend, dst /* Count is 16 too large. */
sub dst, dst, 16 /* Dst is biased by -32. */
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
-1: stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]!
+1: stnp q0, q0, [dst, 32]
+ stnp q0, q0, [dst, 64]!
L(tail64):
subs count, count, 64
b.hi 1b
-2: stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
+2: stnp q0, q0, [dstend, -64]
+ stnp q0, q0, [dstend, -32]
ret
L(try_zva):
@@ -116,10 +116,10 @@ L(try_zva):
*/
L(zva_64):
str q0, [dst, 16]
- stp q0, q0, [dst, 32]
+ stnp q0, q0, [dst, 32]
bic dst, dst, 63
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
+ stnp q0, q0, [dst, 64]
+ stnp q0, q0, [dst, 96]
sub count, dstend, dst /* Count is now 128 too large. */
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
add dst, dst, 128
@@ -128,10 +128,10 @@ L(zva_64):
add dst, dst, 64
subs count, count, 64
b.hi 1b
- stp q0, q0, [dst, 0]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
+ stnp q0, q0, [dst, 0]
+ stnp q0, q0, [dst, 32]
+ stnp q0, q0, [dstend, -64]
+ stnp q0, q0, [dstend, -32]
ret
.p2align 3
@@ -140,9 +140,9 @@ L(zva_128):
b.ne L(zva_other)
str q0, [dst, 16]
- stp q0, q0, [dst, 32]
- stp q0, q0, [dst, 64]
- stp q0, q0, [dst, 96]
+ stnp q0, q0, [dst, 32]
+ stnp q0, q0, [dst, 64]
+ stnp q0, q0, [dst, 96]
bic dst, dst, 127
sub count, dstend, dst /* Count is now 128 too large. */
sub count, count, 128+128 /* Adjust count and bias for loop. */
@@ -151,10 +151,10 @@ L(zva_128):
add dst, dst, 128
subs count, count, 128
b.hi 1b
- stp q0, q0, [dstend, -128]
- stp q0, q0, [dstend, -96]
- stp q0, q0, [dstend, -64]
- stp q0, q0, [dstend, -32]
+ stnp q0, q0, [dstend, -128]
+ stnp q0, q0, [dstend, -96]
+ stnp q0, q0, [dstend, -64]
+ stnp q0, q0, [dstend, -32]
ret
L(zva_other):
@@ -170,8 +170,8 @@ L(zva_other):
subs count, tmp1, dst /* Actual alignment bytes to write. */
bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
beq 2f
-1: stp q0, q0, [dst], 64
- stp q0, q0, [dst, -32]
+1: stnp q0, q0, [dst], 64
+ stnp q0, q0, [dst, -32]
subs count, count, 64
b.hi 1b
2: mov dst, tmp1
--
You are receiving this mail because:
You are on the CC list for the bug.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-02-01 20:12 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-10 2:10 [Bug string/28214] New: [suggestion] using stnp instead of stp in memset of aarch64 wangxuszcn at foxmail dot com
2022-04-21 14:41 ` [Bug string/28214] " wdijkstr at arm dot com
2022-04-21 14:44 ` wangxuszcn at foxmail dot com
2024-02-01 20:12 ` pinskia at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).