public inbox for libc-stable@sourceware.org
 help / color / mirror / Atom feed
* [2.32 COMMITTED] AArch64: Backport memcpy improvements
@ 2020-10-14 14:56 Wilco Dijkstra
  0 siblings, 0 replies; only message in thread
From: Wilco Dijkstra @ 2020-10-14 14:56 UTC (permalink / raw)
  To: libc-stable; +Cc: nd

commit 81c5484d93a7768a8acc4cfdc228d925d60cd906
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Wed Oct 14 13:56:21 2020 +0100

    AArch64: Use __memcpy_simd on Neoverse N2/V1

    Add CPU detection of Neoverse N2 and Neoverse V1, and select __memcpy_simd as
    the memcpy/memmove ifunc.

    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
    (cherry picked from commit e11ed9d2b4558eeacff81557dc9557001af42a6b)

commit 0f8f0ed25c196cfb93edf461aefdad15314ae05c
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Fri Aug 28 17:51:40 2020 +0100

    AArch64: Improve backwards memmove performance

    On some microarchitectures performance of the backwards memmove improves if
    the stores use STR with decreasing addresses.  So change the memmove loop
    in memcpy_advsimd.S to use 2x STR rather than STP.

    Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
    (cherry picked from commit bd394d131c10c9ec22c6424197b79410042eed99)

diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 7cf5f03..799d60c 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy,
                ? __memcpy_falkor
                : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
                  ? __memcpy_thunderx2
-                 : (IS_NEOVERSE_N1 (midr)
+                 : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
+                    || IS_NEOVERSE_V1 (midr)
                     ? __memcpy_simd
                     : __memcpy_generic)))));

diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
index d4ba747..48bb6d7 100644
--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
@@ -223,12 +223,13 @@ L(copy_long_backwards):
        b.ls    L(copy64_from_start)

 L(loop64_backwards):
-       stp     A_q, B_q, [dstend, -32]
+       str     B_q, [dstend, -16]
+       str     A_q, [dstend, -32]
        ldp     A_q, B_q, [srcend, -96]
-       stp     C_q, D_q, [dstend, -64]
+       str     D_q, [dstend, -48]
+       str     C_q, [dstend, -64]!
        ldp     C_q, D_q, [srcend, -128]
        sub     srcend, srcend, 64
-       sub     dstend, dstend, 64
        subs    count, count, 64
        b.hi    L(loop64_backwards)

diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index ad10aa8..46a4cb3 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove,
                ? __memmove_falkor
                : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
                  ? __memmove_thunderx2
-                 : (IS_NEOVERSE_N1 (midr)
+                 : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
+                    || IS_NEOVERSE_V1 (midr)
                     ? __memmove_simd
                     : __memmove_generic)))));

diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index fc68845..00a4d0c 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -54,6 +54,10 @@
                         && MIDR_PARTNUM(midr) == 0x000)
 #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A'                  \
                              && MIDR_PARTNUM(midr) == 0xd0c)
+#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A'                  \
+                             && MIDR_PARTNUM(midr) == 0xd49)
+#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A'                  \
+                             && MIDR_PARTNUM(midr) == 0xd40)

 #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P'                         \
                        && MIDR_PARTNUM(midr) == 0x000)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-10-14 14:56 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-14 14:56 [2.32 COMMITTED] AArch64: Backport memcpy improvements Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).