From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id 32C9C386F442; Wed, 10 Apr 2024 16:10:32 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 32C9C386F442 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1712765432; bh=OMNTqWMcF6F+JwRtBVYD2ejQKBAa5A36Pfjg8buc+CI=; h=From:To:Subject:Date:From; b=HCHsyI3sZWXfpvzcNpZViYlXcUqz2XxQCu/RnVDyw53nIHNHnIcIOfp/kdsSDrvDM 6dp17O45CgnHGLcIEKUFu7JvDoSe5LYL1F6b/R7ioYy7zLbuzcMKbNBTFYq87sFrBf 1PDaN9Eg6O0Ed6jqUj4uSqozBVZKGZxQISTKU3xw= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Wilco Dijkstra To: glibc-cvs@sourceware.org Subject: [glibc/release/2.34/master] AArch64: Add memset_zva64 X-Act-Checkin: glibc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/release/2.34/master X-Git-Oldrev: 7e999181c2b1e1ad671b82b41ca7ab36d6282507 X-Git-Newrev: ff17116c1e111c809f1c3b3d3097f039d7d157f6 Message-Id: <20240410161032.32C9C386F442@sourceware.org> Date: Wed, 10 Apr 2024 16:10:32 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ff17116c1e111c809f1c3b3d3097f039d7d157f6 commit ff17116c1e111c809f1c3b3d3097f039d7d157f6 Author: Wilco Dijkstra Date: Thu Oct 26 17:07:21 2023 +0100 AArch64: Add memset_zva64 Add a specialized memset for the common ZVA size of 64 to avoid the overhead of reading the ZVA size. Since the code is identical to __memset_falkor, remove the latter. Reviewed-by: Adhemerval Zanella (cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd) Diff: --- sysdeps/aarch64/memset.S | 10 +++--- sysdeps/aarch64/multiarch/Makefile | 2 +- sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 +-- sysdeps/aarch64/multiarch/memset.c | 9 +++-- sysdeps/aarch64/multiarch/memset_falkor.S | 54 ----------------------------- sysdeps/aarch64/multiarch/memset_zva64.S | 27 +++++++++++++++ 6 files changed, 38 insertions(+), 68 deletions(-) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index d86390531d..84b0f7c03f 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -101,19 +101,19 @@ L(tail64): ret L(try_zva): -#ifdef ZVA_MACRO - zva_macro -#else +#ifndef ZVA64_ONLY .p2align 3 mrs tmp1, dczid_el0 tbnz tmp1w, 4, L(no_zva) and tmp1w, tmp1w, 15 cmp tmp1w, 4 /* ZVA size is 64 bytes. */ b.ne L(zva_128) - + nop +#endif /* Write the first and last 64 byte aligned block using stp rather than using DC ZVA. This is faster on some cores. */ + .p2align 4 L(zva_64): str q0, [dst, 16] stp q0, q0, [dst, 32] @@ -123,7 +123,6 @@ L(zva_64): sub count, dstend, dst /* Count is now 128 too large. */ sub count, count, 128+64+64 /* Adjust count and bias for loop. */ add dst, dst, 128 - nop 1: dc zva, dst add dst, dst, 64 subs count, count, 64 @@ -134,6 +133,7 @@ L(zva_64): stp q0, q0, [dstend, -32] ret +#ifndef ZVA64_ONLY .p2align 3 L(zva_128): cmp tmp1w, 5 /* ZVA size is 128 bytes. */ diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index a1a4de3cd9..171ca5e4cf 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -12,10 +12,10 @@ sysdep_routines += \ memmove_mops \ memset_a64fx \ memset_emag \ - memset_falkor \ memset_generic \ memset_kunpeng \ memset_mops \ + memset_zva64 \ strlen_asimd \ strlen_generic \ # sysdep_routines diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 31f5a3d0b1..bc8d362ed5 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -59,9 +59,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) IFUNC_IMPL (i, name, memset, - /* Enable this on non-falkor processors too so that other cores - can do a comparative analysis with __memset_generic. */ - IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) + IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) #if HAVE_AARCH64_SVE_ASM diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c index 9a69a99d53..918db12c65 100644 --- a/sysdeps/aarch64/multiarch/memset.c +++ b/sysdeps/aarch64/multiarch/memset.c @@ -28,7 +28,7 @@ extern __typeof (__redirect_memset) __libc_memset; -extern __typeof (__redirect_memset) __memset_falkor attribute_hidden; +extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden; extern __typeof (__redirect_memset) __memset_emag attribute_hidden; extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; @@ -47,18 +47,17 @@ select_memset_ifunc (void) { if (IS_A64FX (midr) && zva_size == 256) return __memset_a64fx; - return __memset_generic; } if (IS_KUNPENG920 (midr)) return __memset_kunpeng; - if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64) - return __memset_falkor; - if (IS_EMAG (midr)) return __memset_emag; + if (zva_size == 64) + return __memset_zva64; + return __memset_generic; } diff --git a/sysdeps/aarch64/multiarch/memset_falkor.S b/sysdeps/aarch64/multiarch/memset_falkor.S deleted file mode 100644 index 70d7d8b07d..0000000000 --- a/sysdeps/aarch64/multiarch/memset_falkor.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Memset for falkor. - Copyright (C) 2017-2021 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#include -#include - -/* Reading dczid_el0 is expensive on falkor so move it into the ifunc - resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to - use this function only when ZVA is enabled. */ - -#if IS_IN (libc) -.macro zva_macro - .p2align 4 - /* Write the first and last 64 byte aligned block using stp rather - than using DC ZVA. This is faster on some cores. */ - str q0, [dst, 16] - stp q0, q0, [dst, 32] - bic dst, dst, 63 - stp q0, q0, [dst, 64] - stp q0, q0, [dst, 96] - sub count, dstend, dst /* Count is now 128 too large. */ - sub count, count, 128+64+64 /* Adjust count and bias for loop. */ - add dst, dst, 128 -1: dc zva, dst - add dst, dst, 64 - subs count, count, 64 - b.hi 1b - stp q0, q0, [dst, 0] - stp q0, q0, [dst, 32] - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret -.endm - -# define ZVA_MACRO zva_macro -# define MEMSET __memset_falkor -# include -#endif diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S new file mode 100644 index 0000000000..13f45fd3d8 --- /dev/null +++ b/sysdeps/aarch64/multiarch/memset_zva64.S @@ -0,0 +1,27 @@ +/* Optimized memset for zva size = 64. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +#define ZVA64_ONLY 1 +#define MEMSET __memset_zva64 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(X) + +#include "../memset.S"