From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id E37CD386F45F; Wed, 10 Apr 2024 16:10:21 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E37CD386F45F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1712765421; bh=4449bOIjnxPKxOaMyijkfAAJwuvNYj+RX8pNyAFjJWc=; h=From:To:Subject:Date:From; b=ck1p+sb00qSPf3woTUNDxz4QREWPbkABW3vprOx9eO85HA3t/Ki/shHXzDdgPtP0a Xbyj3/coIX43h45r/nQQBu4kZ858yhhQBYJ1838ZBIUbnoDqYBa5Vexd4GkJGnZuqA NK1tAbtdbQEHm9m1P2GzbkgVXmCWTzREeqvdtyHw= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Wilco Dijkstra To: glibc-cvs@sourceware.org Subject: [glibc/release/2.34/master] AArch64: Cleanup ifuncs X-Act-Checkin: glibc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/release/2.34/master X-Git-Oldrev: 3b79e57c1cba718af6b76d8e76749d48006acf76 X-Git-Newrev: bfca39cce7904fbe07541dbe32e3e24f21aac94d Message-Id: <20240410161021.E37CD386F45F@sourceware.org> Date: Wed, 10 Apr 2024 16:10:21 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=bfca39cce7904fbe07541dbe32e3e24f21aac94d commit bfca39cce7904fbe07541dbe32e3e24f21aac94d Author: Wilco Dijkstra Date: Tue Oct 24 13:51:07 2023 +0100 AArch64: Cleanup ifuncs Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename strlen_mte to strlen_generic. Remove rtld-memset. Reviewed-by: Szabolcs Nagy (cherry picked from commit 9fd3409842b3e2d31cff5dbd6f96066c430f0aa2) Diff: --- sysdeps/aarch64/memset.S | 2 +- sysdeps/aarch64/multiarch/Makefile | 2 +- sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 +- sysdeps/aarch64/multiarch/memchr_nosimd.S | 9 ++----- sysdeps/aarch64/multiarch/memcpy_a64fx.S | 14 ++++------- sysdeps/aarch64/multiarch/memcpy_falkor.S | 6 ++--- sysdeps/aarch64/multiarch/memcpy_sve.S | 2 -- sysdeps/aarch64/multiarch/memcpy_thunderx.S | 27 ++++----------------- sysdeps/aarch64/multiarch/memcpy_thunderx2.S | 28 ++++------------------ sysdeps/aarch64/multiarch/memset_a64fx.S | 8 ++----- sysdeps/aarch64/multiarch/memset_base64.S | 3 +-- sysdeps/aarch64/multiarch/memset_emag.S | 8 +++---- sysdeps/aarch64/multiarch/memset_generic.S | 8 ++++++- sysdeps/aarch64/multiarch/memset_kunpeng.S | 9 ++----- sysdeps/aarch64/multiarch/rtld-memset.S | 25 ------------------- sysdeps/aarch64/multiarch/strlen.c | 4 ++-- sysdeps/aarch64/multiarch/strlen_asimd.S | 1 - .../multiarch/{strlen_mte.S => strlen_generic.S} | 8 +++---- 18 files changed, 41 insertions(+), 125 deletions(-) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 9067ea23f9..d86390531d 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -29,7 +29,7 @@ * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index e6099548b9..a1a4de3cd9 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -17,6 +17,6 @@ sysdep_routines += \ memset_kunpeng \ memset_mops \ strlen_asimd \ - strlen_mte \ + strlen_generic \ # sysdep_routines endif diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 919d578c3d..1d14548bec 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -75,7 +75,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strlen, IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte)) + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic)) return i; } diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S index 0080f53931..22628f9adc 100644 --- a/sysdeps/aarch64/multiarch/memchr_nosimd.S +++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S @@ -26,10 +26,6 @@ * Use base integer registers. */ -#ifndef MEMCHR -# define MEMCHR __memchr_nosimd -#endif - /* Arguments and results. */ #define srcin x0 #define chrin x1 @@ -62,7 +58,7 @@ #define REP8_7f 0x7f7f7f7f7f7f7f7f -ENTRY_ALIGN (MEMCHR, 6) +ENTRY (__memchr_nosimd) PTR_ARG (0) SIZE_ARG (2) @@ -219,5 +215,4 @@ L(none_chr): mov result, 0 ret -END (MEMCHR) -libc_hidden_builtin_def (MEMCHR) +END (__memchr_nosimd) diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S index 65528405bb..e7a62cafbe 100644 --- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S +++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S @@ -41,9 +41,6 @@ #define cl_remainder x10 // CACHE_LINE_SIZE remainder #if HAVE_AARCH64_SVE_ASM -# if IS_IN (libc) -# define MEMCPY __memcpy_a64fx -# define MEMMOVE __memmove_a64fx .arch armv8.2-a+sve @@ -176,7 +173,7 @@ ret .endm -ENTRY (MEMCPY) +ENTRY (__memcpy_a64fx) PTR_ARG (0) PTR_ARG (1) @@ -334,11 +331,10 @@ L(L2_dc_zva): add dest_ptr, dest_ptr, CACHE_LINE_SIZE * 2 b L(unroll8) -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) +END (__memcpy_a64fx) -ENTRY (MEMMOVE) +ENTRY_ALIGN (__memmove_a64fx, 4) PTR_ARG (0) PTR_ARG (1) @@ -400,7 +396,5 @@ L(bwd_last): mov src_ptr, src b L(last) -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -# endif /* IS_IN (libc) */ +END (__memmove_a64fx) #endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S index 1b40971324..5e177fedc4 100644 --- a/sysdeps/aarch64/multiarch/memcpy_falkor.S +++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S @@ -71,7 +71,7 @@ The non-temporal stores help optimize cache utilization. */ #if IS_IN (libc) -ENTRY_ALIGN (__memcpy_falkor, 6) +ENTRY (__memcpy_falkor) PTR_ARG (0) PTR_ARG (1) @@ -198,7 +198,6 @@ L(loop64): ret END (__memcpy_falkor) -libc_hidden_builtin_def (__memcpy_falkor) /* RATIONALE: @@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor) For small and medium cases memcpy is used. */ -ENTRY_ALIGN (__memmove_falkor, 6) +ENTRY (__memmove_falkor) PTR_ARG (0) PTR_ARG (1) @@ -311,5 +310,4 @@ L(move_long): 3: ret END (__memmove_falkor) -libc_hidden_builtin_def (__memmove_falkor) #endif diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S index 6bc8390fe8..71d2f84f63 100644 --- a/sysdeps/aarch64/multiarch/memcpy_sve.S +++ b/sysdeps/aarch64/multiarch/memcpy_sve.S @@ -141,7 +141,6 @@ L(copy64_from_end): ret END (__memcpy_sve) -libc_hidden_builtin_def (__memcpy_sve) ENTRY (__memmove_sve) @@ -208,5 +207,4 @@ L(return): ret END (__memmove_sve) -libc_hidden_builtin_def (__memmove_sve) #endif diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S index 3ab1e04583..31aed7023f 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S @@ -65,21 +65,7 @@ Overlapping large forward memmoves use a loop that copies backwards. */ -#ifndef MEMMOVE -# define MEMMOVE memmove -#endif -#ifndef MEMCPY -# define MEMCPY memcpy -#endif - -#if IS_IN (libc) - -# undef MEMCPY -# define MEMCPY __memcpy_thunderx -# undef MEMMOVE -# define MEMMOVE __memmove_thunderx - -ENTRY_ALIGN (MEMMOVE, 6) +ENTRY (__memmove_thunderx) PTR_ARG (0) PTR_ARG (1) @@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6) b.lo L(move_long) /* Common case falls through into memcpy. */ -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -ENTRY (MEMCPY) +END (__memmove_thunderx) + +ENTRY (__memcpy_thunderx) PTR_ARG (0) PTR_ARG (1) @@ -316,7 +302,4 @@ L(move_long): stp C_l, C_h, [dstin] 3: ret -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) - -#endif +END (__memcpy_thunderx) diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S index f7456ca029..55a024576e 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S @@ -75,27 +75,12 @@ #define I_v v16 #define J_v v17 -#ifndef MEMMOVE -# define MEMMOVE memmove -#endif -#ifndef MEMCPY -# define MEMCPY memcpy -#endif - -#if IS_IN (libc) - -#undef MEMCPY -#define MEMCPY __memcpy_thunderx2 -#undef MEMMOVE -#define MEMMOVE __memmove_thunderx2 - - /* Overlapping large forward memmoves use a loop that copies backwards. Otherwise memcpy is used. Small moves branch to memcopy16 directly. The longer memcpy cases fall through to the memcpy head. */ -ENTRY_ALIGN (MEMMOVE, 6) +ENTRY (__memmove_thunderx2) PTR_ARG (0) PTR_ARG (1) @@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6) ccmp tmp1, count, 2, hi b.lo L(move_long) -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) +END (__memmove_thunderx2) /* Copies are split into 3 main cases: small copies of up to 16 bytes, @@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE) #define MEMCPY_PREFETCH_LDR 640 - .p2align 4 -ENTRY (MEMCPY) +ENTRY (__memcpy_thunderx2) PTR_ARG (0) PTR_ARG (1) @@ -449,7 +432,7 @@ L(move_long): 3: ret -END (MEMCPY) +END (__memcpy_thunderx2) .section .rodata .p2align 4 @@ -472,6 +455,3 @@ L(ext_table): .word L(ext_size_13) -. .word L(ext_size_14) -. .word L(ext_size_15) -. - -libc_hidden_builtin_def (MEMCPY) -#endif diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index ce54e5418b..26c92c58a8 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -37,8 +37,6 @@ #define cl_remainder x11 // CACHE_LINE_SIZE remainder #if HAVE_AARCH64_SVE_ASM -# if IS_IN (libc) -# define MEMSET __memset_a64fx .arch armv8.2-a+sve @@ -109,7 +107,7 @@ ret .endm -ENTRY (MEMSET) +ENTRY (__memset_a64fx) PTR_ARG (0) SIZE_ARG (2) @@ -261,8 +259,6 @@ L(L2_dc_zva): cbnz rest, L(unroll8) ret -END (MEMSET) -libc_hidden_builtin_def (MEMSET) +END (__memset_a64fx) -#endif /* IS_IN (libc) */ #endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S index 2b5bd431ff..a98c73babb 100644 --- a/sysdeps/aarch64/multiarch/memset_base64.S +++ b/sysdeps/aarch64/multiarch/memset_base64.S @@ -34,7 +34,7 @@ * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) @@ -183,4 +183,3 @@ L(zva_64): #endif END (MEMSET) -libc_hidden_builtin_def (MEMSET) diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S index 891c46e60e..a7f5f35284 100644 --- a/sysdeps/aarch64/multiarch/memset_emag.S +++ b/sysdeps/aarch64/multiarch/memset_emag.S @@ -19,8 +19,7 @@ #include -#if IS_IN (libc) -# define MEMSET __memset_emag +#define MEMSET __memset_emag /* * Using DC ZVA to zero memory does not produce better performance if @@ -30,7 +29,6 @@ * workloads. */ -# define DC_ZVA_THRESHOLD 0 +#define DC_ZVA_THRESHOLD 0 -# include "./memset_base64.S" -#endif +#include "./memset_base64.S" diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S index a4e1dd711c..a6e895f5f2 100644 --- a/sysdeps/aarch64/multiarch/memset_generic.S +++ b/sysdeps/aarch64/multiarch/memset_generic.S @@ -21,9 +21,15 @@ #if IS_IN (libc) # define MEMSET __memset_generic + +/* Do not hide the generic version of memset, we use it internally. */ +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + /* Add a hidden definition for use within libc.so. */ # ifdef SHARED .globl __GI_memset; __GI_memset = __memset_generic # endif -# include #endif + +#include <../memset.S> diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S index 9206afbcee..3a9276cef0 100644 --- a/sysdeps/aarch64/multiarch/memset_kunpeng.S +++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S @@ -20,16 +20,13 @@ #include #include -#if IS_IN (libc) -# define MEMSET __memset_kunpeng - /* Assumptions: * * ARMv8-a, AArch64, unaligned accesses * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (__memset_kunpeng) PTR_ARG (0) SIZE_ARG (2) @@ -108,6 +105,4 @@ L(set_long): stp q0, q0, [dstend, -32] ret -END (MEMSET) -libc_hidden_builtin_def (MEMSET) -#endif +END (__memset_kunpeng) diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S deleted file mode 100644 index f195c22256..0000000000 --- a/sysdeps/aarch64/multiarch/rtld-memset.S +++ /dev/null @@ -1,25 +0,0 @@ -/* Memset for aarch64, for the dynamic linker. - Copyright (C) 2017-2021 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#include - -#if IS_IN (rtld) -# define MEMSET memset -# include -#endif diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c index 8f38de69b5..66375be58e 100644 --- a/sysdeps/aarch64/multiarch/strlen.c +++ b/sysdeps/aarch64/multiarch/strlen.c @@ -28,10 +28,10 @@ extern __typeof (__redirect_strlen) __strlen; -extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden; extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden; -libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd)); +libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd)); # undef strlen strong_alias (__strlen, strlen); diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S index 5f2ce07e61..ca246c0bad 100644 --- a/sysdeps/aarch64/multiarch/strlen_asimd.S +++ b/sysdeps/aarch64/multiarch/strlen_asimd.S @@ -203,4 +203,3 @@ L(page_cross): ret END (__strlen_asimd) -libc_hidden_builtin_def (__strlen_asimd) diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S similarity index 85% rename from sysdeps/aarch64/multiarch/strlen_mte.S rename to sysdeps/aarch64/multiarch/strlen_generic.S index efc6e4df66..350435da0b 100644 --- a/sysdeps/aarch64/multiarch/strlen_mte.S +++ b/sysdeps/aarch64/multiarch/strlen_generic.S @@ -17,14 +17,14 @@ . */ /* The actual strlen code is in ../strlen.S. If we are building libc this file - defines __strlen_mte. Otherwise the include of ../strlen.S will define - the normal __strlen entry points. */ + defines __strlen_generic. Otherwise the include of ../strlen.S will define + the normal __strlen entry points. */ #include #if IS_IN (libc) -# define STRLEN __strlen_mte +# define STRLEN __strlen_generic /* Do not hide the generic version of strlen, we use it internally. */ # undef libc_hidden_builtin_def @@ -32,7 +32,7 @@ # ifdef SHARED /* It doesn't make sense to send libc-internal strlen calls through a PLT. */ - .globl __GI_strlen; __GI_strlen = __strlen_mte + .globl __GI_strlen; __GI_strlen = __strlen_generic # endif #endif