From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id DC9313858D1E; Wed, 1 Nov 2023 13:50:21 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DC9313858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1698846621; bh=7HeaVyiUYkNr6lP0HXgczGpLzDFisWQAjrS8itjq+OE=; h=From:To:Subject:Date:From; b=n9JOdcoVNx6OTubYB15moeJjhsEpXTLOcByaQz/8N6wImKCq5ut0BBsUhb2gz1xn7 wzuh77JBI+P7fDaVBy1RAgvWJElgpT1x1hsCrtVn9GEzF6a4sUkn9wEmEr2cg/lXCu 0m9NcGiDpMo7NiYFasS6MN1YFN2MetRKtsBZrTgM= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Wilco Dijkstra To: glibc-cvs@sourceware.org Subject: [glibc] AArch64: Cleanup ifuncs X-Act-Checkin: glibc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/master X-Git-Oldrev: 9db31d7456a68f7fc448b28dceced26db96f4d66 X-Git-Newrev: 9fd3409842b3e2d31cff5dbd6f96066c430f0aa2 Message-Id: <20231101135021.DC9313858D1E@sourceware.org> Date: Wed, 1 Nov 2023 13:50:21 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=9fd3409842b3e2d31cff5dbd6f96066c430f0aa2 commit 9fd3409842b3e2d31cff5dbd6f96066c430f0aa2 Author: Wilco Dijkstra Date: Tue Oct 24 13:51:07 2023 +0100 AArch64: Cleanup ifuncs Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename strlen_mte to strlen_generic. Remove rtld-memset. Reviewed-by: Szabolcs Nagy Diff: --- sysdeps/aarch64/memset.S | 2 +- sysdeps/aarch64/multiarch/Makefile | 2 +- sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 +- sysdeps/aarch64/multiarch/memchr_nosimd.S | 9 ++----- sysdeps/aarch64/multiarch/memcpy_a64fx.S | 14 ++++------- sysdeps/aarch64/multiarch/memcpy_falkor.S | 6 ++--- sysdeps/aarch64/multiarch/memcpy_sve.S | 2 -- sysdeps/aarch64/multiarch/memcpy_thunderx.S | 27 ++++----------------- sysdeps/aarch64/multiarch/memcpy_thunderx2.S | 28 ++++------------------ sysdeps/aarch64/multiarch/memset_a64fx.S | 8 ++----- sysdeps/aarch64/multiarch/memset_base64.S | 3 +-- sysdeps/aarch64/multiarch/memset_emag.S | 8 +++---- sysdeps/aarch64/multiarch/memset_generic.S | 8 ++++++- sysdeps/aarch64/multiarch/memset_kunpeng.S | 9 ++----- sysdeps/aarch64/multiarch/rtld-memset.S | 25 ------------------- sysdeps/aarch64/multiarch/strlen.c | 4 ++-- sysdeps/aarch64/multiarch/strlen_asimd.S | 1 - .../multiarch/{strlen_mte.S => strlen_generic.S} | 8 +++---- 18 files changed, 41 insertions(+), 125 deletions(-) diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S index 50e5da3e7a..bf3cf85c8a 100644 --- a/sysdeps/aarch64/memset.S +++ b/sysdeps/aarch64/memset.S @@ -29,7 +29,7 @@ * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index e6099548b9..a1a4de3cd9 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -17,6 +17,6 @@ sysdep_routines += \ memset_kunpeng \ memset_mops \ strlen_asimd \ - strlen_mte \ + strlen_generic \ # sysdep_routines endif diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index da7f115377..836e8317a5 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strlen, IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte)) + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic)) return 0; } diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S index 57e48375e9..7800751899 100644 --- a/sysdeps/aarch64/multiarch/memchr_nosimd.S +++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S @@ -26,10 +26,6 @@ * Use base integer registers. */ -#ifndef MEMCHR -# define MEMCHR __memchr_nosimd -#endif - /* Arguments and results. */ #define srcin x0 #define chrin x1 @@ -62,7 +58,7 @@ #define REP8_7f 0x7f7f7f7f7f7f7f7f -ENTRY_ALIGN (MEMCHR, 6) +ENTRY (__memchr_nosimd) PTR_ARG (0) SIZE_ARG (2) @@ -219,5 +215,4 @@ L(none_chr): mov result, 0 ret -END (MEMCHR) -libc_hidden_builtin_def (MEMCHR) +END (__memchr_nosimd) diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S index f89b5b670a..baff7e96d0 100644 --- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S +++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S @@ -39,9 +39,6 @@ #define vlen8 x8 #if HAVE_AARCH64_SVE_ASM -# if IS_IN (libc) -# define MEMCPY __memcpy_a64fx -# define MEMMOVE __memmove_a64fx .arch armv8.2-a+sve @@ -97,7 +94,7 @@ #undef BTI_C #define BTI_C -ENTRY (MEMCPY) +ENTRY (__memcpy_a64fx) PTR_ARG (0) PTR_ARG (1) @@ -234,11 +231,10 @@ L(last_bytes): st1b z3.b, p0, [dstend, -1, mul vl] ret -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) +END (__memcpy_a64fx) -ENTRY_ALIGN (MEMMOVE, 4) +ENTRY_ALIGN (__memmove_a64fx, 4) PTR_ARG (0) PTR_ARG (1) @@ -307,7 +303,5 @@ L(full_overlap): mov dst, dstin b L(last_bytes) -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -# endif /* IS_IN (libc) */ +END (__memmove_a64fx) #endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S index ec0e4ade24..67c4ab34eb 100644 --- a/sysdeps/aarch64/multiarch/memcpy_falkor.S +++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S @@ -71,7 +71,7 @@ The non-temporal stores help optimize cache utilization. */ #if IS_IN (libc) -ENTRY_ALIGN (__memcpy_falkor, 6) +ENTRY (__memcpy_falkor) PTR_ARG (0) PTR_ARG (1) @@ -198,7 +198,6 @@ L(loop64): ret END (__memcpy_falkor) -libc_hidden_builtin_def (__memcpy_falkor) /* RATIONALE: @@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor) For small and medium cases memcpy is used. */ -ENTRY_ALIGN (__memmove_falkor, 6) +ENTRY (__memmove_falkor) PTR_ARG (0) PTR_ARG (1) @@ -311,5 +310,4 @@ L(move_long): 3: ret END (__memmove_falkor) -libc_hidden_builtin_def (__memmove_falkor) #endif diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S index d11be6a443..2f14f91366 100644 --- a/sysdeps/aarch64/multiarch/memcpy_sve.S +++ b/sysdeps/aarch64/multiarch/memcpy_sve.S @@ -141,7 +141,6 @@ L(copy64_from_end): ret END (__memcpy_sve) -libc_hidden_builtin_def (__memcpy_sve) ENTRY (__memmove_sve) @@ -208,5 +207,4 @@ L(return): ret END (__memmove_sve) -libc_hidden_builtin_def (__memmove_sve) #endif diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S index 366287587f..14269b1a47 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S @@ -65,21 +65,7 @@ Overlapping large forward memmoves use a loop that copies backwards. */ -#ifndef MEMMOVE -# define MEMMOVE memmove -#endif -#ifndef MEMCPY -# define MEMCPY memcpy -#endif - -#if IS_IN (libc) - -# undef MEMCPY -# define MEMCPY __memcpy_thunderx -# undef MEMMOVE -# define MEMMOVE __memmove_thunderx - -ENTRY_ALIGN (MEMMOVE, 6) +ENTRY (__memmove_thunderx) PTR_ARG (0) PTR_ARG (1) @@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6) b.lo L(move_long) /* Common case falls through into memcpy. */ -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -ENTRY (MEMCPY) +END (__memmove_thunderx) + +ENTRY (__memcpy_thunderx) PTR_ARG (0) PTR_ARG (1) @@ -316,7 +302,4 @@ L(move_long): stp C_l, C_h, [dstin] 3: ret -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) - -#endif +END (__memcpy_thunderx) diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S index d3d6f1debc..93993b9e03 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S @@ -75,27 +75,12 @@ #define I_v v16 #define J_v v17 -#ifndef MEMMOVE -# define MEMMOVE memmove -#endif -#ifndef MEMCPY -# define MEMCPY memcpy -#endif - -#if IS_IN (libc) - -#undef MEMCPY -#define MEMCPY __memcpy_thunderx2 -#undef MEMMOVE -#define MEMMOVE __memmove_thunderx2 - - /* Overlapping large forward memmoves use a loop that copies backwards. Otherwise memcpy is used. Small moves branch to memcopy16 directly. The longer memcpy cases fall through to the memcpy head. */ -ENTRY_ALIGN (MEMMOVE, 6) +ENTRY (__memmove_thunderx2) PTR_ARG (0) PTR_ARG (1) @@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6) ccmp tmp1, count, 2, hi b.lo L(move_long) -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) +END (__memmove_thunderx2) /* Copies are split into 3 main cases: small copies of up to 16 bytes, @@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE) #define MEMCPY_PREFETCH_LDR 640 - .p2align 4 -ENTRY (MEMCPY) +ENTRY (__memcpy_thunderx2) PTR_ARG (0) PTR_ARG (1) @@ -449,7 +432,7 @@ L(move_long): 3: ret -END (MEMCPY) +END (__memcpy_thunderx2) .section .rodata .p2align 4 @@ -472,6 +455,3 @@ L(ext_table): .word L(ext_size_13) -. .word L(ext_size_14) -. .word L(ext_size_15) -. - -libc_hidden_builtin_def (MEMCPY) -#endif diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S index d520355143..7176f3d284 100644 --- a/sysdeps/aarch64/multiarch/memset_a64fx.S +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S @@ -33,8 +33,6 @@ #define vector_length x9 #if HAVE_AARCH64_SVE_ASM -# if IS_IN (libc) -# define MEMSET __memset_a64fx .arch armv8.2-a+sve @@ -49,7 +47,7 @@ #undef BTI_C #define BTI_C -ENTRY (MEMSET) +ENTRY (__memset_a64fx) PTR_ARG (0) SIZE_ARG (2) @@ -166,8 +164,6 @@ L(L2): add count, count, CACHE_LINE_SIZE b L(last) -END (MEMSET) -libc_hidden_builtin_def (MEMSET) +END (__memset_a64fx) -#endif /* IS_IN (libc) */ #endif /* HAVE_AARCH64_SVE_ASM */ diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S index 35296a6dec..0e8f709fa5 100644 --- a/sysdeps/aarch64/multiarch/memset_base64.S +++ b/sysdeps/aarch64/multiarch/memset_base64.S @@ -34,7 +34,7 @@ * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (MEMSET) PTR_ARG (0) SIZE_ARG (2) @@ -183,4 +183,3 @@ L(zva_64): #endif END (MEMSET) -libc_hidden_builtin_def (MEMSET) diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S index 17d609cead..6fecad4fae 100644 --- a/sysdeps/aarch64/multiarch/memset_emag.S +++ b/sysdeps/aarch64/multiarch/memset_emag.S @@ -19,8 +19,7 @@ #include -#if IS_IN (libc) -# define MEMSET __memset_emag +#define MEMSET __memset_emag /* * Using DC ZVA to zero memory does not produce better performance if @@ -30,7 +29,6 @@ * workloads. */ -# define DC_ZVA_THRESHOLD 0 +#define DC_ZVA_THRESHOLD 0 -# include "./memset_base64.S" -#endif +#include "./memset_base64.S" diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S index 9c23e482bf..6c1f0daac8 100644 --- a/sysdeps/aarch64/multiarch/memset_generic.S +++ b/sysdeps/aarch64/multiarch/memset_generic.S @@ -21,9 +21,15 @@ #if IS_IN (libc) # define MEMSET __memset_generic + +/* Do not hide the generic version of memset, we use it internally. */ +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + /* Add a hidden definition for use within libc.so. */ # ifdef SHARED .globl __GI_memset; __GI_memset = __memset_generic # endif -# include #endif + +#include <../memset.S> diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S index 86c46434fd..4a54373398 100644 --- a/sysdeps/aarch64/multiarch/memset_kunpeng.S +++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S @@ -20,16 +20,13 @@ #include #include -#if IS_IN (libc) -# define MEMSET __memset_kunpeng - /* Assumptions: * * ARMv8-a, AArch64, unaligned accesses * */ -ENTRY_ALIGN (MEMSET, 6) +ENTRY (__memset_kunpeng) PTR_ARG (0) SIZE_ARG (2) @@ -108,6 +105,4 @@ L(set_long): stp q0, q0, [dstend, -32] ret -END (MEMSET) -libc_hidden_builtin_def (MEMSET) -#endif +END (__memset_kunpeng) diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S deleted file mode 100644 index 4b035ed8b2..0000000000 --- a/sysdeps/aarch64/multiarch/rtld-memset.S +++ /dev/null @@ -1,25 +0,0 @@ -/* Memset for aarch64, for the dynamic linker. - Copyright (C) 2017-2023 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#include - -#if IS_IN (rtld) -# define MEMSET memset -# include -#endif diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c index bbdd3de8c4..728bd1936a 100644 --- a/sysdeps/aarch64/multiarch/strlen.c +++ b/sysdeps/aarch64/multiarch/strlen.c @@ -28,10 +28,10 @@ extern __typeof (__redirect_strlen) __strlen; -extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden; extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden; -libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd)); +libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd)); # undef strlen strong_alias (__strlen, strlen); diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S index 490439491d..aee5ef9f78 100644 --- a/sysdeps/aarch64/multiarch/strlen_asimd.S +++ b/sysdeps/aarch64/multiarch/strlen_asimd.S @@ -203,4 +203,3 @@ L(page_cross): ret END (__strlen_asimd) -libc_hidden_builtin_def (__strlen_asimd) diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S similarity index 85% rename from sysdeps/aarch64/multiarch/strlen_mte.S rename to sysdeps/aarch64/multiarch/strlen_generic.S index 1c1220b767..2346296a18 100644 --- a/sysdeps/aarch64/multiarch/strlen_mte.S +++ b/sysdeps/aarch64/multiarch/strlen_generic.S @@ -17,14 +17,14 @@ . */ /* The actual strlen code is in ../strlen.S. If we are building libc this file - defines __strlen_mte. Otherwise the include of ../strlen.S will define - the normal __strlen entry points. */ + defines __strlen_generic. Otherwise the include of ../strlen.S will define + the normal __strlen entry points. */ #include #if IS_IN (libc) -# define STRLEN __strlen_mte +# define STRLEN __strlen_generic /* Do not hide the generic version of strlen, we use it internally. */ # undef libc_hidden_builtin_def @@ -32,7 +32,7 @@ # ifdef SHARED /* It doesn't make sense to send libc-internal strlen calls through a PLT. */ - .globl __GI_strlen; __GI_strlen = __strlen_mte + .globl __GI_strlen; __GI_strlen = __strlen_generic # endif #endif