* [PATCH] AArch64: Cleanup ifuncs
@ 2023-10-24 12:33 Wilco Dijkstra
2023-11-01 13:23 ` Szabolcs Nagy
0 siblings, 1 reply; 2+ messages in thread
From: Wilco Dijkstra @ 2023-10-24 12:33 UTC (permalink / raw)
To: 'GNU C Library'; +Cc: Szabolcs Nagy, Adhemerval Zanella
Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than
ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename
strlen_mte to strlen_generic. Remove rtld-memset.
Passes regress, OK for commit?
---
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 50e5da3e7aec3a7b4ef23676c35c34896e8078e6..bf3cf85c8a95fd8c03ae13c4173fe507040ee8cd 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -29,7 +29,7 @@
*
*/
-ENTRY_ALIGN (MEMSET, 6)
+ENTRY (MEMSET)
PTR_ARG (0)
SIZE_ARG (2)
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index e6099548b96b80b656f3f22cd8d5598634647360..a1a4de3cd93c48db6e47eebc9c111186efca53fb 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -17,6 +17,6 @@ sysdep_routines += \
memset_kunpeng \
memset_mops \
strlen_asimd \
- strlen_mte \
+ strlen_generic \
# sysdep_routines
endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index da7f1153778efd41a15ef5aa900252f9e492b18d..836e8317a5d3b652134d199cf685499983b1a3fc 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strlen,
IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd)
- IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte))
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
return 0;
}
diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S
index 57e48375e91edd74ee0a69e747e09d995ae8fd4f..780075189902919345c5eb15cba7ea0929043d6a 100644
--- a/sysdeps/aarch64/multiarch/memchr_nosimd.S
+++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S
@@ -26,10 +26,6 @@
* Use base integer registers.
*/
-#ifndef MEMCHR
-# define MEMCHR __memchr_nosimd
-#endif
-
/* Arguments and results. */
#define srcin x0
#define chrin x1
@@ -62,7 +58,7 @@
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-ENTRY_ALIGN (MEMCHR, 6)
+ENTRY (__memchr_nosimd)
PTR_ARG (0)
SIZE_ARG (2)
@@ -219,5 +215,4 @@ L(none_chr):
mov result, 0
ret
-END (MEMCHR)
-libc_hidden_builtin_def (MEMCHR)
+END (__memchr_nosimd)
diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
index f89b5b670a16ceb944eb816d318e02628a239df8..baff7e96d0045dac5a6a7a8d555e997998c978dd 100644
--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
@@ -39,9 +39,6 @@
#define vlen8 x8
#if HAVE_AARCH64_SVE_ASM
-# if IS_IN (libc)
-# define MEMCPY __memcpy_a64fx
-# define MEMMOVE __memmove_a64fx
.arch armv8.2-a+sve
@@ -97,7 +94,7 @@
#undef BTI_C
#define BTI_C
-ENTRY (MEMCPY)
+ENTRY (__memcpy_a64fx)
PTR_ARG (0)
PTR_ARG (1)
@@ -234,11 +231,10 @@ L(last_bytes):
st1b z3.b, p0, [dstend, -1, mul vl]
ret
-END (MEMCPY)
-libc_hidden_builtin_def (MEMCPY)
+END (__memcpy_a64fx)
-ENTRY_ALIGN (MEMMOVE, 4)
+ENTRY_ALIGN (__memmove_a64fx, 4)
PTR_ARG (0)
PTR_ARG (1)
@@ -307,7 +303,5 @@ L(full_overlap):
mov dst, dstin
b L(last_bytes)
-END (MEMMOVE)
-libc_hidden_builtin_def (MEMMOVE)
-# endif /* IS_IN (libc) */
+END (__memmove_a64fx)
#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S
index ec0e4ade24bbad76979d2e34a36eff7703d30264..67c4ab34eba40c37c6aae08be6cb5e11e2a82d17 100644
--- a/sysdeps/aarch64/multiarch/memcpy_falkor.S
+++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S
@@ -71,7 +71,7 @@
The non-temporal stores help optimize cache utilization. */
#if IS_IN (libc)
-ENTRY_ALIGN (__memcpy_falkor, 6)
+ENTRY (__memcpy_falkor)
PTR_ARG (0)
PTR_ARG (1)
@@ -198,7 +198,6 @@ L(loop64):
ret
END (__memcpy_falkor)
-libc_hidden_builtin_def (__memcpy_falkor)
/* RATIONALE:
@@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor)
For small and medium cases memcpy is used. */
-ENTRY_ALIGN (__memmove_falkor, 6)
+ENTRY (__memmove_falkor)
PTR_ARG (0)
PTR_ARG (1)
@@ -311,5 +310,4 @@ L(move_long):
3: ret
END (__memmove_falkor)
-libc_hidden_builtin_def (__memmove_falkor)
#endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
index d11be6a44301af4bfd7fa4900555b769dc58d34d..2f14f9136698c5b3130482e2c15c5d4ceb198bda 100644
--- a/sysdeps/aarch64/multiarch/memcpy_sve.S
+++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
@@ -141,7 +141,6 @@ L(copy64_from_end):
ret
END (__memcpy_sve)
-libc_hidden_builtin_def (__memcpy_sve)
ENTRY (__memmove_sve)
@@ -208,5 +207,4 @@ L(return):
ret
END (__memmove_sve)
-libc_hidden_builtin_def (__memmove_sve)
#endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index 366287587f4aec5c00c35320fd1f862e45400d08..14269b1a47137f178e289e31026ab2874ccc3173 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -65,21 +65,7 @@
Overlapping large forward memmoves use a loop that copies backwards.
*/
-#ifndef MEMMOVE
-# define MEMMOVE memmove
-#endif
-#ifndef MEMCPY
-# define MEMCPY memcpy
-#endif
-
-#if IS_IN (libc)
-
-# undef MEMCPY
-# define MEMCPY __memcpy_thunderx
-# undef MEMMOVE
-# define MEMMOVE __memmove_thunderx
-
-ENTRY_ALIGN (MEMMOVE, 6)
+ENTRY (__memmove_thunderx)
PTR_ARG (0)
PTR_ARG (1)
@@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6)
b.lo L(move_long)
/* Common case falls through into memcpy. */
-END (MEMMOVE)
-libc_hidden_builtin_def (MEMMOVE)
-ENTRY (MEMCPY)
+END (__memmove_thunderx)
+
+ENTRY (__memcpy_thunderx)
PTR_ARG (0)
PTR_ARG (1)
@@ -316,7 +302,4 @@ L(move_long):
stp C_l, C_h, [dstin]
3: ret
-END (MEMCPY)
-libc_hidden_builtin_def (MEMCPY)
-
-#endif
+END (__memcpy_thunderx)
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
index d3d6f1debcc936b81aa62b170969b149faa9913d..93993b9e03b95075dbc9aef13f44c2b9a2b7800f 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
@@ -75,27 +75,12 @@
#define I_v v16
#define J_v v17
-#ifndef MEMMOVE
-# define MEMMOVE memmove
-#endif
-#ifndef MEMCPY
-# define MEMCPY memcpy
-#endif
-
-#if IS_IN (libc)
-
-#undef MEMCPY
-#define MEMCPY __memcpy_thunderx2
-#undef MEMMOVE
-#define MEMMOVE __memmove_thunderx2
-
-
/* Overlapping large forward memmoves use a loop that copies backwards.
Otherwise memcpy is used. Small moves branch to memcopy16 directly.
The longer memcpy cases fall through to the memcpy head.
*/
-ENTRY_ALIGN (MEMMOVE, 6)
+ENTRY (__memmove_thunderx2)
PTR_ARG (0)
PTR_ARG (1)
@@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6)
ccmp tmp1, count, 2, hi
b.lo L(move_long)
-END (MEMMOVE)
-libc_hidden_builtin_def (MEMMOVE)
+END (__memmove_thunderx2)
/* Copies are split into 3 main cases: small copies of up to 16 bytes,
@@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE)
#define MEMCPY_PREFETCH_LDR 640
- .p2align 4
-ENTRY (MEMCPY)
+ENTRY (__memcpy_thunderx2)
PTR_ARG (0)
PTR_ARG (1)
@@ -449,7 +432,7 @@ L(move_long):
3: ret
-END (MEMCPY)
+END (__memcpy_thunderx2)
.section .rodata
.p2align 4
@@ -472,6 +455,3 @@ L(ext_table):
.word L(ext_size_13) -.
.word L(ext_size_14) -.
.word L(ext_size_15) -.
-
-libc_hidden_builtin_def (MEMCPY)
-#endif
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
index d520355143ead2acdaf6a7e1652d2417df8087e8..7176f3d284ab12f7a354781db668c0520f47702d 100644
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -33,8 +33,6 @@
#define vector_length x9
#if HAVE_AARCH64_SVE_ASM
-# if IS_IN (libc)
-# define MEMSET __memset_a64fx
.arch armv8.2-a+sve
@@ -49,7 +47,7 @@
#undef BTI_C
#define BTI_C
-ENTRY (MEMSET)
+ENTRY (__memset_a64fx)
PTR_ARG (0)
SIZE_ARG (2)
@@ -166,8 +164,6 @@ L(L2):
add count, count, CACHE_LINE_SIZE
b L(last)
-END (MEMSET)
-libc_hidden_builtin_def (MEMSET)
+END (__memset_a64fx)
-#endif /* IS_IN (libc) */
#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S
index 35296a6dec4944fa74af176bf280e17b11a027fd..0e8f709fa58478d6e9d62020c576bb9be108866c 100644
--- a/sysdeps/aarch64/multiarch/memset_base64.S
+++ b/sysdeps/aarch64/multiarch/memset_base64.S
@@ -34,7 +34,7 @@
*
*/
-ENTRY_ALIGN (MEMSET, 6)
+ENTRY (MEMSET)
PTR_ARG (0)
SIZE_ARG (2)
@@ -183,4 +183,3 @@ L(zva_64):
#endif
END (MEMSET)
-libc_hidden_builtin_def (MEMSET)
diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
index 17d609cead96030fbd42f2997d081c6740396af3..6fecad4fae699f9967da94ddc88867afd5c59414 100644
--- a/sysdeps/aarch64/multiarch/memset_emag.S
+++ b/sysdeps/aarch64/multiarch/memset_emag.S
@@ -19,8 +19,7 @@
#include <sysdep.h>
-#if IS_IN (libc)
-# define MEMSET __memset_emag
+#define MEMSET __memset_emag
/*
* Using DC ZVA to zero memory does not produce better performance if
@@ -30,7 +29,6 @@
* workloads.
*/
-# define DC_ZVA_THRESHOLD 0
+#define DC_ZVA_THRESHOLD 0
-# include "./memset_base64.S"
-#endif
+#include "./memset_base64.S"
diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
index 9c23e482bf90d8b466d3e3d1e2d2d4a053b434bb..6c1f0daac8615c3e83e067db9b61e77c1c2c2fdd 100644
--- a/sysdeps/aarch64/multiarch/memset_generic.S
+++ b/sysdeps/aarch64/multiarch/memset_generic.S
@@ -21,9 +21,15 @@
#if IS_IN (libc)
# define MEMSET __memset_generic
+
+/* Do not hide the generic version of memset, we use it internally. */
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+
/* Add a hidden definition for use within libc.so. */
# ifdef SHARED
.globl __GI_memset; __GI_memset = __memset_generic
# endif
-# include <sysdeps/aarch64/memset.S>
#endif
+
+#include <../memset.S>
diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
index 86c46434fdd30460d0655850d25f69d75b193b2e..4a54373398ff14151c49490c06749277de664cd9 100644
--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
+++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
@@ -20,16 +20,13 @@
#include <sysdep.h>
#include <sysdeps/aarch64/memset-reg.h>
-#if IS_IN (libc)
-# define MEMSET __memset_kunpeng
-
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses
*
*/
-ENTRY_ALIGN (MEMSET, 6)
+ENTRY (__memset_kunpeng)
PTR_ARG (0)
SIZE_ARG (2)
@@ -108,6 +105,4 @@ L(set_long):
stp q0, q0, [dstend, -32]
ret
-END (MEMSET)
-libc_hidden_builtin_def (MEMSET)
-#endif
+END (__memset_kunpeng)
diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
deleted file mode 100644
index 4b035ed8b2cfc80cc37babe6cb2f146804feb46f..0000000000000000000000000000000000000000
--- a/sysdeps/aarch64/multiarch/rtld-memset.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Memset for aarch64, for the dynamic linker.
- Copyright (C) 2017-2023 Free Software Foundation, Inc.
-
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (rtld)
-# define MEMSET memset
-# include <sysdeps/aarch64/memset.S>
-#endif
diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c
index bbdd3de8c4a8dca2e86f6fcfbe810be2fb1867b1..728bd1936a218c77f931c7327d402bcc873c50f9 100644
--- a/sysdeps/aarch64/multiarch/strlen.c
+++ b/sysdeps/aarch64/multiarch/strlen.c
@@ -28,10 +28,10 @@
extern __typeof (__redirect_strlen) __strlen;
-extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden;
+extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden;
-libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd));
+libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd));
# undef strlen
strong_alias (__strlen, strlen);
diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
index 490439491d19c3f14b0228f42248bc8aa6e9e8bd..aee5ef9f78adb1f769dbc48ef9aae8966f120c24 100644
--- a/sysdeps/aarch64/multiarch/strlen_asimd.S
+++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
@@ -203,4 +203,3 @@ L(page_cross):
ret
END (__strlen_asimd)
-libc_hidden_builtin_def (__strlen_asimd)
diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S
similarity index 85%
rename from sysdeps/aarch64/multiarch/strlen_mte.S
rename to sysdeps/aarch64/multiarch/strlen_generic.S
index 1c1220b7675fcc83953aba9ceca79deedca9242b..2346296a188af76880435ff446b9fdd76a174072 100644
--- a/sysdeps/aarch64/multiarch/strlen_mte.S
+++ b/sysdeps/aarch64/multiarch/strlen_generic.S
@@ -17,14 +17,14 @@
<https://www.gnu.org/licenses/>. */
/* The actual strlen code is in ../strlen.S. If we are building libc this file
- defines __strlen_mte. Otherwise the include of ../strlen.S will define
- the normal __strlen entry points. */
+ defines __strlen_generic. Otherwise the include of ../strlen.S will define
+ the normal __strlen entry points. */
#include <sysdep.h>
#if IS_IN (libc)
-# define STRLEN __strlen_mte
+# define STRLEN __strlen_generic
/* Do not hide the generic version of strlen, we use it internally. */
# undef libc_hidden_builtin_def
@@ -32,7 +32,7 @@
# ifdef SHARED
/* It doesn't make sense to send libc-internal strlen calls through a PLT. */
- .globl __GI_strlen; __GI_strlen = __strlen_mte
+ .globl __GI_strlen; __GI_strlen = __strlen_generic
# endif
#endif
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] AArch64: Cleanup ifuncs
2023-10-24 12:33 [PATCH] AArch64: Cleanup ifuncs Wilco Dijkstra
@ 2023-11-01 13:23 ` Szabolcs Nagy
0 siblings, 0 replies; 2+ messages in thread
From: Szabolcs Nagy @ 2023-11-01 13:23 UTC (permalink / raw)
To: Wilco Dijkstra, 'GNU C Library'; +Cc: Adhemerval Zanella
The 10/24/2023 13:33, Wilco Dijkstra wrote:
> Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than
> ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename
> strlen_mte to strlen_generic. Remove rtld-memset.
>
> Passes regress, OK for commit?
OK.
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
>
> ---
>
> diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
> index 50e5da3e7aec3a7b4ef23676c35c34896e8078e6..bf3cf85c8a95fd8c03ae13c4173fe507040ee8cd 100644
> --- a/sysdeps/aarch64/memset.S
> +++ b/sysdeps/aarch64/memset.S
> @@ -29,7 +29,7 @@
> *
> */
>
> -ENTRY_ALIGN (MEMSET, 6)
> +ENTRY (MEMSET)
>
> PTR_ARG (0)
> SIZE_ARG (2)
> diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
> index e6099548b96b80b656f3f22cd8d5598634647360..a1a4de3cd93c48db6e47eebc9c111186efca53fb 100644
> --- a/sysdeps/aarch64/multiarch/Makefile
> +++ b/sysdeps/aarch64/multiarch/Makefile
> @@ -17,6 +17,6 @@ sysdep_routines += \
> memset_kunpeng \
> memset_mops \
> strlen_asimd \
> - strlen_mte \
> + strlen_generic \
> # sysdep_routines
> endif
> diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> index da7f1153778efd41a15ef5aa900252f9e492b18d..836e8317a5d3b652134d199cf685499983b1a3fc 100644
> --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> @@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
> IFUNC_IMPL (i, name, strlen,
> IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd)
> - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte))
> + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
>
> return 0;
> }
> diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S
> index 57e48375e91edd74ee0a69e747e09d995ae8fd4f..780075189902919345c5eb15cba7ea0929043d6a 100644
> --- a/sysdeps/aarch64/multiarch/memchr_nosimd.S
> +++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S
> @@ -26,10 +26,6 @@
> * Use base integer registers.
> */
>
> -#ifndef MEMCHR
> -# define MEMCHR __memchr_nosimd
> -#endif
> -
> /* Arguments and results. */
> #define srcin x0
> #define chrin x1
> @@ -62,7 +58,7 @@
> #define REP8_7f 0x7f7f7f7f7f7f7f7f
>
>
> -ENTRY_ALIGN (MEMCHR, 6)
> +ENTRY (__memchr_nosimd)
>
> PTR_ARG (0)
> SIZE_ARG (2)
> @@ -219,5 +215,4 @@ L(none_chr):
> mov result, 0
> ret
>
> -END (MEMCHR)
> -libc_hidden_builtin_def (MEMCHR)
> +END (__memchr_nosimd)
> diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
> index f89b5b670a16ceb944eb816d318e02628a239df8..baff7e96d0045dac5a6a7a8d555e997998c978dd 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
> @@ -39,9 +39,6 @@
> #define vlen8 x8
>
> #if HAVE_AARCH64_SVE_ASM
> -# if IS_IN (libc)
> -# define MEMCPY __memcpy_a64fx
> -# define MEMMOVE __memmove_a64fx
>
> .arch armv8.2-a+sve
>
> @@ -97,7 +94,7 @@
> #undef BTI_C
> #define BTI_C
>
> -ENTRY (MEMCPY)
> +ENTRY (__memcpy_a64fx)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -234,11 +231,10 @@ L(last_bytes):
> st1b z3.b, p0, [dstend, -1, mul vl]
> ret
>
> -END (MEMCPY)
> -libc_hidden_builtin_def (MEMCPY)
> +END (__memcpy_a64fx)
>
>
> -ENTRY_ALIGN (MEMMOVE, 4)
> +ENTRY_ALIGN (__memmove_a64fx, 4)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -307,7 +303,5 @@ L(full_overlap):
> mov dst, dstin
> b L(last_bytes)
>
> -END (MEMMOVE)
> -libc_hidden_builtin_def (MEMMOVE)
> -# endif /* IS_IN (libc) */
> +END (__memmove_a64fx)
> #endif /* HAVE_AARCH64_SVE_ASM */
> diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S
> index ec0e4ade24bbad76979d2e34a36eff7703d30264..67c4ab34eba40c37c6aae08be6cb5e11e2a82d17 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_falkor.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S
> @@ -71,7 +71,7 @@
> The non-temporal stores help optimize cache utilization. */
>
> #if IS_IN (libc)
> -ENTRY_ALIGN (__memcpy_falkor, 6)
> +ENTRY (__memcpy_falkor)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -198,7 +198,6 @@ L(loop64):
> ret
>
> END (__memcpy_falkor)
> -libc_hidden_builtin_def (__memcpy_falkor)
>
>
> /* RATIONALE:
> @@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor)
>
> For small and medium cases memcpy is used. */
>
> -ENTRY_ALIGN (__memmove_falkor, 6)
> +ENTRY (__memmove_falkor)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -311,5 +310,4 @@ L(move_long):
> 3: ret
>
> END (__memmove_falkor)
> -libc_hidden_builtin_def (__memmove_falkor)
> #endif
> diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
> index d11be6a44301af4bfd7fa4900555b769dc58d34d..2f14f9136698c5b3130482e2c15c5d4ceb198bda 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_sve.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
> @@ -141,7 +141,6 @@ L(copy64_from_end):
> ret
>
> END (__memcpy_sve)
> -libc_hidden_builtin_def (__memcpy_sve)
>
>
> ENTRY (__memmove_sve)
> @@ -208,5 +207,4 @@ L(return):
> ret
>
> END (__memmove_sve)
> -libc_hidden_builtin_def (__memmove_sve)
> #endif
> diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> index 366287587f4aec5c00c35320fd1f862e45400d08..14269b1a47137f178e289e31026ab2874ccc3173 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> @@ -65,21 +65,7 @@
> Overlapping large forward memmoves use a loop that copies backwards.
> */
>
> -#ifndef MEMMOVE
> -# define MEMMOVE memmove
> -#endif
> -#ifndef MEMCPY
> -# define MEMCPY memcpy
> -#endif
> -
> -#if IS_IN (libc)
> -
> -# undef MEMCPY
> -# define MEMCPY __memcpy_thunderx
> -# undef MEMMOVE
> -# define MEMMOVE __memmove_thunderx
> -
> -ENTRY_ALIGN (MEMMOVE, 6)
> +ENTRY (__memmove_thunderx)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6)
> b.lo L(move_long)
>
> /* Common case falls through into memcpy. */
> -END (MEMMOVE)
> -libc_hidden_builtin_def (MEMMOVE)
> -ENTRY (MEMCPY)
> +END (__memmove_thunderx)
> +
> +ENTRY (__memcpy_thunderx)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -316,7 +302,4 @@ L(move_long):
> stp C_l, C_h, [dstin]
> 3: ret
>
> -END (MEMCPY)
> -libc_hidden_builtin_def (MEMCPY)
> -
> -#endif
> +END (__memcpy_thunderx)
> diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
> index d3d6f1debcc936b81aa62b170969b149faa9913d..93993b9e03b95075dbc9aef13f44c2b9a2b7800f 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S
> @@ -75,27 +75,12 @@
> #define I_v v16
> #define J_v v17
>
> -#ifndef MEMMOVE
> -# define MEMMOVE memmove
> -#endif
> -#ifndef MEMCPY
> -# define MEMCPY memcpy
> -#endif
> -
> -#if IS_IN (libc)
> -
> -#undef MEMCPY
> -#define MEMCPY __memcpy_thunderx2
> -#undef MEMMOVE
> -#define MEMMOVE __memmove_thunderx2
> -
> -
> /* Overlapping large forward memmoves use a loop that copies backwards.
> Otherwise memcpy is used. Small moves branch to memcopy16 directly.
> The longer memcpy cases fall through to the memcpy head.
> */
>
> -ENTRY_ALIGN (MEMMOVE, 6)
> +ENTRY (__memmove_thunderx2)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6)
> ccmp tmp1, count, 2, hi
> b.lo L(move_long)
>
> -END (MEMMOVE)
> -libc_hidden_builtin_def (MEMMOVE)
> +END (__memmove_thunderx2)
>
>
> /* Copies are split into 3 main cases: small copies of up to 16 bytes,
> @@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE)
>
> #define MEMCPY_PREFETCH_LDR 640
>
> - .p2align 4
> -ENTRY (MEMCPY)
> +ENTRY (__memcpy_thunderx2)
>
> PTR_ARG (0)
> PTR_ARG (1)
> @@ -449,7 +432,7 @@ L(move_long):
> 3: ret
>
>
> -END (MEMCPY)
> +END (__memcpy_thunderx2)
> .section .rodata
> .p2align 4
>
> @@ -472,6 +455,3 @@ L(ext_table):
> .word L(ext_size_13) -.
> .word L(ext_size_14) -.
> .word L(ext_size_15) -.
> -
> -libc_hidden_builtin_def (MEMCPY)
> -#endif
> diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
> index d520355143ead2acdaf6a7e1652d2417df8087e8..7176f3d284ab12f7a354781db668c0520f47702d 100644
> --- a/sysdeps/aarch64/multiarch/memset_a64fx.S
> +++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
> @@ -33,8 +33,6 @@
> #define vector_length x9
>
> #if HAVE_AARCH64_SVE_ASM
> -# if IS_IN (libc)
> -# define MEMSET __memset_a64fx
>
> .arch armv8.2-a+sve
>
> @@ -49,7 +47,7 @@
> #undef BTI_C
> #define BTI_C
>
> -ENTRY (MEMSET)
> +ENTRY (__memset_a64fx)
> PTR_ARG (0)
> SIZE_ARG (2)
>
> @@ -166,8 +164,6 @@ L(L2):
> add count, count, CACHE_LINE_SIZE
> b L(last)
>
> -END (MEMSET)
> -libc_hidden_builtin_def (MEMSET)
> +END (__memset_a64fx)
>
> -#endif /* IS_IN (libc) */
> #endif /* HAVE_AARCH64_SVE_ASM */
> diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S
> index 35296a6dec4944fa74af176bf280e17b11a027fd..0e8f709fa58478d6e9d62020c576bb9be108866c 100644
> --- a/sysdeps/aarch64/multiarch/memset_base64.S
> +++ b/sysdeps/aarch64/multiarch/memset_base64.S
> @@ -34,7 +34,7 @@
> *
> */
>
> -ENTRY_ALIGN (MEMSET, 6)
> +ENTRY (MEMSET)
>
> PTR_ARG (0)
> SIZE_ARG (2)
> @@ -183,4 +183,3 @@ L(zva_64):
> #endif
>
> END (MEMSET)
> -libc_hidden_builtin_def (MEMSET)
> diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
> index 17d609cead96030fbd42f2997d081c6740396af3..6fecad4fae699f9967da94ddc88867afd5c59414 100644
> --- a/sysdeps/aarch64/multiarch/memset_emag.S
> +++ b/sysdeps/aarch64/multiarch/memset_emag.S
> @@ -19,8 +19,7 @@
>
> #include <sysdep.h>
>
> -#if IS_IN (libc)
> -# define MEMSET __memset_emag
> +#define MEMSET __memset_emag
>
> /*
> * Using DC ZVA to zero memory does not produce better performance if
> @@ -30,7 +29,6 @@
> * workloads.
> */
>
> -# define DC_ZVA_THRESHOLD 0
> +#define DC_ZVA_THRESHOLD 0
>
> -# include "./memset_base64.S"
> -#endif
> +#include "./memset_base64.S"
> diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
> index 9c23e482bf90d8b466d3e3d1e2d2d4a053b434bb..6c1f0daac8615c3e83e067db9b61e77c1c2c2fdd 100644
> --- a/sysdeps/aarch64/multiarch/memset_generic.S
> +++ b/sysdeps/aarch64/multiarch/memset_generic.S
> @@ -21,9 +21,15 @@
>
> #if IS_IN (libc)
> # define MEMSET __memset_generic
> +
> +/* Do not hide the generic version of memset, we use it internally. */
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(name)
> +
> /* Add a hidden definition for use within libc.so. */
> # ifdef SHARED
> .globl __GI_memset; __GI_memset = __memset_generic
> # endif
> -# include <sysdeps/aarch64/memset.S>
> #endif
> +
> +#include <../memset.S>
> diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
> index 86c46434fdd30460d0655850d25f69d75b193b2e..4a54373398ff14151c49490c06749277de664cd9 100644
> --- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
> +++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
> @@ -20,16 +20,13 @@
> #include <sysdep.h>
> #include <sysdeps/aarch64/memset-reg.h>
>
> -#if IS_IN (libc)
> -# define MEMSET __memset_kunpeng
> -
> /* Assumptions:
> *
> * ARMv8-a, AArch64, unaligned accesses
> *
> */
>
> -ENTRY_ALIGN (MEMSET, 6)
> +ENTRY (__memset_kunpeng)
>
> PTR_ARG (0)
> SIZE_ARG (2)
> @@ -108,6 +105,4 @@ L(set_long):
> stp q0, q0, [dstend, -32]
> ret
>
> -END (MEMSET)
> -libc_hidden_builtin_def (MEMSET)
> -#endif
> +END (__memset_kunpeng)
> diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
> deleted file mode 100644
> index 4b035ed8b2cfc80cc37babe6cb2f146804feb46f..0000000000000000000000000000000000000000
> --- a/sysdeps/aarch64/multiarch/rtld-memset.S
> +++ /dev/null
> @@ -1,25 +0,0 @@
> -/* Memset for aarch64, for the dynamic linker.
> - Copyright (C) 2017-2023 Free Software Foundation, Inc.
> -
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library. If not, see
> - <https://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -
> -#if IS_IN (rtld)
> -# define MEMSET memset
> -# include <sysdeps/aarch64/memset.S>
> -#endif
> diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c
> index bbdd3de8c4a8dca2e86f6fcfbe810be2fb1867b1..728bd1936a218c77f931c7327d402bcc873c50f9 100644
> --- a/sysdeps/aarch64/multiarch/strlen.c
> +++ b/sysdeps/aarch64/multiarch/strlen.c
> @@ -28,10 +28,10 @@
>
> extern __typeof (__redirect_strlen) __strlen;
>
> -extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden;
> +extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
> extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden;
>
> -libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd));
> +libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd));
>
> # undef strlen
> strong_alias (__strlen, strlen);
> diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
> index 490439491d19c3f14b0228f42248bc8aa6e9e8bd..aee5ef9f78adb1f769dbc48ef9aae8966f120c24 100644
> --- a/sysdeps/aarch64/multiarch/strlen_asimd.S
> +++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
> @@ -203,4 +203,3 @@ L(page_cross):
> ret
>
> END (__strlen_asimd)
> -libc_hidden_builtin_def (__strlen_asimd)
> diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S
> similarity index 85%
> rename from sysdeps/aarch64/multiarch/strlen_mte.S
> rename to sysdeps/aarch64/multiarch/strlen_generic.S
> index 1c1220b7675fcc83953aba9ceca79deedca9242b..2346296a188af76880435ff446b9fdd76a174072 100644
> --- a/sysdeps/aarch64/multiarch/strlen_mte.S
> +++ b/sysdeps/aarch64/multiarch/strlen_generic.S
> @@ -17,14 +17,14 @@
> <https://www.gnu.org/licenses/>. */
>
> /* The actual strlen code is in ../strlen.S. If we are building libc this file
> - defines __strlen_mte. Otherwise the include of ../strlen.S will define
> - the normal __strlen entry points. */
> + defines __strlen_generic. Otherwise the include of ../strlen.S will define
> + the normal __strlen entry points. */
>
> #include <sysdep.h>
>
> #if IS_IN (libc)
>
> -# define STRLEN __strlen_mte
> +# define STRLEN __strlen_generic
>
> /* Do not hide the generic version of strlen, we use it internally. */
> # undef libc_hidden_builtin_def
> @@ -32,7 +32,7 @@
>
> # ifdef SHARED
> /* It doesn't make sense to send libc-internal strlen calls through a PLT. */
> - .globl __GI_strlen; __GI_strlen = __strlen_mte
> + .globl __GI_strlen; __GI_strlen = __strlen_generic
> # endif
> #endif
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-11-01 13:23 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-24 12:33 [PATCH] AArch64: Cleanup ifuncs Wilco Dijkstra
2023-11-01 13:23 ` Szabolcs Nagy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).