* [PATCH v1 1/3] x86: Align varshift table to 32-bytes @ 2022-06-09 4:16 Noah Goldstein 2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein ` (2 more replies) 0 siblings, 3 replies; 11+ messages in thread From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw) To: libc-alpha This ensures the load will never split a cache line. --- sysdeps/x86_64/multiarch/varshift.c | 5 +++-- sysdeps/x86_64/multiarch/varshift.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c index c8210f0546..d27767520a 100644 --- a/sysdeps/x86_64/multiarch/varshift.c +++ b/sysdeps/x86_64/multiarch/varshift.c @@ -16,9 +16,10 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include "varshift.h" +#include <stdint.h> -const int8_t ___m128i_shift_right[31] attribute_hidden = +const int8_t ___m128i_shift_right[31] attribute_hidden + __attribute__((aligned(32))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h index af30694488..ffd12d79e4 100644 --- a/sysdeps/x86_64/multiarch/varshift.h +++ b/sysdeps/x86_64/multiarch/varshift.h @@ -19,7 +19,8 @@ #include <stdint.h> #include <tmmintrin.h> -extern const int8_t ___m128i_shift_right[31] attribute_hidden; +extern const int8_t ___m128i_shift_right[31] attribute_hidden + __attribute__ ((aligned (32))); static __inline__ __m128i __m128i_shift_right (__m128i value, unsigned long int offset) -- 2.34.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk 2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein @ 2022-06-09 4:16 ` Noah Goldstein 2022-06-09 15:28 ` H.J. Lu 2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein 2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu 2 siblings, 1 reply; 11+ messages in thread From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw) To: libc-alpha No change to the actual logic of the functions. The goal is to so the avx/avx2 machines rely less of sse instructions. Full xcheck passes on x86_64. --- sysdeps/x86_64/multiarch/Makefile | 21 ++++++++++----- .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} | 4 +++ sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 +++++ sysdeps/x86_64/multiarch/strcspn-c-avx.c | 21 +++++++++++++++ .../{strcspn-c.c => strcspn-c-sse4.c} | 26 ++++++++++++------- sysdeps/x86_64/multiarch/strcspn.c | 2 +- sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 23 ++++++++++++++++ .../{strpbrk-c.c => strpbrk-c-sse4.c} | 6 ++--- sysdeps/x86_64/multiarch/strpbrk.c | 2 +- sysdeps/x86_64/multiarch/strspn-c-avx.c | 21 +++++++++++++++ .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++--- sysdeps/x86_64/multiarch/strspn.c | 2 +- 12 files changed, 122 insertions(+), 27 deletions(-) rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%) create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%) create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%) create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3d153cac35..27f306c7c8 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -76,7 +76,8 @@ sysdep_routines += \ strcpy-evex \ strcpy-sse2 \ strcpy-sse2-unaligned \ - strcspn-c \ + strcspn-c-avx \ + strcspn-c-sse4 \ strcspn-sse2 \ strlen-avx2 \ strlen-avx2-rtm \ @@ -108,22 +109,28 @@ sysdep_routines += \ strnlen-evex \ strnlen-evex512 \ strnlen-sse2 \ - strpbrk-c \ + strpbrk-c-avx \ + strpbrk-c-sse4 \ strpbrk-sse2 \ strrchr-avx2 \ strrchr-avx2-rtm \ strrchr-evex \ strrchr-sse2 \ - strspn-c \ + strspn-c-avx \ + strspn-c-sse4 \ strspn-sse2 \ strstr-avx512 \ strstr-sse2-unaligned \ varshift \ # sysdep_routines -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 + +CFLAGS-strcspn-c-avx.c += -mavx +CFLAGS-strcspn-c-sse4.c += -msse4 +CFLAGS-strpbrk-c-avx.c += -mavx +CFLAGS-strpbrk-c-sse4.c += -msse4 +CFLAGS-strspn-c-avx.c += -mavx +CFLAGS-strspn-c-sse4.c += -msse4 + CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 endif diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h similarity index 89% rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h rename to sysdeps/x86_64/multiarch/ifunc-avx.h index b555ff2fac..891f3ddcac 100644 --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h @@ -21,12 +21,16 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + return OPTIMIZE (avx); + if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 58f3ec8306..507c563669 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strcspn.c. */ IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX), + __strcspn_avx) IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX), + __strpbrk_avx) IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strspn.c. */ IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX), + __strspn_avx) IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c new file mode 100644 index 0000000000..b8d983f79f --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c @@ -0,0 +1,21 @@ +/* strcspn with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define STRCSPN __strcspn_avx +#define SECTION "avx" +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c similarity index 90% rename from sysdeps/x86_64/multiarch/strcspn-c.c rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c index c312fab8b1..848c3cfb14 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c.c +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c @@ -52,9 +52,16 @@ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */ -#ifndef STRCSPN_SSE2 -# define STRCSPN_SSE2 __strcspn_sse2 -# define STRCSPN_SSE42 __strcspn_sse42 +#ifndef STRCSPN_FALLBACK +# define STRCSPN_FALLBACK __strcspn_sse2 +#endif + +#ifndef STRCSPN +# define STRCSPN __strcspn_sse42 +#endif + +#ifndef SECTION +# define SECTION "sse4.2" #endif #ifdef USE_AS_STRPBRK @@ -69,16 +76,15 @@ char * #else size_t #endif -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; - +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden; #ifdef USE_AS_STRPBRK char * #else size_t #endif -__attribute__ ((section (".text.sse4.2"))) -STRCSPN_SSE42 (const char *s, const char *a) +__attribute__ ((section (".text." SECTION))) +STRCSPN (const char *s, const char *a) { if (*a == 0) RETURN (NULL, strlen (s)); @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a) maskz_bits = _mm_movemask_epi8 (maskz); if (maskz_bits == 0) { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ + /* There is no NULL terminator. Don't use pcmpstri based approach if the + length of A > 16. */ if (a[16] != 0) - return STRCSPN_SSE2 (s, a); + return STRCSPN_FALLBACK (s, a); } aligned = s; diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c index 4848fa8677..63e1cf052e 100644 --- a/sysdeps/x86_64/multiarch/strcspn.c +++ b/sysdeps/x86_64/multiarch/strcspn.c @@ -24,7 +24,7 @@ # undef strcspn # define SYMBOL_NAME strcspn -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c new file mode 100644 index 0000000000..2918013994 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c @@ -0,0 +1,23 @@ +/* strpbrk with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define USE_AS_STRPBRK +#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN __strpbrk_avx +#define SECTION "avx" +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c similarity index 89% rename from sysdeps/x86_64/multiarch/strpbrk-c.c rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c index abf4ff7f1a..2efd38d809 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c @@ -17,6 +17,6 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRPBRK -#define STRCSPN_SSE2 __strpbrk_sse2 -#define STRCSPN_SSE42 __strpbrk_sse42 -#include "strcspn-c.c" +#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN __strpbrk_sse42 +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c index 04e300ea71..ab5b04a482 100644 --- a/sysdeps/x86_64/multiarch/strpbrk.c +++ b/sysdeps/x86_64/multiarch/strpbrk.c @@ -24,7 +24,7 @@ # undef strpbrk # define SYMBOL_NAME strpbrk -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c new file mode 100644 index 0000000000..9d5fdb9550 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c @@ -0,0 +1,21 @@ +/* strspn with AVX intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#define STRSPN __strspn_avx +#define SECTION "avx" +#include "strspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c similarity index 92% rename from sysdeps/x86_64/multiarch/strspn-c.c rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c index 6124033ceb..6a91def2e0 100644 --- a/sysdeps/x86_64/multiarch/strspn-c.c +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c @@ -53,10 +53,17 @@ extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; +#ifndef STRSPN +# define STRSPN __strspn_sse42 +#endif + +#ifndef SECTION +# define SECTION "sse4.2" +#endif size_t -__attribute__ ((section (".text.sse4.2"))) -__strspn_sse42 (const char *s, const char *a) +__attribute__ ((section (".text." SECTION))) +STRSPN (const char *s, const char *a) { if (*a == 0) return 0; @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a) maskz_bits = _mm_movemask_epi8 (maskz); if (maskz_bits == 0) { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ + /* There is no NULL terminator. Don't use pcmpstri based approach if the + length of A > 16. */ if (a[16] != 0) return __strspn_sse2 (s, a); } diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c index 07f5def155..c3c5e7a3cc 100644 --- a/sysdeps/x86_64/multiarch/strspn.c +++ b/sysdeps/x86_64/multiarch/strspn.c @@ -24,7 +24,7 @@ # undef strspn # define SYMBOL_NAME strspn -# include "ifunc-sse4_2.h" +# include "ifunc-avx.h" libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ()); -- 2.34.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk 2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein @ 2022-06-09 15:28 ` H.J. Lu 0 siblings, 0 replies; 11+ messages in thread From: H.J. Lu @ 2022-06-09 15:28 UTC (permalink / raw) To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > No change to the actual logic of the functions. The goal is to so the > avx/avx2 machines rely less of sse instructions. These aren't the only SSE2 functions. The rest of glibc may be still compiled with SSE2. A different approach is to compile the whole glibc with x86-64 ISA level 3. > Full xcheck passes on x86_64. > --- > sysdeps/x86_64/multiarch/Makefile | 21 ++++++++++----- > .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} | 4 +++ > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 +++++ > sysdeps/x86_64/multiarch/strcspn-c-avx.c | 21 +++++++++++++++ > .../{strcspn-c.c => strcspn-c-sse4.c} | 26 ++++++++++++------- > sysdeps/x86_64/multiarch/strcspn.c | 2 +- > sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 23 ++++++++++++++++ > .../{strpbrk-c.c => strpbrk-c-sse4.c} | 6 ++--- > sysdeps/x86_64/multiarch/strpbrk.c | 2 +- > sysdeps/x86_64/multiarch/strspn-c-avx.c | 21 +++++++++++++++ > .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++--- > sysdeps/x86_64/multiarch/strspn.c | 2 +- > 12 files changed, 122 insertions(+), 27 deletions(-) > rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%) > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c > rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%) > create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c > rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%) > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c > rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%) > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 3d153cac35..27f306c7c8 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -76,7 +76,8 @@ sysdep_routines += \ > strcpy-evex \ > strcpy-sse2 \ > strcpy-sse2-unaligned \ > - strcspn-c \ > + strcspn-c-avx \ > + strcspn-c-sse4 \ > strcspn-sse2 \ > strlen-avx2 \ > strlen-avx2-rtm \ > @@ -108,22 +109,28 @@ sysdep_routines += \ > strnlen-evex \ > strnlen-evex512 \ > strnlen-sse2 \ > - strpbrk-c \ > + strpbrk-c-avx \ > + strpbrk-c-sse4 \ > strpbrk-sse2 \ > strrchr-avx2 \ > strrchr-avx2-rtm \ > strrchr-evex \ > strrchr-sse2 \ > - strspn-c \ > + strspn-c-avx \ > + strspn-c-sse4 \ > strspn-sse2 \ > strstr-avx512 \ > strstr-sse2-unaligned \ > varshift \ > # sysdep_routines > -CFLAGS-varshift.c += -msse4 > -CFLAGS-strcspn-c.c += -msse4 > -CFLAGS-strpbrk-c.c += -msse4 > -CFLAGS-strspn-c.c += -msse4 > + > +CFLAGS-strcspn-c-avx.c += -mavx > +CFLAGS-strcspn-c-sse4.c += -msse4 > +CFLAGS-strpbrk-c-avx.c += -mavx > +CFLAGS-strpbrk-c-sse4.c += -msse4 > +CFLAGS-strspn-c-avx.c += -mavx > +CFLAGS-strspn-c-sse4.c += -msse4 > + > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 > endif > > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h > similarity index 89% > rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h > rename to sysdeps/x86_64/multiarch/ifunc-avx.h > index b555ff2fac..891f3ddcac 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h > @@ -21,12 +21,16 @@ > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; > > static inline void * > IFUNC_SELECTOR (void) > { > const struct cpu_features* cpu_features = __get_cpu_features (); > > + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > + return OPTIMIZE (avx); > + > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) > return OPTIMIZE (sse42); > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 58f3ec8306..507c563669 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strcspn.c. */ > IFUNC_IMPL (i, name, strcspn, > + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX), > + __strcspn_avx) > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), > __strcspn_sse42) > IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) > @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ > IFUNC_IMPL (i, name, strpbrk, > + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX), > + __strpbrk_avx) > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), > __strpbrk_sse42) > IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) > @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strspn.c. */ > IFUNC_IMPL (i, name, strspn, > + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX), > + __strspn_avx) > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), > __strspn_sse42) > IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c > new file mode 100644 > index 0000000000..b8d983f79f > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c > @@ -0,0 +1,21 @@ > +/* strcspn with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define STRCSPN __strcspn_avx > +#define SECTION "avx" > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > similarity index 90% > rename from sysdeps/x86_64/multiarch/strcspn-c.c > rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c > index c312fab8b1..848c3cfb14 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-c.c > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > @@ -52,9 +52,16 @@ > when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > X for case 1. */ > > -#ifndef STRCSPN_SSE2 > -# define STRCSPN_SSE2 __strcspn_sse2 > -# define STRCSPN_SSE42 __strcspn_sse42 > +#ifndef STRCSPN_FALLBACK > +# define STRCSPN_FALLBACK __strcspn_sse2 > +#endif > + > +#ifndef STRCSPN > +# define STRCSPN __strcspn_sse42 > +#endif > + > +#ifndef SECTION > +# define SECTION "sse4.2" > #endif > > #ifdef USE_AS_STRPBRK > @@ -69,16 +76,15 @@ char * > #else > size_t > #endif > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; > - > +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden; > > #ifdef USE_AS_STRPBRK > char * > #else > size_t > #endif > -__attribute__ ((section (".text.sse4.2"))) > -STRCSPN_SSE42 (const char *s, const char *a) > +__attribute__ ((section (".text." SECTION))) > +STRCSPN (const char *s, const char *a) > { > if (*a == 0) > RETURN (NULL, strlen (s)); > @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a) > maskz_bits = _mm_movemask_epi8 (maskz); > if (maskz_bits == 0) > { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > + /* There is no NULL terminator. Don't use pcmpstri based approach if the > + length of A > 16. */ > if (a[16] != 0) > - return STRCSPN_SSE2 (s, a); > + return STRCSPN_FALLBACK (s, a); > } > > aligned = s; > diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c > index 4848fa8677..63e1cf052e 100644 > --- a/sysdeps/x86_64/multiarch/strcspn.c > +++ b/sysdeps/x86_64/multiarch/strcspn.c > @@ -24,7 +24,7 @@ > # undef strcspn > > # define SYMBOL_NAME strcspn > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c > new file mode 100644 > index 0000000000..2918013994 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c > @@ -0,0 +1,23 @@ > +/* strpbrk with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define USE_AS_STRPBRK > +#define STRCSPN_FALLBACK __strpbrk_sse2 > +#define STRCSPN __strpbrk_avx > +#define SECTION "avx" > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > similarity index 89% > rename from sysdeps/x86_64/multiarch/strpbrk-c.c > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > index abf4ff7f1a..2efd38d809 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > @@ -17,6 +17,6 @@ > <https://www.gnu.org/licenses/>. */ > > #define USE_AS_STRPBRK > -#define STRCSPN_SSE2 __strpbrk_sse2 > -#define STRCSPN_SSE42 __strpbrk_sse42 > -#include "strcspn-c.c" > +#define STRCSPN_FALLBACK __strpbrk_sse2 > +#define STRCSPN __strpbrk_sse42 > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c > index 04e300ea71..ab5b04a482 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk.c > +++ b/sysdeps/x86_64/multiarch/strpbrk.c > @@ -24,7 +24,7 @@ > # undef strpbrk > > # define SYMBOL_NAME strpbrk > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c > new file mode 100644 > index 0000000000..9d5fdb9550 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c > @@ -0,0 +1,21 @@ > +/* strspn with AVX intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#define STRSPN __strspn_avx > +#define SECTION "avx" > +#include "strspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > similarity index 92% > rename from sysdeps/x86_64/multiarch/strspn-c.c > rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c > index 6124033ceb..6a91def2e0 100644 > --- a/sysdeps/x86_64/multiarch/strspn-c.c > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > @@ -53,10 +53,17 @@ > > extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; > > +#ifndef STRSPN > +# define STRSPN __strspn_sse42 > +#endif > + > +#ifndef SECTION > +# define SECTION "sse4.2" > +#endif > > size_t > -__attribute__ ((section (".text.sse4.2"))) > -__strspn_sse42 (const char *s, const char *a) > +__attribute__ ((section (".text." SECTION))) > +STRSPN (const char *s, const char *a) > { > if (*a == 0) > return 0; > @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a) > maskz_bits = _mm_movemask_epi8 (maskz); > if (maskz_bits == 0) > { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > + /* There is no NULL terminator. Don't use pcmpstri based approach if the > + length of A > 16. */ > if (a[16] != 0) > return __strspn_sse2 (s, a); > } > diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c > index 07f5def155..c3c5e7a3cc 100644 > --- a/sysdeps/x86_64/multiarch/strspn.c > +++ b/sysdeps/x86_64/multiarch/strspn.c > @@ -24,7 +24,7 @@ > # undef strspn > > # define SYMBOL_NAME strspn > -# include "ifunc-sse4_2.h" > +# include "ifunc-avx.h" > > libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ()); > > -- > 2.34.1 > -- H.J. ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity 2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein 2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein @ 2022-06-09 4:16 ` Noah Goldstein 2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein 2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein 2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu 2 siblings, 2 replies; 11+ messages in thread From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw) To: libc-alpha No functions are changed. It just renames generic implementations from '{func}_sse2' to '{func}_generic'. This is just because the postfix "_sse2" was overloaded and was used for files that had hand-optimized sse2 assembly implementations and files that just redirected back to the generic implementation. Full xcheck passed on x86_64. --- sysdeps/x86_64/multiarch/Makefile | 6 +++--- sysdeps/x86_64/multiarch/ifunc-avx.h | 4 ++-- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 ++++++++-------- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 ++++++-- sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 ++++++-- sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +- sysdeps/x86_64/multiarch/stpncpy.c | 1 + sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 2 +- .../multiarch/{strcspn-sse2.c => strcspn-c.c} | 2 +- sysdeps/x86_64/multiarch/strncat-c.c | 2 +- sysdeps/x86_64/multiarch/strncat.c | 1 + sysdeps/x86_64/multiarch/strncpy-c.c | 2 +- sysdeps/x86_64/multiarch/strncpy.c | 1 + sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 2 +- sysdeps/x86_64/multiarch/strpbrk-c-sse4.c | 2 +- .../multiarch/{strpbrk-sse2.c => strpbrk-c.c} | 2 +- sysdeps/x86_64/multiarch/strspn-c-sse4.c | 4 ++-- .../multiarch/{strspn-sse2.c => strspn-c.c} | 2 +- sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +- sysdeps/x86_64/multiarch/wcscpy.c | 4 ++-- sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 ++-- sysdeps/x86_64/multiarch/wcsnlen.c | 1 + 22 files changed, 45 insertions(+), 33 deletions(-) rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-c.c} (96%) rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-c.c} (96%) rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-c.c} (96%) diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 27f306c7c8..9b1e0add1a 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -76,9 +76,9 @@ sysdep_routines += \ strcpy-evex \ strcpy-sse2 \ strcpy-sse2-unaligned \ + strcspn-c \ strcspn-c-avx \ strcspn-c-sse4 \ - strcspn-sse2 \ strlen-avx2 \ strlen-avx2-rtm \ strlen-evex \ @@ -109,16 +109,16 @@ sysdep_routines += \ strnlen-evex \ strnlen-evex512 \ strnlen-sse2 \ + strpbrk-c \ strpbrk-c-avx \ strpbrk-c-sse4 \ - strpbrk-sse2 \ strrchr-avx2 \ strrchr-avx2-rtm \ strrchr-evex \ strrchr-sse2 \ + strspn-c \ strspn-c-avx \ strspn-c-sse4 \ - strspn-sse2 \ strstr-avx512 \ strstr-sse2-unaligned \ varshift \ diff --git a/sysdeps/x86_64/multiarch/ifunc-avx.h b/sysdeps/x86_64/multiarch/ifunc-avx.h index 891f3ddcac..30efbd29d0 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h @@ -19,7 +19,7 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; @@ -34,5 +34,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 507c563669..23a2d7114d 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ IFUNC_IMPL (i, name, stpcpy, @@ -533,7 +533,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strcspn_avx) IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ IFUNC_IMPL (i, name, strncasecmp, @@ -587,7 +587,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) /* Support sysdeps/x86_64/multiarch/strncpy.c. */ IFUNC_IMPL (i, name, strncpy, @@ -603,7 +603,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, @@ -611,7 +611,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strpbrk_avx) IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) /* Support sysdeps/x86_64/multiarch/strspn.c. */ @@ -620,7 +620,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strspn_avx) IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) /* Support sysdeps/x86_64/multiarch/strstr.c. */ IFUNC_IMPL (i, name, strstr, @@ -703,7 +703,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, wcscpy, IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), __wcscpy_ssse3) - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, @@ -755,7 +755,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wcsnlen, CPU_FEATURE_USABLE (SSE4_1), __wcsnlen_sse4_1) - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index a15afa44e9..80529458d1 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -20,7 +20,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h index 2b29e7608a..88c1c502af 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -19,7 +19,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c index b016e487e1..eb62fcf388 100644 --- a/sysdeps/x86_64/multiarch/stpncpy-c.c +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c @@ -1,4 +1,4 @@ -#define STPNCPY __stpncpy_sse2 +#define STPNCPY __stpncpy_generic #undef weak_alias #define weak_alias(ignored1, ignored2) #undef libc_hidden_def diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c index 82fa53957d..879bc83f0b 100644 --- a/sysdeps/x86_64/multiarch/stpncpy.c +++ b/sysdeps/x86_64/multiarch/stpncpy.c @@ -25,6 +25,7 @@ # undef stpncpy # undef __stpncpy +# define GENERIC generic # define SYMBOL_NAME stpncpy # include "ifunc-strcpy.h" diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c index 848c3cfb14..8541035ccb 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c @@ -53,7 +53,7 @@ X for case 1. */ #ifndef STRCSPN_FALLBACK -# define STRCSPN_FALLBACK __strcspn_sse2 +# define STRCSPN_FALLBACK __strcspn_generic #endif #ifndef STRCSPN diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-c.c similarity index 96% rename from sysdeps/x86_64/multiarch/strcspn-sse2.c rename to sysdeps/x86_64/multiarch/strcspn-c.c index 3a04bb39fc..423de2e2b2 100644 --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c +++ b/sysdeps/x86_64/multiarch/strcspn-c.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRCSPN __strcspn_sse2 +# define STRCSPN __strcspn_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRCSPN) diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c index 93a7fab7ea..b729c033d9 100644 --- a/sysdeps/x86_64/multiarch/strncat-c.c +++ b/sysdeps/x86_64/multiarch/strncat-c.c @@ -1,2 +1,2 @@ -#define STRNCAT __strncat_sse2 +#define STRNCAT __strncat_generic #include <string/strncat.c> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c index b649343a97..50fba8a41f 100644 --- a/sysdeps/x86_64/multiarch/strncat.c +++ b/sysdeps/x86_64/multiarch/strncat.c @@ -24,6 +24,7 @@ # undef strncat # define SYMBOL_NAME strncat +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c index 57c45ac7ab..183b0b8e0f 100644 --- a/sysdeps/x86_64/multiarch/strncpy-c.c +++ b/sysdeps/x86_64/multiarch/strncpy-c.c @@ -1,4 +1,4 @@ -#define STRNCPY __strncpy_sse2 +#define STRNCPY __strncpy_generic #undef libc_hidden_builtin_def #define libc_hidden_builtin_def(strncpy) diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c index 2a780a7e16..7fc7d72ec5 100644 --- a/sysdeps/x86_64/multiarch/strncpy.c +++ b/sysdeps/x86_64/multiarch/strncpy.c @@ -24,6 +24,7 @@ # undef strncpy # define SYMBOL_NAME strncpy +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c index 2918013994..363daebd9e 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c @@ -17,7 +17,7 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRPBRK -#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN_FALLBACK __strpbrk_generic #define STRCSPN __strpbrk_avx #define SECTION "avx" #include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c index 2efd38d809..a02c951dfd 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c @@ -17,6 +17,6 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRPBRK -#define STRCSPN_FALLBACK __strpbrk_sse2 +#define STRCSPN_FALLBACK __strpbrk_generic #define STRCSPN __strpbrk_sse42 #include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c.c similarity index 96% rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c rename to sysdeps/x86_64/multiarch/strpbrk-c.c index d03214c4fb..d31acfe495 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRPBRK __strpbrk_sse2 +# define STRPBRK __strpbrk_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRPBRK) diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c index 6a91def2e0..9323a117ab 100644 --- a/sysdeps/x86_64/multiarch/strspn-c-sse4.c +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c @@ -51,7 +51,7 @@ We exit from the loop for case 1. */ -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; #ifndef STRSPN # define STRSPN __strspn_sse42 @@ -105,7 +105,7 @@ STRSPN (const char *s, const char *a) /* There is no NULL terminator. Don't use pcmpstri based approach if the length of A > 16. */ if (a[16] != 0) - return __strspn_sse2 (s, a); + return __strspn_generic (s, a); } aligned = s; offset = (unsigned int) ((size_t) s & 15); diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-c.c similarity index 96% rename from sysdeps/x86_64/multiarch/strspn-sse2.c rename to sysdeps/x86_64/multiarch/strspn-c.c index 61cc6cb0a5..6b50c36432 100644 --- a/sysdeps/x86_64/multiarch/strspn-sse2.c +++ b/sysdeps/x86_64/multiarch/strspn-c.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRSPN __strspn_sse2 +# define STRSPN __strspn_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRSPN) diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c index 26d6984e9b..fa38dd898d 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-c.c +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c @@ -1,5 +1,5 @@ #if IS_IN (libc) -# define WCSCPY __wcscpy_sse2 +# define WCSCPY __wcscpy_generic #endif #include <wcsmbs/wcscpy.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c index 6a2d1421d9..53c3228dc2 100644 --- a/sysdeps/x86_64/multiarch/wcscpy.c +++ b/sysdeps/x86_64/multiarch/wcscpy.c @@ -26,7 +26,7 @@ # define SYMBOL_NAME wcscpy # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; static inline void * @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c index e1ec7cfbb5..1c9c04241a 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c @@ -1,9 +1,9 @@ #if IS_IN (libc) # include <wchar.h> -# define WCSNLEN __wcsnlen_sse2 +# define WCSNLEN __wcsnlen_generic -extern __typeof (wcsnlen) __wcsnlen_sse2; +extern __typeof (wcsnlen) __wcsnlen_generic; #endif #include "wcsmbs/wcsnlen.c" diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index baa26666a8..05b7a211de 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,6 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen +# define GENERIC generic # include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); -- 2.34.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2] x86: Rename generic functions with unique postfix for clarity 2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein @ 2022-06-10 0:58 ` Noah Goldstein 2022-06-10 1:19 ` H.J. Lu 2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein 1 sibling, 1 reply; 11+ messages in thread From: Noah Goldstein @ 2022-06-10 0:58 UTC (permalink / raw) To: libc-alpha No functions are changed. It just renames generic implementations from '{func}_sse2' to '{func}_generic'. This is just because the postfix "_sse2" was overloaded and was used for files that had hand-optimized sse2 assembly implementations and files that just redirected back to the generic implementation. Full xcheck passed on x86_64. --- sysdeps/x86_64/multiarch/Makefile | 15 +- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +- sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +- sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +- sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +- sysdeps/x86_64/multiarch/stpncpy.c | 1 + sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++ sysdeps/x86_64/multiarch/strcspn-c.c | 151 +--------------- sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 --- sysdeps/x86_64/multiarch/strncat-c.c | 2 +- sysdeps/x86_64/multiarch/strncat.c | 1 + sysdeps/x86_64/multiarch/strncpy-c.c | 2 +- sysdeps/x86_64/multiarch/strncpy.c | 1 + .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +- sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +- sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 --- sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++ sysdeps/x86_64/multiarch/strspn-c.c | 126 +------------- sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +- sysdeps/x86_64/multiarch/wcscpy.c | 4 +- sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +- sysdeps/x86_64/multiarch/wcsnlen.c | 1 + 23 files changed, 376 insertions(+), 363 deletions(-) create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%) delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3d153cac35..86c6ecdfc1 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -77,7 +77,7 @@ sysdep_routines += \ strcpy-sse2 \ strcpy-sse2-unaligned \ strcspn-c \ - strcspn-sse2 \ + strcspn-c-sse4 \ strlen-avx2 \ strlen-avx2-rtm \ strlen-evex \ @@ -109,21 +109,22 @@ sysdep_routines += \ strnlen-evex512 \ strnlen-sse2 \ strpbrk-c \ - strpbrk-sse2 \ + strpbrk-c-sse4 \ strrchr-avx2 \ strrchr-avx2-rtm \ strrchr-evex \ strrchr-sse2 \ strspn-c \ - strspn-sse2 \ + strspn-c-sse4 \ strstr-avx512 \ strstr-sse2-unaligned \ varshift \ # sysdep_routines -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 + +CFLAGS-strcspn-c-sse4.c += -msse4 +CFLAGS-strpbrk-c-sse4.c += -msse4 +CFLAGS-strspn-c-sse4.c += -msse4 + CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 endif diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 58f3ec8306..4cbd200d39 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ IFUNC_IMPL (i, name, stpcpy, @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcspn, IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ IFUNC_IMPL (i, name, strncasecmp, @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) /* Support sysdeps/x86_64/multiarch/strncpy.c. */ IFUNC_IMPL (i, name, strncpy, @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) /* Support sysdeps/x86_64/multiarch/strspn.c. */ IFUNC_IMPL (i, name, strspn, IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) /* Support sysdeps/x86_64/multiarch/strstr.c. */ IFUNC_IMPL (i, name, strstr, @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, wcscpy, IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), __wcscpy_ssse3) - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wcsnlen, CPU_FEATURE_USABLE (SSE4_1), __wcsnlen_sse4_1) - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h index b555ff2fac..ee36525bcf 100644 --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h @@ -19,7 +19,7 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; static inline void * @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index a15afa44e9..80529458d1 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -20,7 +20,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h index 2b29e7608a..88c1c502af 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -19,7 +19,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c index b016e487e1..eb62fcf388 100644 --- a/sysdeps/x86_64/multiarch/stpncpy-c.c +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c @@ -1,4 +1,4 @@ -#define STPNCPY __stpncpy_sse2 +#define STPNCPY __stpncpy_generic #undef weak_alias #define weak_alias(ignored1, ignored2) #undef libc_hidden_def diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c index 82fa53957d..879bc83f0b 100644 --- a/sysdeps/x86_64/multiarch/stpncpy.c +++ b/sysdeps/x86_64/multiarch/stpncpy.c @@ -25,6 +25,7 @@ # undef stpncpy # undef __stpncpy +# define GENERIC generic # define SYMBOL_NAME stpncpy # include "ifunc-strcpy.h" diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c new file mode 100644 index 0000000000..59f64f9fe8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c @@ -0,0 +1,163 @@ +/* strcspn with SSE4.2 intrinsics + Copyright (C) 2009-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <nmmintrin.h> +#include <string.h> +#include "varshift.h" + +/* We use 0x2: + _SIDD_SBYTE_OPS + | _SIDD_CMP_EQUAL_ANY + | _SIDD_POSITIVE_POLARITY + | _SIDD_LEAST_SIGNIFICANT + on pcmpistri to compare xmm/mem128 + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + X X X X X X X X X X X X X X X X + + against xmm + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + A A A A A A A A A A A A A A A A + + to find out if the first 16byte data element has any byte A and + the offset of the first byte. There are 3 cases: + + 1. The first 16byte data element has the byte A at the offset X. + 2. The first 16byte data element has EOS and doesn't have the byte A. + 3. The first 16byte data element is valid and doesn't have the byte A. + + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: + + 1 X 1 0/1 0 + 2 16 0 1 0 + 3 16 0 0 0 + + We exit from the loop for cases 1 and 2 with jbe which branches + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset + X for case 1. */ + +#ifndef STRCSPN_GENERIC +# define STRCSPN_GENERIC __strcspn_generic +# define STRCSPN_SSE42 __strcspn_sse42 +#endif + +#ifdef USE_AS_STRPBRK +# define RETURN(val1, val2) return val1 +#else +# define RETURN(val1, val2) return val2 +#endif + +extern +#ifdef USE_AS_STRPBRK +char * +#else +size_t +#endif +STRCSPN_GENERIC (const char *, const char *) attribute_hidden; + + +#ifdef USE_AS_STRPBRK +char * +#else +size_t +#endif +__attribute__ ((section (".text.sse4.2"))) +STRCSPN_SSE42 (const char *s, const char *a) +{ + if (*a == 0) + RETURN (NULL, strlen (s)); + + const char *aligned; + __m128i mask, maskz, zero; + unsigned int maskz_bits; + unsigned int offset = (unsigned int) ((size_t) a & 15); + zero = _mm_set1_epi8 (0); + if (offset != 0) + { + /* Load masks. */ + aligned = (const char *) ((size_t) a & -16L); + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); + maskz = _mm_cmpeq_epi8 (mask0, zero); + + /* Find where the NULL terminator is. */ + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; + if (maskz_bits != 0) + { + mask = __m128i_shift_right (mask0, offset); + offset = (unsigned int) ((size_t) s & 15); + if (offset) + goto start_unaligned; + + aligned = s; + goto start_loop; + } + } + + /* A is aligned. */ + mask = _mm_loadu_si128 ((__m128i *) a); + /* Find where the NULL terminator is. */ + maskz = _mm_cmpeq_epi8 (mask, zero); + maskz_bits = _mm_movemask_epi8 (maskz); + if (maskz_bits == 0) + { + /* There is no NULL terminator. Don't use SSE4.2 if the length + of A > 16. */ + if (a[16] != 0) + return STRCSPN_GENERIC (s, a); + } + + aligned = s; + offset = (unsigned int) ((size_t) s & 15); + if (offset != 0) + { + start_unaligned: + /* Check partial string. */ + aligned = (const char *) ((size_t) s & -16L); + __m128i value = _mm_load_si128 ((__m128i *) aligned); + + value = __m128i_shift_right (value, offset); + + unsigned int length = _mm_cmpistri (mask, value, 0x2); + /* No need to check ZFlag since ZFlag is always 1. */ + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); + if (cflag) + RETURN ((char *) (s + length), length); + /* Find where the NULL terminator is. */ + unsigned int index = _mm_cmpistri (value, value, 0x3a); + if (index < 16 - offset) + RETURN (NULL, index); + aligned += 16; + } + +start_loop: + while (1) + { + __m128i value = _mm_load_si128 ((__m128i *) aligned); + unsigned int index = _mm_cmpistri (mask, value, 0x2); + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); + if (cflag) + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); + if (zflag) + RETURN (NULL, + /* Find where the NULL terminator is. */ + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); + aligned += 16; + } +} diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c index c312fab8b1..423de2e2b2 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c.c +++ b/sysdeps/x86_64/multiarch/strcspn-c.c @@ -1,5 +1,5 @@ -/* strcspn with SSE4.2 intrinsics - Copyright (C) 2009-2022 Free Software Foundation, Inc. +/* strcspn. + Copyright (C) 2017-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,148 +16,13 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <nmmintrin.h> -#include <string.h> -#include "varshift.h" +#if IS_IN (libc) -/* We use 0x2: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_ANY - | _SIDD_POSITIVE_POLARITY - | _SIDD_LEAST_SIGNIFICANT - on pcmpistri to compare xmm/mem128 +# include <sysdep.h> +# define STRCSPN __strcspn_generic - 0 1 2 3 4 5 6 7 8 9 A B C D E F - X X X X X X X X X X X X X X X X - - against xmm - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - A A A A A A A A A A A A A A A A - - to find out if the first 16byte data element has any byte A and - the offset of the first byte. There are 3 cases: - - 1. The first 16byte data element has the byte A at the offset X. - 2. The first 16byte data element has EOS and doesn't have the byte A. - 3. The first 16byte data element is valid and doesn't have the byte A. - - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: - - 1 X 1 0/1 0 - 2 16 0 1 0 - 3 16 0 0 0 - - We exit from the loop for cases 1 and 2 with jbe which branches - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset - X for case 1. */ - -#ifndef STRCSPN_SSE2 -# define STRCSPN_SSE2 __strcspn_sse2 -# define STRCSPN_SSE42 __strcspn_sse42 -#endif - -#ifdef USE_AS_STRPBRK -# define RETURN(val1, val2) return val1 -#else -# define RETURN(val1, val2) return val2 -#endif - -extern -#ifdef USE_AS_STRPBRK -char * -#else -size_t -#endif -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; - - -#ifdef USE_AS_STRPBRK -char * -#else -size_t +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(STRCSPN) #endif -__attribute__ ((section (".text.sse4.2"))) -STRCSPN_SSE42 (const char *s, const char *a) -{ - if (*a == 0) - RETURN (NULL, strlen (s)); - - const char *aligned; - __m128i mask, maskz, zero; - unsigned int maskz_bits; - unsigned int offset = (unsigned int) ((size_t) a & 15); - zero = _mm_set1_epi8 (0); - if (offset != 0) - { - /* Load masks. */ - aligned = (const char *) ((size_t) a & -16L); - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); - maskz = _mm_cmpeq_epi8 (mask0, zero); - - /* Find where the NULL terminator is. */ - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; - if (maskz_bits != 0) - { - mask = __m128i_shift_right (mask0, offset); - offset = (unsigned int) ((size_t) s & 15); - if (offset) - goto start_unaligned; - - aligned = s; - goto start_loop; - } - } - - /* A is aligned. */ - mask = _mm_loadu_si128 ((__m128i *) a); - /* Find where the NULL terminator is. */ - maskz = _mm_cmpeq_epi8 (mask, zero); - maskz_bits = _mm_movemask_epi8 (maskz); - if (maskz_bits == 0) - { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ - if (a[16] != 0) - return STRCSPN_SSE2 (s, a); - } - - aligned = s; - offset = (unsigned int) ((size_t) s & 15); - if (offset != 0) - { - start_unaligned: - /* Check partial string. */ - aligned = (const char *) ((size_t) s & -16L); - __m128i value = _mm_load_si128 ((__m128i *) aligned); - - value = __m128i_shift_right (value, offset); - - unsigned int length = _mm_cmpistri (mask, value, 0x2); - /* No need to check ZFlag since ZFlag is always 1. */ - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); - if (cflag) - RETURN ((char *) (s + length), length); - /* Find where the NULL terminator is. */ - unsigned int index = _mm_cmpistri (value, value, 0x3a); - if (index < 16 - offset) - RETURN (NULL, index); - aligned += 16; - } -start_loop: - while (1) - { - __m128i value = _mm_load_si128 ((__m128i *) aligned); - unsigned int index = _mm_cmpistri (mask, value, 0x2); - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); - if (cflag) - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); - if (zflag) - RETURN (NULL, - /* Find where the NULL terminator is. */ - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); - aligned += 16; - } -} +#include <string/strcspn.c> diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c deleted file mode 100644 index 3a04bb39fc..0000000000 --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c +++ /dev/null @@ -1,28 +0,0 @@ -/* strcspn. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> -# define STRCSPN __strcspn_sse2 - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(STRCSPN) -#endif - -#include <string/strcspn.c> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c index 93a7fab7ea..b729c033d9 100644 --- a/sysdeps/x86_64/multiarch/strncat-c.c +++ b/sysdeps/x86_64/multiarch/strncat-c.c @@ -1,2 +1,2 @@ -#define STRNCAT __strncat_sse2 +#define STRNCAT __strncat_generic #include <string/strncat.c> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c index b649343a97..50fba8a41f 100644 --- a/sysdeps/x86_64/multiarch/strncat.c +++ b/sysdeps/x86_64/multiarch/strncat.c @@ -24,6 +24,7 @@ # undef strncat # define SYMBOL_NAME strncat +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c index 57c45ac7ab..183b0b8e0f 100644 --- a/sysdeps/x86_64/multiarch/strncpy-c.c +++ b/sysdeps/x86_64/multiarch/strncpy-c.c @@ -1,4 +1,4 @@ -#define STRNCPY __strncpy_sse2 +#define STRNCPY __strncpy_generic #undef libc_hidden_builtin_def #define libc_hidden_builtin_def(strncpy) diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c index 2a780a7e16..7fc7d72ec5 100644 --- a/sysdeps/x86_64/multiarch/strncpy.c +++ b/sysdeps/x86_64/multiarch/strncpy.c @@ -24,6 +24,7 @@ # undef strncpy # define SYMBOL_NAME strncpy +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c similarity index 74% rename from sysdeps/x86_64/multiarch/strspn-sse2.c rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c index 61cc6cb0a5..8700276773 100644 --- a/sysdeps/x86_64/multiarch/strspn-sse2.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c @@ -1,5 +1,5 @@ -/* strspn. - Copyright (C) 2017-2022 Free Software Foundation, Inc. +/* strpbrk with SSE4.2 intrinsics + Copyright (C) 2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,7 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#if IS_IN (libc) - -# include <sysdep.h> -# define STRSPN __strspn_sse2 - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(STRSPN) -#endif - -#include <string/strspn.c> +#define USE_AS_STRPBRK +#define STRCSPN_GENERIC __strpbrk_generic +#define STRCSPN_SSE42 __strpbrk_sse42 +#include "strcspn-c-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c index abf4ff7f1a..d31acfe495 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c.c +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c @@ -1,5 +1,5 @@ -/* strpbrk with SSE4.2 intrinsics - Copyright (C) 2022 Free Software Foundation, Inc. +/* strpbrk. + Copyright (C) 2017-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,7 +16,13 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define USE_AS_STRPBRK -#define STRCSPN_SSE2 __strpbrk_sse2 -#define STRCSPN_SSE42 __strpbrk_sse42 -#include "strcspn-c.c" +#if IS_IN (libc) + +# include <sysdep.h> +# define STRPBRK __strpbrk_generic + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(STRPBRK) +#endif + +#include <string/strpbrk.c> diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c deleted file mode 100644 index d03214c4fb..0000000000 --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c +++ /dev/null @@ -1,28 +0,0 @@ -/* strpbrk. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> -# define STRPBRK __strpbrk_sse2 - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(STRPBRK) -#endif - -#include <string/strpbrk.c> diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c new file mode 100644 index 0000000000..d044916688 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c @@ -0,0 +1,136 @@ +/* strspn with SSE4.2 intrinsics + Copyright (C) 2009-2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <nmmintrin.h> +#include <string.h> +#include "varshift.h" + +/* We use 0x12: + _SIDD_SBYTE_OPS + | _SIDD_CMP_EQUAL_ANY + | _SIDD_NEGATIVE_POLARITY + | _SIDD_LEAST_SIGNIFICANT + on pcmpistri to compare xmm/mem128 + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + X X X X X X X X X X X X X X X X + + against xmm + + 0 1 2 3 4 5 6 7 8 9 A B C D E F + A A A A A A A A A A A A A A A A + + to find out if the first 16byte data element has any non-A byte and + the offset of the first byte. There are 2 cases: + + 1. The first 16byte data element has the non-A byte, including + EOS, at the offset X. + 2. The first 16byte data element is valid and doesn't have the non-A + byte. + + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: + + case ECX CFlag ZFlag SFlag + 1 X 1 0/1 0 + 2 16 0 0 0 + + We exit from the loop for case 1. */ + +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; + + +size_t +__attribute__ ((section (".text.sse4.2"))) +__strspn_sse42 (const char *s, const char *a) +{ + if (*a == 0) + return 0; + + const char *aligned; + __m128i mask, maskz, zero; + unsigned int maskz_bits; + unsigned int offset = (int) ((size_t) a & 15); + zero = _mm_set1_epi8 (0); + if (offset != 0) + { + /* Load masks. */ + aligned = (const char *) ((size_t) a & -16L); + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); + maskz = _mm_cmpeq_epi8 (mask0, zero); + + /* Find where the NULL terminator is. */ + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; + if (maskz_bits != 0) + { + mask = __m128i_shift_right (mask0, offset); + offset = (unsigned int) ((size_t) s & 15); + if (offset) + goto start_unaligned; + + aligned = s; + goto start_loop; + } + } + + /* A is aligned. */ + mask = _mm_loadu_si128 ((__m128i *) a); + + /* Find where the NULL terminator is. */ + maskz = _mm_cmpeq_epi8 (mask, zero); + maskz_bits = _mm_movemask_epi8 (maskz); + if (maskz_bits == 0) + { + /* There is no NULL terminator. Don't use SSE4.2 if the length + of A > 16. */ + if (a[16] != 0) + return __strspn_generic (s, a); + } + aligned = s; + offset = (unsigned int) ((size_t) s & 15); + + if (offset != 0) + { + start_unaligned: + /* Check partial string. */ + aligned = (const char *) ((size_t) s & -16L); + __m128i value = _mm_load_si128 ((__m128i *) aligned); + __m128i adj_value = __m128i_shift_right (value, offset); + + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); + /* No need to check CFlag since it is always 1. */ + if (length < 16 - offset) + return length; + /* Find where the NULL terminator is. */ + maskz = _mm_cmpeq_epi8 (value, zero); + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; + if (maskz_bits != 0) + return length; + aligned += 16; + } + +start_loop: + while (1) + { + __m128i value = _mm_load_si128 ((__m128i *) aligned); + unsigned int index = _mm_cmpistri (mask, value, 0x12); + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); + if (cflag) + return (size_t) (aligned + index - s); + aligned += 16; + } +} diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c index 6124033ceb..6b50c36432 100644 --- a/sysdeps/x86_64/multiarch/strspn-c.c +++ b/sysdeps/x86_64/multiarch/strspn-c.c @@ -1,5 +1,5 @@ -/* strspn with SSE4.2 intrinsics - Copyright (C) 2009-2022 Free Software Foundation, Inc. +/* strspn. + Copyright (C) 2017-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,121 +16,13 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <nmmintrin.h> -#include <string.h> -#include "varshift.h" +#if IS_IN (libc) -/* We use 0x12: - _SIDD_SBYTE_OPS - | _SIDD_CMP_EQUAL_ANY - | _SIDD_NEGATIVE_POLARITY - | _SIDD_LEAST_SIGNIFICANT - on pcmpistri to compare xmm/mem128 +# include <sysdep.h> +# define STRSPN __strspn_generic - 0 1 2 3 4 5 6 7 8 9 A B C D E F - X X X X X X X X X X X X X X X X +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(STRSPN) +#endif - against xmm - - 0 1 2 3 4 5 6 7 8 9 A B C D E F - A A A A A A A A A A A A A A A A - - to find out if the first 16byte data element has any non-A byte and - the offset of the first byte. There are 2 cases: - - 1. The first 16byte data element has the non-A byte, including - EOS, at the offset X. - 2. The first 16byte data element is valid and doesn't have the non-A - byte. - - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: - - case ECX CFlag ZFlag SFlag - 1 X 1 0/1 0 - 2 16 0 0 0 - - We exit from the loop for case 1. */ - -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; - - -size_t -__attribute__ ((section (".text.sse4.2"))) -__strspn_sse42 (const char *s, const char *a) -{ - if (*a == 0) - return 0; - - const char *aligned; - __m128i mask, maskz, zero; - unsigned int maskz_bits; - unsigned int offset = (int) ((size_t) a & 15); - zero = _mm_set1_epi8 (0); - if (offset != 0) - { - /* Load masks. */ - aligned = (const char *) ((size_t) a & -16L); - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); - maskz = _mm_cmpeq_epi8 (mask0, zero); - - /* Find where the NULL terminator is. */ - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; - if (maskz_bits != 0) - { - mask = __m128i_shift_right (mask0, offset); - offset = (unsigned int) ((size_t) s & 15); - if (offset) - goto start_unaligned; - - aligned = s; - goto start_loop; - } - } - - /* A is aligned. */ - mask = _mm_loadu_si128 ((__m128i *) a); - - /* Find where the NULL terminator is. */ - maskz = _mm_cmpeq_epi8 (mask, zero); - maskz_bits = _mm_movemask_epi8 (maskz); - if (maskz_bits == 0) - { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ - if (a[16] != 0) - return __strspn_sse2 (s, a); - } - aligned = s; - offset = (unsigned int) ((size_t) s & 15); - - if (offset != 0) - { - start_unaligned: - /* Check partial string. */ - aligned = (const char *) ((size_t) s & -16L); - __m128i value = _mm_load_si128 ((__m128i *) aligned); - __m128i adj_value = __m128i_shift_right (value, offset); - - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); - /* No need to check CFlag since it is always 1. */ - if (length < 16 - offset) - return length; - /* Find where the NULL terminator is. */ - maskz = _mm_cmpeq_epi8 (value, zero); - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; - if (maskz_bits != 0) - return length; - aligned += 16; - } - -start_loop: - while (1) - { - __m128i value = _mm_load_si128 ((__m128i *) aligned); - unsigned int index = _mm_cmpistri (mask, value, 0x12); - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); - if (cflag) - return (size_t) (aligned + index - s); - aligned += 16; - } -} +#include <string/strspn.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c index 26d6984e9b..fa38dd898d 100644 --- a/sysdeps/x86_64/multiarch/wcscpy-c.c +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c @@ -1,5 +1,5 @@ #if IS_IN (libc) -# define WCSCPY __wcscpy_sse2 +# define WCSCPY __wcscpy_generic #endif #include <wcsmbs/wcscpy.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c index 6a2d1421d9..53c3228dc2 100644 --- a/sysdeps/x86_64/multiarch/wcscpy.c +++ b/sysdeps/x86_64/multiarch/wcscpy.c @@ -26,7 +26,7 @@ # define SYMBOL_NAME wcscpy # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; static inline void * @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c index e1ec7cfbb5..1c9c04241a 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c @@ -1,9 +1,9 @@ #if IS_IN (libc) # include <wchar.h> -# define WCSNLEN __wcsnlen_sse2 +# define WCSNLEN __wcsnlen_generic -extern __typeof (wcsnlen) __wcsnlen_sse2; +extern __typeof (wcsnlen) __wcsnlen_generic; #endif #include "wcsmbs/wcsnlen.c" diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index baa26666a8..05b7a211de 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,6 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen +# define GENERIC generic # include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); -- 2.34.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity 2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein @ 2022-06-10 1:19 ` H.J. Lu 2022-06-10 1:26 ` Noah Goldstein 0 siblings, 1 reply; 11+ messages in thread From: H.J. Lu @ 2022-06-10 1:19 UTC (permalink / raw) To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > No functions are changed. It just renames generic implementations from > '{func}_sse2' to '{func}_generic'. This is just because the postfix > "_sse2" was overloaded and was used for files that had hand-optimized > sse2 assembly implementations and files that just redirected back > to the generic implementation. This change isn't small and its benefit is very small. Can it be the part of a big change to support building glibc with -march=x86-64-vN > Full xcheck passed on x86_64. > --- > sysdeps/x86_64/multiarch/Makefile | 15 +- > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +- > sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +- > sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +- > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +- > sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +- > sysdeps/x86_64/multiarch/stpncpy.c | 1 + > sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++ > sysdeps/x86_64/multiarch/strcspn-c.c | 151 +--------------- > sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 --- > sysdeps/x86_64/multiarch/strncat-c.c | 2 +- > sysdeps/x86_64/multiarch/strncat.c | 1 + > sysdeps/x86_64/multiarch/strncpy-c.c | 2 +- > sysdeps/x86_64/multiarch/strncpy.c | 1 + > .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +- > sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +- > sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 --- > sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++ > sysdeps/x86_64/multiarch/strspn-c.c | 126 +------------- > sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +- > sysdeps/x86_64/multiarch/wcscpy.c | 4 +- > sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +- > sysdeps/x86_64/multiarch/wcsnlen.c | 1 + > 23 files changed, 376 insertions(+), 363 deletions(-) > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c > delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c > rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%) > delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 3d153cac35..86c6ecdfc1 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -77,7 +77,7 @@ sysdep_routines += \ > strcpy-sse2 \ > strcpy-sse2-unaligned \ > strcspn-c \ > - strcspn-sse2 \ > + strcspn-c-sse4 \ > strlen-avx2 \ > strlen-avx2-rtm \ > strlen-evex \ > @@ -109,21 +109,22 @@ sysdep_routines += \ > strnlen-evex512 \ > strnlen-sse2 \ > strpbrk-c \ > - strpbrk-sse2 \ > + strpbrk-c-sse4 \ > strrchr-avx2 \ > strrchr-avx2-rtm \ > strrchr-evex \ > strrchr-sse2 \ > strspn-c \ > - strspn-sse2 \ > + strspn-c-sse4 \ > strstr-avx512 \ > strstr-sse2-unaligned \ > varshift \ > # sysdep_routines > -CFLAGS-varshift.c += -msse4 > -CFLAGS-strcspn-c.c += -msse4 > -CFLAGS-strpbrk-c.c += -msse4 > -CFLAGS-strspn-c.c += -msse4 > + > +CFLAGS-strcspn-c-sse4.c += -msse4 > +CFLAGS-strpbrk-c-sse4.c += -msse4 > +CFLAGS-strspn-c-sse4.c += -msse4 > + > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 > endif > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 58f3ec8306..4cbd200d39 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __stpncpy_evex) > IFUNC_IMPL_ADD (array, i, stpncpy, 1, > __stpncpy_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) > + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) > > /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ > IFUNC_IMPL (i, name, stpcpy, > @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL (i, name, strcspn, > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), > __strcspn_sse42) > - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) > + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > IFUNC_IMPL (i, name, strncasecmp, > @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __strncat_evex) > IFUNC_IMPL_ADD (array, i, strncat, 1, > __strncat_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) > + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) > > /* Support sysdeps/x86_64/multiarch/strncpy.c. */ > IFUNC_IMPL (i, name, strncpy, > @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __strncpy_evex) > IFUNC_IMPL_ADD (array, i, strncpy, 1, > __strncpy_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) > + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) > > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ > IFUNC_IMPL (i, name, strpbrk, > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), > __strpbrk_sse42) > - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) > + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) > > > /* Support sysdeps/x86_64/multiarch/strspn.c. */ > IFUNC_IMPL (i, name, strspn, > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), > __strspn_sse42) > - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) > + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > IFUNC_IMPL (i, name, strstr, > @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL (i, name, wcscpy, > IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), > __wcscpy_ssse3) > - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) > + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) > > /* Support sysdeps/x86_64/multiarch/wcslen.c. */ > IFUNC_IMPL (i, name, wcslen, > @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL_ADD (array, i, wcsnlen, > CPU_FEATURE_USABLE (SSE4_1), > __wcsnlen_sse4_1) > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) > + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) > > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ > IFUNC_IMPL (i, name, wmemchr, > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > index b555ff2fac..ee36525bcf 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > @@ -19,7 +19,7 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > static inline void * > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) > return OPTIMIZE (sse42); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (generic); > } > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > index a15afa44e9..80529458d1 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > @@ -20,7 +20,11 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +#ifndef GENERIC > +# define GENERIC sse2 > +#endif > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) > attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > return OPTIMIZE (sse2_unaligned); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (GENERIC); > } > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > index 2b29e7608a..88c1c502af 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > @@ -19,7 +19,11 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +#ifndef GENERIC > +# define GENERIC sse2 > +#endif > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > return OPTIMIZE (sse4_1); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (GENERIC); > } > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c > index b016e487e1..eb62fcf388 100644 > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c > +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c > @@ -1,4 +1,4 @@ > -#define STPNCPY __stpncpy_sse2 > +#define STPNCPY __stpncpy_generic > #undef weak_alias > #define weak_alias(ignored1, ignored2) > #undef libc_hidden_def > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c > index 82fa53957d..879bc83f0b 100644 > --- a/sysdeps/x86_64/multiarch/stpncpy.c > +++ b/sysdeps/x86_64/multiarch/stpncpy.c > @@ -25,6 +25,7 @@ > # undef stpncpy > # undef __stpncpy > > +# define GENERIC generic > # define SYMBOL_NAME stpncpy > # include "ifunc-strcpy.h" > > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > new file mode 100644 > index 0000000000..59f64f9fe8 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > @@ -0,0 +1,163 @@ > +/* strcspn with SSE4.2 intrinsics > + Copyright (C) 2009-2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <nmmintrin.h> > +#include <string.h> > +#include "varshift.h" > + > +/* We use 0x2: > + _SIDD_SBYTE_OPS > + | _SIDD_CMP_EQUAL_ANY > + | _SIDD_POSITIVE_POLARITY > + | _SIDD_LEAST_SIGNIFICANT > + on pcmpistri to compare xmm/mem128 > + > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > + X X X X X X X X X X X X X X X X > + > + against xmm > + > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > + A A A A A A A A A A A A A A A A > + > + to find out if the first 16byte data element has any byte A and > + the offset of the first byte. There are 3 cases: > + > + 1. The first 16byte data element has the byte A at the offset X. > + 2. The first 16byte data element has EOS and doesn't have the byte A. > + 3. The first 16byte data element is valid and doesn't have the byte A. > + > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > + > + 1 X 1 0/1 0 > + 2 16 0 1 0 > + 3 16 0 0 0 > + > + We exit from the loop for cases 1 and 2 with jbe which branches > + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > + X for case 1. */ > + > +#ifndef STRCSPN_GENERIC > +# define STRCSPN_GENERIC __strcspn_generic > +# define STRCSPN_SSE42 __strcspn_sse42 > +#endif > + > +#ifdef USE_AS_STRPBRK > +# define RETURN(val1, val2) return val1 > +#else > +# define RETURN(val1, val2) return val2 > +#endif > + > +extern > +#ifdef USE_AS_STRPBRK > +char * > +#else > +size_t > +#endif > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden; > + > + > +#ifdef USE_AS_STRPBRK > +char * > +#else > +size_t > +#endif > +__attribute__ ((section (".text.sse4.2"))) > +STRCSPN_SSE42 (const char *s, const char *a) > +{ > + if (*a == 0) > + RETURN (NULL, strlen (s)); > + > + const char *aligned; > + __m128i mask, maskz, zero; > + unsigned int maskz_bits; > + unsigned int offset = (unsigned int) ((size_t) a & 15); > + zero = _mm_set1_epi8 (0); > + if (offset != 0) > + { > + /* Load masks. */ > + aligned = (const char *) ((size_t) a & -16L); > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > + maskz = _mm_cmpeq_epi8 (mask0, zero); > + > + /* Find where the NULL terminator is. */ > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > + if (maskz_bits != 0) > + { > + mask = __m128i_shift_right (mask0, offset); > + offset = (unsigned int) ((size_t) s & 15); > + if (offset) > + goto start_unaligned; > + > + aligned = s; > + goto start_loop; > + } > + } > + > + /* A is aligned. */ > + mask = _mm_loadu_si128 ((__m128i *) a); > + /* Find where the NULL terminator is. */ > + maskz = _mm_cmpeq_epi8 (mask, zero); > + maskz_bits = _mm_movemask_epi8 (maskz); > + if (maskz_bits == 0) > + { > + /* There is no NULL terminator. Don't use SSE4.2 if the length > + of A > 16. */ > + if (a[16] != 0) > + return STRCSPN_GENERIC (s, a); > + } > + > + aligned = s; > + offset = (unsigned int) ((size_t) s & 15); > + if (offset != 0) > + { > + start_unaligned: > + /* Check partial string. */ > + aligned = (const char *) ((size_t) s & -16L); > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > + > + value = __m128i_shift_right (value, offset); > + > + unsigned int length = _mm_cmpistri (mask, value, 0x2); > + /* No need to check ZFlag since ZFlag is always 1. */ > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > + if (cflag) > + RETURN ((char *) (s + length), length); > + /* Find where the NULL terminator is. */ > + unsigned int index = _mm_cmpistri (value, value, 0x3a); > + if (index < 16 - offset) > + RETURN (NULL, index); > + aligned += 16; > + } > + > +start_loop: > + while (1) > + { > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > + unsigned int index = _mm_cmpistri (mask, value, 0x2); > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); > + if (cflag) > + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); > + if (zflag) > + RETURN (NULL, > + /* Find where the NULL terminator is. */ > + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); > + aligned += 16; > + } > +} > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c > index c312fab8b1..423de2e2b2 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-c.c > +++ b/sysdeps/x86_64/multiarch/strcspn-c.c > @@ -1,5 +1,5 @@ > -/* strcspn with SSE4.2 intrinsics > - Copyright (C) 2009-2022 Free Software Foundation, Inc. > +/* strcspn. > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -16,148 +16,13 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#include <nmmintrin.h> > -#include <string.h> > -#include "varshift.h" > +#if IS_IN (libc) > > -/* We use 0x2: > - _SIDD_SBYTE_OPS > - | _SIDD_CMP_EQUAL_ANY > - | _SIDD_POSITIVE_POLARITY > - | _SIDD_LEAST_SIGNIFICANT > - on pcmpistri to compare xmm/mem128 > +# include <sysdep.h> > +# define STRCSPN __strcspn_generic > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > - X X X X X X X X X X X X X X X X > - > - against xmm > - > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > - A A A A A A A A A A A A A A A A > - > - to find out if the first 16byte data element has any byte A and > - the offset of the first byte. There are 3 cases: > - > - 1. The first 16byte data element has the byte A at the offset X. > - 2. The first 16byte data element has EOS and doesn't have the byte A. > - 3. The first 16byte data element is valid and doesn't have the byte A. > - > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > - > - 1 X 1 0/1 0 > - 2 16 0 1 0 > - 3 16 0 0 0 > - > - We exit from the loop for cases 1 and 2 with jbe which branches > - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > - X for case 1. */ > - > -#ifndef STRCSPN_SSE2 > -# define STRCSPN_SSE2 __strcspn_sse2 > -# define STRCSPN_SSE42 __strcspn_sse42 > -#endif > - > -#ifdef USE_AS_STRPBRK > -# define RETURN(val1, val2) return val1 > -#else > -# define RETURN(val1, val2) return val2 > -#endif > - > -extern > -#ifdef USE_AS_STRPBRK > -char * > -#else > -size_t > -#endif > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; > - > - > -#ifdef USE_AS_STRPBRK > -char * > -#else > -size_t > +# undef libc_hidden_builtin_def > +# define libc_hidden_builtin_def(STRCSPN) > #endif > -__attribute__ ((section (".text.sse4.2"))) > -STRCSPN_SSE42 (const char *s, const char *a) > -{ > - if (*a == 0) > - RETURN (NULL, strlen (s)); > - > - const char *aligned; > - __m128i mask, maskz, zero; > - unsigned int maskz_bits; > - unsigned int offset = (unsigned int) ((size_t) a & 15); > - zero = _mm_set1_epi8 (0); > - if (offset != 0) > - { > - /* Load masks. */ > - aligned = (const char *) ((size_t) a & -16L); > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > - maskz = _mm_cmpeq_epi8 (mask0, zero); > - > - /* Find where the NULL terminator is. */ > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > - if (maskz_bits != 0) > - { > - mask = __m128i_shift_right (mask0, offset); > - offset = (unsigned int) ((size_t) s & 15); > - if (offset) > - goto start_unaligned; > - > - aligned = s; > - goto start_loop; > - } > - } > - > - /* A is aligned. */ > - mask = _mm_loadu_si128 ((__m128i *) a); > - /* Find where the NULL terminator is. */ > - maskz = _mm_cmpeq_epi8 (mask, zero); > - maskz_bits = _mm_movemask_epi8 (maskz); > - if (maskz_bits == 0) > - { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > - if (a[16] != 0) > - return STRCSPN_SSE2 (s, a); > - } > - > - aligned = s; > - offset = (unsigned int) ((size_t) s & 15); > - if (offset != 0) > - { > - start_unaligned: > - /* Check partial string. */ > - aligned = (const char *) ((size_t) s & -16L); > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > - > - value = __m128i_shift_right (value, offset); > - > - unsigned int length = _mm_cmpistri (mask, value, 0x2); > - /* No need to check ZFlag since ZFlag is always 1. */ > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > - if (cflag) > - RETURN ((char *) (s + length), length); > - /* Find where the NULL terminator is. */ > - unsigned int index = _mm_cmpistri (value, value, 0x3a); > - if (index < 16 - offset) > - RETURN (NULL, index); > - aligned += 16; > - } > > -start_loop: > - while (1) > - { > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > - unsigned int index = _mm_cmpistri (mask, value, 0x2); > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); > - if (cflag) > - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); > - if (zflag) > - RETURN (NULL, > - /* Find where the NULL terminator is. */ > - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); > - aligned += 16; > - } > -} > +#include <string/strcspn.c> > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c > deleted file mode 100644 > index 3a04bb39fc..0000000000 > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c > +++ /dev/null > @@ -1,28 +0,0 @@ > -/* strcspn. > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#if IS_IN (libc) > - > -# include <sysdep.h> > -# define STRCSPN __strcspn_sse2 > - > -# undef libc_hidden_builtin_def > -# define libc_hidden_builtin_def(STRCSPN) > -#endif > - > -#include <string/strcspn.c> > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c > index 93a7fab7ea..b729c033d9 100644 > --- a/sysdeps/x86_64/multiarch/strncat-c.c > +++ b/sysdeps/x86_64/multiarch/strncat-c.c > @@ -1,2 +1,2 @@ > -#define STRNCAT __strncat_sse2 > +#define STRNCAT __strncat_generic > #include <string/strncat.c> > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c > index b649343a97..50fba8a41f 100644 > --- a/sysdeps/x86_64/multiarch/strncat.c > +++ b/sysdeps/x86_64/multiarch/strncat.c > @@ -24,6 +24,7 @@ > # undef strncat > > # define SYMBOL_NAME strncat > +# define GENERIC generic > # include "ifunc-strcpy.h" > > libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c > index 57c45ac7ab..183b0b8e0f 100644 > --- a/sysdeps/x86_64/multiarch/strncpy-c.c > +++ b/sysdeps/x86_64/multiarch/strncpy-c.c > @@ -1,4 +1,4 @@ > -#define STRNCPY __strncpy_sse2 > +#define STRNCPY __strncpy_generic > #undef libc_hidden_builtin_def > #define libc_hidden_builtin_def(strncpy) > > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c > index 2a780a7e16..7fc7d72ec5 100644 > --- a/sysdeps/x86_64/multiarch/strncpy.c > +++ b/sysdeps/x86_64/multiarch/strncpy.c > @@ -24,6 +24,7 @@ > # undef strncpy > > # define SYMBOL_NAME strncpy > +# define GENERIC generic > # include "ifunc-strcpy.h" > > libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > similarity index 74% > rename from sysdeps/x86_64/multiarch/strspn-sse2.c > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > index 61cc6cb0a5..8700276773 100644 > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > @@ -1,5 +1,5 @@ > -/* strspn. > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > +/* strpbrk with SSE4.2 intrinsics > + Copyright (C) 2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -16,13 +16,7 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#if IS_IN (libc) > - > -# include <sysdep.h> > -# define STRSPN __strspn_sse2 > - > -# undef libc_hidden_builtin_def > -# define libc_hidden_builtin_def(STRSPN) > -#endif > - > -#include <string/strspn.c> > +#define USE_AS_STRPBRK > +#define STRCSPN_GENERIC __strpbrk_generic > +#define STRCSPN_SSE42 __strpbrk_sse42 > +#include "strcspn-c-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c > index abf4ff7f1a..d31acfe495 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c > @@ -1,5 +1,5 @@ > -/* strpbrk with SSE4.2 intrinsics > - Copyright (C) 2022 Free Software Foundation, Inc. > +/* strpbrk. > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -16,7 +16,13 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define USE_AS_STRPBRK > -#define STRCSPN_SSE2 __strpbrk_sse2 > -#define STRCSPN_SSE42 __strpbrk_sse42 > -#include "strcspn-c.c" > +#if IS_IN (libc) > + > +# include <sysdep.h> > +# define STRPBRK __strpbrk_generic > + > +# undef libc_hidden_builtin_def > +# define libc_hidden_builtin_def(STRPBRK) > +#endif > + > +#include <string/strpbrk.c> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c > deleted file mode 100644 > index d03214c4fb..0000000000 > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c > +++ /dev/null > @@ -1,28 +0,0 @@ > -/* strpbrk. > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > - <https://www.gnu.org/licenses/>. */ > - > -#if IS_IN (libc) > - > -# include <sysdep.h> > -# define STRPBRK __strpbrk_sse2 > - > -# undef libc_hidden_builtin_def > -# define libc_hidden_builtin_def(STRPBRK) > -#endif > - > -#include <string/strpbrk.c> > diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > new file mode 100644 > index 0000000000..d044916688 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > @@ -0,0 +1,136 @@ > +/* strspn with SSE4.2 intrinsics > + Copyright (C) 2009-2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <nmmintrin.h> > +#include <string.h> > +#include "varshift.h" > + > +/* We use 0x12: > + _SIDD_SBYTE_OPS > + | _SIDD_CMP_EQUAL_ANY > + | _SIDD_NEGATIVE_POLARITY > + | _SIDD_LEAST_SIGNIFICANT > + on pcmpistri to compare xmm/mem128 > + > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > + X X X X X X X X X X X X X X X X > + > + against xmm > + > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > + A A A A A A A A A A A A A A A A > + > + to find out if the first 16byte data element has any non-A byte and > + the offset of the first byte. There are 2 cases: > + > + 1. The first 16byte data element has the non-A byte, including > + EOS, at the offset X. > + 2. The first 16byte data element is valid and doesn't have the non-A > + byte. > + > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > + > + case ECX CFlag ZFlag SFlag > + 1 X 1 0/1 0 > + 2 16 0 0 0 > + > + We exit from the loop for case 1. */ > + > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; > + > + > +size_t > +__attribute__ ((section (".text.sse4.2"))) > +__strspn_sse42 (const char *s, const char *a) > +{ > + if (*a == 0) > + return 0; > + > + const char *aligned; > + __m128i mask, maskz, zero; > + unsigned int maskz_bits; > + unsigned int offset = (int) ((size_t) a & 15); > + zero = _mm_set1_epi8 (0); > + if (offset != 0) > + { > + /* Load masks. */ > + aligned = (const char *) ((size_t) a & -16L); > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > + maskz = _mm_cmpeq_epi8 (mask0, zero); > + > + /* Find where the NULL terminator is. */ > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > + if (maskz_bits != 0) > + { > + mask = __m128i_shift_right (mask0, offset); > + offset = (unsigned int) ((size_t) s & 15); > + if (offset) > + goto start_unaligned; > + > + aligned = s; > + goto start_loop; > + } > + } > + > + /* A is aligned. */ > + mask = _mm_loadu_si128 ((__m128i *) a); > + > + /* Find where the NULL terminator is. */ > + maskz = _mm_cmpeq_epi8 (mask, zero); > + maskz_bits = _mm_movemask_epi8 (maskz); > + if (maskz_bits == 0) > + { > + /* There is no NULL terminator. Don't use SSE4.2 if the length > + of A > 16. */ > + if (a[16] != 0) > + return __strspn_generic (s, a); > + } > + aligned = s; > + offset = (unsigned int) ((size_t) s & 15); > + > + if (offset != 0) > + { > + start_unaligned: > + /* Check partial string. */ > + aligned = (const char *) ((size_t) s & -16L); > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > + __m128i adj_value = __m128i_shift_right (value, offset); > + > + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); > + /* No need to check CFlag since it is always 1. */ > + if (length < 16 - offset) > + return length; > + /* Find where the NULL terminator is. */ > + maskz = _mm_cmpeq_epi8 (value, zero); > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > + if (maskz_bits != 0) > + return length; > + aligned += 16; > + } > + > +start_loop: > + while (1) > + { > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > + unsigned int index = _mm_cmpistri (mask, value, 0x12); > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); > + if (cflag) > + return (size_t) (aligned + index - s); > + aligned += 16; > + } > +} > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c > index 6124033ceb..6b50c36432 100644 > --- a/sysdeps/x86_64/multiarch/strspn-c.c > +++ b/sysdeps/x86_64/multiarch/strspn-c.c > @@ -1,5 +1,5 @@ > -/* strspn with SSE4.2 intrinsics > - Copyright (C) 2009-2022 Free Software Foundation, Inc. > +/* strspn. > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > The GNU C Library is free software; you can redistribute it and/or > @@ -16,121 +16,13 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#include <nmmintrin.h> > -#include <string.h> > -#include "varshift.h" > +#if IS_IN (libc) > > -/* We use 0x12: > - _SIDD_SBYTE_OPS > - | _SIDD_CMP_EQUAL_ANY > - | _SIDD_NEGATIVE_POLARITY > - | _SIDD_LEAST_SIGNIFICANT > - on pcmpistri to compare xmm/mem128 > +# include <sysdep.h> > +# define STRSPN __strspn_generic > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > - X X X X X X X X X X X X X X X X > +# undef libc_hidden_builtin_def > +# define libc_hidden_builtin_def(STRSPN) > +#endif > > - against xmm > - > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > - A A A A A A A A A A A A A A A A > - > - to find out if the first 16byte data element has any non-A byte and > - the offset of the first byte. There are 2 cases: > - > - 1. The first 16byte data element has the non-A byte, including > - EOS, at the offset X. > - 2. The first 16byte data element is valid and doesn't have the non-A > - byte. > - > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > - > - case ECX CFlag ZFlag SFlag > - 1 X 1 0/1 0 > - 2 16 0 0 0 > - > - We exit from the loop for case 1. */ > - > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; > - > - > -size_t > -__attribute__ ((section (".text.sse4.2"))) > -__strspn_sse42 (const char *s, const char *a) > -{ > - if (*a == 0) > - return 0; > - > - const char *aligned; > - __m128i mask, maskz, zero; > - unsigned int maskz_bits; > - unsigned int offset = (int) ((size_t) a & 15); > - zero = _mm_set1_epi8 (0); > - if (offset != 0) > - { > - /* Load masks. */ > - aligned = (const char *) ((size_t) a & -16L); > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > - maskz = _mm_cmpeq_epi8 (mask0, zero); > - > - /* Find where the NULL terminator is. */ > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > - if (maskz_bits != 0) > - { > - mask = __m128i_shift_right (mask0, offset); > - offset = (unsigned int) ((size_t) s & 15); > - if (offset) > - goto start_unaligned; > - > - aligned = s; > - goto start_loop; > - } > - } > - > - /* A is aligned. */ > - mask = _mm_loadu_si128 ((__m128i *) a); > - > - /* Find where the NULL terminator is. */ > - maskz = _mm_cmpeq_epi8 (mask, zero); > - maskz_bits = _mm_movemask_epi8 (maskz); > - if (maskz_bits == 0) > - { > - /* There is no NULL terminator. Don't use SSE4.2 if the length > - of A > 16. */ > - if (a[16] != 0) > - return __strspn_sse2 (s, a); > - } > - aligned = s; > - offset = (unsigned int) ((size_t) s & 15); > - > - if (offset != 0) > - { > - start_unaligned: > - /* Check partial string. */ > - aligned = (const char *) ((size_t) s & -16L); > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > - __m128i adj_value = __m128i_shift_right (value, offset); > - > - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); > - /* No need to check CFlag since it is always 1. */ > - if (length < 16 - offset) > - return length; > - /* Find where the NULL terminator is. */ > - maskz = _mm_cmpeq_epi8 (value, zero); > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > - if (maskz_bits != 0) > - return length; > - aligned += 16; > - } > - > -start_loop: > - while (1) > - { > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > - unsigned int index = _mm_cmpistri (mask, value, 0x12); > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); > - if (cflag) > - return (size_t) (aligned + index - s); > - aligned += 16; > - } > -} > +#include <string/strspn.c> > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c > index 26d6984e9b..fa38dd898d 100644 > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c > @@ -1,5 +1,5 @@ > #if IS_IN (libc) > -# define WCSCPY __wcscpy_sse2 > +# define WCSCPY __wcscpy_generic > #endif > > #include <wcsmbs/wcscpy.c> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c > index 6a2d1421d9..53c3228dc2 100644 > --- a/sysdeps/x86_64/multiarch/wcscpy.c > +++ b/sysdeps/x86_64/multiarch/wcscpy.c > @@ -26,7 +26,7 @@ > # define SYMBOL_NAME wcscpy > # include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; > > static inline void * > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) > return OPTIMIZE (ssse3); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (generic); > } > > libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c > index e1ec7cfbb5..1c9c04241a 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c > +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c > @@ -1,9 +1,9 @@ > #if IS_IN (libc) > # include <wchar.h> > > -# define WCSNLEN __wcsnlen_sse2 > +# define WCSNLEN __wcsnlen_generic > > -extern __typeof (wcsnlen) __wcsnlen_sse2; > +extern __typeof (wcsnlen) __wcsnlen_generic; > #endif > > #include "wcsmbs/wcsnlen.c" > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c > index baa26666a8..05b7a211de 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen.c > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c > @@ -24,6 +24,7 @@ > # undef __wcsnlen > > # define SYMBOL_NAME wcsnlen > +# define GENERIC generic > # include "ifunc-wcslen.h" > > libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); > -- > 2.34.1 > -- H.J. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity 2022-06-10 1:19 ` H.J. Lu @ 2022-06-10 1:26 ` Noah Goldstein 0 siblings, 0 replies; 11+ messages in thread From: Noah Goldstein @ 2022-06-10 1:26 UTC (permalink / raw) To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell On Thu, Jun 9, 2022 at 6:20 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > No functions are changed. It just renames generic implementations from > > '{func}_sse2' to '{func}_generic'. This is just because the postfix > > "_sse2" was overloaded and was used for files that had hand-optimized > > sse2 assembly implementations and files that just redirected back > > to the generic implementation. > > This change isn't small and its benefit is very small. Can it be the part of > a big change to support building glibc with > > -march=x86-64-vN kk > > > Full xcheck passed on x86_64. > > --- > > sysdeps/x86_64/multiarch/Makefile | 15 +- > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +- > > sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +- > > sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +- > > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +- > > sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +- > > sysdeps/x86_64/multiarch/stpncpy.c | 1 + > > sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++ > > sysdeps/x86_64/multiarch/strcspn-c.c | 151 +--------------- > > sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 --- > > sysdeps/x86_64/multiarch/strncat-c.c | 2 +- > > sysdeps/x86_64/multiarch/strncat.c | 1 + > > sysdeps/x86_64/multiarch/strncpy-c.c | 2 +- > > sysdeps/x86_64/multiarch/strncpy.c | 1 + > > .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +- > > sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +- > > sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 --- > > sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++ > > sysdeps/x86_64/multiarch/strspn-c.c | 126 +------------- > > sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +- > > sysdeps/x86_64/multiarch/wcscpy.c | 4 +- > > sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +- > > sysdeps/x86_64/multiarch/wcsnlen.c | 1 + > > 23 files changed, 376 insertions(+), 363 deletions(-) > > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c > > delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c > > rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%) > > delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c > > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c > > > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > > index 3d153cac35..86c6ecdfc1 100644 > > --- a/sysdeps/x86_64/multiarch/Makefile > > +++ b/sysdeps/x86_64/multiarch/Makefile > > @@ -77,7 +77,7 @@ sysdep_routines += \ > > strcpy-sse2 \ > > strcpy-sse2-unaligned \ > > strcspn-c \ > > - strcspn-sse2 \ > > + strcspn-c-sse4 \ > > strlen-avx2 \ > > strlen-avx2-rtm \ > > strlen-evex \ > > @@ -109,21 +109,22 @@ sysdep_routines += \ > > strnlen-evex512 \ > > strnlen-sse2 \ > > strpbrk-c \ > > - strpbrk-sse2 \ > > + strpbrk-c-sse4 \ > > strrchr-avx2 \ > > strrchr-avx2-rtm \ > > strrchr-evex \ > > strrchr-sse2 \ > > strspn-c \ > > - strspn-sse2 \ > > + strspn-c-sse4 \ > > strstr-avx512 \ > > strstr-sse2-unaligned \ > > varshift \ > > # sysdep_routines > > -CFLAGS-varshift.c += -msse4 > > -CFLAGS-strcspn-c.c += -msse4 > > -CFLAGS-strpbrk-c.c += -msse4 > > -CFLAGS-strspn-c.c += -msse4 > > + > > +CFLAGS-strcspn-c-sse4.c += -msse4 > > +CFLAGS-strpbrk-c-sse4.c += -msse4 > > +CFLAGS-strspn-c-sse4.c += -msse4 > > + > > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 > > endif > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > index 58f3ec8306..4cbd200d39 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > __stpncpy_evex) > > IFUNC_IMPL_ADD (array, i, stpncpy, 1, > > __stpncpy_sse2_unaligned) > > - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) > > + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) > > > > /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ > > IFUNC_IMPL (i, name, stpcpy, > > @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > IFUNC_IMPL (i, name, strcspn, > > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), > > __strcspn_sse42) > > - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) > > + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) > > > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > > IFUNC_IMPL (i, name, strncasecmp, > > @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > __strncat_evex) > > IFUNC_IMPL_ADD (array, i, strncat, 1, > > __strncat_sse2_unaligned) > > - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) > > + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) > > > > /* Support sysdeps/x86_64/multiarch/strncpy.c. */ > > IFUNC_IMPL (i, name, strncpy, > > @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > __strncpy_evex) > > IFUNC_IMPL_ADD (array, i, strncpy, 1, > > __strncpy_sse2_unaligned) > > - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) > > + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) > > > > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ > > IFUNC_IMPL (i, name, strpbrk, > > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), > > __strpbrk_sse42) > > - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) > > + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) > > > > > > /* Support sysdeps/x86_64/multiarch/strspn.c. */ > > IFUNC_IMPL (i, name, strspn, > > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), > > __strspn_sse42) > > - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) > > + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) > > > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > > IFUNC_IMPL (i, name, strstr, > > @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > IFUNC_IMPL (i, name, wcscpy, > > IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), > > __wcscpy_ssse3) > > - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) > > + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) > > > > /* Support sysdeps/x86_64/multiarch/wcslen.c. */ > > IFUNC_IMPL (i, name, wcslen, > > @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > IFUNC_IMPL_ADD (array, i, wcsnlen, > > CPU_FEATURE_USABLE (SSE4_1), > > __wcsnlen_sse4_1) > > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) > > + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) > > > > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ > > IFUNC_IMPL (i, name, wmemchr, > > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > > index b555ff2fac..ee36525bcf 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > > @@ -19,7 +19,7 @@ > > > > #include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > > > static inline void * > > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void) > > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) > > return OPTIMIZE (sse42); > > > > - return OPTIMIZE (sse2); > > + return OPTIMIZE (generic); > > } > > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > > index a15afa44e9..80529458d1 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > > @@ -20,7 +20,11 @@ > > > > #include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > +#ifndef GENERIC > > +# define GENERIC sse2 > > +#endif > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) > > attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) > > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > > return OPTIMIZE (sse2_unaligned); > > > > - return OPTIMIZE (sse2); > > + return OPTIMIZE (GENERIC); > > } > > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > index 2b29e7608a..88c1c502af 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > > @@ -19,7 +19,11 @@ > > > > #include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > +#ifndef GENERIC > > +# define GENERIC sse2 > > +#endif > > + > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) > > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > > return OPTIMIZE (sse4_1); > > > > - return OPTIMIZE (sse2); > > + return OPTIMIZE (GENERIC); > > } > > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c > > index b016e487e1..eb62fcf388 100644 > > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c > > +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c > > @@ -1,4 +1,4 @@ > > -#define STPNCPY __stpncpy_sse2 > > +#define STPNCPY __stpncpy_generic > > #undef weak_alias > > #define weak_alias(ignored1, ignored2) > > #undef libc_hidden_def > > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c > > index 82fa53957d..879bc83f0b 100644 > > --- a/sysdeps/x86_64/multiarch/stpncpy.c > > +++ b/sysdeps/x86_64/multiarch/stpncpy.c > > @@ -25,6 +25,7 @@ > > # undef stpncpy > > # undef __stpncpy > > > > +# define GENERIC generic > > # define SYMBOL_NAME stpncpy > > # include "ifunc-strcpy.h" > > > > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > > new file mode 100644 > > index 0000000000..59f64f9fe8 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c > > @@ -0,0 +1,163 @@ > > +/* strcspn with SSE4.2 intrinsics > > + Copyright (C) 2009-2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <nmmintrin.h> > > +#include <string.h> > > +#include "varshift.h" > > + > > +/* We use 0x2: > > + _SIDD_SBYTE_OPS > > + | _SIDD_CMP_EQUAL_ANY > > + | _SIDD_POSITIVE_POLARITY > > + | _SIDD_LEAST_SIGNIFICANT > > + on pcmpistri to compare xmm/mem128 > > + > > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > > + X X X X X X X X X X X X X X X X > > + > > + against xmm > > + > > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > > + A A A A A A A A A A A A A A A A > > + > > + to find out if the first 16byte data element has any byte A and > > + the offset of the first byte. There are 3 cases: > > + > > + 1. The first 16byte data element has the byte A at the offset X. > > + 2. The first 16byte data element has EOS and doesn't have the byte A. > > + 3. The first 16byte data element is valid and doesn't have the byte A. > > + > > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > > + > > + 1 X 1 0/1 0 > > + 2 16 0 1 0 > > + 3 16 0 0 0 > > + > > + We exit from the loop for cases 1 and 2 with jbe which branches > > + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > > + X for case 1. */ > > + > > +#ifndef STRCSPN_GENERIC > > +# define STRCSPN_GENERIC __strcspn_generic > > +# define STRCSPN_SSE42 __strcspn_sse42 > > +#endif > > + > > +#ifdef USE_AS_STRPBRK > > +# define RETURN(val1, val2) return val1 > > +#else > > +# define RETURN(val1, val2) return val2 > > +#endif > > + > > +extern > > +#ifdef USE_AS_STRPBRK > > +char * > > +#else > > +size_t > > +#endif > > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden; > > + > > + > > +#ifdef USE_AS_STRPBRK > > +char * > > +#else > > +size_t > > +#endif > > +__attribute__ ((section (".text.sse4.2"))) > > +STRCSPN_SSE42 (const char *s, const char *a) > > +{ > > + if (*a == 0) > > + RETURN (NULL, strlen (s)); > > + > > + const char *aligned; > > + __m128i mask, maskz, zero; > > + unsigned int maskz_bits; > > + unsigned int offset = (unsigned int) ((size_t) a & 15); > > + zero = _mm_set1_epi8 (0); > > + if (offset != 0) > > + { > > + /* Load masks. */ > > + aligned = (const char *) ((size_t) a & -16L); > > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > > + maskz = _mm_cmpeq_epi8 (mask0, zero); > > + > > + /* Find where the NULL terminator is. */ > > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > + if (maskz_bits != 0) > > + { > > + mask = __m128i_shift_right (mask0, offset); > > + offset = (unsigned int) ((size_t) s & 15); > > + if (offset) > > + goto start_unaligned; > > + > > + aligned = s; > > + goto start_loop; > > + } > > + } > > + > > + /* A is aligned. */ > > + mask = _mm_loadu_si128 ((__m128i *) a); > > + /* Find where the NULL terminator is. */ > > + maskz = _mm_cmpeq_epi8 (mask, zero); > > + maskz_bits = _mm_movemask_epi8 (maskz); > > + if (maskz_bits == 0) > > + { > > + /* There is no NULL terminator. Don't use SSE4.2 if the length > > + of A > 16. */ > > + if (a[16] != 0) > > + return STRCSPN_GENERIC (s, a); > > + } > > + > > + aligned = s; > > + offset = (unsigned int) ((size_t) s & 15); > > + if (offset != 0) > > + { > > + start_unaligned: > > + /* Check partial string. */ > > + aligned = (const char *) ((size_t) s & -16L); > > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > > + > > + value = __m128i_shift_right (value, offset); > > + > > + unsigned int length = _mm_cmpistri (mask, value, 0x2); > > + /* No need to check ZFlag since ZFlag is always 1. */ > > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > > + if (cflag) > > + RETURN ((char *) (s + length), length); > > + /* Find where the NULL terminator is. */ > > + unsigned int index = _mm_cmpistri (value, value, 0x3a); > > + if (index < 16 - offset) > > + RETURN (NULL, index); > > + aligned += 16; > > + } > > + > > +start_loop: > > + while (1) > > + { > > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > > + unsigned int index = _mm_cmpistri (mask, value, 0x2); > > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > > + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); > > + if (cflag) > > + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); > > + if (zflag) > > + RETURN (NULL, > > + /* Find where the NULL terminator is. */ > > + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); > > + aligned += 16; > > + } > > +} > > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c > > index c312fab8b1..423de2e2b2 100644 > > --- a/sysdeps/x86_64/multiarch/strcspn-c.c > > +++ b/sysdeps/x86_64/multiarch/strcspn-c.c > > @@ -1,5 +1,5 @@ > > -/* strcspn with SSE4.2 intrinsics > > - Copyright (C) 2009-2022 Free Software Foundation, Inc. > > +/* strcspn. > > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > The GNU C Library is free software; you can redistribute it and/or > > @@ -16,148 +16,13 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#include <nmmintrin.h> > > -#include <string.h> > > -#include "varshift.h" > > +#if IS_IN (libc) > > > > -/* We use 0x2: > > - _SIDD_SBYTE_OPS > > - | _SIDD_CMP_EQUAL_ANY > > - | _SIDD_POSITIVE_POLARITY > > - | _SIDD_LEAST_SIGNIFICANT > > - on pcmpistri to compare xmm/mem128 > > +# include <sysdep.h> > > +# define STRCSPN __strcspn_generic > > > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > > - X X X X X X X X X X X X X X X X > > - > > - against xmm > > - > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > > - A A A A A A A A A A A A A A A A > > - > > - to find out if the first 16byte data element has any byte A and > > - the offset of the first byte. There are 3 cases: > > - > > - 1. The first 16byte data element has the byte A at the offset X. > > - 2. The first 16byte data element has EOS and doesn't have the byte A. > > - 3. The first 16byte data element is valid and doesn't have the byte A. > > - > > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > > - > > - 1 X 1 0/1 0 > > - 2 16 0 1 0 > > - 3 16 0 0 0 > > - > > - We exit from the loop for cases 1 and 2 with jbe which branches > > - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > > - X for case 1. */ > > - > > -#ifndef STRCSPN_SSE2 > > -# define STRCSPN_SSE2 __strcspn_sse2 > > -# define STRCSPN_SSE42 __strcspn_sse42 > > -#endif > > - > > -#ifdef USE_AS_STRPBRK > > -# define RETURN(val1, val2) return val1 > > -#else > > -# define RETURN(val1, val2) return val2 > > -#endif > > - > > -extern > > -#ifdef USE_AS_STRPBRK > > -char * > > -#else > > -size_t > > -#endif > > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; > > - > > - > > -#ifdef USE_AS_STRPBRK > > -char * > > -#else > > -size_t > > +# undef libc_hidden_builtin_def > > +# define libc_hidden_builtin_def(STRCSPN) > > #endif > > -__attribute__ ((section (".text.sse4.2"))) > > -STRCSPN_SSE42 (const char *s, const char *a) > > -{ > > - if (*a == 0) > > - RETURN (NULL, strlen (s)); > > - > > - const char *aligned; > > - __m128i mask, maskz, zero; > > - unsigned int maskz_bits; > > - unsigned int offset = (unsigned int) ((size_t) a & 15); > > - zero = _mm_set1_epi8 (0); > > - if (offset != 0) > > - { > > - /* Load masks. */ > > - aligned = (const char *) ((size_t) a & -16L); > > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > > - maskz = _mm_cmpeq_epi8 (mask0, zero); > > - > > - /* Find where the NULL terminator is. */ > > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > - if (maskz_bits != 0) > > - { > > - mask = __m128i_shift_right (mask0, offset); > > - offset = (unsigned int) ((size_t) s & 15); > > - if (offset) > > - goto start_unaligned; > > - > > - aligned = s; > > - goto start_loop; > > - } > > - } > > - > > - /* A is aligned. */ > > - mask = _mm_loadu_si128 ((__m128i *) a); > > - /* Find where the NULL terminator is. */ > > - maskz = _mm_cmpeq_epi8 (mask, zero); > > - maskz_bits = _mm_movemask_epi8 (maskz); > > - if (maskz_bits == 0) > > - { > > - /* There is no NULL terminator. Don't use SSE4.2 if the length > > - of A > 16. */ > > - if (a[16] != 0) > > - return STRCSPN_SSE2 (s, a); > > - } > > - > > - aligned = s; > > - offset = (unsigned int) ((size_t) s & 15); > > - if (offset != 0) > > - { > > - start_unaligned: > > - /* Check partial string. */ > > - aligned = (const char *) ((size_t) s & -16L); > > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > > - > > - value = __m128i_shift_right (value, offset); > > - > > - unsigned int length = _mm_cmpistri (mask, value, 0x2); > > - /* No need to check ZFlag since ZFlag is always 1. */ > > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > > - if (cflag) > > - RETURN ((char *) (s + length), length); > > - /* Find where the NULL terminator is. */ > > - unsigned int index = _mm_cmpistri (value, value, 0x3a); > > - if (index < 16 - offset) > > - RETURN (NULL, index); > > - aligned += 16; > > - } > > > > -start_loop: > > - while (1) > > - { > > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > > - unsigned int index = _mm_cmpistri (mask, value, 0x2); > > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2); > > - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2); > > - if (cflag) > > - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s)); > > - if (zflag) > > - RETURN (NULL, > > - /* Find where the NULL terminator is. */ > > - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s)); > > - aligned += 16; > > - } > > -} > > +#include <string/strcspn.c> > > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c > > deleted file mode 100644 > > index 3a04bb39fc..0000000000 > > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c > > +++ /dev/null > > @@ -1,28 +0,0 @@ > > -/* strcspn. > > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > > - This file is part of the GNU C Library. > > - > > - The GNU C Library is free software; you can redistribute it and/or > > - modify it under the terms of the GNU Lesser General Public > > - License as published by the Free Software Foundation; either > > - version 2.1 of the License, or (at your option) any later version. > > - > > - The GNU C Library is distributed in the hope that it will be useful, > > - but WITHOUT ANY WARRANTY; without even the implied warranty of > > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - Lesser General Public License for more details. > > - > > - You should have received a copy of the GNU Lesser General Public > > - License along with the GNU C Library; if not, see > > - <https://www.gnu.org/licenses/>. */ > > - > > -#if IS_IN (libc) > > - > > -# include <sysdep.h> > > -# define STRCSPN __strcspn_sse2 > > - > > -# undef libc_hidden_builtin_def > > -# define libc_hidden_builtin_def(STRCSPN) > > -#endif > > - > > -#include <string/strcspn.c> > > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c > > index 93a7fab7ea..b729c033d9 100644 > > --- a/sysdeps/x86_64/multiarch/strncat-c.c > > +++ b/sysdeps/x86_64/multiarch/strncat-c.c > > @@ -1,2 +1,2 @@ > > -#define STRNCAT __strncat_sse2 > > +#define STRNCAT __strncat_generic > > #include <string/strncat.c> > > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c > > index b649343a97..50fba8a41f 100644 > > --- a/sysdeps/x86_64/multiarch/strncat.c > > +++ b/sysdeps/x86_64/multiarch/strncat.c > > @@ -24,6 +24,7 @@ > > # undef strncat > > > > # define SYMBOL_NAME strncat > > +# define GENERIC generic > > # include "ifunc-strcpy.h" > > > > libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c > > index 57c45ac7ab..183b0b8e0f 100644 > > --- a/sysdeps/x86_64/multiarch/strncpy-c.c > > +++ b/sysdeps/x86_64/multiarch/strncpy-c.c > > @@ -1,4 +1,4 @@ > > -#define STRNCPY __strncpy_sse2 > > +#define STRNCPY __strncpy_generic > > #undef libc_hidden_builtin_def > > #define libc_hidden_builtin_def(strncpy) > > > > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c > > index 2a780a7e16..7fc7d72ec5 100644 > > --- a/sysdeps/x86_64/multiarch/strncpy.c > > +++ b/sysdeps/x86_64/multiarch/strncpy.c > > @@ -24,6 +24,7 @@ > > # undef strncpy > > > > # define SYMBOL_NAME strncpy > > +# define GENERIC generic > > # include "ifunc-strcpy.h" > > > > libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > > similarity index 74% > > rename from sysdeps/x86_64/multiarch/strspn-sse2.c > > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > > index 61cc6cb0a5..8700276773 100644 > > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c > > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c > > @@ -1,5 +1,5 @@ > > -/* strspn. > > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > > +/* strpbrk with SSE4.2 intrinsics > > + Copyright (C) 2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > The GNU C Library is free software; you can redistribute it and/or > > @@ -16,13 +16,7 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#if IS_IN (libc) > > - > > -# include <sysdep.h> > > -# define STRSPN __strspn_sse2 > > - > > -# undef libc_hidden_builtin_def > > -# define libc_hidden_builtin_def(STRSPN) > > -#endif > > - > > -#include <string/strspn.c> > > +#define USE_AS_STRPBRK > > +#define STRCSPN_GENERIC __strpbrk_generic > > +#define STRCSPN_SSE42 __strpbrk_sse42 > > +#include "strcspn-c-sse4.c" > > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c > > index abf4ff7f1a..d31acfe495 100644 > > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c > > +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c > > @@ -1,5 +1,5 @@ > > -/* strpbrk with SSE4.2 intrinsics > > - Copyright (C) 2022 Free Software Foundation, Inc. > > +/* strpbrk. > > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > The GNU C Library is free software; you can redistribute it and/or > > @@ -16,7 +16,13 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#define USE_AS_STRPBRK > > -#define STRCSPN_SSE2 __strpbrk_sse2 > > -#define STRCSPN_SSE42 __strpbrk_sse42 > > -#include "strcspn-c.c" > > +#if IS_IN (libc) > > + > > +# include <sysdep.h> > > +# define STRPBRK __strpbrk_generic > > + > > +# undef libc_hidden_builtin_def > > +# define libc_hidden_builtin_def(STRPBRK) > > +#endif > > + > > +#include <string/strpbrk.c> > > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c > > deleted file mode 100644 > > index d03214c4fb..0000000000 > > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c > > +++ /dev/null > > @@ -1,28 +0,0 @@ > > -/* strpbrk. > > - Copyright (C) 2017-2022 Free Software Foundation, Inc. > > - This file is part of the GNU C Library. > > - > > - The GNU C Library is free software; you can redistribute it and/or > > - modify it under the terms of the GNU Lesser General Public > > - License as published by the Free Software Foundation; either > > - version 2.1 of the License, or (at your option) any later version. > > - > > - The GNU C Library is distributed in the hope that it will be useful, > > - but WITHOUT ANY WARRANTY; without even the implied warranty of > > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > - Lesser General Public License for more details. > > - > > - You should have received a copy of the GNU Lesser General Public > > - License along with the GNU C Library; if not, see > > - <https://www.gnu.org/licenses/>. */ > > - > > -#if IS_IN (libc) > > - > > -# include <sysdep.h> > > -# define STRPBRK __strpbrk_sse2 > > - > > -# undef libc_hidden_builtin_def > > -# define libc_hidden_builtin_def(STRPBRK) > > -#endif > > - > > -#include <string/strpbrk.c> > > diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > > new file mode 100644 > > index 0000000000..d044916688 > > --- /dev/null > > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c > > @@ -0,0 +1,136 @@ > > +/* strspn with SSE4.2 intrinsics > > + Copyright (C) 2009-2022 Free Software Foundation, Inc. > > + This file is part of the GNU C Library. > > + > > + The GNU C Library is free software; you can redistribute it and/or > > + modify it under the terms of the GNU Lesser General Public > > + License as published by the Free Software Foundation; either > > + version 2.1 of the License, or (at your option) any later version. > > + > > + The GNU C Library is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + Lesser General Public License for more details. > > + > > + You should have received a copy of the GNU Lesser General Public > > + License along with the GNU C Library; if not, see > > + <https://www.gnu.org/licenses/>. */ > > + > > +#include <nmmintrin.h> > > +#include <string.h> > > +#include "varshift.h" > > + > > +/* We use 0x12: > > + _SIDD_SBYTE_OPS > > + | _SIDD_CMP_EQUAL_ANY > > + | _SIDD_NEGATIVE_POLARITY > > + | _SIDD_LEAST_SIGNIFICANT > > + on pcmpistri to compare xmm/mem128 > > + > > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > > + X X X X X X X X X X X X X X X X > > + > > + against xmm > > + > > + 0 1 2 3 4 5 6 7 8 9 A B C D E F > > + A A A A A A A A A A A A A A A A > > + > > + to find out if the first 16byte data element has any non-A byte and > > + the offset of the first byte. There are 2 cases: > > + > > + 1. The first 16byte data element has the non-A byte, including > > + EOS, at the offset X. > > + 2. The first 16byte data element is valid and doesn't have the non-A > > + byte. > > + > > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > > + > > + case ECX CFlag ZFlag SFlag > > + 1 X 1 0/1 0 > > + 2 16 0 0 0 > > + > > + We exit from the loop for case 1. */ > > + > > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; > > + > > + > > +size_t > > +__attribute__ ((section (".text.sse4.2"))) > > +__strspn_sse42 (const char *s, const char *a) > > +{ > > + if (*a == 0) > > + return 0; > > + > > + const char *aligned; > > + __m128i mask, maskz, zero; > > + unsigned int maskz_bits; > > + unsigned int offset = (int) ((size_t) a & 15); > > + zero = _mm_set1_epi8 (0); > > + if (offset != 0) > > + { > > + /* Load masks. */ > > + aligned = (const char *) ((size_t) a & -16L); > > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > > + maskz = _mm_cmpeq_epi8 (mask0, zero); > > + > > + /* Find where the NULL terminator is. */ > > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > + if (maskz_bits != 0) > > + { > > + mask = __m128i_shift_right (mask0, offset); > > + offset = (unsigned int) ((size_t) s & 15); > > + if (offset) > > + goto start_unaligned; > > + > > + aligned = s; > > + goto start_loop; > > + } > > + } > > + > > + /* A is aligned. */ > > + mask = _mm_loadu_si128 ((__m128i *) a); > > + > > + /* Find where the NULL terminator is. */ > > + maskz = _mm_cmpeq_epi8 (mask, zero); > > + maskz_bits = _mm_movemask_epi8 (maskz); > > + if (maskz_bits == 0) > > + { > > + /* There is no NULL terminator. Don't use SSE4.2 if the length > > + of A > 16. */ > > + if (a[16] != 0) > > + return __strspn_generic (s, a); > > + } > > + aligned = s; > > + offset = (unsigned int) ((size_t) s & 15); > > + > > + if (offset != 0) > > + { > > + start_unaligned: > > + /* Check partial string. */ > > + aligned = (const char *) ((size_t) s & -16L); > > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > > + __m128i adj_value = __m128i_shift_right (value, offset); > > + > > + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); > > + /* No need to check CFlag since it is always 1. */ > > + if (length < 16 - offset) > > + return length; > > + /* Find where the NULL terminator is. */ > > + maskz = _mm_cmpeq_epi8 (value, zero); > > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > + if (maskz_bits != 0) > > + return length; > > + aligned += 16; > > + } > > + > > +start_loop: > > + while (1) > > + { > > + __m128i value = _mm_load_si128 ((__m128i *) aligned); > > + unsigned int index = _mm_cmpistri (mask, value, 0x12); > > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); > > + if (cflag) > > + return (size_t) (aligned + index - s); > > + aligned += 16; > > + } > > +} > > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c > > index 6124033ceb..6b50c36432 100644 > > --- a/sysdeps/x86_64/multiarch/strspn-c.c > > +++ b/sysdeps/x86_64/multiarch/strspn-c.c > > @@ -1,5 +1,5 @@ > > -/* strspn with SSE4.2 intrinsics > > - Copyright (C) 2009-2022 Free Software Foundation, Inc. > > +/* strspn. > > + Copyright (C) 2017-2022 Free Software Foundation, Inc. > > This file is part of the GNU C Library. > > > > The GNU C Library is free software; you can redistribute it and/or > > @@ -16,121 +16,13 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#include <nmmintrin.h> > > -#include <string.h> > > -#include "varshift.h" > > +#if IS_IN (libc) > > > > -/* We use 0x12: > > - _SIDD_SBYTE_OPS > > - | _SIDD_CMP_EQUAL_ANY > > - | _SIDD_NEGATIVE_POLARITY > > - | _SIDD_LEAST_SIGNIFICANT > > - on pcmpistri to compare xmm/mem128 > > +# include <sysdep.h> > > +# define STRSPN __strspn_generic > > > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > > - X X X X X X X X X X X X X X X X > > +# undef libc_hidden_builtin_def > > +# define libc_hidden_builtin_def(STRSPN) > > +#endif > > > > - against xmm > > - > > - 0 1 2 3 4 5 6 7 8 9 A B C D E F > > - A A A A A A A A A A A A A A A A > > - > > - to find out if the first 16byte data element has any non-A byte and > > - the offset of the first byte. There are 2 cases: > > - > > - 1. The first 16byte data element has the non-A byte, including > > - EOS, at the offset X. > > - 2. The first 16byte data element is valid and doesn't have the non-A > > - byte. > > - > > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases: > > - > > - case ECX CFlag ZFlag SFlag > > - 1 X 1 0/1 0 > > - 2 16 0 0 0 > > - > > - We exit from the loop for case 1. */ > > - > > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; > > - > > - > > -size_t > > -__attribute__ ((section (".text.sse4.2"))) > > -__strspn_sse42 (const char *s, const char *a) > > -{ > > - if (*a == 0) > > - return 0; > > - > > - const char *aligned; > > - __m128i mask, maskz, zero; > > - unsigned int maskz_bits; > > - unsigned int offset = (int) ((size_t) a & 15); > > - zero = _mm_set1_epi8 (0); > > - if (offset != 0) > > - { > > - /* Load masks. */ > > - aligned = (const char *) ((size_t) a & -16L); > > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); > > - maskz = _mm_cmpeq_epi8 (mask0, zero); > > - > > - /* Find where the NULL terminator is. */ > > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > - if (maskz_bits != 0) > > - { > > - mask = __m128i_shift_right (mask0, offset); > > - offset = (unsigned int) ((size_t) s & 15); > > - if (offset) > > - goto start_unaligned; > > - > > - aligned = s; > > - goto start_loop; > > - } > > - } > > - > > - /* A is aligned. */ > > - mask = _mm_loadu_si128 ((__m128i *) a); > > - > > - /* Find where the NULL terminator is. */ > > - maskz = _mm_cmpeq_epi8 (mask, zero); > > - maskz_bits = _mm_movemask_epi8 (maskz); > > - if (maskz_bits == 0) > > - { > > - /* There is no NULL terminator. Don't use SSE4.2 if the length > > - of A > 16. */ > > - if (a[16] != 0) > > - return __strspn_sse2 (s, a); > > - } > > - aligned = s; > > - offset = (unsigned int) ((size_t) s & 15); > > - > > - if (offset != 0) > > - { > > - start_unaligned: > > - /* Check partial string. */ > > - aligned = (const char *) ((size_t) s & -16L); > > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > > - __m128i adj_value = __m128i_shift_right (value, offset); > > - > > - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12); > > - /* No need to check CFlag since it is always 1. */ > > - if (length < 16 - offset) > > - return length; > > - /* Find where the NULL terminator is. */ > > - maskz = _mm_cmpeq_epi8 (value, zero); > > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset; > > - if (maskz_bits != 0) > > - return length; > > - aligned += 16; > > - } > > - > > -start_loop: > > - while (1) > > - { > > - __m128i value = _mm_load_si128 ((__m128i *) aligned); > > - unsigned int index = _mm_cmpistri (mask, value, 0x12); > > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12); > > - if (cflag) > > - return (size_t) (aligned + index - s); > > - aligned += 16; > > - } > > -} > > +#include <string/strspn.c> > > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c > > index 26d6984e9b..fa38dd898d 100644 > > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c > > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c > > @@ -1,5 +1,5 @@ > > #if IS_IN (libc) > > -# define WCSCPY __wcscpy_sse2 > > +# define WCSCPY __wcscpy_generic > > #endif > > > > #include <wcsmbs/wcscpy.c> > > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c > > index 6a2d1421d9..53c3228dc2 100644 > > --- a/sysdeps/x86_64/multiarch/wcscpy.c > > +++ b/sysdeps/x86_64/multiarch/wcscpy.c > > @@ -26,7 +26,7 @@ > > # define SYMBOL_NAME wcscpy > > # include <init-arch.h> > > > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; > > > > static inline void * > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) > > if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) > > return OPTIMIZE (ssse3); > > > > - return OPTIMIZE (sse2); > > + return OPTIMIZE (generic); > > } > > > > libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c > > index e1ec7cfbb5..1c9c04241a 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c > > +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c > > @@ -1,9 +1,9 @@ > > #if IS_IN (libc) > > # include <wchar.h> > > > > -# define WCSNLEN __wcsnlen_sse2 > > +# define WCSNLEN __wcsnlen_generic > > > > -extern __typeof (wcsnlen) __wcsnlen_sse2; > > +extern __typeof (wcsnlen) __wcsnlen_generic; > > #endif > > > > #include "wcsmbs/wcsnlen.c" > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c > > index baa26666a8..05b7a211de 100644 > > --- a/sysdeps/x86_64/multiarch/wcsnlen.c > > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c > > @@ -24,6 +24,7 @@ > > # undef __wcsnlen > > > > # define SYMBOL_NAME wcsnlen > > +# define GENERIC generic > > # include "ifunc-wcslen.h" > > > > libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); > > -- > > 2.34.1 > > > > > -- > H.J. ^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3] x86: Rename generic functions with unique postfix for clarity 2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein 2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein @ 2022-06-16 22:11 ` Noah Goldstein 2022-06-16 22:43 ` H.J. Lu 1 sibling, 1 reply; 11+ messages in thread From: Noah Goldstein @ 2022-06-16 22:11 UTC (permalink / raw) To: libc-alpha No functions are changed. It just renames generic implementations from '{func}_sse2' to '{func}_generic'. This is just because the postfix "_sse2" was overloaded and was used for files that had hand-optimized sse2 assembly implementations and files that just redirected back to the generic implementation. Full xcheck passed on x86_64. --- Note this change is in preperation to further changes to the file organization in the multiarch directory. sysdeps/x86_64/multiarch/Makefile | 33 ++++++++++--------- sysdeps/x86_64/multiarch/ifunc-avx2.h | 8 +++-- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 18 +++++----- sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +++-- sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +++-- sysdeps/x86_64/multiarch/stpncpy-c.c | 7 ---- sysdeps/x86_64/multiarch/stpncpy-generic.c | 26 +++++++++++++++ sysdeps/x86_64/multiarch/stpncpy.c | 1 + .../{strcspn-sse2.c => strcspn-generic.c} | 2 +- .../multiarch/{strcspn-c.c => strcspn-sse4.c} | 8 ++--- sysdeps/x86_64/multiarch/strncat-c.c | 2 -- sysdeps/x86_64/multiarch/strncat-generic.c | 21 ++++++++++++ sysdeps/x86_64/multiarch/strncat.c | 1 + sysdeps/x86_64/multiarch/strncpy-c.c | 5 --- sysdeps/x86_64/multiarch/strncpy-generic.c | 24 ++++++++++++++ sysdeps/x86_64/multiarch/strncpy.c | 1 + .../{strpbrk-sse2.c => strpbrk-generic.c} | 2 +- .../multiarch/{strpbrk-c.c => strpbrk-sse4.c} | 4 +-- .../{strspn-sse2.c => strspn-generic.c} | 2 +- .../multiarch/{strspn-c.c => strspn-sse4.c} | 4 +-- sysdeps/x86_64/multiarch/wcscpy-c.c | 5 --- sysdeps/x86_64/multiarch/wcscpy-generic.c | 24 ++++++++++++++ sysdeps/x86_64/multiarch/wcscpy.c | 4 +-- .../{wcsncmp-sse2.c => wcsncmp-generic.c} | 4 +-- sysdeps/x86_64/multiarch/wcsncmp.c | 2 ++ sysdeps/x86_64/multiarch/wcsnlen-c.c | 9 ----- sysdeps/x86_64/multiarch/wcsnlen-generic.c | 28 ++++++++++++++++ sysdeps/x86_64/multiarch/wcsnlen.c | 1 + 29 files changed, 190 insertions(+), 76 deletions(-) delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%) rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%) delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%) rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%) rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%) rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%) delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%) delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3d153cac35..666ee4d5d6 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -43,7 +43,7 @@ sysdep_routines += \ stpcpy-sse2-unaligned \ stpncpy-avx2 \ stpncpy-avx2-rtm \ - stpncpy-c \ + stpncpy-generic \ stpncpy-evex \ stpncpy-sse2-unaligned \ strcasecmp_l-avx2 \ @@ -76,8 +76,8 @@ sysdep_routines += \ strcpy-evex \ strcpy-sse2 \ strcpy-sse2-unaligned \ - strcspn-c \ - strcspn-sse2 \ + strcspn-generic \ + strcspn-sse4 \ strlen-avx2 \ strlen-avx2-rtm \ strlen-evex \ @@ -90,7 +90,7 @@ sysdep_routines += \ strncase_l-sse4_2 \ strncat-avx2 \ strncat-avx2-rtm \ - strncat-c \ + strncat-generic \ strncat-evex \ strncat-sse2-unaligned \ strncmp-avx2 \ @@ -100,7 +100,7 @@ sysdep_routines += \ strncmp-sse4_2 \ strncpy-avx2 \ strncpy-avx2-rtm \ - strncpy-c \ + strncpy-generic \ strncpy-evex \ strncpy-sse2-unaligned \ strnlen-avx2 \ @@ -108,22 +108,23 @@ sysdep_routines += \ strnlen-evex \ strnlen-evex512 \ strnlen-sse2 \ - strpbrk-c \ - strpbrk-sse2 \ + strpbrk-generic \ + strpbrk-sse4 \ strrchr-avx2 \ strrchr-avx2-rtm \ strrchr-evex \ strrchr-sse2 \ - strspn-c \ - strspn-sse2 \ + strspn-generic \ + strspn-sse4 \ strstr-avx512 \ strstr-sse2-unaligned \ varshift \ # sysdep_routines -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 + +CFLAGS-strcspn-sse4.c += -msse4 +CFLAGS-strpbrk-sse4.c += -msse4 +CFLAGS-strspn-sse4.c += -msse4 + CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 endif @@ -137,7 +138,7 @@ sysdep_routines += \ wcscmp-avx2-rtm \ wcscmp-evex \ wcscmp-sse2 \ - wcscpy-c \ + wcscpy-generic \ wcscpy-ssse3 \ wcslen-avx2 \ wcslen-avx2-rtm \ @@ -147,11 +148,11 @@ sysdep_routines += \ wcslen-sse4_1 \ wcsncmp-avx2 \ wcsncmp-avx2-rtm \ + wcsncmp-generic \ wcsncmp-evex \ - wcsncmp-sse2 \ wcsnlen-avx2 \ wcsnlen-avx2-rtm \ - wcsnlen-c \ + wcsnlen-generic \ wcsnlen-evex \ wcsnlen-evex512 \ wcsnlen-sse4_1 \ diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h index 4289df29ec..1d9cdfcfec 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h @@ -19,7 +19,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; @@ -44,5 +48,5 @@ IFUNC_SELECTOR (void) return OPTIMIZE (avx2); } - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index dc595752e0..883362f63d 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ IFUNC_IMPL (i, name, stpcpy, @@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcspn, IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), __strcspn_sse42) - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ IFUNC_IMPL (i, name, strncasecmp, @@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) /* Support sysdeps/x86_64/multiarch/strncpy.c. */ IFUNC_IMPL (i, name, strncpy, @@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ IFUNC_IMPL (i, name, strpbrk, IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), __strpbrk_sse42) - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) /* Support sysdeps/x86_64/multiarch/strspn.c. */ IFUNC_IMPL (i, name, strspn, IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), __strspn_sse42) - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) /* Support sysdeps/x86_64/multiarch/strstr.c. */ IFUNC_IMPL (i, name, strstr, @@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && CPU_FEATURE_USABLE (AVX512BW) && CPU_FEATURE_USABLE (BMI2)), __wcsncmp_evex) - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) + IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic)) /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ IFUNC_IMPL (i, name, wcscpy, IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), __wcscpy_ssse3) - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, @@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wcsnlen, CPU_FEATURE_USABLE (SSE4_1), __wcsnlen_sse4_1) - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h index b555ff2fac..ee36525bcf 100644 --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h @@ -19,7 +19,7 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; static inline void * @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) return OPTIMIZE (sse42); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index a15afa44e9..80529458d1 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -20,7 +20,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h index 2b29e7608a..88c1c502af 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -19,7 +19,11 @@ #include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +#ifndef GENERIC +# define GENERIC sse2 +#endif + +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); - return OPTIMIZE (sse2); + return OPTIMIZE (GENERIC); } diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c deleted file mode 100644 index b016e487e1..0000000000 --- a/sysdeps/x86_64/multiarch/stpncpy-c.c +++ /dev/null @@ -1,7 +0,0 @@ -#define STPNCPY __stpncpy_sse2 -#undef weak_alias -#define weak_alias(ignored1, ignored2) -#undef libc_hidden_def -#define libc_hidden_def(stpncpy) - -#include <string/stpncpy.c> diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c new file mode 100644 index 0000000000..87826845b0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c @@ -0,0 +1,26 @@ +/* stpncpy. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#define STPNCPY __stpncpy_generic +#undef weak_alias +#define weak_alias(ignored1, ignored2) +#undef libc_hidden_def +#define libc_hidden_def(stpncpy) + +#include <string/stpncpy.c> diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c index 82fa53957d..879bc83f0b 100644 --- a/sysdeps/x86_64/multiarch/stpncpy.c +++ b/sysdeps/x86_64/multiarch/stpncpy.c @@ -25,6 +25,7 @@ # undef stpncpy # undef __stpncpy +# define GENERIC generic # define SYMBOL_NAME stpncpy # include "ifunc-strcpy.h" diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c similarity index 96% rename from sysdeps/x86_64/multiarch/strcspn-sse2.c rename to sysdeps/x86_64/multiarch/strcspn-generic.c index 3a04bb39fc..423de2e2b2 100644 --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c +++ b/sysdeps/x86_64/multiarch/strcspn-generic.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRCSPN __strcspn_sse2 +# define STRCSPN __strcspn_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRCSPN) diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c similarity index 96% rename from sysdeps/x86_64/multiarch/strcspn-c.c rename to sysdeps/x86_64/multiarch/strcspn-sse4.c index c312fab8b1..59f64f9fe8 100644 --- a/sysdeps/x86_64/multiarch/strcspn-c.c +++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c @@ -52,8 +52,8 @@ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */ -#ifndef STRCSPN_SSE2 -# define STRCSPN_SSE2 __strcspn_sse2 +#ifndef STRCSPN_GENERIC +# define STRCSPN_GENERIC __strcspn_generic # define STRCSPN_SSE42 __strcspn_sse42 #endif @@ -69,7 +69,7 @@ char * #else size_t #endif -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; +STRCSPN_GENERIC (const char *, const char *) attribute_hidden; #ifdef USE_AS_STRPBRK @@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a) /* There is no NULL terminator. Don't use SSE4.2 if the length of A > 16. */ if (a[16] != 0) - return STRCSPN_SSE2 (s, a); + return STRCSPN_GENERIC (s, a); } aligned = s; diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c deleted file mode 100644 index 93a7fab7ea..0000000000 --- a/sysdeps/x86_64/multiarch/strncat-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define STRNCAT __strncat_sse2 -#include <string/strncat.c> diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c new file mode 100644 index 0000000000..0090669cd1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncat-generic.c @@ -0,0 +1,21 @@ +/* strncat. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#define STRNCAT __strncat_generic +#include <string/strncat.c> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c index b649343a97..50fba8a41f 100644 --- a/sysdeps/x86_64/multiarch/strncat.c +++ b/sysdeps/x86_64/multiarch/strncat.c @@ -24,6 +24,7 @@ # undef strncat # define SYMBOL_NAME strncat +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c deleted file mode 100644 index 57c45ac7ab..0000000000 --- a/sysdeps/x86_64/multiarch/strncpy-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#define STRNCPY __strncpy_sse2 -#undef libc_hidden_builtin_def -#define libc_hidden_builtin_def(strncpy) - -#include <string/strncpy.c> diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c new file mode 100644 index 0000000000..9916153dd5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncpy-generic.c @@ -0,0 +1,24 @@ +/* strncpy. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#define STRNCPY __strncpy_generic +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(strncpy) + +#include <string/strncpy.c> diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c index 2a780a7e16..7fc7d72ec5 100644 --- a/sysdeps/x86_64/multiarch/strncpy.c +++ b/sysdeps/x86_64/multiarch/strncpy.c @@ -24,6 +24,7 @@ # undef strncpy # define SYMBOL_NAME strncpy +# define GENERIC generic # include "ifunc-strcpy.h" libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c similarity index 96% rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c rename to sysdeps/x86_64/multiarch/strpbrk-generic.c index d03214c4fb..d31acfe495 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c +++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRPBRK __strpbrk_sse2 +# define STRPBRK __strpbrk_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRPBRK) diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c similarity index 92% rename from sysdeps/x86_64/multiarch/strpbrk-c.c rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c index abf4ff7f1a..bf74d660d5 100644 --- a/sysdeps/x86_64/multiarch/strpbrk-c.c +++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c @@ -17,6 +17,6 @@ <https://www.gnu.org/licenses/>. */ #define USE_AS_STRPBRK -#define STRCSPN_SSE2 __strpbrk_sse2 +#define STRCSPN_GENERIC __strpbrk_generic #define STRCSPN_SSE42 __strpbrk_sse42 -#include "strcspn-c.c" +#include "strcspn-sse4.c" diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c similarity index 96% rename from sysdeps/x86_64/multiarch/strspn-sse2.c rename to sysdeps/x86_64/multiarch/strspn-generic.c index 61cc6cb0a5..6b50c36432 100644 --- a/sysdeps/x86_64/multiarch/strspn-sse2.c +++ b/sysdeps/x86_64/multiarch/strspn-generic.c @@ -19,7 +19,7 @@ #if IS_IN (libc) # include <sysdep.h> -# define STRSPN __strspn_sse2 +# define STRSPN __strspn_generic # undef libc_hidden_builtin_def # define libc_hidden_builtin_def(STRSPN) diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c similarity index 97% rename from sysdeps/x86_64/multiarch/strspn-c.c rename to sysdeps/x86_64/multiarch/strspn-sse4.c index 6124033ceb..d044916688 100644 --- a/sysdeps/x86_64/multiarch/strspn-c.c +++ b/sysdeps/x86_64/multiarch/strspn-sse4.c @@ -51,7 +51,7 @@ We exit from the loop for case 1. */ -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; size_t @@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a) /* There is no NULL terminator. Don't use SSE4.2 if the length of A > 16. */ if (a[16] != 0) - return __strspn_sse2 (s, a); + return __strspn_generic (s, a); } aligned = s; offset = (unsigned int) ((size_t) s & 15); diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c deleted file mode 100644 index 26d6984e9b..0000000000 --- a/sysdeps/x86_64/multiarch/wcscpy-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define WCSCPY __wcscpy_sse2 -#endif - -#include <wcsmbs/wcscpy.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c new file mode 100644 index 0000000000..5ea905f33f --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c @@ -0,0 +1,24 @@ +/* wcscpy. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#if IS_IN (libc) +# define WCSCPY __wcscpy_generic +#endif + +#include <wcsmbs/wcscpy.c> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c index 6a2d1421d9..53c3228dc2 100644 --- a/sysdeps/x86_64/multiarch/wcscpy.c +++ b/sysdeps/x86_64/multiarch/wcscpy.c @@ -26,7 +26,7 @@ # define SYMBOL_NAME wcscpy # include <init-arch.h> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; static inline void * @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) return OPTIMIZE (ssse3); - return OPTIMIZE (sse2); + return OPTIMIZE (generic); } libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c similarity index 92% rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c index 8d9cbbb900..658d541886 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c @@ -1,4 +1,4 @@ -/* wcsncmp optimized with SSE2. +/* wcsncmp. Copyright (C) 2018-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,5 +16,5 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define WCSNCMP __wcsncmp_sse2 +#define WCSNCMP __wcsncmp_generic #include <wcsmbs/wcsncmp.c> diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c index 5e00af2ca5..1836f794dd 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp.c +++ b/sysdeps/x86_64/multiarch/wcsncmp.c @@ -24,6 +24,8 @@ # undef wcsncmp # undef __wcsncmp +# define GENERIC generic + # define SYMBOL_NAME wcsncmp # include "ifunc-avx2.h" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c deleted file mode 100644 index e1ec7cfbb5..0000000000 --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#if IS_IN (libc) -# include <wchar.h> - -# define WCSNLEN __wcsnlen_sse2 - -extern __typeof (wcsnlen) __wcsnlen_sse2; -#endif - -#include "wcsmbs/wcsnlen.c" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c new file mode 100644 index 0000000000..2d75da7709 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c @@ -0,0 +1,28 @@ +/* wcsnlen. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#if IS_IN (libc) +# include <wchar.h> + +# define WCSNLEN __wcsnlen_generic + +extern __typeof (wcsnlen) __wcsnlen_generic; +#endif + +#include "wcsmbs/wcsnlen.c" diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index baa26666a8..05b7a211de 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,6 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen +# define GENERIC generic # include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); -- 2.34.1 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3] x86: Rename generic functions with unique postfix for clarity 2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein @ 2022-06-16 22:43 ` H.J. Lu 0 siblings, 0 replies; 11+ messages in thread From: H.J. Lu @ 2022-06-16 22:43 UTC (permalink / raw) To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell On Thu, Jun 16, 2022 at 3:12 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > No functions are changed. It just renames generic implementations from > '{func}_sse2' to '{func}_generic'. This is just because the postfix > "_sse2" was overloaded and was used for files that had hand-optimized > sse2 assembly implementations and files that just redirected back > to the generic implementation. > > Full xcheck passed on x86_64. > --- > Note this change is in preperation to further changes to the file > organization in the multiarch directory. > sysdeps/x86_64/multiarch/Makefile | 33 ++++++++++--------- > sysdeps/x86_64/multiarch/ifunc-avx2.h | 8 +++-- > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 18 +++++----- > sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-- > sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +++-- > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +++-- > sysdeps/x86_64/multiarch/stpncpy-c.c | 7 ---- > sysdeps/x86_64/multiarch/stpncpy-generic.c | 26 +++++++++++++++ > sysdeps/x86_64/multiarch/stpncpy.c | 1 + > .../{strcspn-sse2.c => strcspn-generic.c} | 2 +- > .../multiarch/{strcspn-c.c => strcspn-sse4.c} | 8 ++--- > sysdeps/x86_64/multiarch/strncat-c.c | 2 -- > sysdeps/x86_64/multiarch/strncat-generic.c | 21 ++++++++++++ > sysdeps/x86_64/multiarch/strncat.c | 1 + > sysdeps/x86_64/multiarch/strncpy-c.c | 5 --- > sysdeps/x86_64/multiarch/strncpy-generic.c | 24 ++++++++++++++ > sysdeps/x86_64/multiarch/strncpy.c | 1 + > .../{strpbrk-sse2.c => strpbrk-generic.c} | 2 +- > .../multiarch/{strpbrk-c.c => strpbrk-sse4.c} | 4 +-- > .../{strspn-sse2.c => strspn-generic.c} | 2 +- > .../multiarch/{strspn-c.c => strspn-sse4.c} | 4 +-- > sysdeps/x86_64/multiarch/wcscpy-c.c | 5 --- > sysdeps/x86_64/multiarch/wcscpy-generic.c | 24 ++++++++++++++ > sysdeps/x86_64/multiarch/wcscpy.c | 4 +-- > .../{wcsncmp-sse2.c => wcsncmp-generic.c} | 4 +-- > sysdeps/x86_64/multiarch/wcsncmp.c | 2 ++ > sysdeps/x86_64/multiarch/wcsnlen-c.c | 9 ----- > sysdeps/x86_64/multiarch/wcsnlen-generic.c | 28 ++++++++++++++++ > sysdeps/x86_64/multiarch/wcsnlen.c | 1 + > 29 files changed, 190 insertions(+), 76 deletions(-) > delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c > create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c > rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%) > rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%) > delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c > create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c > delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c > create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c > rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%) > rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%) > rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%) > rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%) > delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c > create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c > rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%) > delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c > create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c > > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile > index 3d153cac35..666ee4d5d6 100644 > --- a/sysdeps/x86_64/multiarch/Makefile > +++ b/sysdeps/x86_64/multiarch/Makefile > @@ -43,7 +43,7 @@ sysdep_routines += \ > stpcpy-sse2-unaligned \ > stpncpy-avx2 \ > stpncpy-avx2-rtm \ > - stpncpy-c \ > + stpncpy-generic \ > stpncpy-evex \ > stpncpy-sse2-unaligned \ > strcasecmp_l-avx2 \ > @@ -76,8 +76,8 @@ sysdep_routines += \ > strcpy-evex \ > strcpy-sse2 \ > strcpy-sse2-unaligned \ > - strcspn-c \ > - strcspn-sse2 \ > + strcspn-generic \ > + strcspn-sse4 \ > strlen-avx2 \ > strlen-avx2-rtm \ > strlen-evex \ > @@ -90,7 +90,7 @@ sysdep_routines += \ > strncase_l-sse4_2 \ > strncat-avx2 \ > strncat-avx2-rtm \ > - strncat-c \ > + strncat-generic \ > strncat-evex \ > strncat-sse2-unaligned \ > strncmp-avx2 \ > @@ -100,7 +100,7 @@ sysdep_routines += \ > strncmp-sse4_2 \ > strncpy-avx2 \ > strncpy-avx2-rtm \ > - strncpy-c \ > + strncpy-generic \ > strncpy-evex \ > strncpy-sse2-unaligned \ > strnlen-avx2 \ > @@ -108,22 +108,23 @@ sysdep_routines += \ > strnlen-evex \ > strnlen-evex512 \ > strnlen-sse2 \ > - strpbrk-c \ > - strpbrk-sse2 \ > + strpbrk-generic \ > + strpbrk-sse4 \ > strrchr-avx2 \ > strrchr-avx2-rtm \ > strrchr-evex \ > strrchr-sse2 \ > - strspn-c \ > - strspn-sse2 \ > + strspn-generic \ > + strspn-sse4 \ > strstr-avx512 \ > strstr-sse2-unaligned \ > varshift \ > # sysdep_routines > -CFLAGS-varshift.c += -msse4 > -CFLAGS-strcspn-c.c += -msse4 > -CFLAGS-strpbrk-c.c += -msse4 > -CFLAGS-strspn-c.c += -msse4 > + > +CFLAGS-strcspn-sse4.c += -msse4 > +CFLAGS-strpbrk-sse4.c += -msse4 > +CFLAGS-strspn-sse4.c += -msse4 > + > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3 > endif > > @@ -137,7 +138,7 @@ sysdep_routines += \ > wcscmp-avx2-rtm \ > wcscmp-evex \ > wcscmp-sse2 \ > - wcscpy-c \ > + wcscpy-generic \ > wcscpy-ssse3 \ > wcslen-avx2 \ > wcslen-avx2-rtm \ > @@ -147,11 +148,11 @@ sysdep_routines += \ > wcslen-sse4_1 \ > wcsncmp-avx2 \ > wcsncmp-avx2-rtm \ > + wcsncmp-generic \ > wcsncmp-evex \ > - wcsncmp-sse2 \ > wcsnlen-avx2 \ > wcsnlen-avx2-rtm \ > - wcsnlen-c \ > + wcsnlen-generic \ > wcsnlen-evex \ > wcsnlen-evex512 \ > wcsnlen-sse4_1 \ > diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h > index 4289df29ec..1d9cdfcfec 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h > @@ -19,7 +19,11 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +#ifndef GENERIC > +# define GENERIC sse2 > +#endif > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; > @@ -44,5 +48,5 @@ IFUNC_SELECTOR (void) > return OPTIMIZE (avx2); > } > > - return OPTIMIZE (sse2); > + return OPTIMIZE (GENERIC); > } > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index dc595752e0..883362f63d 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __stpncpy_evex) > IFUNC_IMPL_ADD (array, i, stpncpy, 1, > __stpncpy_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) > + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic)) > > /* Support sysdeps/x86_64/multiarch/stpcpy.c. */ > IFUNC_IMPL (i, name, stpcpy, > @@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL (i, name, strcspn, > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2), > __strcspn_sse42) > - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2)) > + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic)) > > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */ > IFUNC_IMPL (i, name, strncasecmp, > @@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __strncat_evex) > IFUNC_IMPL_ADD (array, i, strncat, 1, > __strncat_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2)) > + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic)) > > /* Support sysdeps/x86_64/multiarch/strncpy.c. */ > IFUNC_IMPL (i, name, strncpy, > @@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > __strncpy_evex) > IFUNC_IMPL_ADD (array, i, strncpy, 1, > __strncpy_sse2_unaligned) > - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) > + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic)) > > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */ > IFUNC_IMPL (i, name, strpbrk, > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2), > __strpbrk_sse42) > - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2)) > + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic)) > > > /* Support sysdeps/x86_64/multiarch/strspn.c. */ > IFUNC_IMPL (i, name, strspn, > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2), > __strspn_sse42) > - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2)) > + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic)) > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > IFUNC_IMPL (i, name, strstr, > @@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > && CPU_FEATURE_USABLE (AVX512BW) > && CPU_FEATURE_USABLE (BMI2)), > __wcsncmp_evex) > - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) > + IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic)) > > /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ > IFUNC_IMPL (i, name, wcscpy, > IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3), > __wcscpy_ssse3) > - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2)) > + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic)) > > /* Support sysdeps/x86_64/multiarch/wcslen.c. */ > IFUNC_IMPL (i, name, wcslen, > @@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > IFUNC_IMPL_ADD (array, i, wcsnlen, > CPU_FEATURE_USABLE (SSE4_1), > __wcsnlen_sse4_1) > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2)) > + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic)) > > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ > IFUNC_IMPL (i, name, wmemchr, > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > index b555ff2fac..ee36525bcf 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h > @@ -19,7 +19,7 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; > > static inline void * > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)) > return OPTIMIZE (sse42); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (generic); > } > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > index a15afa44e9..80529458d1 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h > @@ -20,7 +20,11 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +#ifndef GENERIC > +# define GENERIC sse2 > +#endif > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) > attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > return OPTIMIZE (sse2_unaligned); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (GENERIC); > } > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > index 2b29e7608a..88c1c502af 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h > @@ -19,7 +19,11 @@ > > #include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +#ifndef GENERIC > +# define GENERIC sse2 > +#endif > + > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1)) > return OPTIMIZE (sse4_1); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (GENERIC); > } > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c > deleted file mode 100644 > index b016e487e1..0000000000 > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c > +++ /dev/null > @@ -1,7 +0,0 @@ > -#define STPNCPY __stpncpy_sse2 > -#undef weak_alias > -#define weak_alias(ignored1, ignored2) > -#undef libc_hidden_def > -#define libc_hidden_def(stpncpy) > - > -#include <string/stpncpy.c> > diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c > new file mode 100644 > index 0000000000..87826845b0 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c > @@ -0,0 +1,26 @@ > +/* stpncpy. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > + > +#define STPNCPY __stpncpy_generic > +#undef weak_alias > +#define weak_alias(ignored1, ignored2) > +#undef libc_hidden_def > +#define libc_hidden_def(stpncpy) > + > +#include <string/stpncpy.c> > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c > index 82fa53957d..879bc83f0b 100644 > --- a/sysdeps/x86_64/multiarch/stpncpy.c > +++ b/sysdeps/x86_64/multiarch/stpncpy.c > @@ -25,6 +25,7 @@ > # undef stpncpy > # undef __stpncpy > > +# define GENERIC generic > # define SYMBOL_NAME stpncpy > # include "ifunc-strcpy.h" > > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c > similarity index 96% > rename from sysdeps/x86_64/multiarch/strcspn-sse2.c > rename to sysdeps/x86_64/multiarch/strcspn-generic.c > index 3a04bb39fc..423de2e2b2 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c > +++ b/sysdeps/x86_64/multiarch/strcspn-generic.c > @@ -19,7 +19,7 @@ > #if IS_IN (libc) > > # include <sysdep.h> > -# define STRCSPN __strcspn_sse2 > +# define STRCSPN __strcspn_generic > > # undef libc_hidden_builtin_def > # define libc_hidden_builtin_def(STRCSPN) > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c > similarity index 96% > rename from sysdeps/x86_64/multiarch/strcspn-c.c > rename to sysdeps/x86_64/multiarch/strcspn-sse4.c > index c312fab8b1..59f64f9fe8 100644 > --- a/sysdeps/x86_64/multiarch/strcspn-c.c > +++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c > @@ -52,8 +52,8 @@ > when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset > X for case 1. */ > > -#ifndef STRCSPN_SSE2 > -# define STRCSPN_SSE2 __strcspn_sse2 > +#ifndef STRCSPN_GENERIC > +# define STRCSPN_GENERIC __strcspn_generic > # define STRCSPN_SSE42 __strcspn_sse42 > #endif > > @@ -69,7 +69,7 @@ char * > #else > size_t > #endif > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden; > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden; > > > #ifdef USE_AS_STRPBRK > @@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a) > /* There is no NULL terminator. Don't use SSE4.2 if the length > of A > 16. */ > if (a[16] != 0) > - return STRCSPN_SSE2 (s, a); > + return STRCSPN_GENERIC (s, a); > } > > aligned = s; > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c > deleted file mode 100644 > index 93a7fab7ea..0000000000 > --- a/sysdeps/x86_64/multiarch/strncat-c.c > +++ /dev/null > @@ -1,2 +0,0 @@ > -#define STRNCAT __strncat_sse2 > -#include <string/strncat.c> > diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c > new file mode 100644 > index 0000000000..0090669cd1 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strncat-generic.c > @@ -0,0 +1,21 @@ > +/* strncat. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > + > +#define STRNCAT __strncat_generic > +#include <string/strncat.c> > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c > index b649343a97..50fba8a41f 100644 > --- a/sysdeps/x86_64/multiarch/strncat.c > +++ b/sysdeps/x86_64/multiarch/strncat.c > @@ -24,6 +24,7 @@ > # undef strncat > > # define SYMBOL_NAME strncat > +# define GENERIC generic > # include "ifunc-strcpy.h" > > libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c > deleted file mode 100644 > index 57c45ac7ab..0000000000 > --- a/sysdeps/x86_64/multiarch/strncpy-c.c > +++ /dev/null > @@ -1,5 +0,0 @@ > -#define STRNCPY __strncpy_sse2 > -#undef libc_hidden_builtin_def > -#define libc_hidden_builtin_def(strncpy) > - > -#include <string/strncpy.c> > diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c > new file mode 100644 > index 0000000000..9916153dd5 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/strncpy-generic.c > @@ -0,0 +1,24 @@ > +/* strncpy. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > + > +#define STRNCPY __strncpy_generic > +#undef libc_hidden_builtin_def > +#define libc_hidden_builtin_def(strncpy) > + > +#include <string/strncpy.c> > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c > index 2a780a7e16..7fc7d72ec5 100644 > --- a/sysdeps/x86_64/multiarch/strncpy.c > +++ b/sysdeps/x86_64/multiarch/strncpy.c > @@ -24,6 +24,7 @@ > # undef strncpy > > # define SYMBOL_NAME strncpy > +# define GENERIC generic > # include "ifunc-strcpy.h" > > libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c > similarity index 96% > rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c > rename to sysdeps/x86_64/multiarch/strpbrk-generic.c > index d03214c4fb..d31acfe495 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c > @@ -19,7 +19,7 @@ > #if IS_IN (libc) > > # include <sysdep.h> > -# define STRPBRK __strpbrk_sse2 > +# define STRPBRK __strpbrk_generic > > # undef libc_hidden_builtin_def > # define libc_hidden_builtin_def(STRPBRK) > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c > similarity index 92% > rename from sysdeps/x86_64/multiarch/strpbrk-c.c > rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c > index abf4ff7f1a..bf74d660d5 100644 > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c > +++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c > @@ -17,6 +17,6 @@ > <https://www.gnu.org/licenses/>. */ > > #define USE_AS_STRPBRK > -#define STRCSPN_SSE2 __strpbrk_sse2 > +#define STRCSPN_GENERIC __strpbrk_generic > #define STRCSPN_SSE42 __strpbrk_sse42 > -#include "strcspn-c.c" > +#include "strcspn-sse4.c" > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c > similarity index 96% > rename from sysdeps/x86_64/multiarch/strspn-sse2.c > rename to sysdeps/x86_64/multiarch/strspn-generic.c > index 61cc6cb0a5..6b50c36432 100644 > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c > +++ b/sysdeps/x86_64/multiarch/strspn-generic.c > @@ -19,7 +19,7 @@ > #if IS_IN (libc) > > # include <sysdep.h> > -# define STRSPN __strspn_sse2 > +# define STRSPN __strspn_generic > > # undef libc_hidden_builtin_def > # define libc_hidden_builtin_def(STRSPN) > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c > similarity index 97% > rename from sysdeps/x86_64/multiarch/strspn-c.c > rename to sysdeps/x86_64/multiarch/strspn-sse4.c > index 6124033ceb..d044916688 100644 > --- a/sysdeps/x86_64/multiarch/strspn-c.c > +++ b/sysdeps/x86_64/multiarch/strspn-sse4.c > @@ -51,7 +51,7 @@ > > We exit from the loop for case 1. */ > > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden; > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden; > > > size_t > @@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a) > /* There is no NULL terminator. Don't use SSE4.2 if the length > of A > 16. */ > if (a[16] != 0) > - return __strspn_sse2 (s, a); > + return __strspn_generic (s, a); > } > aligned = s; > offset = (unsigned int) ((size_t) s & 15); > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c > deleted file mode 100644 > index 26d6984e9b..0000000000 > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c > +++ /dev/null > @@ -1,5 +0,0 @@ > -#if IS_IN (libc) > -# define WCSCPY __wcscpy_sse2 > -#endif > - > -#include <wcsmbs/wcscpy.c> > diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c > new file mode 100644 > index 0000000000..5ea905f33f > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c > @@ -0,0 +1,24 @@ > +/* wcscpy. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > + > +#if IS_IN (libc) > +# define WCSCPY __wcscpy_generic > +#endif > + > +#include <wcsmbs/wcscpy.c> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c > index 6a2d1421d9..53c3228dc2 100644 > --- a/sysdeps/x86_64/multiarch/wcscpy.c > +++ b/sysdeps/x86_64/multiarch/wcscpy.c > @@ -26,7 +26,7 @@ > # define SYMBOL_NAME wcscpy > # include <init-arch.h> > > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; > > static inline void * > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void) > if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3)) > return OPTIMIZE (ssse3); > > - return OPTIMIZE (sse2); > + return OPTIMIZE (generic); > } > > libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ()); > diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > similarity index 92% > rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c > rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c > index 8d9cbbb900..658d541886 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c > +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c > @@ -1,4 +1,4 @@ > -/* wcsncmp optimized with SSE2. > +/* wcsncmp. > Copyright (C) 2018-2022 Free Software Foundation, Inc. > This file is part of the GNU C Library. > > @@ -16,5 +16,5 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#define WCSNCMP __wcsncmp_sse2 > +#define WCSNCMP __wcsncmp_generic > #include <wcsmbs/wcsncmp.c> > diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c > index 5e00af2ca5..1836f794dd 100644 > --- a/sysdeps/x86_64/multiarch/wcsncmp.c > +++ b/sysdeps/x86_64/multiarch/wcsncmp.c > @@ -24,6 +24,8 @@ > # undef wcsncmp > # undef __wcsncmp > > +# define GENERIC generic > + > # define SYMBOL_NAME wcsncmp > # include "ifunc-avx2.h" > > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c > deleted file mode 100644 > index e1ec7cfbb5..0000000000 > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c > +++ /dev/null > @@ -1,9 +0,0 @@ > -#if IS_IN (libc) > -# include <wchar.h> > - > -# define WCSNLEN __wcsnlen_sse2 > - > -extern __typeof (wcsnlen) __wcsnlen_sse2; > -#endif > - > -#include "wcsmbs/wcsnlen.c" > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > new file mode 100644 > index 0000000000..2d75da7709 > --- /dev/null > +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c > @@ -0,0 +1,28 @@ > +/* wcsnlen. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > + > +#if IS_IN (libc) > +# include <wchar.h> > + > +# define WCSNLEN __wcsnlen_generic > + > +extern __typeof (wcsnlen) __wcsnlen_generic; > +#endif > + > +#include "wcsmbs/wcsnlen.c" > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c > index baa26666a8..05b7a211de 100644 > --- a/sysdeps/x86_64/multiarch/wcsnlen.c > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c > @@ -24,6 +24,7 @@ > # undef __wcsnlen > > # define SYMBOL_NAME wcsnlen > +# define GENERIC generic > # include "ifunc-wcslen.h" > > libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); > -- > 2.34.1 > LGTM. Thanks. -- H.J. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes 2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein 2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein 2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein @ 2022-06-09 15:14 ` H.J. Lu 2022-07-14 2:51 ` Sunil Pandey 2 siblings, 1 reply; 11+ messages in thread From: H.J. Lu @ 2022-06-09 15:14 UTC (permalink / raw) To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > This ensures the load will never split a cache line. > --- > sysdeps/x86_64/multiarch/varshift.c | 5 +++-- > sysdeps/x86_64/multiarch/varshift.h | 3 ++- > 2 files changed, 5 insertions(+), 3 deletions(-) > > diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c > index c8210f0546..d27767520a 100644 > --- a/sysdeps/x86_64/multiarch/varshift.c > +++ b/sysdeps/x86_64/multiarch/varshift.c > @@ -16,9 +16,10 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#include "varshift.h" > +#include <stdint.h> > > -const int8_t ___m128i_shift_right[31] attribute_hidden = > +const int8_t ___m128i_shift_right[31] attribute_hidden > + __attribute__((aligned(32))) = > { > 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, > -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 > diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h > index af30694488..ffd12d79e4 100644 > --- a/sysdeps/x86_64/multiarch/varshift.h > +++ b/sysdeps/x86_64/multiarch/varshift.h > @@ -19,7 +19,8 @@ > #include <stdint.h> > #include <tmmintrin.h> > > -extern const int8_t ___m128i_shift_right[31] attribute_hidden; > +extern const int8_t ___m128i_shift_right[31] attribute_hidden > + __attribute__ ((aligned (32))); > > static __inline__ __m128i > __m128i_shift_right (__m128i value, unsigned long int offset) > -- > 2.34.1 > LGTM. Thanks. -- H.J. ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes 2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu @ 2022-07-14 2:51 ` Sunil Pandey 0 siblings, 0 replies; 11+ messages in thread From: Sunil Pandey @ 2022-07-14 2:51 UTC (permalink / raw) To: H.J. Lu; +Cc: Noah Goldstein, GNU C Library On Thu, Jun 9, 2022 at 8:15 AM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > This ensures the load will never split a cache line. > > --- > > sysdeps/x86_64/multiarch/varshift.c | 5 +++-- > > sysdeps/x86_64/multiarch/varshift.h | 3 ++- > > 2 files changed, 5 insertions(+), 3 deletions(-) > > > > diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c > > index c8210f0546..d27767520a 100644 > > --- a/sysdeps/x86_64/multiarch/varshift.c > > +++ b/sysdeps/x86_64/multiarch/varshift.c > > @@ -16,9 +16,10 @@ > > License along with the GNU C Library; if not, see > > <https://www.gnu.org/licenses/>. */ > > > > -#include "varshift.h" > > +#include <stdint.h> > > > > -const int8_t ___m128i_shift_right[31] attribute_hidden = > > +const int8_t ___m128i_shift_right[31] attribute_hidden > > + __attribute__((aligned(32))) = > > { > > 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, > > -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 > > diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h > > index af30694488..ffd12d79e4 100644 > > --- a/sysdeps/x86_64/multiarch/varshift.h > > +++ b/sysdeps/x86_64/multiarch/varshift.h > > @@ -19,7 +19,8 @@ > > #include <stdint.h> > > #include <tmmintrin.h> > > > > -extern const int8_t ___m128i_shift_right[31] attribute_hidden; > > +extern const int8_t ___m128i_shift_right[31] attribute_hidden > > + __attribute__ ((aligned (32))); > > > > static __inline__ __m128i > > __m128i_shift_right (__m128i value, unsigned long int offset) > > -- > > 2.34.1 > > > > LGTM. > > Thanks. > > -- > H.J. I would like to backport this patch to release branches. Any comments or objections? --Sunil ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2022-07-14 2:52 UTC | newest] Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein 2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein 2022-06-09 15:28 ` H.J. Lu 2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein 2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein 2022-06-10 1:19 ` H.J. Lu 2022-06-10 1:26 ` Noah Goldstein 2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein 2022-06-16 22:43 ` H.J. Lu 2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu 2022-07-14 2:51 ` Sunil Pandey
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).