From: Noah Goldstein <goldstein.w.n@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
"Carlos O'Donell" <carlos@systemhalted.org>
Subject: Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity
Date: Thu, 9 Jun 2022 18:26:57 -0700 [thread overview]
Message-ID: <CAFUsyf+Vc-ED2UhRcGE7WxxtS-OYL6+U9sBV-DKHo9+e9QgEgw@mail.gmail.com> (raw)
In-Reply-To: <CAMe9rOpcws-9d=getSo7itF-puA0sVxSQpWg3LuLQb2ZDu8sRQ@mail.gmail.com>
On Thu, Jun 9, 2022 at 6:20 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > No functions are changed. It just renames generic implementations from
> > '{func}_sse2' to '{func}_generic'. This is just because the postfix
> > "_sse2" was overloaded and was used for files that had hand-optimized
> > sse2 assembly implementations and files that just redirected back
> > to the generic implementation.
>
> This change isn't small and its benefit is very small. Can it be the part of
> a big change to support building glibc with
>
> -march=x86-64-vN
kk
>
> > Full xcheck passed on x86_64.
> > ---
> > sysdeps/x86_64/multiarch/Makefile | 15 +-
> > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +-
> > sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-
> > sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +-
> > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +-
> > sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/stpncpy.c | 1 +
> > sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++
> > sysdeps/x86_64/multiarch/strcspn-c.c | 151 +---------------
> > sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 ---
> > sysdeps/x86_64/multiarch/strncat-c.c | 2 +-
> > sysdeps/x86_64/multiarch/strncat.c | 1 +
> > sysdeps/x86_64/multiarch/strncpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/strncpy.c | 1 +
> > .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +-
> > sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +-
> > sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 ---
> > sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++
> > sysdeps/x86_64/multiarch/strspn-c.c | 126 +-------------
> > sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/wcscpy.c | 4 +-
> > sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +-
> > sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
> > 23 files changed, 376 insertions(+), 363 deletions(-)
> > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
> > rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
> > delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
> >
> > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> > index 3d153cac35..86c6ecdfc1 100644
> > --- a/sysdeps/x86_64/multiarch/Makefile
> > +++ b/sysdeps/x86_64/multiarch/Makefile
> > @@ -77,7 +77,7 @@ sysdep_routines += \
> > strcpy-sse2 \
> > strcpy-sse2-unaligned \
> > strcspn-c \
> > - strcspn-sse2 \
> > + strcspn-c-sse4 \
> > strlen-avx2 \
> > strlen-avx2-rtm \
> > strlen-evex \
> > @@ -109,21 +109,22 @@ sysdep_routines += \
> > strnlen-evex512 \
> > strnlen-sse2 \
> > strpbrk-c \
> > - strpbrk-sse2 \
> > + strpbrk-c-sse4 \
> > strrchr-avx2 \
> > strrchr-avx2-rtm \
> > strrchr-evex \
> > strrchr-sse2 \
> > strspn-c \
> > - strspn-sse2 \
> > + strspn-c-sse4 \
> > strstr-avx512 \
> > strstr-sse2-unaligned \
> > varshift \
> > # sysdep_routines
> > -CFLAGS-varshift.c += -msse4
> > -CFLAGS-strcspn-c.c += -msse4
> > -CFLAGS-strpbrk-c.c += -msse4
> > -CFLAGS-strspn-c.c += -msse4
> > +
> > +CFLAGS-strcspn-c-sse4.c += -msse4
> > +CFLAGS-strpbrk-c-sse4.c += -msse4
> > +CFLAGS-strspn-c-sse4.c += -msse4
> > +
> > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> > endif
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index 58f3ec8306..4cbd200d39 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __stpncpy_evex)
> > IFUNC_IMPL_ADD (array, i, stpncpy, 1,
> > __stpncpy_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/stpcpy.c. */
> > IFUNC_IMPL (i, name, stpcpy,
> > @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, strcspn,
> > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> > __strcspn_sse42)
> > - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> > + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
> > IFUNC_IMPL (i, name, strncasecmp,
> > @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __strncat_evex)
> > IFUNC_IMPL_ADD (array, i, strncat, 1,
> > __strncat_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> > + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strncpy.c. */
> > IFUNC_IMPL (i, name, strncpy,
> > @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __strncpy_evex)
> > IFUNC_IMPL_ADD (array, i, strncpy, 1,
> > __strncpy_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
> > IFUNC_IMPL (i, name, strpbrk,
> > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> > __strpbrk_sse42)
> > - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> > + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
> >
> >
> > /* Support sysdeps/x86_64/multiarch/strspn.c. */
> > IFUNC_IMPL (i, name, strspn,
> > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> > __strspn_sse42)
> > - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> > + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strstr.c. */
> > IFUNC_IMPL (i, name, strstr,
> > @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, wcscpy,
> > IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
> > __wcscpy_ssse3)
> > - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/wcslen.c. */
> > IFUNC_IMPL (i, name, wcslen,
> > @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL_ADD (array, i, wcsnlen,
> > CPU_FEATURE_USABLE (SSE4_1),
> > __wcsnlen_sse4_1)
> > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> > + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
> > IFUNC_IMPL (i, name, wmemchr,
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > index b555ff2fac..ee36525bcf 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > @@ -19,7 +19,7 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> >
> > static inline void *
> > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> > return OPTIMIZE (sse42);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (generic);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > index a15afa44e9..80529458d1 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > @@ -20,7 +20,11 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
> > attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > return OPTIMIZE (sse2_unaligned);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (GENERIC);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > index 2b29e7608a..88c1c502af 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > @@ -19,7 +19,11 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
> > return OPTIMIZE (sse4_1);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (GENERIC);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > index b016e487e1..eb62fcf388 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STPNCPY __stpncpy_sse2
> > +#define STPNCPY __stpncpy_generic
> > #undef weak_alias
> > #define weak_alias(ignored1, ignored2)
> > #undef libc_hidden_def
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> > index 82fa53957d..879bc83f0b 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> > @@ -25,6 +25,7 @@
> > # undef stpncpy
> > # undef __stpncpy
> >
> > +# define GENERIC generic
> > # define SYMBOL_NAME stpncpy
> > # include "ifunc-strcpy.h"
> >
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..59f64f9fe8
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > @@ -0,0 +1,163 @@
> > +/* strcspn with SSE4.2 intrinsics
> > + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x2:
> > + _SIDD_SBYTE_OPS
> > + | _SIDD_CMP_EQUAL_ANY
> > + | _SIDD_POSITIVE_POLARITY
> > + | _SIDD_LEAST_SIGNIFICANT
> > + on pcmpistri to compare xmm/mem128
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + X X X X X X X X X X X X X X X X
> > +
> > + against xmm
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + A A A A A A A A A A A A A A A A
> > +
> > + to find out if the first 16byte data element has any byte A and
> > + the offset of the first byte. There are 3 cases:
> > +
> > + 1. The first 16byte data element has the byte A at the offset X.
> > + 2. The first 16byte data element has EOS and doesn't have the byte A.
> > + 3. The first 16byte data element is valid and doesn't have the byte A.
> > +
> > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > + 1 X 1 0/1 0
> > + 2 16 0 1 0
> > + 3 16 0 0 0
> > +
> > + We exit from the loop for cases 1 and 2 with jbe which branches
> > + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> > + X for case 1. */
> > +
> > +#ifndef STRCSPN_GENERIC
> > +# define STRCSPN_GENERIC __strcspn_generic
> > +# define STRCSPN_SSE42 __strcspn_sse42
> > +#endif
> > +
> > +#ifdef USE_AS_STRPBRK
> > +# define RETURN(val1, val2) return val1
> > +#else
> > +# define RETURN(val1, val2) return val2
> > +#endif
> > +
> > +extern
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
> > +
> > +
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +__attribute__ ((section (".text.sse4.2")))
> > +STRCSPN_SSE42 (const char *s, const char *a)
> > +{
> > + if (*a == 0)
> > + RETURN (NULL, strlen (s));
> > +
> > + const char *aligned;
> > + __m128i mask, maskz, zero;
> > + unsigned int maskz_bits;
> > + unsigned int offset = (unsigned int) ((size_t) a & 15);
> > + zero = _mm_set1_epi8 (0);
> > + if (offset != 0)
> > + {
> > + /* Load masks. */
> > + aligned = (const char *) ((size_t) a & -16L);
> > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > + maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + {
> > + mask = __m128i_shift_right (mask0, offset);
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset)
> > + goto start_unaligned;
> > +
> > + aligned = s;
> > + goto start_loop;
> > + }
> > + }
> > +
> > + /* A is aligned. */
> > + mask = _mm_loadu_si128 ((__m128i *) a);
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (mask, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz);
> > + if (maskz_bits == 0)
> > + {
> > + /* There is no NULL terminator. Don't use SSE4.2 if the length
> > + of A > 16. */
> > + if (a[16] != 0)
> > + return STRCSPN_GENERIC (s, a);
> > + }
> > +
> > + aligned = s;
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset != 0)
> > + {
> > + start_unaligned:
> > + /* Check partial string. */
> > + aligned = (const char *) ((size_t) s & -16L);
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +
> > + value = __m128i_shift_right (value, offset);
> > +
> > + unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > + /* No need to check ZFlag since ZFlag is always 1. */
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > + if (cflag)
> > + RETURN ((char *) (s + length), length);
> > + /* Find where the NULL terminator is. */
> > + unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > + if (index < 16 - offset)
> > + RETURN (NULL, index);
> > + aligned += 16;
> > + }
> > +
> > +start_loop:
> > + while (1)
> > + {
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > + if (cflag)
> > + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > + if (zflag)
> > + RETURN (NULL,
> > + /* Find where the NULL terminator is. */
> > + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > + aligned += 16;
> > + }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
> > index c312fab8b1..423de2e2b2 100644
> > --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strcspn with SSE4.2 intrinsics
> > - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strcspn.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,148 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x2:
> > - _SIDD_SBYTE_OPS
> > - | _SIDD_CMP_EQUAL_ANY
> > - | _SIDD_POSITIVE_POLARITY
> > - | _SIDD_LEAST_SIGNIFICANT
> > - on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRCSPN __strcspn_generic
> >
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - X X X X X X X X X X X X X X X X
> > -
> > - against xmm
> > -
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - A A A A A A A A A A A A A A A A
> > -
> > - to find out if the first 16byte data element has any byte A and
> > - the offset of the first byte. There are 3 cases:
> > -
> > - 1. The first 16byte data element has the byte A at the offset X.
> > - 2. The first 16byte data element has EOS and doesn't have the byte A.
> > - 3. The first 16byte data element is valid and doesn't have the byte A.
> > -
> > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > - 1 X 1 0/1 0
> > - 2 16 0 1 0
> > - 3 16 0 0 0
> > -
> > - We exit from the loop for cases 1 and 2 with jbe which branches
> > - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> > - X for case 1. */
> > -
> > -#ifndef STRCSPN_SSE2
> > -# define STRCSPN_SSE2 __strcspn_sse2
> > -# define STRCSPN_SSE42 __strcspn_sse42
> > -#endif
> > -
> > -#ifdef USE_AS_STRPBRK
> > -# define RETURN(val1, val2) return val1
> > -#else
> > -# define RETURN(val1, val2) return val2
> > -#endif
> > -
> > -extern
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > -#endif
> > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRCSPN)
> > #endif
> > -__attribute__ ((section (".text.sse4.2")))
> > -STRCSPN_SSE42 (const char *s, const char *a)
> > -{
> > - if (*a == 0)
> > - RETURN (NULL, strlen (s));
> > -
> > - const char *aligned;
> > - __m128i mask, maskz, zero;
> > - unsigned int maskz_bits;
> > - unsigned int offset = (unsigned int) ((size_t) a & 15);
> > - zero = _mm_set1_epi8 (0);
> > - if (offset != 0)
> > - {
> > - /* Load masks. */
> > - aligned = (const char *) ((size_t) a & -16L);
> > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > - maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - {
> > - mask = __m128i_shift_right (mask0, offset);
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset)
> > - goto start_unaligned;
> > -
> > - aligned = s;
> > - goto start_loop;
> > - }
> > - }
> > -
> > - /* A is aligned. */
> > - mask = _mm_loadu_si128 ((__m128i *) a);
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (mask, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz);
> > - if (maskz_bits == 0)
> > - {
> > - /* There is no NULL terminator. Don't use SSE4.2 if the length
> > - of A > 16. */
> > - if (a[16] != 0)
> > - return STRCSPN_SSE2 (s, a);
> > - }
> > -
> > - aligned = s;
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset != 0)
> > - {
> > - start_unaligned:
> > - /* Check partial string. */
> > - aligned = (const char *) ((size_t) s & -16L);
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -
> > - value = __m128i_shift_right (value, offset);
> > -
> > - unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > - /* No need to check ZFlag since ZFlag is always 1. */
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > - if (cflag)
> > - RETURN ((char *) (s + length), length);
> > - /* Find where the NULL terminator is. */
> > - unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > - if (index < 16 - offset)
> > - RETURN (NULL, index);
> > - aligned += 16;
> > - }
> >
> > -start_loop:
> > - while (1)
> > - {
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > - if (cflag)
> > - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > - if (zflag)
> > - RETURN (NULL,
> > - /* Find where the NULL terminator is. */
> > - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > - aligned += 16;
> > - }
> > -}
> > +#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > deleted file mode 100644
> > index 3a04bb39fc..0000000000
> > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strcspn.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > - This file is part of the GNU C Library.
> > -
> > - The GNU C Library is free software; you can redistribute it and/or
> > - modify it under the terms of the GNU Lesser General Public
> > - License as published by the Free Software Foundation; either
> > - version 2.1 of the License, or (at your option) any later version.
> > -
> > - The GNU C Library is distributed in the hope that it will be useful,
> > - but WITHOUT ANY WARRANTY; without even the implied warranty of
> > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > - Lesser General Public License for more details.
> > -
> > - You should have received a copy of the GNU Lesser General Public
> > - License along with the GNU C Library; if not, see
> > - <https://www.gnu.org/licenses/>. */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRCSPN __strcspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRCSPN)
> > -#endif
> > -
> > -#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> > index 93a7fab7ea..b729c033d9 100644
> > --- a/sysdeps/x86_64/multiarch/strncat-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncat-c.c
> > @@ -1,2 +1,2 @@
> > -#define STRNCAT __strncat_sse2
> > +#define STRNCAT __strncat_generic
> > #include <string/strncat.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> > index b649343a97..50fba8a41f 100644
> > --- a/sysdeps/x86_64/multiarch/strncat.c
> > +++ b/sysdeps/x86_64/multiarch/strncat.c
> > @@ -24,6 +24,7 @@
> > # undef strncat
> >
> > # define SYMBOL_NAME strncat
> > +# define GENERIC generic
> > # include "ifunc-strcpy.h"
> >
> > libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> > index 57c45ac7ab..183b0b8e0f 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STRNCPY __strncpy_sse2
> > +#define STRNCPY __strncpy_generic
> > #undef libc_hidden_builtin_def
> > #define libc_hidden_builtin_def(strncpy)
> >
> > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> > index 2a780a7e16..7fc7d72ec5 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy.c
> > @@ -24,6 +24,7 @@
> > # undef strncpy
> >
> > # define SYMBOL_NAME strncpy
> > +# define GENERIC generic
> > # include "ifunc-strcpy.h"
> >
> > libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > similarity index 74%
> > rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > index 61cc6cb0a5..8700276773 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > @@ -1,5 +1,5 @@
> > -/* strspn.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > +/* strpbrk with SSE4.2 intrinsics
> > + Copyright (C) 2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,13 +16,7 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRSPN __strspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRSPN)
> > -#endif
> > -
> > -#include <string/strspn.c>
> > +#define USE_AS_STRPBRK
> > +#define STRCSPN_GENERIC __strpbrk_generic
> > +#define STRCSPN_SSE42 __strpbrk_sse42
> > +#include "strcspn-c-sse4.c"
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > index abf4ff7f1a..d31acfe495 100644
> > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > @@ -1,5 +1,5 @@
> > -/* strpbrk with SSE4.2 intrinsics
> > - Copyright (C) 2022 Free Software Foundation, Inc.
> > +/* strpbrk.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,7 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#define USE_AS_STRPBRK
> > -#define STRCSPN_SSE2 __strpbrk_sse2
> > -#define STRCSPN_SSE42 __strpbrk_sse42
> > -#include "strcspn-c.c"
> > +#if IS_IN (libc)
> > +
> > +# include <sysdep.h>
> > +# define STRPBRK __strpbrk_generic
> > +
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRPBRK)
> > +#endif
> > +
> > +#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > deleted file mode 100644
> > index d03214c4fb..0000000000
> > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strpbrk.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > - This file is part of the GNU C Library.
> > -
> > - The GNU C Library is free software; you can redistribute it and/or
> > - modify it under the terms of the GNU Lesser General Public
> > - License as published by the Free Software Foundation; either
> > - version 2.1 of the License, or (at your option) any later version.
> > -
> > - The GNU C Library is distributed in the hope that it will be useful,
> > - but WITHOUT ANY WARRANTY; without even the implied warranty of
> > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > - Lesser General Public License for more details.
> > -
> > - You should have received a copy of the GNU Lesser General Public
> > - License along with the GNU C Library; if not, see
> > - <https://www.gnu.org/licenses/>. */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRPBRK __strpbrk_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRPBRK)
> > -#endif
> > -
> > -#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..d044916688
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > @@ -0,0 +1,136 @@
> > +/* strspn with SSE4.2 intrinsics
> > + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x12:
> > + _SIDD_SBYTE_OPS
> > + | _SIDD_CMP_EQUAL_ANY
> > + | _SIDD_NEGATIVE_POLARITY
> > + | _SIDD_LEAST_SIGNIFICANT
> > + on pcmpistri to compare xmm/mem128
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + X X X X X X X X X X X X X X X X
> > +
> > + against xmm
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + A A A A A A A A A A A A A A A A
> > +
> > + to find out if the first 16byte data element has any non-A byte and
> > + the offset of the first byte. There are 2 cases:
> > +
> > + 1. The first 16byte data element has the non-A byte, including
> > + EOS, at the offset X.
> > + 2. The first 16byte data element is valid and doesn't have the non-A
> > + byte.
> > +
> > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > + case ECX CFlag ZFlag SFlag
> > + 1 X 1 0/1 0
> > + 2 16 0 0 0
> > +
> > + We exit from the loop for case 1. */
> > +
> > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
> > +
> > +
> > +size_t
> > +__attribute__ ((section (".text.sse4.2")))
> > +__strspn_sse42 (const char *s, const char *a)
> > +{
> > + if (*a == 0)
> > + return 0;
> > +
> > + const char *aligned;
> > + __m128i mask, maskz, zero;
> > + unsigned int maskz_bits;
> > + unsigned int offset = (int) ((size_t) a & 15);
> > + zero = _mm_set1_epi8 (0);
> > + if (offset != 0)
> > + {
> > + /* Load masks. */
> > + aligned = (const char *) ((size_t) a & -16L);
> > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > + maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + {
> > + mask = __m128i_shift_right (mask0, offset);
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset)
> > + goto start_unaligned;
> > +
> > + aligned = s;
> > + goto start_loop;
> > + }
> > + }
> > +
> > + /* A is aligned. */
> > + mask = _mm_loadu_si128 ((__m128i *) a);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (mask, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz);
> > + if (maskz_bits == 0)
> > + {
> > + /* There is no NULL terminator. Don't use SSE4.2 if the length
> > + of A > 16. */
> > + if (a[16] != 0)
> > + return __strspn_generic (s, a);
> > + }
> > + aligned = s;
> > + offset = (unsigned int) ((size_t) s & 15);
> > +
> > + if (offset != 0)
> > + {
> > + start_unaligned:
> > + /* Check partial string. */
> > + aligned = (const char *) ((size_t) s & -16L);
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + __m128i adj_value = __m128i_shift_right (value, offset);
> > +
> > + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > + /* No need to check CFlag since it is always 1. */
> > + if (length < 16 - offset)
> > + return length;
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (value, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + return length;
> > + aligned += 16;
> > + }
> > +
> > +start_loop:
> > + while (1)
> > + {
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > + if (cflag)
> > + return (size_t) (aligned + index - s);
> > + aligned += 16;
> > + }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
> > index 6124033ceb..6b50c36432 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strspn with SSE4.2 intrinsics
> > - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strspn.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,121 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x12:
> > - _SIDD_SBYTE_OPS
> > - | _SIDD_CMP_EQUAL_ANY
> > - | _SIDD_NEGATIVE_POLARITY
> > - | _SIDD_LEAST_SIGNIFICANT
> > - on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRSPN __strspn_generic
> >
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - X X X X X X X X X X X X X X X X
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRSPN)
> > +#endif
> >
> > - against xmm
> > -
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - A A A A A A A A A A A A A A A A
> > -
> > - to find out if the first 16byte data element has any non-A byte and
> > - the offset of the first byte. There are 2 cases:
> > -
> > - 1. The first 16byte data element has the non-A byte, including
> > - EOS, at the offset X.
> > - 2. The first 16byte data element is valid and doesn't have the non-A
> > - byte.
> > -
> > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > - case ECX CFlag ZFlag SFlag
> > - 1 X 1 0/1 0
> > - 2 16 0 0 0
> > -
> > - We exit from the loop for case 1. */
> > -
> > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -size_t
> > -__attribute__ ((section (".text.sse4.2")))
> > -__strspn_sse42 (const char *s, const char *a)
> > -{
> > - if (*a == 0)
> > - return 0;
> > -
> > - const char *aligned;
> > - __m128i mask, maskz, zero;
> > - unsigned int maskz_bits;
> > - unsigned int offset = (int) ((size_t) a & 15);
> > - zero = _mm_set1_epi8 (0);
> > - if (offset != 0)
> > - {
> > - /* Load masks. */
> > - aligned = (const char *) ((size_t) a & -16L);
> > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > - maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - {
> > - mask = __m128i_shift_right (mask0, offset);
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset)
> > - goto start_unaligned;
> > -
> > - aligned = s;
> > - goto start_loop;
> > - }
> > - }
> > -
> > - /* A is aligned. */
> > - mask = _mm_loadu_si128 ((__m128i *) a);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (mask, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz);
> > - if (maskz_bits == 0)
> > - {
> > - /* There is no NULL terminator. Don't use SSE4.2 if the length
> > - of A > 16. */
> > - if (a[16] != 0)
> > - return __strspn_sse2 (s, a);
> > - }
> > - aligned = s;
> > - offset = (unsigned int) ((size_t) s & 15);
> > -
> > - if (offset != 0)
> > - {
> > - start_unaligned:
> > - /* Check partial string. */
> > - aligned = (const char *) ((size_t) s & -16L);
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - __m128i adj_value = __m128i_shift_right (value, offset);
> > -
> > - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > - /* No need to check CFlag since it is always 1. */
> > - if (length < 16 - offset)
> > - return length;
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (value, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - return length;
> > - aligned += 16;
> > - }
> > -
> > -start_loop:
> > - while (1)
> > - {
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > - if (cflag)
> > - return (size_t) (aligned + index - s);
> > - aligned += 16;
> > - }
> > -}
> > +#include <string/strspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > index 26d6984e9b..fa38dd898d 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > @@ -1,5 +1,5 @@
> > #if IS_IN (libc)
> > -# define WCSCPY __wcscpy_sse2
> > +# define WCSCPY __wcscpy_generic
> > #endif
> >
> > #include <wcsmbs/wcscpy.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> > index 6a2d1421d9..53c3228dc2 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> > @@ -26,7 +26,7 @@
> > # define SYMBOL_NAME wcscpy
> > # include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
> >
> > static inline void *
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> > return OPTIMIZE (ssse3);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (generic);
> > }
> >
> > libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > index e1ec7cfbb5..1c9c04241a 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > @@ -1,9 +1,9 @@
> > #if IS_IN (libc)
> > # include <wchar.h>
> >
> > -# define WCSNLEN __wcsnlen_sse2
> > +# define WCSNLEN __wcsnlen_generic
> >
> > -extern __typeof (wcsnlen) __wcsnlen_sse2;
> > +extern __typeof (wcsnlen) __wcsnlen_generic;
> > #endif
> >
> > #include "wcsmbs/wcsnlen.c"
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> > index baa26666a8..05b7a211de 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> > @@ -24,6 +24,7 @@
> > # undef __wcsnlen
> >
> > # define SYMBOL_NAME wcsnlen
> > +# define GENERIC generic
> > # include "ifunc-wcslen.h"
> >
> > libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
next prev parent reply other threads:[~2022-06-10 1:27 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
2022-06-09 15:28 ` H.J. Lu
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein
2022-06-10 1:19 ` H.J. Lu
2022-06-10 1:26 ` Noah Goldstein [this message]
2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein
2022-06-16 22:43 ` H.J. Lu
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
2022-07-14 2:51 ` Sunil Pandey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAFUsyf+Vc-ED2UhRcGE7WxxtS-OYL6+U9sBV-DKHo9+e9QgEgw@mail.gmail.com \
--to=goldstein.w.n@gmail.com \
--cc=carlos@systemhalted.org \
--cc=hjl.tools@gmail.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).