public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1 1/3] x86: Align varshift table to 32-bytes
@ 2022-06-09  4:16 Noah Goldstein
  2022-06-09  4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09  4:16 UTC (permalink / raw)
  To: libc-alpha

This ensures the load will never split a cache line.
---
 sysdeps/x86_64/multiarch/varshift.c | 5 +++--
 sysdeps/x86_64/multiarch/varshift.h | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
index c8210f0546..d27767520a 100644
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ b/sysdeps/x86_64/multiarch/varshift.c
@@ -16,9 +16,10 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include "varshift.h"
+#include <stdint.h>
 
-const int8_t ___m128i_shift_right[31] attribute_hidden =
+const int8_t ___m128i_shift_right[31] attribute_hidden
+    __attribute__((aligned(32))) =
   {
     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
index af30694488..ffd12d79e4 100644
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -19,7 +19,8 @@
 #include <stdint.h>
 #include <tmmintrin.h>
 
-extern const int8_t ___m128i_shift_right[31] attribute_hidden;
+extern const int8_t ___m128i_shift_right[31] attribute_hidden
+    __attribute__ ((aligned (32)));
 
 static __inline__ __m128i
 __m128i_shift_right (__m128i value, unsigned long int offset)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk
  2022-06-09  4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
@ 2022-06-09  4:16 ` Noah Goldstein
  2022-06-09 15:28   ` H.J. Lu
  2022-06-09  4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
  2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
  2 siblings, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09  4:16 UTC (permalink / raw)
  To: libc-alpha

No change to the actual logic of the functions. The goal is to so the
avx/avx2 machines rely less of sse instructions.

Full xcheck passes on x86_64.
---
 sysdeps/x86_64/multiarch/Makefile             | 21 ++++++++++-----
 .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} |  4 +++
 sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  6 +++++
 sysdeps/x86_64/multiarch/strcspn-c-avx.c      | 21 +++++++++++++++
 .../{strcspn-c.c => strcspn-c-sse4.c}         | 26 ++++++++++++-------
 sysdeps/x86_64/multiarch/strcspn.c            |  2 +-
 sysdeps/x86_64/multiarch/strpbrk-c-avx.c      | 23 ++++++++++++++++
 .../{strpbrk-c.c => strpbrk-c-sse4.c}         |  6 ++---
 sysdeps/x86_64/multiarch/strpbrk.c            |  2 +-
 sysdeps/x86_64/multiarch/strspn-c-avx.c       | 21 +++++++++++++++
 .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++---
 sysdeps/x86_64/multiarch/strspn.c             |  2 +-
 12 files changed, 122 insertions(+), 27 deletions(-)
 rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%)
 create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c
 rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%)
 create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c
 rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%)
 create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c
 rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%)

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..27f306c7c8 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -76,7 +76,8 @@ sysdep_routines += \
   strcpy-evex \
   strcpy-sse2 \
   strcpy-sse2-unaligned \
-  strcspn-c \
+  strcspn-c-avx \
+  strcspn-c-sse4 \
   strcspn-sse2 \
   strlen-avx2 \
   strlen-avx2-rtm \
@@ -108,22 +109,28 @@ sysdep_routines += \
   strnlen-evex \
   strnlen-evex512 \
   strnlen-sse2 \
-  strpbrk-c \
+  strpbrk-c-avx \
+  strpbrk-c-sse4 \
   strpbrk-sse2 \
   strrchr-avx2 \
   strrchr-avx2-rtm \
   strrchr-evex \
   strrchr-sse2 \
-  strspn-c \
+  strspn-c-avx \
+  strspn-c-sse4 \
   strspn-sse2 \
   strstr-avx512 \
   strstr-sse2-unaligned \
   varshift \
 # sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-c-avx.c += -mavx
+CFLAGS-strcspn-c-sse4.c += -msse4
+CFLAGS-strpbrk-c-avx.c += -mavx
+CFLAGS-strpbrk-c-sse4.c += -msse4
+CFLAGS-strspn-c-avx.c += -mavx
+CFLAGS-strspn-c-sse4.c += -msse4
+
 CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
 endif
 
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
similarity index 89%
rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h
rename to sysdeps/x86_64/multiarch/ifunc-avx.h
index b555ff2fac..891f3ddcac 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
@@ -21,12 +21,16 @@
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
 
 static inline void *
 IFUNC_SELECTOR (void)
 {
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx);
+
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
     return OPTIMIZE (sse42);
 
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 58f3ec8306..507c563669 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strcspn.c.  */
   IFUNC_IMPL (i, name, strcspn,
+	      IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX),
+			      __strcspn_avx)
 	      IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strcspn_sse42)
 	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
@@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
   IFUNC_IMPL (i, name, strpbrk,
+	      IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX),
+			      __strpbrk_avx)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
 			      __strpbrk_sse42)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
@@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strspn.c.  */
   IFUNC_IMPL (i, name, strspn,
+	      IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX),
+			      __strspn_avx)
 	      IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strspn_sse42)
 	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
new file mode 100644
index 0000000000..b8d983f79f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
@@ -0,0 +1,21 @@
+/* strcspn with AVX intrinsics
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define STRCSPN __strcspn_avx
+#define SECTION "avx"
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
similarity index 90%
rename from sysdeps/x86_64/multiarch/strcspn-c.c
rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c
index c312fab8b1..848c3cfb14 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -52,9 +52,16 @@
    when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
    X for case 1.  */
 
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
+#ifndef STRCSPN_FALLBACK
+# define STRCSPN_FALLBACK __strcspn_sse2
+#endif
+
+#ifndef STRCSPN
+# define STRCSPN __strcspn_sse42
+#endif
+
+#ifndef SECTION
+# define SECTION "sse4.2"
 #endif
 
 #ifdef USE_AS_STRPBRK
@@ -69,16 +76,15 @@ char *
 #else
 size_t
 #endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
-
+STRCSPN_FALLBACK (const char *, const char *) attribute_hidden;
 
 #ifdef USE_AS_STRPBRK
 char *
 #else
 size_t
 #endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
+__attribute__ ((section (".text." SECTION)))
+STRCSPN (const char *s, const char *a)
 {
   if (*a == 0)
     RETURN (NULL, strlen (s));
@@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a)
   maskz_bits = _mm_movemask_epi8 (maskz);
   if (maskz_bits == 0)
     {
-      /* There is no NULL terminator.  Don't use SSE4.2 if the length
-         of A > 16.  */
+      /* There is no NULL terminator.  Don't use pcmpstri based approach if the
+	 length of A > 16.  */
       if (a[16] != 0)
-        return STRCSPN_SSE2 (s, a);
+        return STRCSPN_FALLBACK (s, a);
     }
 
   aligned = s;
diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
index 4848fa8677..63e1cf052e 100644
--- a/sysdeps/x86_64/multiarch/strcspn.c
+++ b/sysdeps/x86_64/multiarch/strcspn.c
@@ -24,7 +24,7 @@
 # undef strcspn
 
 # define SYMBOL_NAME strcspn
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
 
 libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
 
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
new file mode 100644
index 0000000000..2918013994
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
@@ -0,0 +1,23 @@
+/* strpbrk with AVX intrinsics
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define USE_AS_STRPBRK
+#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN __strpbrk_avx
+#define SECTION "avx"
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
similarity index 89%
rename from sysdeps/x86_64/multiarch/strpbrk-c.c
rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index abf4ff7f1a..2efd38d809 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -17,6 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN __strpbrk_sse42
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
index 04e300ea71..ab5b04a482 100644
--- a/sysdeps/x86_64/multiarch/strpbrk.c
+++ b/sysdeps/x86_64/multiarch/strpbrk.c
@@ -24,7 +24,7 @@
 # undef strpbrk
 
 # define SYMBOL_NAME strpbrk
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
 
 libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
 
diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c
new file mode 100644
index 0000000000..9d5fdb9550
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c
@@ -0,0 +1,21 @@
+/* strspn with AVX intrinsics
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define STRSPN __strspn_avx
+#define SECTION "avx"
+#include "strspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/strspn-c.c
rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c
index 6124033ceb..6a91def2e0 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -53,10 +53,17 @@
 
 extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
 
+#ifndef STRSPN
+# define STRSPN __strspn_sse42
+#endif
+
+#ifndef SECTION
+# define SECTION "sse4.2"
+#endif
 
 size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
+__attribute__ ((section (".text." SECTION)))
+STRSPN (const char *s, const char *a)
 {
   if (*a == 0)
     return 0;
@@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a)
   maskz_bits = _mm_movemask_epi8 (maskz);
   if (maskz_bits == 0)
     {
-      /* There is no NULL terminator.  Don't use SSE4.2 if the length
-         of A > 16.  */
+      /* There is no NULL terminator.  Don't use pcmpstri based approach if the
+	 length of A > 16.  */
       if (a[16] != 0)
         return __strspn_sse2 (s, a);
     }
diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
index 07f5def155..c3c5e7a3cc 100644
--- a/sysdeps/x86_64/multiarch/strspn.c
+++ b/sysdeps/x86_64/multiarch/strspn.c
@@ -24,7 +24,7 @@
 # undef strspn
 
 # define SYMBOL_NAME strspn
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
 
 libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity
  2022-06-09  4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
  2022-06-09  4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
@ 2022-06-09  4:16 ` Noah Goldstein
  2022-06-10  0:58   ` [PATCH v2] " Noah Goldstein
  2022-06-16 22:11   ` [PATCH v3] " Noah Goldstein
  2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
  2 siblings, 2 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09  4:16 UTC (permalink / raw)
  To: libc-alpha

No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.

Full xcheck passed on x86_64.
---
 sysdeps/x86_64/multiarch/Makefile                |  6 +++---
 sysdeps/x86_64/multiarch/ifunc-avx.h             |  4 ++--
 sysdeps/x86_64/multiarch/ifunc-impl-list.c       | 16 ++++++++--------
 sysdeps/x86_64/multiarch/ifunc-strcpy.h          |  8 ++++++--
 sysdeps/x86_64/multiarch/ifunc-wcslen.h          |  8 ++++++--
 sysdeps/x86_64/multiarch/stpncpy-c.c             |  2 +-
 sysdeps/x86_64/multiarch/stpncpy.c               |  1 +
 sysdeps/x86_64/multiarch/strcspn-c-sse4.c        |  2 +-
 .../multiarch/{strcspn-sse2.c => strcspn-c.c}    |  2 +-
 sysdeps/x86_64/multiarch/strncat-c.c             |  2 +-
 sysdeps/x86_64/multiarch/strncat.c               |  1 +
 sysdeps/x86_64/multiarch/strncpy-c.c             |  2 +-
 sysdeps/x86_64/multiarch/strncpy.c               |  1 +
 sysdeps/x86_64/multiarch/strpbrk-c-avx.c         |  2 +-
 sysdeps/x86_64/multiarch/strpbrk-c-sse4.c        |  2 +-
 .../multiarch/{strpbrk-sse2.c => strpbrk-c.c}    |  2 +-
 sysdeps/x86_64/multiarch/strspn-c-sse4.c         |  4 ++--
 .../multiarch/{strspn-sse2.c => strspn-c.c}      |  2 +-
 sysdeps/x86_64/multiarch/wcscpy-c.c              |  2 +-
 sysdeps/x86_64/multiarch/wcscpy.c                |  4 ++--
 sysdeps/x86_64/multiarch/wcsnlen-c.c             |  4 ++--
 sysdeps/x86_64/multiarch/wcsnlen.c               |  1 +
 22 files changed, 45 insertions(+), 33 deletions(-)
 rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-c.c} (96%)
 rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-c.c} (96%)
 rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-c.c} (96%)

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 27f306c7c8..9b1e0add1a 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -76,9 +76,9 @@ sysdep_routines += \
   strcpy-evex \
   strcpy-sse2 \
   strcpy-sse2-unaligned \
+  strcspn-c \
   strcspn-c-avx \
   strcspn-c-sse4 \
-  strcspn-sse2 \
   strlen-avx2 \
   strlen-avx2-rtm \
   strlen-evex \
@@ -109,16 +109,16 @@ sysdep_routines += \
   strnlen-evex \
   strnlen-evex512 \
   strnlen-sse2 \
+  strpbrk-c \
   strpbrk-c-avx \
   strpbrk-c-sse4 \
-  strpbrk-sse2 \
   strrchr-avx2 \
   strrchr-avx2-rtm \
   strrchr-evex \
   strrchr-sse2 \
+  strspn-c \
   strspn-c-avx \
   strspn-c-sse4 \
-  strspn-sse2 \
   strstr-avx512 \
   strstr-sse2-unaligned \
   varshift \
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
index 891f3ddcac..30efbd29d0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
@@ -19,7 +19,7 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
 
@@ -34,5 +34,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
     return OPTIMIZE (sse42);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 507c563669..23a2d7114d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __stpncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1,
 			      __stpncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
   IFUNC_IMPL (i, name, stpcpy,
@@ -533,7 +533,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strcspn_avx)
 	      IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strcspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
   IFUNC_IMPL (i, name, strncasecmp,
@@ -587,7 +587,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncat_evex)
 	      IFUNC_IMPL_ADD (array, i, strncat, 1,
 			      __strncat_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
   IFUNC_IMPL (i, name, strncpy,
@@ -603,7 +603,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1,
 			      __strncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
   IFUNC_IMPL (i, name, strpbrk,
@@ -611,7 +611,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strpbrk_avx)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
 			      __strpbrk_sse42)
-	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
 
 
   /* Support sysdeps/x86_64/multiarch/strspn.c.  */
@@ -620,7 +620,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strspn_avx)
 	      IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strstr.c.  */
   IFUNC_IMPL (i, name, strstr,
@@ -703,7 +703,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
 			      __wcscpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
   IFUNC_IMPL (i, name, wcslen,
@@ -755,7 +755,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
 			      CPU_FEATURE_USABLE (SSE4_1),
 			      __wcsnlen_sse4_1)
-	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
 
   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
   IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
     return OPTIMIZE (sse4_1);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index b016e487e1..eb62fcf388 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,4 +1,4 @@
-#define STPNCPY __stpncpy_sse2
+#define STPNCPY __stpncpy_generic
 #undef weak_alias
 #define weak_alias(ignored1, ignored2)
 #undef libc_hidden_def
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
 # undef stpncpy
 # undef __stpncpy
 
+# define GENERIC generic
 # define SYMBOL_NAME stpncpy
 # include "ifunc-strcpy.h"
 
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
index 848c3cfb14..8541035ccb 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -53,7 +53,7 @@
    X for case 1.  */
 
 #ifndef STRCSPN_FALLBACK
-# define STRCSPN_FALLBACK __strcspn_sse2
+# define STRCSPN_FALLBACK __strcspn_generic
 #endif
 
 #ifndef STRCSPN
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
rename to sysdeps/x86_64/multiarch/strcspn-c.c
index 3a04bb39fc..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRCSPN __strcspn_sse2
+# define STRCSPN __strcspn_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRCSPN)
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index 93a7fab7ea..b729c033d9 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,2 +1,2 @@
-#define STRNCAT __strncat_sse2
+#define STRNCAT __strncat_generic
 #include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
 # undef strncat
 
 # define SYMBOL_NAME strncat
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 57c45ac7ab..183b0b8e0f 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,4 +1,4 @@
-#define STRNCPY __strncpy_sse2
+#define STRNCPY __strncpy_generic
 #undef libc_hidden_builtin_def
 #define libc_hidden_builtin_def(strncpy)
 
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
 # undef strncpy
 
 # define SYMBOL_NAME strncpy
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
index 2918013994..363daebd9e 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
@@ -17,7 +17,7 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define USE_AS_STRPBRK
-#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN_FALLBACK __strpbrk_generic
 #define STRCSPN __strpbrk_avx
 #define SECTION "avx"
 #include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index 2efd38d809..a02c951dfd 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -17,6 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define USE_AS_STRPBRK
-#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN_FALLBACK __strpbrk_generic
 #define STRCSPN __strpbrk_sse42
 #include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-c.c
index d03214c4fb..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
+# define STRPBRK __strpbrk_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRPBRK)
diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
index 6a91def2e0..9323a117ab 100644
--- a/sysdeps/x86_64/multiarch/strspn-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -51,7 +51,7 @@
 
    We exit from the loop for case 1.  */
 
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
 
 #ifndef STRSPN
 # define STRSPN __strspn_sse42
@@ -105,7 +105,7 @@ STRSPN (const char *s, const char *a)
       /* There is no NULL terminator.  Don't use pcmpstri based approach if the
 	 length of A > 16.  */
       if (a[16] != 0)
-        return __strspn_sse2 (s, a);
+        return __strspn_generic (s, a);
     }
   aligned = s;
   offset = (unsigned int) ((size_t) s & 15);
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strspn-c.c
index 61cc6cb0a5..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRSPN __strspn_sse2
+# define STRSPN __strspn_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRSPN)
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
index 26d6984e9b..fa38dd898d 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
@@ -1,5 +1,5 @@
 #if IS_IN (libc)
-# define WCSCPY  __wcscpy_sse2
+# define WCSCPY  __wcscpy_generic
 #endif
 
 #include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
 # define SYMBOL_NAME wcscpy
 # include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
 
 static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
     return OPTIMIZE (ssse3);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
 
 libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
index e1ec7cfbb5..1c9c04241a 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -1,9 +1,9 @@
 #if IS_IN (libc)
 # include <wchar.h>
 
-# define WCSNLEN __wcsnlen_sse2
+# define WCSNLEN __wcsnlen_generic
 
-extern __typeof (wcsnlen) __wcsnlen_sse2;
+extern __typeof (wcsnlen) __wcsnlen_generic;
 #endif
 
 #include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
 # undef __wcsnlen
 
 # define SYMBOL_NAME wcsnlen
+# define GENERIC generic
 # include "ifunc-wcslen.h"
 
 libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
-- 
2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes
  2022-06-09  4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
  2022-06-09  4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
  2022-06-09  4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
@ 2022-06-09 15:14 ` H.J. Lu
  2022-07-14  2:51   ` Sunil Pandey
  2 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2022-06-09 15:14 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This ensures the load will never split a cache line.
> ---
>  sysdeps/x86_64/multiarch/varshift.c | 5 +++--
>  sysdeps/x86_64/multiarch/varshift.h | 3 ++-
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
> index c8210f0546..d27767520a 100644
> --- a/sysdeps/x86_64/multiarch/varshift.c
> +++ b/sysdeps/x86_64/multiarch/varshift.c
> @@ -16,9 +16,10 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include "varshift.h"
> +#include <stdint.h>
>
> -const int8_t ___m128i_shift_right[31] attribute_hidden =
> +const int8_t ___m128i_shift_right[31] attribute_hidden
> +    __attribute__((aligned(32))) =
>    {
>      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
>      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
> diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
> index af30694488..ffd12d79e4 100644
> --- a/sysdeps/x86_64/multiarch/varshift.h
> +++ b/sysdeps/x86_64/multiarch/varshift.h
> @@ -19,7 +19,8 @@
>  #include <stdint.h>
>  #include <tmmintrin.h>
>
> -extern const int8_t ___m128i_shift_right[31] attribute_hidden;
> +extern const int8_t ___m128i_shift_right[31] attribute_hidden
> +    __attribute__ ((aligned (32)));
>
>  static __inline__ __m128i
>  __m128i_shift_right (__m128i value, unsigned long int offset)
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn,  and strpbrk
  2022-06-09  4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
@ 2022-06-09 15:28   ` H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2022-06-09 15:28 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No change to the actual logic of the functions. The goal is to so the
> avx/avx2 machines rely less of sse instructions.

These aren't the only SSE2 functions.  The rest of glibc may be still compiled
with SSE2.   A different approach is to compile the whole glibc with x86-64
ISA level 3.

> Full xcheck passes on x86_64.
> ---
>  sysdeps/x86_64/multiarch/Makefile             | 21 ++++++++++-----
>  .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} |  4 +++
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  6 +++++
>  sysdeps/x86_64/multiarch/strcspn-c-avx.c      | 21 +++++++++++++++
>  .../{strcspn-c.c => strcspn-c-sse4.c}         | 26 ++++++++++++-------
>  sysdeps/x86_64/multiarch/strcspn.c            |  2 +-
>  sysdeps/x86_64/multiarch/strpbrk-c-avx.c      | 23 ++++++++++++++++
>  .../{strpbrk-c.c => strpbrk-c-sse4.c}         |  6 ++---
>  sysdeps/x86_64/multiarch/strpbrk.c            |  2 +-
>  sysdeps/x86_64/multiarch/strspn-c-avx.c       | 21 +++++++++++++++
>  .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++---
>  sysdeps/x86_64/multiarch/strspn.c             |  2 +-
>  12 files changed, 122 insertions(+), 27 deletions(-)
>  rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%)
>  create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c
>  rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%)
>  create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c
>  rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%)
>  create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c
>  rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%)
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..27f306c7c8 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -76,7 +76,8 @@ sysdep_routines += \
>    strcpy-evex \
>    strcpy-sse2 \
>    strcpy-sse2-unaligned \
> -  strcspn-c \
> +  strcspn-c-avx \
> +  strcspn-c-sse4 \
>    strcspn-sse2 \
>    strlen-avx2 \
>    strlen-avx2-rtm \
> @@ -108,22 +109,28 @@ sysdep_routines += \
>    strnlen-evex \
>    strnlen-evex512 \
>    strnlen-sse2 \
> -  strpbrk-c \
> +  strpbrk-c-avx \
> +  strpbrk-c-sse4 \
>    strpbrk-sse2 \
>    strrchr-avx2 \
>    strrchr-avx2-rtm \
>    strrchr-evex \
>    strrchr-sse2 \
> -  strspn-c \
> +  strspn-c-avx \
> +  strspn-c-sse4 \
>    strspn-sse2 \
>    strstr-avx512 \
>    strstr-sse2-unaligned \
>    varshift \
>  # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-c-avx.c += -mavx
> +CFLAGS-strcspn-c-sse4.c += -msse4
> +CFLAGS-strpbrk-c-avx.c += -mavx
> +CFLAGS-strpbrk-c-sse4.c += -msse4
> +CFLAGS-strspn-c-avx.c += -mavx
> +CFLAGS-strspn-c-sse4.c += -msse4
> +
>  CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
>  endif
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
> similarity index 89%
> rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> rename to sysdeps/x86_64/multiarch/ifunc-avx.h
> index b555ff2fac..891f3ddcac 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
> @@ -21,12 +21,16 @@
>
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
>
>  static inline void *
>  IFUNC_SELECTOR (void)
>  {
>    const struct cpu_features* cpu_features = __get_cpu_features ();
>
> +  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
> +    return OPTIMIZE (avx);
> +
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
>      return OPTIMIZE (sse42);
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 58f3ec8306..507c563669 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
>    /* Support sysdeps/x86_64/multiarch/strcspn.c.  */
>    IFUNC_IMPL (i, name, strcspn,
> +             IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX),
> +                             __strcspn_avx)
>               IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strcspn_sse42)
>               IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
>    /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
>    IFUNC_IMPL (i, name, strpbrk,
> +             IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX),
> +                             __strpbrk_avx)
>               IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
>                               __strpbrk_sse42)
>               IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
>    /* Support sysdeps/x86_64/multiarch/strspn.c.  */
>    IFUNC_IMPL (i, name, strspn,
> +             IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX),
> +                             __strspn_avx)
>               IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strspn_sse42)
>               IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
> new file mode 100644
> index 0000000000..b8d983f79f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
> @@ -0,0 +1,21 @@
> +/* strcspn with AVX intrinsics
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define STRCSPN __strcspn_avx
> +#define SECTION "avx"
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> similarity index 90%
> rename from sysdeps/x86_64/multiarch/strcspn-c.c
> rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> index c312fab8b1..848c3cfb14 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> @@ -52,9 +52,16 @@
>     when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
>     X for case 1.  */
>
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> -# define STRCSPN_SSE42 __strcspn_sse42
> +#ifndef STRCSPN_FALLBACK
> +# define STRCSPN_FALLBACK __strcspn_sse2
> +#endif
> +
> +#ifndef STRCSPN
> +# define STRCSPN __strcspn_sse42
> +#endif
> +
> +#ifndef SECTION
> +# define SECTION "sse4.2"
>  #endif
>
>  #ifdef USE_AS_STRPBRK
> @@ -69,16 +76,15 @@ char *
>  #else
>  size_t
>  #endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> -
> +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden;
>
>  #ifdef USE_AS_STRPBRK
>  char *
>  #else
>  size_t
>  #endif
> -__attribute__ ((section (".text.sse4.2")))
> -STRCSPN_SSE42 (const char *s, const char *a)
> +__attribute__ ((section (".text." SECTION)))
> +STRCSPN (const char *s, const char *a)
>  {
>    if (*a == 0)
>      RETURN (NULL, strlen (s));
> @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a)
>    maskz_bits = _mm_movemask_epi8 (maskz);
>    if (maskz_bits == 0)
>      {
> -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> -         of A > 16.  */
> +      /* There is no NULL terminator.  Don't use pcmpstri based approach if the
> +        length of A > 16.  */
>        if (a[16] != 0)
> -        return STRCSPN_SSE2 (s, a);
> +        return STRCSPN_FALLBACK (s, a);
>      }
>
>    aligned = s;
> diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
> index 4848fa8677..63e1cf052e 100644
> --- a/sysdeps/x86_64/multiarch/strcspn.c
> +++ b/sysdeps/x86_64/multiarch/strcspn.c
> @@ -24,7 +24,7 @@
>  # undef strcspn
>
>  # define SYMBOL_NAME strcspn
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
>  libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
>
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
> new file mode 100644
> index 0000000000..2918013994
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
> @@ -0,0 +1,23 @@
> +/* strpbrk with AVX intrinsics
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define USE_AS_STRPBRK
> +#define STRCSPN_FALLBACK __strpbrk_sse2
> +#define STRCSPN __strpbrk_avx
> +#define SECTION "avx"
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> similarity index 89%
> rename from sysdeps/x86_64/multiarch/strpbrk-c.c
> rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> index abf4ff7f1a..2efd38d809 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> @@ -17,6 +17,6 @@
>     <https://www.gnu.org/licenses/>.  */
>
>  #define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> -#define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#define STRCSPN_FALLBACK __strpbrk_sse2
> +#define STRCSPN __strpbrk_sse42
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
> index 04e300ea71..ab5b04a482 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk.c
> @@ -24,7 +24,7 @@
>  # undef strpbrk
>
>  # define SYMBOL_NAME strpbrk
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
>  libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
>
> diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c
> new file mode 100644
> index 0000000000..9d5fdb9550
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c
> @@ -0,0 +1,21 @@
> +/* strspn with AVX intrinsics
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define STRSPN __strspn_avx
> +#define SECTION "avx"
> +#include "strspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/strspn-c.c
> rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c
> index 6124033ceb..6a91def2e0 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> @@ -53,10 +53,17 @@
>
>  extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
>
> +#ifndef STRSPN
> +# define STRSPN __strspn_sse42
> +#endif
> +
> +#ifndef SECTION
> +# define SECTION "sse4.2"
> +#endif
>
>  size_t
> -__attribute__ ((section (".text.sse4.2")))
> -__strspn_sse42 (const char *s, const char *a)
> +__attribute__ ((section (".text." SECTION)))
> +STRSPN (const char *s, const char *a)
>  {
>    if (*a == 0)
>      return 0;
> @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a)
>    maskz_bits = _mm_movemask_epi8 (maskz);
>    if (maskz_bits == 0)
>      {
> -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> -         of A > 16.  */
> +      /* There is no NULL terminator.  Don't use pcmpstri based approach if the
> +        length of A > 16.  */
>        if (a[16] != 0)
>          return __strspn_sse2 (s, a);
>      }
> diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
> index 07f5def155..c3c5e7a3cc 100644
> --- a/sysdeps/x86_64/multiarch/strspn.c
> +++ b/sysdeps/x86_64/multiarch/strspn.c
> @@ -24,7 +24,7 @@
>  # undef strspn
>
>  # define SYMBOL_NAME strspn
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
>  libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
>
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2] x86: Rename generic functions with unique postfix for clarity
  2022-06-09  4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
@ 2022-06-10  0:58   ` Noah Goldstein
  2022-06-10  1:19     ` H.J. Lu
  2022-06-16 22:11   ` [PATCH v3] " Noah Goldstein
  1 sibling, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-10  0:58 UTC (permalink / raw)
  To: libc-alpha

No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.

Full xcheck passed on x86_64.
---
 sysdeps/x86_64/multiarch/Makefile             |  15 +-
 sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  16 +-
 sysdeps/x86_64/multiarch/ifunc-sse4_2.h       |   4 +-
 sysdeps/x86_64/multiarch/ifunc-strcpy.h       |   8 +-
 sysdeps/x86_64/multiarch/ifunc-wcslen.h       |   8 +-
 sysdeps/x86_64/multiarch/stpncpy-c.c          |   2 +-
 sysdeps/x86_64/multiarch/stpncpy.c            |   1 +
 sysdeps/x86_64/multiarch/strcspn-c-sse4.c     | 163 ++++++++++++++++++
 sysdeps/x86_64/multiarch/strcspn-c.c          | 151 +---------------
 sysdeps/x86_64/multiarch/strcspn-sse2.c       |  28 ---
 sysdeps/x86_64/multiarch/strncat-c.c          |   2 +-
 sysdeps/x86_64/multiarch/strncat.c            |   1 +
 sysdeps/x86_64/multiarch/strncpy-c.c          |   2 +-
 sysdeps/x86_64/multiarch/strncpy.c            |   1 +
 .../{strspn-sse2.c => strpbrk-c-sse4.c}       |  18 +-
 sysdeps/x86_64/multiarch/strpbrk-c.c          |  18 +-
 sysdeps/x86_64/multiarch/strpbrk-sse2.c       |  28 ---
 sysdeps/x86_64/multiarch/strspn-c-sse4.c      | 136 +++++++++++++++
 sysdeps/x86_64/multiarch/strspn-c.c           | 126 +-------------
 sysdeps/x86_64/multiarch/wcscpy-c.c           |   2 +-
 sysdeps/x86_64/multiarch/wcscpy.c             |   4 +-
 sysdeps/x86_64/multiarch/wcsnlen-c.c          |   4 +-
 sysdeps/x86_64/multiarch/wcsnlen.c            |   1 +
 23 files changed, 376 insertions(+), 363 deletions(-)
 create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
 delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
 rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
 delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
 create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..86c6ecdfc1 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -77,7 +77,7 @@ sysdep_routines += \
   strcpy-sse2 \
   strcpy-sse2-unaligned \
   strcspn-c \
-  strcspn-sse2 \
+  strcspn-c-sse4 \
   strlen-avx2 \
   strlen-avx2-rtm \
   strlen-evex \
@@ -109,21 +109,22 @@ sysdep_routines += \
   strnlen-evex512 \
   strnlen-sse2 \
   strpbrk-c \
-  strpbrk-sse2 \
+  strpbrk-c-sse4 \
   strrchr-avx2 \
   strrchr-avx2-rtm \
   strrchr-evex \
   strrchr-sse2 \
   strspn-c \
-  strspn-sse2 \
+  strspn-c-sse4 \
   strstr-avx512 \
   strstr-sse2-unaligned \
   varshift \
 # sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-c-sse4.c += -msse4
+CFLAGS-strpbrk-c-sse4.c += -msse4
+CFLAGS-strspn-c-sse4.c += -msse4
+
 CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
 endif
 
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 58f3ec8306..4cbd200d39 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __stpncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1,
 			      __stpncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
   IFUNC_IMPL (i, name, stpcpy,
@@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcspn,
 	      IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strcspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
   IFUNC_IMPL (i, name, strncasecmp,
@@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncat_evex)
 	      IFUNC_IMPL_ADD (array, i, strncat, 1,
 			      __strncat_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
   IFUNC_IMPL (i, name, strncpy,
@@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1,
 			      __strncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
   IFUNC_IMPL (i, name, strpbrk,
 	      IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
 			      __strpbrk_sse42)
-	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
 
 
   /* Support sysdeps/x86_64/multiarch/strspn.c.  */
   IFUNC_IMPL (i, name, strspn,
 	      IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strstr.c.  */
   IFUNC_IMPL (i, name, strstr,
@@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
 			      __wcscpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
   IFUNC_IMPL (i, name, wcslen,
@@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
 			      CPU_FEATURE_USABLE (SSE4_1),
 			      __wcsnlen_sse4_1)
-	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
 
   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
   IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
index b555ff2fac..ee36525bcf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -19,7 +19,7 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
 
 static inline void *
@@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
     return OPTIMIZE (sse42);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
     return OPTIMIZE (sse4_1);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index b016e487e1..eb62fcf388 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,4 +1,4 @@
-#define STPNCPY __stpncpy_sse2
+#define STPNCPY __stpncpy_generic
 #undef weak_alias
 #define weak_alias(ignored1, ignored2)
 #undef libc_hidden_def
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
 # undef stpncpy
 # undef __stpncpy
 
+# define GENERIC generic
 # define SYMBOL_NAME stpncpy
 # include "ifunc-strcpy.h"
 
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
new file mode 100644
index 0000000000..59f64f9fe8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -0,0 +1,163 @@
+/* strcspn with SSE4.2 intrinsics
+   Copyright (C) 2009-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x2:
+	_SIDD_SBYTE_OPS
+	| _SIDD_CMP_EQUAL_ANY
+	| _SIDD_POSITIVE_POLARITY
+	| _SIDD_LEAST_SIGNIFICANT
+   on pcmpistri to compare xmm/mem128
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   X X X X X X X X X X X X X X X X
+
+   against xmm
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   A A A A A A A A A A A A A A A A
+
+   to find out if the first 16byte data element has any byte A and
+   the offset of the first byte.  There are 3 cases:
+
+   1. The first 16byte data element has the byte A at the offset X.
+   2. The first 16byte data element has EOS and doesn't have the byte A.
+   3. The first 16byte data element is valid and doesn't have the byte A.
+
+   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+    1		 X	  1	 0/1	  0
+    2		16	  0	  1	  0
+    3		16	  0	  0	  0
+
+   We exit from the loop for cases 1 and 2 with jbe which branches
+   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
+   X for case 1.  */
+
+#ifndef STRCSPN_GENERIC
+# define STRCSPN_GENERIC __strcspn_generic
+# define STRCSPN_SSE42 __strcspn_sse42
+#endif
+
+#ifdef USE_AS_STRPBRK
+# define RETURN(val1, val2) return val1
+#else
+# define RETURN(val1, val2) return val2
+#endif
+
+extern
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
+
+
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+__attribute__ ((section (".text.sse4.2")))
+STRCSPN_SSE42 (const char *s, const char *a)
+{
+  if (*a == 0)
+    RETURN (NULL, strlen (s));
+
+  const char *aligned;
+  __m128i mask, maskz, zero;
+  unsigned int maskz_bits;
+  unsigned int offset = (unsigned int) ((size_t) a & 15);
+  zero = _mm_set1_epi8 (0);
+  if (offset != 0)
+    {
+      /* Load masks.  */
+      aligned = (const char *) ((size_t) a & -16L);
+      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+      maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+      /* Find where the NULL terminator is.  */
+      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+      if (maskz_bits != 0)
+        {
+          mask = __m128i_shift_right (mask0, offset);
+          offset = (unsigned int) ((size_t) s & 15);
+          if (offset)
+            goto start_unaligned;
+
+          aligned = s;
+          goto start_loop;
+        }
+    }
+
+  /* A is aligned.  */
+  mask = _mm_loadu_si128 ((__m128i *) a);
+  /* Find where the NULL terminator is.  */
+  maskz = _mm_cmpeq_epi8 (mask, zero);
+  maskz_bits = _mm_movemask_epi8 (maskz);
+  if (maskz_bits == 0)
+    {
+      /* There is no NULL terminator.  Don't use SSE4.2 if the length
+         of A > 16.  */
+      if (a[16] != 0)
+        return STRCSPN_GENERIC (s, a);
+    }
+
+  aligned = s;
+  offset = (unsigned int) ((size_t) s & 15);
+  if (offset != 0)
+    {
+    start_unaligned:
+      /* Check partial string.  */
+      aligned = (const char *) ((size_t) s & -16L);
+      __m128i value = _mm_load_si128 ((__m128i *) aligned);
+
+      value = __m128i_shift_right (value, offset);
+
+      unsigned int length = _mm_cmpistri (mask, value, 0x2);
+      /* No need to check ZFlag since ZFlag is always 1.  */
+      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+      if (cflag)
+	RETURN ((char *) (s + length), length);
+      /* Find where the NULL terminator is.  */
+      unsigned int index = _mm_cmpistri (value, value, 0x3a);
+      if (index < 16 - offset)
+	RETURN (NULL, index);
+      aligned += 16;
+    }
+
+start_loop:
+  while (1)
+    {
+      __m128i value = _mm_load_si128 ((__m128i *) aligned);
+      unsigned int index = _mm_cmpistri (mask, value, 0x2);
+      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
+      if (cflag)
+	RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
+      if (zflag)
+	RETURN (NULL,
+		/* Find where the NULL terminator is.  */
+		(size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
+      aligned += 16;
+    }
+}
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index c312fab8b1..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -1,5 +1,5 @@
-/* strcspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2022 Free Software Foundation, Inc.
+/* strcspn.
+   Copyright (C) 2017-2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,148 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
+#if IS_IN (libc)
 
-/* We use 0x2:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_ANY
-	| _SIDD_POSITIVE_POLARITY
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
+# include <sysdep.h>
+# define STRCSPN __strcspn_generic
 
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   A A A A A A A A A A A A A A A A
-
-   to find out if the first 16byte data element has any byte A and
-   the offset of the first byte.  There are 3 cases:
-
-   1. The first 16byte data element has the byte A at the offset X.
-   2. The first 16byte data element has EOS and doesn't have the byte A.
-   3. The first 16byte data element is valid and doesn't have the byte A.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
-    1		 X	  1	 0/1	  0
-    2		16	  0	  1	  0
-    3		16	  0	  0	  0
-
-   We exit from the loop for cases 1 and 2 with jbe which branches
-   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
-   X for case 1.  */
-
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
-#endif
-
-#ifdef USE_AS_STRPBRK
-# define RETURN(val1, val2) return val1
-#else
-# define RETURN(val1, val2) return val2
-#endif
-
-extern
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
-
-
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRCSPN)
 #endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
-{
-  if (*a == 0)
-    RETURN (NULL, strlen (s));
-
-  const char *aligned;
-  __m128i mask, maskz, zero;
-  unsigned int maskz_bits;
-  unsigned int offset = (unsigned int) ((size_t) a & 15);
-  zero = _mm_set1_epi8 (0);
-  if (offset != 0)
-    {
-      /* Load masks.  */
-      aligned = (const char *) ((size_t) a & -16L);
-      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-      maskz = _mm_cmpeq_epi8 (mask0, zero);
-
-      /* Find where the NULL terminator is.  */
-      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
-      if (maskz_bits != 0)
-        {
-          mask = __m128i_shift_right (mask0, offset);
-          offset = (unsigned int) ((size_t) s & 15);
-          if (offset)
-            goto start_unaligned;
-
-          aligned = s;
-          goto start_loop;
-        }
-    }
-
-  /* A is aligned.  */
-  mask = _mm_loadu_si128 ((__m128i *) a);
-  /* Find where the NULL terminator is.  */
-  maskz = _mm_cmpeq_epi8 (mask, zero);
-  maskz_bits = _mm_movemask_epi8 (maskz);
-  if (maskz_bits == 0)
-    {
-      /* There is no NULL terminator.  Don't use SSE4.2 if the length
-         of A > 16.  */
-      if (a[16] != 0)
-        return STRCSPN_SSE2 (s, a);
-    }
-
-  aligned = s;
-  offset = (unsigned int) ((size_t) s & 15);
-  if (offset != 0)
-    {
-    start_unaligned:
-      /* Check partial string.  */
-      aligned = (const char *) ((size_t) s & -16L);
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-
-      value = __m128i_shift_right (value, offset);
-
-      unsigned int length = _mm_cmpistri (mask, value, 0x2);
-      /* No need to check ZFlag since ZFlag is always 1.  */
-      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
-      if (cflag)
-	RETURN ((char *) (s + length), length);
-      /* Find where the NULL terminator is.  */
-      unsigned int index = _mm_cmpistri (value, value, 0x3a);
-      if (index < 16 - offset)
-	RETURN (NULL, index);
-      aligned += 16;
-    }
 
-start_loop:
-  while (1)
-    {
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      unsigned int index = _mm_cmpistri (mask, value, 0x2);
-      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
-      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
-      if (cflag)
-	RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
-      if (zflag)
-	RETURN (NULL,
-		/* Find where the NULL terminator is.  */
-		(size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
-      aligned += 16;
-    }
-}
+#include <string/strcspn.c>
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
deleted file mode 100644
index 3a04bb39fc..0000000000
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* strcspn.
-   Copyright (C) 2017-2022 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRCSPN __strcspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRCSPN)
-#endif
-
-#include <string/strcspn.c>
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index 93a7fab7ea..b729c033d9 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,2 +1,2 @@
-#define STRNCAT __strncat_sse2
+#define STRNCAT __strncat_generic
 #include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
 # undef strncat
 
 # define SYMBOL_NAME strncat
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 57c45ac7ab..183b0b8e0f 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,4 +1,4 @@
-#define STRNCPY __strncpy_sse2
+#define STRNCPY __strncpy_generic
 #undef libc_hidden_builtin_def
 #define libc_hidden_builtin_def(strncpy)
 
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
 # undef strncpy
 
 # define SYMBOL_NAME strncpy
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
similarity index 74%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index 61cc6cb0a5..8700276773 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -1,5 +1,5 @@
-/* strspn.
-   Copyright (C) 2017-2022 Free Software Foundation, Inc.
+/* strpbrk with SSE4.2 intrinsics
+   Copyright (C) 2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRSPN __strspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRSPN)
-#endif
-
-#include <string/strspn.c>
+#define USE_AS_STRPBRK
+#define STRCSPN_GENERIC __strpbrk_generic
+#define STRCSPN_SSE42 __strpbrk_sse42
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
index abf4ff7f1a..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -1,5 +1,5 @@
-/* strpbrk with SSE4.2 intrinsics
-   Copyright (C) 2022 Free Software Foundation, Inc.
+/* strpbrk.
+   Copyright (C) 2017-2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRPBRK __strpbrk_generic
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRPBRK)
+#endif
+
+#include <string/strpbrk.c>
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
deleted file mode 100644
index d03214c4fb..0000000000
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* strpbrk.
-   Copyright (C) 2017-2022 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRPBRK)
-#endif
-
-#include <string/strpbrk.c>
diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
new file mode 100644
index 0000000000..d044916688
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -0,0 +1,136 @@
+/* strspn with SSE4.2 intrinsics
+   Copyright (C) 2009-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x12:
+	_SIDD_SBYTE_OPS
+	| _SIDD_CMP_EQUAL_ANY
+	| _SIDD_NEGATIVE_POLARITY
+	| _SIDD_LEAST_SIGNIFICANT
+   on pcmpistri to compare xmm/mem128
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   X X X X X X X X X X X X X X X X
+
+   against xmm
+
+   0 1 2 3 4 5 6 7 8 9 A B C D E F
+   A A A A A A A A A A A A A A A A
+
+   to find out if the first 16byte data element has any non-A byte and
+   the offset of the first byte.  There are 2 cases:
+
+   1. The first 16byte data element has the non-A byte, including
+      EOS, at the offset X.
+   2. The first 16byte data element is valid and doesn't have the non-A
+      byte.
+
+   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+   case		ECX	CFlag	ZFlag	SFlag
+    1		 X	  1	 0/1	  0
+    2		16	  0	  0	  0
+
+   We exit from the loop for case 1.  */
+
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
+
+
+size_t
+__attribute__ ((section (".text.sse4.2")))
+__strspn_sse42 (const char *s, const char *a)
+{
+  if (*a == 0)
+    return 0;
+
+  const char *aligned;
+  __m128i mask, maskz, zero;
+  unsigned int maskz_bits;
+  unsigned int offset = (int) ((size_t) a & 15);
+  zero = _mm_set1_epi8 (0);
+  if (offset != 0)
+    {
+      /* Load masks.  */
+      aligned = (const char *) ((size_t) a & -16L);
+      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+      maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+      /* Find where the NULL terminator is.  */
+      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+      if (maskz_bits != 0)
+        {
+          mask = __m128i_shift_right (mask0, offset);
+          offset = (unsigned int) ((size_t) s & 15);
+          if (offset)
+            goto start_unaligned;
+
+          aligned = s;
+          goto start_loop;
+        }
+    }
+
+  /* A is aligned.  */
+  mask = _mm_loadu_si128 ((__m128i *) a);
+
+  /* Find where the NULL terminator is.  */
+  maskz = _mm_cmpeq_epi8 (mask, zero);
+  maskz_bits = _mm_movemask_epi8 (maskz);
+  if (maskz_bits == 0)
+    {
+      /* There is no NULL terminator.  Don't use SSE4.2 if the length
+         of A > 16.  */
+      if (a[16] != 0)
+        return __strspn_generic (s, a);
+    }
+  aligned = s;
+  offset = (unsigned int) ((size_t) s & 15);
+
+  if (offset != 0)
+    {
+    start_unaligned:
+      /* Check partial string.  */
+      aligned = (const char *) ((size_t) s & -16L);
+      __m128i value = _mm_load_si128 ((__m128i *) aligned);
+      __m128i adj_value = __m128i_shift_right (value, offset);
+
+      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
+      /* No need to check CFlag since it is always 1.  */
+      if (length < 16 - offset)
+	return length;
+      /* Find where the NULL terminator is.  */
+      maskz = _mm_cmpeq_epi8 (value, zero);
+      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+      if (maskz_bits != 0)
+	return length;
+      aligned += 16;
+    }
+
+start_loop:
+  while (1)
+    {
+      __m128i value = _mm_load_si128 ((__m128i *) aligned);
+      unsigned int index = _mm_cmpistri (mask, value, 0x12);
+      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
+      if (cflag)
+	return (size_t) (aligned + index - s);
+      aligned += 16;
+    }
+}
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 6124033ceb..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
-/* strspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2022 Free Software Foundation, Inc.
+/* strspn.
+   Copyright (C) 2017-2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,121 +16,13 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
+#if IS_IN (libc)
 
-/* We use 0x12:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_ANY
-	| _SIDD_NEGATIVE_POLARITY
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
+# include <sysdep.h>
+# define STRSPN __strspn_generic
 
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRSPN)
+#endif
 
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   A A A A A A A A A A A A A A A A
-
-   to find out if the first 16byte data element has any non-A byte and
-   the offset of the first byte.  There are 2 cases:
-
-   1. The first 16byte data element has the non-A byte, including
-      EOS, at the offset X.
-   2. The first 16byte data element is valid and doesn't have the non-A
-      byte.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
-   case		ECX	CFlag	ZFlag	SFlag
-    1		 X	  1	 0/1	  0
-    2		16	  0	  0	  0
-
-   We exit from the loop for case 1.  */
-
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
-
-
-size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
-{
-  if (*a == 0)
-    return 0;
-
-  const char *aligned;
-  __m128i mask, maskz, zero;
-  unsigned int maskz_bits;
-  unsigned int offset = (int) ((size_t) a & 15);
-  zero = _mm_set1_epi8 (0);
-  if (offset != 0)
-    {
-      /* Load masks.  */
-      aligned = (const char *) ((size_t) a & -16L);
-      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-      maskz = _mm_cmpeq_epi8 (mask0, zero);
-
-      /* Find where the NULL terminator is.  */
-      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
-      if (maskz_bits != 0)
-        {
-          mask = __m128i_shift_right (mask0, offset);
-          offset = (unsigned int) ((size_t) s & 15);
-          if (offset)
-            goto start_unaligned;
-
-          aligned = s;
-          goto start_loop;
-        }
-    }
-
-  /* A is aligned.  */
-  mask = _mm_loadu_si128 ((__m128i *) a);
-
-  /* Find where the NULL terminator is.  */
-  maskz = _mm_cmpeq_epi8 (mask, zero);
-  maskz_bits = _mm_movemask_epi8 (maskz);
-  if (maskz_bits == 0)
-    {
-      /* There is no NULL terminator.  Don't use SSE4.2 if the length
-         of A > 16.  */
-      if (a[16] != 0)
-        return __strspn_sse2 (s, a);
-    }
-  aligned = s;
-  offset = (unsigned int) ((size_t) s & 15);
-
-  if (offset != 0)
-    {
-    start_unaligned:
-      /* Check partial string.  */
-      aligned = (const char *) ((size_t) s & -16L);
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      __m128i adj_value = __m128i_shift_right (value, offset);
-
-      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
-      /* No need to check CFlag since it is always 1.  */
-      if (length < 16 - offset)
-	return length;
-      /* Find where the NULL terminator is.  */
-      maskz = _mm_cmpeq_epi8 (value, zero);
-      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
-      if (maskz_bits != 0)
-	return length;
-      aligned += 16;
-    }
-
-start_loop:
-  while (1)
-    {
-      __m128i value = _mm_load_si128 ((__m128i *) aligned);
-      unsigned int index = _mm_cmpistri (mask, value, 0x12);
-      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
-      if (cflag)
-	return (size_t) (aligned + index - s);
-      aligned += 16;
-    }
-}
+#include <string/strspn.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
index 26d6984e9b..fa38dd898d 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
@@ -1,5 +1,5 @@
 #if IS_IN (libc)
-# define WCSCPY  __wcscpy_sse2
+# define WCSCPY  __wcscpy_generic
 #endif
 
 #include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
 # define SYMBOL_NAME wcscpy
 # include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
 
 static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
     return OPTIMIZE (ssse3);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
 
 libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
index e1ec7cfbb5..1c9c04241a 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -1,9 +1,9 @@
 #if IS_IN (libc)
 # include <wchar.h>
 
-# define WCSNLEN __wcsnlen_sse2
+# define WCSNLEN __wcsnlen_generic
 
-extern __typeof (wcsnlen) __wcsnlen_sse2;
+extern __typeof (wcsnlen) __wcsnlen_generic;
 #endif
 
 #include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
 # undef __wcsnlen
 
 # define SYMBOL_NAME wcsnlen
+# define GENERIC generic
 # include "ifunc-wcslen.h"
 
 libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
-- 
2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity
  2022-06-10  0:58   ` [PATCH v2] " Noah Goldstein
@ 2022-06-10  1:19     ` H.J. Lu
  2022-06-10  1:26       ` Noah Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2022-06-10  1:19 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No functions are changed. It just renames generic implementations from
> '{func}_sse2' to '{func}_generic'. This is just because the postfix
> "_sse2" was overloaded and was used for files that had hand-optimized
> sse2 assembly implementations and files that just redirected back
> to the generic implementation.

This change isn't small and its benefit is very small.  Can it be the part of
a big change to support building glibc with

-march=x86-64-vN

> Full xcheck passed on x86_64.
> ---
>  sysdeps/x86_64/multiarch/Makefile             |  15 +-
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  16 +-
>  sysdeps/x86_64/multiarch/ifunc-sse4_2.h       |   4 +-
>  sysdeps/x86_64/multiarch/ifunc-strcpy.h       |   8 +-
>  sysdeps/x86_64/multiarch/ifunc-wcslen.h       |   8 +-
>  sysdeps/x86_64/multiarch/stpncpy-c.c          |   2 +-
>  sysdeps/x86_64/multiarch/stpncpy.c            |   1 +
>  sysdeps/x86_64/multiarch/strcspn-c-sse4.c     | 163 ++++++++++++++++++
>  sysdeps/x86_64/multiarch/strcspn-c.c          | 151 +---------------
>  sysdeps/x86_64/multiarch/strcspn-sse2.c       |  28 ---
>  sysdeps/x86_64/multiarch/strncat-c.c          |   2 +-
>  sysdeps/x86_64/multiarch/strncat.c            |   1 +
>  sysdeps/x86_64/multiarch/strncpy-c.c          |   2 +-
>  sysdeps/x86_64/multiarch/strncpy.c            |   1 +
>  .../{strspn-sse2.c => strpbrk-c-sse4.c}       |  18 +-
>  sysdeps/x86_64/multiarch/strpbrk-c.c          |  18 +-
>  sysdeps/x86_64/multiarch/strpbrk-sse2.c       |  28 ---
>  sysdeps/x86_64/multiarch/strspn-c-sse4.c      | 136 +++++++++++++++
>  sysdeps/x86_64/multiarch/strspn-c.c           | 126 +-------------
>  sysdeps/x86_64/multiarch/wcscpy-c.c           |   2 +-
>  sysdeps/x86_64/multiarch/wcscpy.c             |   4 +-
>  sysdeps/x86_64/multiarch/wcsnlen-c.c          |   4 +-
>  sysdeps/x86_64/multiarch/wcsnlen.c            |   1 +
>  23 files changed, 376 insertions(+), 363 deletions(-)
>  create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
>  delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
>  rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
>  delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
>  create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..86c6ecdfc1 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -77,7 +77,7 @@ sysdep_routines += \
>    strcpy-sse2 \
>    strcpy-sse2-unaligned \
>    strcspn-c \
> -  strcspn-sse2 \
> +  strcspn-c-sse4 \
>    strlen-avx2 \
>    strlen-avx2-rtm \
>    strlen-evex \
> @@ -109,21 +109,22 @@ sysdep_routines += \
>    strnlen-evex512 \
>    strnlen-sse2 \
>    strpbrk-c \
> -  strpbrk-sse2 \
> +  strpbrk-c-sse4 \
>    strrchr-avx2 \
>    strrchr-avx2-rtm \
>    strrchr-evex \
>    strrchr-sse2 \
>    strspn-c \
> -  strspn-sse2 \
> +  strspn-c-sse4 \
>    strstr-avx512 \
>    strstr-sse2-unaligned \
>    varshift \
>  # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-c-sse4.c += -msse4
> +CFLAGS-strpbrk-c-sse4.c += -msse4
> +CFLAGS-strspn-c-sse4.c += -msse4
> +
>  CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
>  endif
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 58f3ec8306..4cbd200d39 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __stpncpy_evex)
>               IFUNC_IMPL_ADD (array, i, stpncpy, 1,
>                               __stpncpy_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
>    IFUNC_IMPL (i, name, stpcpy,
> @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strcspn,
>               IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strcspn_sse42)
> -             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> +             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
>    IFUNC_IMPL (i, name, strncasecmp,
> @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __strncat_evex)
>               IFUNC_IMPL_ADD (array, i, strncat, 1,
>                               __strncat_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> +             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
>    IFUNC_IMPL (i, name, strncpy,
> @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __strncpy_evex)
>               IFUNC_IMPL_ADD (array, i, strncpy, 1,
>                               __strncpy_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
>    IFUNC_IMPL (i, name, strpbrk,
>               IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
>                               __strpbrk_sse42)
> -             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> +             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
>
>
>    /* Support sysdeps/x86_64/multiarch/strspn.c.  */
>    IFUNC_IMPL (i, name, strspn,
>               IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strspn_sse42)
> -             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> +             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
>    IFUNC_IMPL (i, name, strstr,
> @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, wcscpy,
>               IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
>                               __wcscpy_ssse3)
> -             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
>    IFUNC_IMPL (i, name, wcslen,
> @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>               IFUNC_IMPL_ADD (array, i, wcsnlen,
>                               CPU_FEATURE_USABLE (SSE4_1),
>                               __wcsnlen_sse4_1)
> -             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> +             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
>    IFUNC_IMPL (i, name, wmemchr,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> index b555ff2fac..ee36525bcf 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> @@ -19,7 +19,7 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
>
>  static inline void *
> @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
>      return OPTIMIZE (sse42);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (generic);
>  }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> index a15afa44e9..80529458d1 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> @@ -20,7 +20,11 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
>    attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
>      return OPTIMIZE (sse2_unaligned);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (GENERIC);
>  }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> index 2b29e7608a..88c1c502af 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> @@ -19,7 +19,11 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
>      return OPTIMIZE (sse4_1);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (GENERIC);
>  }
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> index b016e487e1..eb62fcf388 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
> @@ -1,4 +1,4 @@
> -#define STPNCPY __stpncpy_sse2
> +#define STPNCPY __stpncpy_generic
>  #undef weak_alias
>  #define weak_alias(ignored1, ignored2)
>  #undef libc_hidden_def
> diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> index 82fa53957d..879bc83f0b 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> @@ -25,6 +25,7 @@
>  # undef stpncpy
>  # undef __stpncpy
>
> +# define GENERIC generic
>  # define SYMBOL_NAME stpncpy
>  # include "ifunc-strcpy.h"
>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> new file mode 100644
> index 0000000000..59f64f9fe8
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> @@ -0,0 +1,163 @@
> +/* strcspn with SSE4.2 intrinsics
> +   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <nmmintrin.h>
> +#include <string.h>
> +#include "varshift.h"
> +
> +/* We use 0x2:
> +       _SIDD_SBYTE_OPS
> +       | _SIDD_CMP_EQUAL_ANY
> +       | _SIDD_POSITIVE_POLARITY
> +       | _SIDD_LEAST_SIGNIFICANT
> +   on pcmpistri to compare xmm/mem128
> +
> +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> +   X X X X X X X X X X X X X X X X
> +
> +   against xmm
> +
> +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> +   A A A A A A A A A A A A A A A A
> +
> +   to find out if the first 16byte data element has any byte A and
> +   the offset of the first byte.  There are 3 cases:
> +
> +   1. The first 16byte data element has the byte A at the offset X.
> +   2. The first 16byte data element has EOS and doesn't have the byte A.
> +   3. The first 16byte data element is valid and doesn't have the byte A.
> +
> +   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> +
> +    1           X        1      0/1      0
> +    2          16        0       1       0
> +    3          16        0       0       0
> +
> +   We exit from the loop for cases 1 and 2 with jbe which branches
> +   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
> +   X for case 1.  */
> +
> +#ifndef STRCSPN_GENERIC
> +# define STRCSPN_GENERIC __strcspn_generic
> +# define STRCSPN_SSE42 __strcspn_sse42
> +#endif
> +
> +#ifdef USE_AS_STRPBRK
> +# define RETURN(val1, val2) return val1
> +#else
> +# define RETURN(val1, val2) return val2
> +#endif
> +
> +extern
> +#ifdef USE_AS_STRPBRK
> +char *
> +#else
> +size_t
> +#endif
> +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
> +
> +
> +#ifdef USE_AS_STRPBRK
> +char *
> +#else
> +size_t
> +#endif
> +__attribute__ ((section (".text.sse4.2")))
> +STRCSPN_SSE42 (const char *s, const char *a)
> +{
> +  if (*a == 0)
> +    RETURN (NULL, strlen (s));
> +
> +  const char *aligned;
> +  __m128i mask, maskz, zero;
> +  unsigned int maskz_bits;
> +  unsigned int offset = (unsigned int) ((size_t) a & 15);
> +  zero = _mm_set1_epi8 (0);
> +  if (offset != 0)
> +    {
> +      /* Load masks.  */
> +      aligned = (const char *) ((size_t) a & -16L);
> +      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> +      maskz = _mm_cmpeq_epi8 (mask0, zero);
> +
> +      /* Find where the NULL terminator is.  */
> +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> +      if (maskz_bits != 0)
> +        {
> +          mask = __m128i_shift_right (mask0, offset);
> +          offset = (unsigned int) ((size_t) s & 15);
> +          if (offset)
> +            goto start_unaligned;
> +
> +          aligned = s;
> +          goto start_loop;
> +        }
> +    }
> +
> +  /* A is aligned.  */
> +  mask = _mm_loadu_si128 ((__m128i *) a);
> +  /* Find where the NULL terminator is.  */
> +  maskz = _mm_cmpeq_epi8 (mask, zero);
> +  maskz_bits = _mm_movemask_epi8 (maskz);
> +  if (maskz_bits == 0)
> +    {
> +      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> +         of A > 16.  */
> +      if (a[16] != 0)
> +        return STRCSPN_GENERIC (s, a);
> +    }
> +
> +  aligned = s;
> +  offset = (unsigned int) ((size_t) s & 15);
> +  if (offset != 0)
> +    {
> +    start_unaligned:
> +      /* Check partial string.  */
> +      aligned = (const char *) ((size_t) s & -16L);
> +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> +
> +      value = __m128i_shift_right (value, offset);
> +
> +      unsigned int length = _mm_cmpistri (mask, value, 0x2);
> +      /* No need to check ZFlag since ZFlag is always 1.  */
> +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> +      if (cflag)
> +       RETURN ((char *) (s + length), length);
> +      /* Find where the NULL terminator is.  */
> +      unsigned int index = _mm_cmpistri (value, value, 0x3a);
> +      if (index < 16 - offset)
> +       RETURN (NULL, index);
> +      aligned += 16;
> +    }
> +
> +start_loop:
> +  while (1)
> +    {
> +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> +      unsigned int index = _mm_cmpistri (mask, value, 0x2);
> +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> +      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> +      if (cflag)
> +       RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> +      if (zflag)
> +       RETURN (NULL,
> +               /* Find where the NULL terminator is.  */
> +               (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> +      aligned += 16;
> +    }
> +}
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
> index c312fab8b1..423de2e2b2 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-c.c
> @@ -1,5 +1,5 @@
> -/* strcspn with SSE4.2 intrinsics
> -   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +/* strcspn.
> +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -16,148 +16,13 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <nmmintrin.h>
> -#include <string.h>
> -#include "varshift.h"
> +#if IS_IN (libc)
>
> -/* We use 0x2:
> -       _SIDD_SBYTE_OPS
> -       | _SIDD_CMP_EQUAL_ANY
> -       | _SIDD_POSITIVE_POLARITY
> -       | _SIDD_LEAST_SIGNIFICANT
> -   on pcmpistri to compare xmm/mem128
> +# include <sysdep.h>
> +# define STRCSPN __strcspn_generic
>
> -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> -   X X X X X X X X X X X X X X X X
> -
> -   against xmm
> -
> -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> -   A A A A A A A A A A A A A A A A
> -
> -   to find out if the first 16byte data element has any byte A and
> -   the offset of the first byte.  There are 3 cases:
> -
> -   1. The first 16byte data element has the byte A at the offset X.
> -   2. The first 16byte data element has EOS and doesn't have the byte A.
> -   3. The first 16byte data element is valid and doesn't have the byte A.
> -
> -   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> -
> -    1           X        1      0/1      0
> -    2          16        0       1       0
> -    3          16        0       0       0
> -
> -   We exit from the loop for cases 1 and 2 with jbe which branches
> -   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
> -   X for case 1.  */
> -
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> -# define STRCSPN_SSE42 __strcspn_sse42
> -#endif
> -
> -#ifdef USE_AS_STRPBRK
> -# define RETURN(val1, val2) return val1
> -#else
> -# define RETURN(val1, val2) return val2
> -#endif
> -
> -extern
> -#ifdef USE_AS_STRPBRK
> -char *
> -#else
> -size_t
> -#endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> -
> -
> -#ifdef USE_AS_STRPBRK
> -char *
> -#else
> -size_t
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRCSPN)
>  #endif
> -__attribute__ ((section (".text.sse4.2")))
> -STRCSPN_SSE42 (const char *s, const char *a)
> -{
> -  if (*a == 0)
> -    RETURN (NULL, strlen (s));
> -
> -  const char *aligned;
> -  __m128i mask, maskz, zero;
> -  unsigned int maskz_bits;
> -  unsigned int offset = (unsigned int) ((size_t) a & 15);
> -  zero = _mm_set1_epi8 (0);
> -  if (offset != 0)
> -    {
> -      /* Load masks.  */
> -      aligned = (const char *) ((size_t) a & -16L);
> -      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> -      maskz = _mm_cmpeq_epi8 (mask0, zero);
> -
> -      /* Find where the NULL terminator is.  */
> -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> -      if (maskz_bits != 0)
> -        {
> -          mask = __m128i_shift_right (mask0, offset);
> -          offset = (unsigned int) ((size_t) s & 15);
> -          if (offset)
> -            goto start_unaligned;
> -
> -          aligned = s;
> -          goto start_loop;
> -        }
> -    }
> -
> -  /* A is aligned.  */
> -  mask = _mm_loadu_si128 ((__m128i *) a);
> -  /* Find where the NULL terminator is.  */
> -  maskz = _mm_cmpeq_epi8 (mask, zero);
> -  maskz_bits = _mm_movemask_epi8 (maskz);
> -  if (maskz_bits == 0)
> -    {
> -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> -         of A > 16.  */
> -      if (a[16] != 0)
> -        return STRCSPN_SSE2 (s, a);
> -    }
> -
> -  aligned = s;
> -  offset = (unsigned int) ((size_t) s & 15);
> -  if (offset != 0)
> -    {
> -    start_unaligned:
> -      /* Check partial string.  */
> -      aligned = (const char *) ((size_t) s & -16L);
> -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> -
> -      value = __m128i_shift_right (value, offset);
> -
> -      unsigned int length = _mm_cmpistri (mask, value, 0x2);
> -      /* No need to check ZFlag since ZFlag is always 1.  */
> -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> -      if (cflag)
> -       RETURN ((char *) (s + length), length);
> -      /* Find where the NULL terminator is.  */
> -      unsigned int index = _mm_cmpistri (value, value, 0x3a);
> -      if (index < 16 - offset)
> -       RETURN (NULL, index);
> -      aligned += 16;
> -    }
>
> -start_loop:
> -  while (1)
> -    {
> -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> -      unsigned int index = _mm_cmpistri (mask, value, 0x2);
> -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> -      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> -      if (cflag)
> -       RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> -      if (zflag)
> -       RETURN (NULL,
> -               /* Find where the NULL terminator is.  */
> -               (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> -      aligned += 16;
> -    }
> -}
> +#include <string/strcspn.c>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
> deleted file mode 100644
> index 3a04bb39fc..0000000000
> --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> +++ /dev/null
> @@ -1,28 +0,0 @@
> -/* strcspn.
> -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRCSPN __strcspn_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRCSPN)
> -#endif
> -
> -#include <string/strcspn.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> index 93a7fab7ea..b729c033d9 100644
> --- a/sysdeps/x86_64/multiarch/strncat-c.c
> +++ b/sysdeps/x86_64/multiarch/strncat-c.c
> @@ -1,2 +1,2 @@
> -#define STRNCAT __strncat_sse2
> +#define STRNCAT __strncat_generic
>  #include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> index b649343a97..50fba8a41f 100644
> --- a/sysdeps/x86_64/multiarch/strncat.c
> +++ b/sysdeps/x86_64/multiarch/strncat.c
> @@ -24,6 +24,7 @@
>  # undef strncat
>
>  # define SYMBOL_NAME strncat
> +# define GENERIC generic
>  # include "ifunc-strcpy.h"
>
>  libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> index 57c45ac7ab..183b0b8e0f 100644
> --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> +++ b/sysdeps/x86_64/multiarch/strncpy-c.c
> @@ -1,4 +1,4 @@
> -#define STRNCPY __strncpy_sse2
> +#define STRNCPY __strncpy_generic
>  #undef libc_hidden_builtin_def
>  #define libc_hidden_builtin_def(strncpy)
>
> diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> index 2a780a7e16..7fc7d72ec5 100644
> --- a/sysdeps/x86_64/multiarch/strncpy.c
> +++ b/sysdeps/x86_64/multiarch/strncpy.c
> @@ -24,6 +24,7 @@
>  # undef strncpy
>
>  # define SYMBOL_NAME strncpy
> +# define GENERIC generic
>  # include "ifunc-strcpy.h"
>
>  libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> similarity index 74%
> rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> index 61cc6cb0a5..8700276773 100644
> --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> @@ -1,5 +1,5 @@
> -/* strspn.
> -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> +/* strpbrk with SSE4.2 intrinsics
> +   Copyright (C) 2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -16,13 +16,7 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRSPN __strspn_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRSPN)
> -#endif
> -
> -#include <string/strspn.c>
> +#define USE_AS_STRPBRK
> +#define STRCSPN_GENERIC __strpbrk_generic
> +#define STRCSPN_SSE42 __strpbrk_sse42
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
> index abf4ff7f1a..d31acfe495 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
> @@ -1,5 +1,5 @@
> -/* strpbrk with SSE4.2 intrinsics
> -   Copyright (C) 2022 Free Software Foundation, Inc.
> +/* strpbrk.
> +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -16,7 +16,13 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> -#define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#if IS_IN (libc)
> +
> +# include <sysdep.h>
> +# define STRPBRK __strpbrk_generic
> +
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRPBRK)
> +#endif
> +
> +#include <string/strpbrk.c>
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> deleted file mode 100644
> index d03214c4fb..0000000000
> --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> +++ /dev/null
> @@ -1,28 +0,0 @@
> -/* strpbrk.
> -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRPBRK __strpbrk_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRPBRK)
> -#endif
> -
> -#include <string/strpbrk.c>
> diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> new file mode 100644
> index 0000000000..d044916688
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> @@ -0,0 +1,136 @@
> +/* strspn with SSE4.2 intrinsics
> +   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <nmmintrin.h>
> +#include <string.h>
> +#include "varshift.h"
> +
> +/* We use 0x12:
> +       _SIDD_SBYTE_OPS
> +       | _SIDD_CMP_EQUAL_ANY
> +       | _SIDD_NEGATIVE_POLARITY
> +       | _SIDD_LEAST_SIGNIFICANT
> +   on pcmpistri to compare xmm/mem128
> +
> +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> +   X X X X X X X X X X X X X X X X
> +
> +   against xmm
> +
> +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> +   A A A A A A A A A A A A A A A A
> +
> +   to find out if the first 16byte data element has any non-A byte and
> +   the offset of the first byte.  There are 2 cases:
> +
> +   1. The first 16byte data element has the non-A byte, including
> +      EOS, at the offset X.
> +   2. The first 16byte data element is valid and doesn't have the non-A
> +      byte.
> +
> +   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> +
> +   case                ECX     CFlag   ZFlag   SFlag
> +    1           X        1      0/1      0
> +    2          16        0       0       0
> +
> +   We exit from the loop for case 1.  */
> +
> +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
> +
> +
> +size_t
> +__attribute__ ((section (".text.sse4.2")))
> +__strspn_sse42 (const char *s, const char *a)
> +{
> +  if (*a == 0)
> +    return 0;
> +
> +  const char *aligned;
> +  __m128i mask, maskz, zero;
> +  unsigned int maskz_bits;
> +  unsigned int offset = (int) ((size_t) a & 15);
> +  zero = _mm_set1_epi8 (0);
> +  if (offset != 0)
> +    {
> +      /* Load masks.  */
> +      aligned = (const char *) ((size_t) a & -16L);
> +      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> +      maskz = _mm_cmpeq_epi8 (mask0, zero);
> +
> +      /* Find where the NULL terminator is.  */
> +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> +      if (maskz_bits != 0)
> +        {
> +          mask = __m128i_shift_right (mask0, offset);
> +          offset = (unsigned int) ((size_t) s & 15);
> +          if (offset)
> +            goto start_unaligned;
> +
> +          aligned = s;
> +          goto start_loop;
> +        }
> +    }
> +
> +  /* A is aligned.  */
> +  mask = _mm_loadu_si128 ((__m128i *) a);
> +
> +  /* Find where the NULL terminator is.  */
> +  maskz = _mm_cmpeq_epi8 (mask, zero);
> +  maskz_bits = _mm_movemask_epi8 (maskz);
> +  if (maskz_bits == 0)
> +    {
> +      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> +         of A > 16.  */
> +      if (a[16] != 0)
> +        return __strspn_generic (s, a);
> +    }
> +  aligned = s;
> +  offset = (unsigned int) ((size_t) s & 15);
> +
> +  if (offset != 0)
> +    {
> +    start_unaligned:
> +      /* Check partial string.  */
> +      aligned = (const char *) ((size_t) s & -16L);
> +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> +      __m128i adj_value = __m128i_shift_right (value, offset);
> +
> +      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> +      /* No need to check CFlag since it is always 1.  */
> +      if (length < 16 - offset)
> +       return length;
> +      /* Find where the NULL terminator is.  */
> +      maskz = _mm_cmpeq_epi8 (value, zero);
> +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> +      if (maskz_bits != 0)
> +       return length;
> +      aligned += 16;
> +    }
> +
> +start_loop:
> +  while (1)
> +    {
> +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> +      unsigned int index = _mm_cmpistri (mask, value, 0x12);
> +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> +      if (cflag)
> +       return (size_t) (aligned + index - s);
> +      aligned += 16;
> +    }
> +}
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
> index 6124033ceb..6b50c36432 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-c.c
> @@ -1,5 +1,5 @@
> -/* strspn with SSE4.2 intrinsics
> -   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +/* strspn.
> +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -16,121 +16,13 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <nmmintrin.h>
> -#include <string.h>
> -#include "varshift.h"
> +#if IS_IN (libc)
>
> -/* We use 0x12:
> -       _SIDD_SBYTE_OPS
> -       | _SIDD_CMP_EQUAL_ANY
> -       | _SIDD_NEGATIVE_POLARITY
> -       | _SIDD_LEAST_SIGNIFICANT
> -   on pcmpistri to compare xmm/mem128
> +# include <sysdep.h>
> +# define STRSPN __strspn_generic
>
> -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> -   X X X X X X X X X X X X X X X X
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRSPN)
> +#endif
>
> -   against xmm
> -
> -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> -   A A A A A A A A A A A A A A A A
> -
> -   to find out if the first 16byte data element has any non-A byte and
> -   the offset of the first byte.  There are 2 cases:
> -
> -   1. The first 16byte data element has the non-A byte, including
> -      EOS, at the offset X.
> -   2. The first 16byte data element is valid and doesn't have the non-A
> -      byte.
> -
> -   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> -
> -   case                ECX     CFlag   ZFlag   SFlag
> -    1           X        1      0/1      0
> -    2          16        0       0       0
> -
> -   We exit from the loop for case 1.  */
> -
> -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> -
> -
> -size_t
> -__attribute__ ((section (".text.sse4.2")))
> -__strspn_sse42 (const char *s, const char *a)
> -{
> -  if (*a == 0)
> -    return 0;
> -
> -  const char *aligned;
> -  __m128i mask, maskz, zero;
> -  unsigned int maskz_bits;
> -  unsigned int offset = (int) ((size_t) a & 15);
> -  zero = _mm_set1_epi8 (0);
> -  if (offset != 0)
> -    {
> -      /* Load masks.  */
> -      aligned = (const char *) ((size_t) a & -16L);
> -      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> -      maskz = _mm_cmpeq_epi8 (mask0, zero);
> -
> -      /* Find where the NULL terminator is.  */
> -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> -      if (maskz_bits != 0)
> -        {
> -          mask = __m128i_shift_right (mask0, offset);
> -          offset = (unsigned int) ((size_t) s & 15);
> -          if (offset)
> -            goto start_unaligned;
> -
> -          aligned = s;
> -          goto start_loop;
> -        }
> -    }
> -
> -  /* A is aligned.  */
> -  mask = _mm_loadu_si128 ((__m128i *) a);
> -
> -  /* Find where the NULL terminator is.  */
> -  maskz = _mm_cmpeq_epi8 (mask, zero);
> -  maskz_bits = _mm_movemask_epi8 (maskz);
> -  if (maskz_bits == 0)
> -    {
> -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> -         of A > 16.  */
> -      if (a[16] != 0)
> -        return __strspn_sse2 (s, a);
> -    }
> -  aligned = s;
> -  offset = (unsigned int) ((size_t) s & 15);
> -
> -  if (offset != 0)
> -    {
> -    start_unaligned:
> -      /* Check partial string.  */
> -      aligned = (const char *) ((size_t) s & -16L);
> -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> -      __m128i adj_value = __m128i_shift_right (value, offset);
> -
> -      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> -      /* No need to check CFlag since it is always 1.  */
> -      if (length < 16 - offset)
> -       return length;
> -      /* Find where the NULL terminator is.  */
> -      maskz = _mm_cmpeq_epi8 (value, zero);
> -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> -      if (maskz_bits != 0)
> -       return length;
> -      aligned += 16;
> -    }
> -
> -start_loop:
> -  while (1)
> -    {
> -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> -      unsigned int index = _mm_cmpistri (mask, value, 0x12);
> -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> -      if (cflag)
> -       return (size_t) (aligned + index - s);
> -      aligned += 16;
> -    }
> -}
> +#include <string/strspn.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> index 26d6984e9b..fa38dd898d 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
> @@ -1,5 +1,5 @@
>  #if IS_IN (libc)
> -# define WCSCPY  __wcscpy_sse2
> +# define WCSCPY  __wcscpy_generic
>  #endif
>
>  #include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> index 6a2d1421d9..53c3228dc2 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> @@ -26,7 +26,7 @@
>  # define SYMBOL_NAME wcscpy
>  # include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>
>  static inline void *
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
>      return OPTIMIZE (ssse3);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (generic);
>  }
>
>  libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> index e1ec7cfbb5..1c9c04241a 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> @@ -1,9 +1,9 @@
>  #if IS_IN (libc)
>  # include <wchar.h>
>
> -# define WCSNLEN __wcsnlen_sse2
> +# define WCSNLEN __wcsnlen_generic
>
> -extern __typeof (wcsnlen) __wcsnlen_sse2;
> +extern __typeof (wcsnlen) __wcsnlen_generic;
>  #endif
>
>  #include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> index baa26666a8..05b7a211de 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> @@ -24,6 +24,7 @@
>  # undef __wcsnlen
>
>  # define SYMBOL_NAME wcsnlen
> +# define GENERIC generic
>  # include "ifunc-wcslen.h"
>
>  libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity
  2022-06-10  1:19     ` H.J. Lu
@ 2022-06-10  1:26       ` Noah Goldstein
  0 siblings, 0 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-10  1:26 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Thu, Jun 9, 2022 at 6:20 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > No functions are changed. It just renames generic implementations from
> > '{func}_sse2' to '{func}_generic'. This is just because the postfix
> > "_sse2" was overloaded and was used for files that had hand-optimized
> > sse2 assembly implementations and files that just redirected back
> > to the generic implementation.
>
> This change isn't small and its benefit is very small.  Can it be the part of
> a big change to support building glibc with
>
> -march=x86-64-vN

kk
>
> > Full xcheck passed on x86_64.
> > ---
> >  sysdeps/x86_64/multiarch/Makefile             |  15 +-
> >  sysdeps/x86_64/multiarch/ifunc-impl-list.c    |  16 +-
> >  sysdeps/x86_64/multiarch/ifunc-sse4_2.h       |   4 +-
> >  sysdeps/x86_64/multiarch/ifunc-strcpy.h       |   8 +-
> >  sysdeps/x86_64/multiarch/ifunc-wcslen.h       |   8 +-
> >  sysdeps/x86_64/multiarch/stpncpy-c.c          |   2 +-
> >  sysdeps/x86_64/multiarch/stpncpy.c            |   1 +
> >  sysdeps/x86_64/multiarch/strcspn-c-sse4.c     | 163 ++++++++++++++++++
> >  sysdeps/x86_64/multiarch/strcspn-c.c          | 151 +---------------
> >  sysdeps/x86_64/multiarch/strcspn-sse2.c       |  28 ---
> >  sysdeps/x86_64/multiarch/strncat-c.c          |   2 +-
> >  sysdeps/x86_64/multiarch/strncat.c            |   1 +
> >  sysdeps/x86_64/multiarch/strncpy-c.c          |   2 +-
> >  sysdeps/x86_64/multiarch/strncpy.c            |   1 +
> >  .../{strspn-sse2.c => strpbrk-c-sse4.c}       |  18 +-
> >  sysdeps/x86_64/multiarch/strpbrk-c.c          |  18 +-
> >  sysdeps/x86_64/multiarch/strpbrk-sse2.c       |  28 ---
> >  sysdeps/x86_64/multiarch/strspn-c-sse4.c      | 136 +++++++++++++++
> >  sysdeps/x86_64/multiarch/strspn-c.c           | 126 +-------------
> >  sysdeps/x86_64/multiarch/wcscpy-c.c           |   2 +-
> >  sysdeps/x86_64/multiarch/wcscpy.c             |   4 +-
> >  sysdeps/x86_64/multiarch/wcsnlen-c.c          |   4 +-
> >  sysdeps/x86_64/multiarch/wcsnlen.c            |   1 +
> >  23 files changed, 376 insertions(+), 363 deletions(-)
> >  create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> >  delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
> >  rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
> >  delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
> >  create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
> >
> > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> > index 3d153cac35..86c6ecdfc1 100644
> > --- a/sysdeps/x86_64/multiarch/Makefile
> > +++ b/sysdeps/x86_64/multiarch/Makefile
> > @@ -77,7 +77,7 @@ sysdep_routines += \
> >    strcpy-sse2 \
> >    strcpy-sse2-unaligned \
> >    strcspn-c \
> > -  strcspn-sse2 \
> > +  strcspn-c-sse4 \
> >    strlen-avx2 \
> >    strlen-avx2-rtm \
> >    strlen-evex \
> > @@ -109,21 +109,22 @@ sysdep_routines += \
> >    strnlen-evex512 \
> >    strnlen-sse2 \
> >    strpbrk-c \
> > -  strpbrk-sse2 \
> > +  strpbrk-c-sse4 \
> >    strrchr-avx2 \
> >    strrchr-avx2-rtm \
> >    strrchr-evex \
> >    strrchr-sse2 \
> >    strspn-c \
> > -  strspn-sse2 \
> > +  strspn-c-sse4 \
> >    strstr-avx512 \
> >    strstr-sse2-unaligned \
> >    varshift \
> >  # sysdep_routines
> > -CFLAGS-varshift.c += -msse4
> > -CFLAGS-strcspn-c.c += -msse4
> > -CFLAGS-strpbrk-c.c += -msse4
> > -CFLAGS-strspn-c.c += -msse4
> > +
> > +CFLAGS-strcspn-c-sse4.c += -msse4
> > +CFLAGS-strpbrk-c-sse4.c += -msse4
> > +CFLAGS-strspn-c-sse4.c += -msse4
> > +
> >  CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> >  endif
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index 58f3ec8306..4cbd200d39 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                               __stpncpy_evex)
> >               IFUNC_IMPL_ADD (array, i, stpncpy, 1,
> >                               __stpncpy_sse2_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> > +             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
> >    IFUNC_IMPL (i, name, stpcpy,
> > @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, strcspn,
> >               IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> >                               __strcspn_sse42)
> > -             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> > +             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
> >    IFUNC_IMPL (i, name, strncasecmp,
> > @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                               __strncat_evex)
> >               IFUNC_IMPL_ADD (array, i, strncat, 1,
> >                               __strncat_sse2_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> > +             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
> >    IFUNC_IMPL (i, name, strncpy,
> > @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >                               __strncpy_evex)
> >               IFUNC_IMPL_ADD (array, i, strncpy, 1,
> >                               __strncpy_sse2_unaligned)
> > -             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> > +             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
> >    IFUNC_IMPL (i, name, strpbrk,
> >               IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> >                               __strpbrk_sse42)
> > -             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> > +             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
> >
> >
> >    /* Support sysdeps/x86_64/multiarch/strspn.c.  */
> >    IFUNC_IMPL (i, name, strspn,
> >               IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> >                               __strspn_sse42)
> > -             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> > +             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
> >    IFUNC_IMPL (i, name, strstr,
> > @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >    IFUNC_IMPL (i, name, wcscpy,
> >               IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
> >                               __wcscpy_ssse3)
> > -             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> > +             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
> >    IFUNC_IMPL (i, name, wcslen,
> > @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >               IFUNC_IMPL_ADD (array, i, wcsnlen,
> >                               CPU_FEATURE_USABLE (SSE4_1),
> >                               __wcsnlen_sse4_1)
> > -             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> > +             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
> >
> >    /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
> >    IFUNC_IMPL (i, name, wmemchr,
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > index b555ff2fac..ee36525bcf 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > @@ -19,7 +19,7 @@
> >
> >  #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> >
> >  static inline void *
> > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
> >    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> >      return OPTIMIZE (sse42);
> >
> > -  return OPTIMIZE (sse2);
> > +  return OPTIMIZE (generic);
> >  }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > index a15afa44e9..80529458d1 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > @@ -20,7 +20,11 @@
> >
> >  #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
> >    attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
> >    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> >      return OPTIMIZE (sse2_unaligned);
> >
> > -  return OPTIMIZE (sse2);
> > +  return OPTIMIZE (GENERIC);
> >  }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > index 2b29e7608a..88c1c502af 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > @@ -19,7 +19,11 @@
> >
> >  #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
> >    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
> >      return OPTIMIZE (sse4_1);
> >
> > -  return OPTIMIZE (sse2);
> > +  return OPTIMIZE (GENERIC);
> >  }
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > index b016e487e1..eb62fcf388 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STPNCPY __stpncpy_sse2
> > +#define STPNCPY __stpncpy_generic
> >  #undef weak_alias
> >  #define weak_alias(ignored1, ignored2)
> >  #undef libc_hidden_def
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> > index 82fa53957d..879bc83f0b 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> > @@ -25,6 +25,7 @@
> >  # undef stpncpy
> >  # undef __stpncpy
> >
> > +# define GENERIC generic
> >  # define SYMBOL_NAME stpncpy
> >  # include "ifunc-strcpy.h"
> >
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..59f64f9fe8
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > @@ -0,0 +1,163 @@
> > +/* strcspn with SSE4.2 intrinsics
> > +   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x2:
> > +       _SIDD_SBYTE_OPS
> > +       | _SIDD_CMP_EQUAL_ANY
> > +       | _SIDD_POSITIVE_POLARITY
> > +       | _SIDD_LEAST_SIGNIFICANT
> > +   on pcmpistri to compare xmm/mem128
> > +
> > +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > +   X X X X X X X X X X X X X X X X
> > +
> > +   against xmm
> > +
> > +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > +   A A A A A A A A A A A A A A A A
> > +
> > +   to find out if the first 16byte data element has any byte A and
> > +   the offset of the first byte.  There are 3 cases:
> > +
> > +   1. The first 16byte data element has the byte A at the offset X.
> > +   2. The first 16byte data element has EOS and doesn't have the byte A.
> > +   3. The first 16byte data element is valid and doesn't have the byte A.
> > +
> > +   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > +    1           X        1      0/1      0
> > +    2          16        0       1       0
> > +    3          16        0       0       0
> > +
> > +   We exit from the loop for cases 1 and 2 with jbe which branches
> > +   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
> > +   X for case 1.  */
> > +
> > +#ifndef STRCSPN_GENERIC
> > +# define STRCSPN_GENERIC __strcspn_generic
> > +# define STRCSPN_SSE42 __strcspn_sse42
> > +#endif
> > +
> > +#ifdef USE_AS_STRPBRK
> > +# define RETURN(val1, val2) return val1
> > +#else
> > +# define RETURN(val1, val2) return val2
> > +#endif
> > +
> > +extern
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
> > +
> > +
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +__attribute__ ((section (".text.sse4.2")))
> > +STRCSPN_SSE42 (const char *s, const char *a)
> > +{
> > +  if (*a == 0)
> > +    RETURN (NULL, strlen (s));
> > +
> > +  const char *aligned;
> > +  __m128i mask, maskz, zero;
> > +  unsigned int maskz_bits;
> > +  unsigned int offset = (unsigned int) ((size_t) a & 15);
> > +  zero = _mm_set1_epi8 (0);
> > +  if (offset != 0)
> > +    {
> > +      /* Load masks.  */
> > +      aligned = (const char *) ((size_t) a & -16L);
> > +      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > +      maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > +      /* Find where the NULL terminator is.  */
> > +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > +      if (maskz_bits != 0)
> > +        {
> > +          mask = __m128i_shift_right (mask0, offset);
> > +          offset = (unsigned int) ((size_t) s & 15);
> > +          if (offset)
> > +            goto start_unaligned;
> > +
> > +          aligned = s;
> > +          goto start_loop;
> > +        }
> > +    }
> > +
> > +  /* A is aligned.  */
> > +  mask = _mm_loadu_si128 ((__m128i *) a);
> > +  /* Find where the NULL terminator is.  */
> > +  maskz = _mm_cmpeq_epi8 (mask, zero);
> > +  maskz_bits = _mm_movemask_epi8 (maskz);
> > +  if (maskz_bits == 0)
> > +    {
> > +      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> > +         of A > 16.  */
> > +      if (a[16] != 0)
> > +        return STRCSPN_GENERIC (s, a);
> > +    }
> > +
> > +  aligned = s;
> > +  offset = (unsigned int) ((size_t) s & 15);
> > +  if (offset != 0)
> > +    {
> > +    start_unaligned:
> > +      /* Check partial string.  */
> > +      aligned = (const char *) ((size_t) s & -16L);
> > +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +
> > +      value = __m128i_shift_right (value, offset);
> > +
> > +      unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > +      /* No need to check ZFlag since ZFlag is always 1.  */
> > +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > +      if (cflag)
> > +       RETURN ((char *) (s + length), length);
> > +      /* Find where the NULL terminator is.  */
> > +      unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > +      if (index < 16 - offset)
> > +       RETURN (NULL, index);
> > +      aligned += 16;
> > +    }
> > +
> > +start_loop:
> > +  while (1)
> > +    {
> > +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +      unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > +      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > +      if (cflag)
> > +       RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > +      if (zflag)
> > +       RETURN (NULL,
> > +               /* Find where the NULL terminator is.  */
> > +               (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > +      aligned += 16;
> > +    }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
> > index c312fab8b1..423de2e2b2 100644
> > --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strcspn with SSE4.2 intrinsics
> > -   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strcspn.
> > +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> >
> >     The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,148 +16,13 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x2:
> > -       _SIDD_SBYTE_OPS
> > -       | _SIDD_CMP_EQUAL_ANY
> > -       | _SIDD_POSITIVE_POLARITY
> > -       | _SIDD_LEAST_SIGNIFICANT
> > -   on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRCSPN __strcspn_generic
> >
> > -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > -   X X X X X X X X X X X X X X X X
> > -
> > -   against xmm
> > -
> > -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > -   A A A A A A A A A A A A A A A A
> > -
> > -   to find out if the first 16byte data element has any byte A and
> > -   the offset of the first byte.  There are 3 cases:
> > -
> > -   1. The first 16byte data element has the byte A at the offset X.
> > -   2. The first 16byte data element has EOS and doesn't have the byte A.
> > -   3. The first 16byte data element is valid and doesn't have the byte A.
> > -
> > -   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > -    1           X        1      0/1      0
> > -    2          16        0       1       0
> > -    3          16        0       0       0
> > -
> > -   We exit from the loop for cases 1 and 2 with jbe which branches
> > -   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
> > -   X for case 1.  */
> > -
> > -#ifndef STRCSPN_SSE2
> > -# define STRCSPN_SSE2 __strcspn_sse2
> > -# define STRCSPN_SSE42 __strcspn_sse42
> > -#endif
> > -
> > -#ifdef USE_AS_STRPBRK
> > -# define RETURN(val1, val2) return val1
> > -#else
> > -# define RETURN(val1, val2) return val2
> > -#endif
> > -
> > -extern
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > -#endif
> > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRCSPN)
> >  #endif
> > -__attribute__ ((section (".text.sse4.2")))
> > -STRCSPN_SSE42 (const char *s, const char *a)
> > -{
> > -  if (*a == 0)
> > -    RETURN (NULL, strlen (s));
> > -
> > -  const char *aligned;
> > -  __m128i mask, maskz, zero;
> > -  unsigned int maskz_bits;
> > -  unsigned int offset = (unsigned int) ((size_t) a & 15);
> > -  zero = _mm_set1_epi8 (0);
> > -  if (offset != 0)
> > -    {
> > -      /* Load masks.  */
> > -      aligned = (const char *) ((size_t) a & -16L);
> > -      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > -      maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > -      /* Find where the NULL terminator is.  */
> > -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > -      if (maskz_bits != 0)
> > -        {
> > -          mask = __m128i_shift_right (mask0, offset);
> > -          offset = (unsigned int) ((size_t) s & 15);
> > -          if (offset)
> > -            goto start_unaligned;
> > -
> > -          aligned = s;
> > -          goto start_loop;
> > -        }
> > -    }
> > -
> > -  /* A is aligned.  */
> > -  mask = _mm_loadu_si128 ((__m128i *) a);
> > -  /* Find where the NULL terminator is.  */
> > -  maskz = _mm_cmpeq_epi8 (mask, zero);
> > -  maskz_bits = _mm_movemask_epi8 (maskz);
> > -  if (maskz_bits == 0)
> > -    {
> > -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> > -         of A > 16.  */
> > -      if (a[16] != 0)
> > -        return STRCSPN_SSE2 (s, a);
> > -    }
> > -
> > -  aligned = s;
> > -  offset = (unsigned int) ((size_t) s & 15);
> > -  if (offset != 0)
> > -    {
> > -    start_unaligned:
> > -      /* Check partial string.  */
> > -      aligned = (const char *) ((size_t) s & -16L);
> > -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -
> > -      value = __m128i_shift_right (value, offset);
> > -
> > -      unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > -      /* No need to check ZFlag since ZFlag is always 1.  */
> > -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > -      if (cflag)
> > -       RETURN ((char *) (s + length), length);
> > -      /* Find where the NULL terminator is.  */
> > -      unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > -      if (index < 16 - offset)
> > -       RETURN (NULL, index);
> > -      aligned += 16;
> > -    }
> >
> > -start_loop:
> > -  while (1)
> > -    {
> > -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -      unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > -      unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > -      if (cflag)
> > -       RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > -      if (zflag)
> > -       RETURN (NULL,
> > -               /* Find where the NULL terminator is.  */
> > -               (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > -      aligned += 16;
> > -    }
> > -}
> > +#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > deleted file mode 100644
> > index 3a04bb39fc..0000000000
> > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strcspn.
> > -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > -   This file is part of the GNU C Library.
> > -
> > -   The GNU C Library is free software; you can redistribute it and/or
> > -   modify it under the terms of the GNU Lesser General Public
> > -   License as published by the Free Software Foundation; either
> > -   version 2.1 of the License, or (at your option) any later version.
> > -
> > -   The GNU C Library is distributed in the hope that it will be useful,
> > -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > -   Lesser General Public License for more details.
> > -
> > -   You should have received a copy of the GNU Lesser General Public
> > -   License along with the GNU C Library; if not, see
> > -   <https://www.gnu.org/licenses/>.  */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRCSPN __strcspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRCSPN)
> > -#endif
> > -
> > -#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> > index 93a7fab7ea..b729c033d9 100644
> > --- a/sysdeps/x86_64/multiarch/strncat-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncat-c.c
> > @@ -1,2 +1,2 @@
> > -#define STRNCAT __strncat_sse2
> > +#define STRNCAT __strncat_generic
> >  #include <string/strncat.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> > index b649343a97..50fba8a41f 100644
> > --- a/sysdeps/x86_64/multiarch/strncat.c
> > +++ b/sysdeps/x86_64/multiarch/strncat.c
> > @@ -24,6 +24,7 @@
> >  # undef strncat
> >
> >  # define SYMBOL_NAME strncat
> > +# define GENERIC generic
> >  # include "ifunc-strcpy.h"
> >
> >  libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> > index 57c45ac7ab..183b0b8e0f 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STRNCPY __strncpy_sse2
> > +#define STRNCPY __strncpy_generic
> >  #undef libc_hidden_builtin_def
> >  #define libc_hidden_builtin_def(strncpy)
> >
> > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> > index 2a780a7e16..7fc7d72ec5 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy.c
> > @@ -24,6 +24,7 @@
> >  # undef strncpy
> >
> >  # define SYMBOL_NAME strncpy
> > +# define GENERIC generic
> >  # include "ifunc-strcpy.h"
> >
> >  libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > similarity index 74%
> > rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > index 61cc6cb0a5..8700276773 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > @@ -1,5 +1,5 @@
> > -/* strspn.
> > -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > +/* strpbrk with SSE4.2 intrinsics
> > +   Copyright (C) 2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> >
> >     The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,13 +16,7 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRSPN __strspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRSPN)
> > -#endif
> > -
> > -#include <string/strspn.c>
> > +#define USE_AS_STRPBRK
> > +#define STRCSPN_GENERIC __strpbrk_generic
> > +#define STRCSPN_SSE42 __strpbrk_sse42
> > +#include "strcspn-c-sse4.c"
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > index abf4ff7f1a..d31acfe495 100644
> > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > @@ -1,5 +1,5 @@
> > -/* strpbrk with SSE4.2 intrinsics
> > -   Copyright (C) 2022 Free Software Foundation, Inc.
> > +/* strpbrk.
> > +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> >
> >     The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,7 +16,13 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#define USE_AS_STRPBRK
> > -#define STRCSPN_SSE2 __strpbrk_sse2
> > -#define STRCSPN_SSE42 __strpbrk_sse42
> > -#include "strcspn-c.c"
> > +#if IS_IN (libc)
> > +
> > +# include <sysdep.h>
> > +# define STRPBRK __strpbrk_generic
> > +
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRPBRK)
> > +#endif
> > +
> > +#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > deleted file mode 100644
> > index d03214c4fb..0000000000
> > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strpbrk.
> > -   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > -   This file is part of the GNU C Library.
> > -
> > -   The GNU C Library is free software; you can redistribute it and/or
> > -   modify it under the terms of the GNU Lesser General Public
> > -   License as published by the Free Software Foundation; either
> > -   version 2.1 of the License, or (at your option) any later version.
> > -
> > -   The GNU C Library is distributed in the hope that it will be useful,
> > -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > -   Lesser General Public License for more details.
> > -
> > -   You should have received a copy of the GNU Lesser General Public
> > -   License along with the GNU C Library; if not, see
> > -   <https://www.gnu.org/licenses/>.  */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRPBRK __strpbrk_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRPBRK)
> > -#endif
> > -
> > -#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..d044916688
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > @@ -0,0 +1,136 @@
> > +/* strspn with SSE4.2 intrinsics
> > +   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x12:
> > +       _SIDD_SBYTE_OPS
> > +       | _SIDD_CMP_EQUAL_ANY
> > +       | _SIDD_NEGATIVE_POLARITY
> > +       | _SIDD_LEAST_SIGNIFICANT
> > +   on pcmpistri to compare xmm/mem128
> > +
> > +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > +   X X X X X X X X X X X X X X X X
> > +
> > +   against xmm
> > +
> > +   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > +   A A A A A A A A A A A A A A A A
> > +
> > +   to find out if the first 16byte data element has any non-A byte and
> > +   the offset of the first byte.  There are 2 cases:
> > +
> > +   1. The first 16byte data element has the non-A byte, including
> > +      EOS, at the offset X.
> > +   2. The first 16byte data element is valid and doesn't have the non-A
> > +      byte.
> > +
> > +   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > +   case                ECX     CFlag   ZFlag   SFlag
> > +    1           X        1      0/1      0
> > +    2          16        0       0       0
> > +
> > +   We exit from the loop for case 1.  */
> > +
> > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
> > +
> > +
> > +size_t
> > +__attribute__ ((section (".text.sse4.2")))
> > +__strspn_sse42 (const char *s, const char *a)
> > +{
> > +  if (*a == 0)
> > +    return 0;
> > +
> > +  const char *aligned;
> > +  __m128i mask, maskz, zero;
> > +  unsigned int maskz_bits;
> > +  unsigned int offset = (int) ((size_t) a & 15);
> > +  zero = _mm_set1_epi8 (0);
> > +  if (offset != 0)
> > +    {
> > +      /* Load masks.  */
> > +      aligned = (const char *) ((size_t) a & -16L);
> > +      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > +      maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > +      /* Find where the NULL terminator is.  */
> > +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > +      if (maskz_bits != 0)
> > +        {
> > +          mask = __m128i_shift_right (mask0, offset);
> > +          offset = (unsigned int) ((size_t) s & 15);
> > +          if (offset)
> > +            goto start_unaligned;
> > +
> > +          aligned = s;
> > +          goto start_loop;
> > +        }
> > +    }
> > +
> > +  /* A is aligned.  */
> > +  mask = _mm_loadu_si128 ((__m128i *) a);
> > +
> > +  /* Find where the NULL terminator is.  */
> > +  maskz = _mm_cmpeq_epi8 (mask, zero);
> > +  maskz_bits = _mm_movemask_epi8 (maskz);
> > +  if (maskz_bits == 0)
> > +    {
> > +      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> > +         of A > 16.  */
> > +      if (a[16] != 0)
> > +        return __strspn_generic (s, a);
> > +    }
> > +  aligned = s;
> > +  offset = (unsigned int) ((size_t) s & 15);
> > +
> > +  if (offset != 0)
> > +    {
> > +    start_unaligned:
> > +      /* Check partial string.  */
> > +      aligned = (const char *) ((size_t) s & -16L);
> > +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +      __m128i adj_value = __m128i_shift_right (value, offset);
> > +
> > +      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > +      /* No need to check CFlag since it is always 1.  */
> > +      if (length < 16 - offset)
> > +       return length;
> > +      /* Find where the NULL terminator is.  */
> > +      maskz = _mm_cmpeq_epi8 (value, zero);
> > +      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > +      if (maskz_bits != 0)
> > +       return length;
> > +      aligned += 16;
> > +    }
> > +
> > +start_loop:
> > +  while (1)
> > +    {
> > +      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +      unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > +      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > +      if (cflag)
> > +       return (size_t) (aligned + index - s);
> > +      aligned += 16;
> > +    }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
> > index 6124033ceb..6b50c36432 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strspn with SSE4.2 intrinsics
> > -   Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strspn.
> > +   Copyright (C) 2017-2022 Free Software Foundation, Inc.
> >     This file is part of the GNU C Library.
> >
> >     The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,121 +16,13 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x12:
> > -       _SIDD_SBYTE_OPS
> > -       | _SIDD_CMP_EQUAL_ANY
> > -       | _SIDD_NEGATIVE_POLARITY
> > -       | _SIDD_LEAST_SIGNIFICANT
> > -   on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRSPN __strspn_generic
> >
> > -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > -   X X X X X X X X X X X X X X X X
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRSPN)
> > +#endif
> >
> > -   against xmm
> > -
> > -   0 1 2 3 4 5 6 7 8 9 A B C D E F
> > -   A A A A A A A A A A A A A A A A
> > -
> > -   to find out if the first 16byte data element has any non-A byte and
> > -   the offset of the first byte.  There are 2 cases:
> > -
> > -   1. The first 16byte data element has the non-A byte, including
> > -      EOS, at the offset X.
> > -   2. The first 16byte data element is valid and doesn't have the non-A
> > -      byte.
> > -
> > -   Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > -   case                ECX     CFlag   ZFlag   SFlag
> > -    1           X        1      0/1      0
> > -    2          16        0       0       0
> > -
> > -   We exit from the loop for case 1.  */
> > -
> > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -size_t
> > -__attribute__ ((section (".text.sse4.2")))
> > -__strspn_sse42 (const char *s, const char *a)
> > -{
> > -  if (*a == 0)
> > -    return 0;
> > -
> > -  const char *aligned;
> > -  __m128i mask, maskz, zero;
> > -  unsigned int maskz_bits;
> > -  unsigned int offset = (int) ((size_t) a & 15);
> > -  zero = _mm_set1_epi8 (0);
> > -  if (offset != 0)
> > -    {
> > -      /* Load masks.  */
> > -      aligned = (const char *) ((size_t) a & -16L);
> > -      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > -      maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > -      /* Find where the NULL terminator is.  */
> > -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > -      if (maskz_bits != 0)
> > -        {
> > -          mask = __m128i_shift_right (mask0, offset);
> > -          offset = (unsigned int) ((size_t) s & 15);
> > -          if (offset)
> > -            goto start_unaligned;
> > -
> > -          aligned = s;
> > -          goto start_loop;
> > -        }
> > -    }
> > -
> > -  /* A is aligned.  */
> > -  mask = _mm_loadu_si128 ((__m128i *) a);
> > -
> > -  /* Find where the NULL terminator is.  */
> > -  maskz = _mm_cmpeq_epi8 (mask, zero);
> > -  maskz_bits = _mm_movemask_epi8 (maskz);
> > -  if (maskz_bits == 0)
> > -    {
> > -      /* There is no NULL terminator.  Don't use SSE4.2 if the length
> > -         of A > 16.  */
> > -      if (a[16] != 0)
> > -        return __strspn_sse2 (s, a);
> > -    }
> > -  aligned = s;
> > -  offset = (unsigned int) ((size_t) s & 15);
> > -
> > -  if (offset != 0)
> > -    {
> > -    start_unaligned:
> > -      /* Check partial string.  */
> > -      aligned = (const char *) ((size_t) s & -16L);
> > -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -      __m128i adj_value = __m128i_shift_right (value, offset);
> > -
> > -      unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > -      /* No need to check CFlag since it is always 1.  */
> > -      if (length < 16 - offset)
> > -       return length;
> > -      /* Find where the NULL terminator is.  */
> > -      maskz = _mm_cmpeq_epi8 (value, zero);
> > -      maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > -      if (maskz_bits != 0)
> > -       return length;
> > -      aligned += 16;
> > -    }
> > -
> > -start_loop:
> > -  while (1)
> > -    {
> > -      __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -      unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > -      unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > -      if (cflag)
> > -       return (size_t) (aligned + index - s);
> > -      aligned += 16;
> > -    }
> > -}
> > +#include <string/strspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > index 26d6984e9b..fa38dd898d 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > @@ -1,5 +1,5 @@
> >  #if IS_IN (libc)
> > -# define WCSCPY  __wcscpy_sse2
> > +# define WCSCPY  __wcscpy_generic
> >  #endif
> >
> >  #include <wcsmbs/wcscpy.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> > index 6a2d1421d9..53c3228dc2 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> > @@ -26,7 +26,7 @@
> >  # define SYMBOL_NAME wcscpy
> >  # include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
> >
> >  static inline void *
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> >    if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> >      return OPTIMIZE (ssse3);
> >
> > -  return OPTIMIZE (sse2);
> > +  return OPTIMIZE (generic);
> >  }
> >
> >  libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > index e1ec7cfbb5..1c9c04241a 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > @@ -1,9 +1,9 @@
> >  #if IS_IN (libc)
> >  # include <wchar.h>
> >
> > -# define WCSNLEN __wcsnlen_sse2
> > +# define WCSNLEN __wcsnlen_generic
> >
> > -extern __typeof (wcsnlen) __wcsnlen_sse2;
> > +extern __typeof (wcsnlen) __wcsnlen_generic;
> >  #endif
> >
> >  #include "wcsmbs/wcsnlen.c"
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> > index baa26666a8..05b7a211de 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> > @@ -24,6 +24,7 @@
> >  # undef __wcsnlen
> >
> >  # define SYMBOL_NAME wcsnlen
> > +# define GENERIC generic
> >  # include "ifunc-wcslen.h"
> >
> >  libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> > --
> > 2.34.1
> >
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v3] x86: Rename generic functions with unique postfix for clarity
  2022-06-09  4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
  2022-06-10  0:58   ` [PATCH v2] " Noah Goldstein
@ 2022-06-16 22:11   ` Noah Goldstein
  2022-06-16 22:43     ` H.J. Lu
  1 sibling, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-16 22:11 UTC (permalink / raw)
  To: libc-alpha

No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.

Full xcheck passed on x86_64.
---
Note this change is in preperation to further changes to the file
organization in the multiarch directory.
 sysdeps/x86_64/multiarch/Makefile             | 33 ++++++++++---------
 sysdeps/x86_64/multiarch/ifunc-avx2.h         |  8 +++--
 sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 18 +++++-----
 sysdeps/x86_64/multiarch/ifunc-sse4_2.h       |  4 +--
 sysdeps/x86_64/multiarch/ifunc-strcpy.h       |  8 +++--
 sysdeps/x86_64/multiarch/ifunc-wcslen.h       |  8 +++--
 sysdeps/x86_64/multiarch/stpncpy-c.c          |  7 ----
 sysdeps/x86_64/multiarch/stpncpy-generic.c    | 26 +++++++++++++++
 sysdeps/x86_64/multiarch/stpncpy.c            |  1 +
 .../{strcspn-sse2.c => strcspn-generic.c}     |  2 +-
 .../multiarch/{strcspn-c.c => strcspn-sse4.c} |  8 ++---
 sysdeps/x86_64/multiarch/strncat-c.c          |  2 --
 sysdeps/x86_64/multiarch/strncat-generic.c    | 21 ++++++++++++
 sysdeps/x86_64/multiarch/strncat.c            |  1 +
 sysdeps/x86_64/multiarch/strncpy-c.c          |  5 ---
 sysdeps/x86_64/multiarch/strncpy-generic.c    | 24 ++++++++++++++
 sysdeps/x86_64/multiarch/strncpy.c            |  1 +
 .../{strpbrk-sse2.c => strpbrk-generic.c}     |  2 +-
 .../multiarch/{strpbrk-c.c => strpbrk-sse4.c} |  4 +--
 .../{strspn-sse2.c => strspn-generic.c}       |  2 +-
 .../multiarch/{strspn-c.c => strspn-sse4.c}   |  4 +--
 sysdeps/x86_64/multiarch/wcscpy-c.c           |  5 ---
 sysdeps/x86_64/multiarch/wcscpy-generic.c     | 24 ++++++++++++++
 sysdeps/x86_64/multiarch/wcscpy.c             |  4 +--
 .../{wcsncmp-sse2.c => wcsncmp-generic.c}     |  4 +--
 sysdeps/x86_64/multiarch/wcsncmp.c            |  2 ++
 sysdeps/x86_64/multiarch/wcsnlen-c.c          |  9 -----
 sysdeps/x86_64/multiarch/wcsnlen-generic.c    | 28 ++++++++++++++++
 sysdeps/x86_64/multiarch/wcsnlen.c            |  1 +
 29 files changed, 190 insertions(+), 76 deletions(-)
 delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c
 create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c
 rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%)
 rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%)
 delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
 create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c
 delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c
 create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c
 rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%)
 rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%)
 rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%)
 rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%)
 delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c
 create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c
 rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%)
 delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c
 create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..666ee4d5d6 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -43,7 +43,7 @@ sysdep_routines += \
   stpcpy-sse2-unaligned \
   stpncpy-avx2 \
   stpncpy-avx2-rtm \
-  stpncpy-c \
+  stpncpy-generic \
   stpncpy-evex \
   stpncpy-sse2-unaligned \
   strcasecmp_l-avx2 \
@@ -76,8 +76,8 @@ sysdep_routines += \
   strcpy-evex \
   strcpy-sse2 \
   strcpy-sse2-unaligned \
-  strcspn-c \
-  strcspn-sse2 \
+  strcspn-generic \
+  strcspn-sse4 \
   strlen-avx2 \
   strlen-avx2-rtm \
   strlen-evex \
@@ -90,7 +90,7 @@ sysdep_routines += \
   strncase_l-sse4_2 \
   strncat-avx2 \
   strncat-avx2-rtm \
-  strncat-c \
+  strncat-generic \
   strncat-evex \
   strncat-sse2-unaligned \
   strncmp-avx2 \
@@ -100,7 +100,7 @@ sysdep_routines += \
   strncmp-sse4_2 \
   strncpy-avx2 \
   strncpy-avx2-rtm \
-  strncpy-c \
+  strncpy-generic \
   strncpy-evex \
   strncpy-sse2-unaligned \
   strnlen-avx2 \
@@ -108,22 +108,23 @@ sysdep_routines += \
   strnlen-evex \
   strnlen-evex512 \
   strnlen-sse2 \
-  strpbrk-c \
-  strpbrk-sse2 \
+  strpbrk-generic \
+  strpbrk-sse4 \
   strrchr-avx2 \
   strrchr-avx2-rtm \
   strrchr-evex \
   strrchr-sse2 \
-  strspn-c \
-  strspn-sse2 \
+  strspn-generic \
+  strspn-sse4 \
   strstr-avx512 \
   strstr-sse2-unaligned \
   varshift \
 # sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-sse4.c += -msse4
+CFLAGS-strpbrk-sse4.c += -msse4
+CFLAGS-strspn-sse4.c += -msse4
+
 CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
 endif
 
@@ -137,7 +138,7 @@ sysdep_routines += \
   wcscmp-avx2-rtm \
   wcscmp-evex \
   wcscmp-sse2 \
-  wcscpy-c \
+  wcscpy-generic \
   wcscpy-ssse3 \
   wcslen-avx2 \
   wcslen-avx2-rtm \
@@ -147,11 +148,11 @@ sysdep_routines += \
   wcslen-sse4_1 \
   wcsncmp-avx2 \
   wcsncmp-avx2-rtm \
+  wcsncmp-generic \
   wcsncmp-evex \
-  wcsncmp-sse2 \
   wcsnlen-avx2 \
   wcsnlen-avx2-rtm \
-  wcsnlen-c \
+  wcsnlen-generic \
   wcsnlen-evex \
   wcsnlen-evex512 \
   wcsnlen-sse4_1 \
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 4289df29ec..1d9cdfcfec 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -19,7 +19,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
@@ -44,5 +48,5 @@ IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2);
     }
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index dc595752e0..883362f63d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __stpncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1,
 			      __stpncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
   IFUNC_IMPL (i, name, stpcpy,
@@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcspn,
 	      IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strcspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
   IFUNC_IMPL (i, name, strncasecmp,
@@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncat_evex)
 	      IFUNC_IMPL_ADD (array, i, strncat, 1,
 			      __strncat_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
 
   /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
   IFUNC_IMPL (i, name, strncpy,
@@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncpy_evex)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1,
 			      __strncpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
   IFUNC_IMPL (i, name, strpbrk,
 	      IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
 			      __strpbrk_sse42)
-	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
 
 
   /* Support sysdeps/x86_64/multiarch/strspn.c.  */
   IFUNC_IMPL (i, name, strspn,
 	      IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
 			      __strspn_sse42)
-	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+	      IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
 
   /* Support sysdeps/x86_64/multiarch/strstr.c.  */
   IFUNC_IMPL (i, name, strstr,
@@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			       && CPU_FEATURE_USABLE (AVX512BW)
 			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsncmp_evex)
-	      IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcscpy.c.  */
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
 			      __wcscpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
   IFUNC_IMPL (i, name, wcslen,
@@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
 			      CPU_FEATURE_USABLE (SSE4_1),
 			      __wcsnlen_sse4_1)
-	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
 
   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
   IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
index b555ff2fac..ee36525bcf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -19,7 +19,7 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
 
 static inline void *
@@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
     return OPTIMIZE (sse42);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
   attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
 
 #include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
     return OPTIMIZE (sse4_1);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (GENERIC);
 }
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
deleted file mode 100644
index b016e487e1..0000000000
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#define STPNCPY __stpncpy_sse2
-#undef weak_alias
-#define weak_alias(ignored1, ignored2)
-#undef libc_hidden_def
-#define libc_hidden_def(stpncpy)
-
-#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c
new file mode 100644
index 0000000000..87826845b0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c
@@ -0,0 +1,26 @@
+/* stpncpy.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#define STPNCPY __stpncpy_generic
+#undef weak_alias
+#define weak_alias(ignored1, ignored2)
+#undef libc_hidden_def
+#define libc_hidden_def(stpncpy)
+
+#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
 # undef stpncpy
 # undef __stpncpy
 
+# define GENERIC generic
 # define SYMBOL_NAME stpncpy
 # include "ifunc-strcpy.h"
 
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
rename to sysdeps/x86_64/multiarch/strcspn-generic.c
index 3a04bb39fc..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strcspn-generic.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRCSPN __strcspn_sse2
+# define STRCSPN __strcspn_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRCSPN)
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-c.c
rename to sysdeps/x86_64/multiarch/strcspn-sse4.c
index c312fab8b1..59f64f9fe8 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c
@@ -52,8 +52,8 @@
    when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
    X for case 1.  */
 
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
+#ifndef STRCSPN_GENERIC
+# define STRCSPN_GENERIC __strcspn_generic
 # define STRCSPN_SSE42 __strcspn_sse42
 #endif
 
@@ -69,7 +69,7 @@ char *
 #else
 size_t
 #endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
+STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
 
 
 #ifdef USE_AS_STRPBRK
@@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
       /* There is no NULL terminator.  Don't use SSE4.2 if the length
          of A > 16.  */
       if (a[16] != 0)
-        return STRCSPN_SSE2 (s, a);
+        return STRCSPN_GENERIC (s, a);
     }
 
   aligned = s;
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
deleted file mode 100644
index 93a7fab7ea..0000000000
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define STRNCAT __strncat_sse2
-#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c
new file mode 100644
index 0000000000..0090669cd1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat-generic.c
@@ -0,0 +1,21 @@
+/* strncat.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#define STRNCAT __strncat_generic
+#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
 # undef strncat
 
 # define SYMBOL_NAME strncat
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
deleted file mode 100644
index 57c45ac7ab..0000000000
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#define STRNCPY __strncpy_sse2
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(strncpy)
-
-#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c
new file mode 100644
index 0000000000..9916153dd5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncpy-generic.c
@@ -0,0 +1,24 @@
+/* strncpy.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#define STRNCPY __strncpy_generic
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(strncpy)
+
+#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
 # undef strncpy
 
 # define SYMBOL_NAME strncpy
+# define GENERIC generic
 # include "ifunc-strcpy.h"
 
 libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-generic.c
index d03214c4fb..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
+# define STRPBRK __strpbrk_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRPBRK)
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/strpbrk-c.c
rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c
index abf4ff7f1a..bf74d660d5 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
@@ -17,6 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
+#define STRCSPN_GENERIC __strpbrk_generic
 #define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#include "strcspn-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strspn-generic.c
index 61cc6cb0a5..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strspn-generic.c
@@ -19,7 +19,7 @@
 #if IS_IN (libc)
 
 # include <sysdep.h>
-# define STRSPN __strspn_sse2
+# define STRSPN __strspn_generic
 
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(STRSPN)
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c
similarity index 97%
rename from sysdeps/x86_64/multiarch/strspn-c.c
rename to sysdeps/x86_64/multiarch/strspn-sse4.c
index 6124033ceb..d044916688 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-sse4.c
@@ -51,7 +51,7 @@
 
    We exit from the loop for case 1.  */
 
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
 
 
 size_t
@@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a)
       /* There is no NULL terminator.  Don't use SSE4.2 if the length
          of A > 16.  */
       if (a[16] != 0)
-        return __strspn_sse2 (s, a);
+        return __strspn_generic (s, a);
     }
   aligned = s;
   offset = (unsigned int) ((size_t) s & 15);
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
deleted file mode 100644
index 26d6984e9b..0000000000
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define WCSCPY  __wcscpy_sse2
-#endif
-
-#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c
new file mode 100644
index 0000000000..5ea905f33f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c
@@ -0,0 +1,24 @@
+/* wcscpy.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#if IS_IN (libc)
+# define WCSCPY  __wcscpy_generic
+#endif
+
+#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
 # define SYMBOL_NAME wcscpy
 # include <init-arch.h>
 
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
 
 static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
     return OPTIMIZE (ssse3);
 
-  return OPTIMIZE (sse2);
+  return OPTIMIZE (generic);
 }
 
 libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c
rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c
index 8d9cbbb900..658d541886 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
+++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
@@ -1,4 +1,4 @@
-/* wcsncmp optimized with SSE2.
+/* wcsncmp.
    Copyright (C) 2018-2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,5 +16,5 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define WCSNCMP __wcsncmp_sse2
+#define WCSNCMP __wcsncmp_generic
 #include <wcsmbs/wcsncmp.c>
diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
index 5e00af2ca5..1836f794dd 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp.c
+++ b/sysdeps/x86_64/multiarch/wcsncmp.c
@@ -24,6 +24,8 @@
 # undef wcsncmp
 # undef __wcsncmp
 
+# define GENERIC generic
+
 # define SYMBOL_NAME wcsncmp
 # include "ifunc-avx2.h"
 
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
deleted file mode 100644
index e1ec7cfbb5..0000000000
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#if IS_IN (libc)
-# include <wchar.h>
-
-# define WCSNLEN __wcsnlen_sse2
-
-extern __typeof (wcsnlen) __wcsnlen_sse2;
-#endif
-
-#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
new file mode 100644
index 0000000000..2d75da7709
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
@@ -0,0 +1,28 @@
+/* wcsnlen.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#if IS_IN (libc)
+# include <wchar.h>
+
+# define WCSNLEN __wcsnlen_generic
+
+extern __typeof (wcsnlen) __wcsnlen_generic;
+#endif
+
+#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
 # undef __wcsnlen
 
 # define SYMBOL_NAME wcsnlen
+# define GENERIC generic
 # include "ifunc-wcslen.h"
 
 libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
-- 
2.34.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] x86: Rename generic functions with unique postfix for clarity
  2022-06-16 22:11   ` [PATCH v3] " Noah Goldstein
@ 2022-06-16 22:43     ` H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2022-06-16 22:43 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Thu, Jun 16, 2022 at 3:12 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No functions are changed. It just renames generic implementations from
> '{func}_sse2' to '{func}_generic'. This is just because the postfix
> "_sse2" was overloaded and was used for files that had hand-optimized
> sse2 assembly implementations and files that just redirected back
> to the generic implementation.
>
> Full xcheck passed on x86_64.
> ---
> Note this change is in preperation to further changes to the file
> organization in the multiarch directory.
>  sysdeps/x86_64/multiarch/Makefile             | 33 ++++++++++---------
>  sysdeps/x86_64/multiarch/ifunc-avx2.h         |  8 +++--
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 18 +++++-----
>  sysdeps/x86_64/multiarch/ifunc-sse4_2.h       |  4 +--
>  sysdeps/x86_64/multiarch/ifunc-strcpy.h       |  8 +++--
>  sysdeps/x86_64/multiarch/ifunc-wcslen.h       |  8 +++--
>  sysdeps/x86_64/multiarch/stpncpy-c.c          |  7 ----
>  sysdeps/x86_64/multiarch/stpncpy-generic.c    | 26 +++++++++++++++
>  sysdeps/x86_64/multiarch/stpncpy.c            |  1 +
>  .../{strcspn-sse2.c => strcspn-generic.c}     |  2 +-
>  .../multiarch/{strcspn-c.c => strcspn-sse4.c} |  8 ++---
>  sysdeps/x86_64/multiarch/strncat-c.c          |  2 --
>  sysdeps/x86_64/multiarch/strncat-generic.c    | 21 ++++++++++++
>  sysdeps/x86_64/multiarch/strncat.c            |  1 +
>  sysdeps/x86_64/multiarch/strncpy-c.c          |  5 ---
>  sysdeps/x86_64/multiarch/strncpy-generic.c    | 24 ++++++++++++++
>  sysdeps/x86_64/multiarch/strncpy.c            |  1 +
>  .../{strpbrk-sse2.c => strpbrk-generic.c}     |  2 +-
>  .../multiarch/{strpbrk-c.c => strpbrk-sse4.c} |  4 +--
>  .../{strspn-sse2.c => strspn-generic.c}       |  2 +-
>  .../multiarch/{strspn-c.c => strspn-sse4.c}   |  4 +--
>  sysdeps/x86_64/multiarch/wcscpy-c.c           |  5 ---
>  sysdeps/x86_64/multiarch/wcscpy-generic.c     | 24 ++++++++++++++
>  sysdeps/x86_64/multiarch/wcscpy.c             |  4 +--
>  .../{wcsncmp-sse2.c => wcsncmp-generic.c}     |  4 +--
>  sysdeps/x86_64/multiarch/wcsncmp.c            |  2 ++
>  sysdeps/x86_64/multiarch/wcsnlen-c.c          |  9 -----
>  sysdeps/x86_64/multiarch/wcsnlen-generic.c    | 28 ++++++++++++++++
>  sysdeps/x86_64/multiarch/wcsnlen.c            |  1 +
>  29 files changed, 190 insertions(+), 76 deletions(-)
>  delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c
>  create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c
>  rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%)
>  rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%)
>  delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
>  create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c
>  delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c
>  create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c
>  rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%)
>  rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%)
>  rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%)
>  rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%)
>  delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c
>  create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c
>  rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%)
>  delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c
>  create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..666ee4d5d6 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -43,7 +43,7 @@ sysdep_routines += \
>    stpcpy-sse2-unaligned \
>    stpncpy-avx2 \
>    stpncpy-avx2-rtm \
> -  stpncpy-c \
> +  stpncpy-generic \
>    stpncpy-evex \
>    stpncpy-sse2-unaligned \
>    strcasecmp_l-avx2 \
> @@ -76,8 +76,8 @@ sysdep_routines += \
>    strcpy-evex \
>    strcpy-sse2 \
>    strcpy-sse2-unaligned \
> -  strcspn-c \
> -  strcspn-sse2 \
> +  strcspn-generic \
> +  strcspn-sse4 \
>    strlen-avx2 \
>    strlen-avx2-rtm \
>    strlen-evex \
> @@ -90,7 +90,7 @@ sysdep_routines += \
>    strncase_l-sse4_2 \
>    strncat-avx2 \
>    strncat-avx2-rtm \
> -  strncat-c \
> +  strncat-generic \
>    strncat-evex \
>    strncat-sse2-unaligned \
>    strncmp-avx2 \
> @@ -100,7 +100,7 @@ sysdep_routines += \
>    strncmp-sse4_2 \
>    strncpy-avx2 \
>    strncpy-avx2-rtm \
> -  strncpy-c \
> +  strncpy-generic \
>    strncpy-evex \
>    strncpy-sse2-unaligned \
>    strnlen-avx2 \
> @@ -108,22 +108,23 @@ sysdep_routines += \
>    strnlen-evex \
>    strnlen-evex512 \
>    strnlen-sse2 \
> -  strpbrk-c \
> -  strpbrk-sse2 \
> +  strpbrk-generic \
> +  strpbrk-sse4 \
>    strrchr-avx2 \
>    strrchr-avx2-rtm \
>    strrchr-evex \
>    strrchr-sse2 \
> -  strspn-c \
> -  strspn-sse2 \
> +  strspn-generic \
> +  strspn-sse4 \
>    strstr-avx512 \
>    strstr-sse2-unaligned \
>    varshift \
>  # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-sse4.c += -msse4
> +CFLAGS-strpbrk-sse4.c += -msse4
> +CFLAGS-strspn-sse4.c += -msse4
> +
>  CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
>  endif
>
> @@ -137,7 +138,7 @@ sysdep_routines += \
>    wcscmp-avx2-rtm \
>    wcscmp-evex \
>    wcscmp-sse2 \
> -  wcscpy-c \
> +  wcscpy-generic \
>    wcscpy-ssse3 \
>    wcslen-avx2 \
>    wcslen-avx2-rtm \
> @@ -147,11 +148,11 @@ sysdep_routines += \
>    wcslen-sse4_1 \
>    wcsncmp-avx2 \
>    wcsncmp-avx2-rtm \
> +  wcsncmp-generic \
>    wcsncmp-evex \
> -  wcsncmp-sse2 \
>    wcsnlen-avx2 \
>    wcsnlen-avx2-rtm \
> -  wcsnlen-c \
> +  wcsnlen-generic \
>    wcsnlen-evex \
>    wcsnlen-evex512 \
>    wcsnlen-sse4_1 \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> index 4289df29ec..1d9cdfcfec 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> @@ -19,7 +19,11 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> @@ -44,5 +48,5 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2);
>      }
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (GENERIC);
>  }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index dc595752e0..883362f63d 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __stpncpy_evex)
>               IFUNC_IMPL_ADD (array, i, stpncpy, 1,
>                               __stpncpy_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
>    IFUNC_IMPL (i, name, stpcpy,
> @@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    IFUNC_IMPL (i, name, strcspn,
>               IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strcspn_sse42)
> -             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> +             IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
>    IFUNC_IMPL (i, name, strncasecmp,
> @@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __strncat_evex)
>               IFUNC_IMPL_ADD (array, i, strncat, 1,
>                               __strncat_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> +             IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
>    IFUNC_IMPL (i, name, strncpy,
> @@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                               __strncpy_evex)
>               IFUNC_IMPL_ADD (array, i, strncpy, 1,
>                               __strncpy_sse2_unaligned)
> -             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
>    IFUNC_IMPL (i, name, strpbrk,
>               IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
>                               __strpbrk_sse42)
> -             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> +             IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
>
>
>    /* Support sysdeps/x86_64/multiarch/strspn.c.  */
>    IFUNC_IMPL (i, name, strspn,
>               IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
>                               __strspn_sse42)
> -             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> +             IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
>
>    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
>    IFUNC_IMPL (i, name, strstr,
> @@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>                                && CPU_FEATURE_USABLE (AVX512BW)
>                                && CPU_FEATURE_USABLE (BMI2)),
>                               __wcsncmp_evex)
> -             IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
> +             IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wcscpy.c.  */
>    IFUNC_IMPL (i, name, wcscpy,
>               IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
>                               __wcscpy_ssse3)
> -             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> +             IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
>    IFUNC_IMPL (i, name, wcslen,
> @@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>               IFUNC_IMPL_ADD (array, i, wcsnlen,
>                               CPU_FEATURE_USABLE (SSE4_1),
>                               __wcsnlen_sse4_1)
> -             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> +             IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
>
>    /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
>    IFUNC_IMPL (i, name, wmemchr,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> index b555ff2fac..ee36525bcf 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> @@ -19,7 +19,7 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
>
>  static inline void *
> @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
>      return OPTIMIZE (sse42);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (generic);
>  }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> index a15afa44e9..80529458d1 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> @@ -20,7 +20,11 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
>    attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
>      return OPTIMIZE (sse2_unaligned);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (GENERIC);
>  }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> index 2b29e7608a..88c1c502af 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> @@ -19,7 +19,11 @@
>
>  #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
>      return OPTIMIZE (sse4_1);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (GENERIC);
>  }
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> deleted file mode 100644
> index b016e487e1..0000000000
> --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> +++ /dev/null
> @@ -1,7 +0,0 @@
> -#define STPNCPY __stpncpy_sse2
> -#undef weak_alias
> -#define weak_alias(ignored1, ignored2)
> -#undef libc_hidden_def
> -#define libc_hidden_def(stpncpy)
> -
> -#include <string/stpncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c
> new file mode 100644
> index 0000000000..87826845b0
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c
> @@ -0,0 +1,26 @@
> +/* stpncpy.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#define STPNCPY __stpncpy_generic
> +#undef weak_alias
> +#define weak_alias(ignored1, ignored2)
> +#undef libc_hidden_def
> +#define libc_hidden_def(stpncpy)
> +
> +#include <string/stpncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> index 82fa53957d..879bc83f0b 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> @@ -25,6 +25,7 @@
>  # undef stpncpy
>  # undef __stpncpy
>
> +# define GENERIC generic
>  # define SYMBOL_NAME stpncpy
>  # include "ifunc-strcpy.h"
>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strcspn-generic.c
> index 3a04bb39fc..423de2e2b2 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-generic.c
> @@ -19,7 +19,7 @@
>  #if IS_IN (libc)
>
>  # include <sysdep.h>
> -# define STRCSPN __strcspn_sse2
> +# define STRCSPN __strcspn_generic
>
>  # undef libc_hidden_builtin_def
>  # define libc_hidden_builtin_def(STRCSPN)
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strcspn-c.c
> rename to sysdeps/x86_64/multiarch/strcspn-sse4.c
> index c312fab8b1..59f64f9fe8 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c
> @@ -52,8 +52,8 @@
>     when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
>     X for case 1.  */
>
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> +#ifndef STRCSPN_GENERIC
> +# define STRCSPN_GENERIC __strcspn_generic
>  # define STRCSPN_SSE42 __strcspn_sse42
>  #endif
>
> @@ -69,7 +69,7 @@ char *
>  #else
>  size_t
>  #endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
>
>
>  #ifdef USE_AS_STRPBRK
> @@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
>        /* There is no NULL terminator.  Don't use SSE4.2 if the length
>           of A > 16.  */
>        if (a[16] != 0)
> -        return STRCSPN_SSE2 (s, a);
> +        return STRCSPN_GENERIC (s, a);
>      }
>
>    aligned = s;
> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> deleted file mode 100644
> index 93a7fab7ea..0000000000
> --- a/sysdeps/x86_64/multiarch/strncat-c.c
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -#define STRNCAT __strncat_sse2
> -#include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c
> new file mode 100644
> index 0000000000..0090669cd1
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strncat-generic.c
> @@ -0,0 +1,21 @@
> +/* strncat.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#define STRNCAT __strncat_generic
> +#include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> index b649343a97..50fba8a41f 100644
> --- a/sysdeps/x86_64/multiarch/strncat.c
> +++ b/sysdeps/x86_64/multiarch/strncat.c
> @@ -24,6 +24,7 @@
>  # undef strncat
>
>  # define SYMBOL_NAME strncat
> +# define GENERIC generic
>  # include "ifunc-strcpy.h"
>
>  libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> deleted file mode 100644
> index 57c45ac7ab..0000000000
> --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#define STRNCPY __strncpy_sse2
> -#undef libc_hidden_builtin_def
> -#define libc_hidden_builtin_def(strncpy)
> -
> -#include <string/strncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c
> new file mode 100644
> index 0000000000..9916153dd5
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strncpy-generic.c
> @@ -0,0 +1,24 @@
> +/* strncpy.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#define STRNCPY __strncpy_generic
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(strncpy)
> +
> +#include <string/strncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> index 2a780a7e16..7fc7d72ec5 100644
> --- a/sysdeps/x86_64/multiarch/strncpy.c
> +++ b/sysdeps/x86_64/multiarch/strncpy.c
> @@ -24,6 +24,7 @@
>  # undef strncpy
>
>  # define SYMBOL_NAME strncpy
> +# define GENERIC generic
>  # include "ifunc-strcpy.h"
>
>  libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
> rename to sysdeps/x86_64/multiarch/strpbrk-generic.c
> index d03214c4fb..d31acfe495 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c
> @@ -19,7 +19,7 @@
>  #if IS_IN (libc)
>
>  # include <sysdep.h>
> -# define STRPBRK __strpbrk_sse2
> +# define STRPBRK __strpbrk_generic
>
>  # undef libc_hidden_builtin_def
>  # define libc_hidden_builtin_def(STRPBRK)
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/strpbrk-c.c
> rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c
> index abf4ff7f1a..bf74d660d5 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
> @@ -17,6 +17,6 @@
>     <https://www.gnu.org/licenses/>.  */
>
>  #define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> +#define STRCSPN_GENERIC __strpbrk_generic
>  #define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#include "strcspn-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strspn-generic.c
> index 61cc6cb0a5..6b50c36432 100644
> --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strspn-generic.c
> @@ -19,7 +19,7 @@
>  #if IS_IN (libc)
>
>  # include <sysdep.h>
> -# define STRSPN __strspn_sse2
> +# define STRSPN __strspn_generic
>
>  # undef libc_hidden_builtin_def
>  # define libc_hidden_builtin_def(STRSPN)
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c
> similarity index 97%
> rename from sysdeps/x86_64/multiarch/strspn-c.c
> rename to sysdeps/x86_64/multiarch/strspn-sse4.c
> index 6124033ceb..d044916688 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-sse4.c
> @@ -51,7 +51,7 @@
>
>     We exit from the loop for case 1.  */
>
> -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
>
>
>  size_t
> @@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a)
>        /* There is no NULL terminator.  Don't use SSE4.2 if the length
>           of A > 16.  */
>        if (a[16] != 0)
> -        return __strspn_sse2 (s, a);
> +        return __strspn_generic (s, a);
>      }
>    aligned = s;
>    offset = (unsigned int) ((size_t) s & 15);
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> deleted file mode 100644
> index 26d6984e9b..0000000000
> --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#if IS_IN (libc)
> -# define WCSCPY  __wcscpy_sse2
> -#endif
> -
> -#include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> new file mode 100644
> index 0000000000..5ea905f33f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> @@ -0,0 +1,24 @@
> +/* wcscpy.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#if IS_IN (libc)
> +# define WCSCPY  __wcscpy_generic
> +#endif
> +
> +#include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> index 6a2d1421d9..53c3228dc2 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> @@ -26,7 +26,7 @@
>  # define SYMBOL_NAME wcscpy
>  # include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>
>  static inline void *
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
>      return OPTIMIZE (ssse3);
>
> -  return OPTIMIZE (sse2);
> +  return OPTIMIZE (generic);
>  }
>
>  libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c
> rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c
> index 8d9cbbb900..658d541886 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
> +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
> @@ -1,4 +1,4 @@
> -/* wcsncmp optimized with SSE2.
> +/* wcsncmp.
>     Copyright (C) 2018-2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
> @@ -16,5 +16,5 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#define WCSNCMP __wcsncmp_sse2
> +#define WCSNCMP __wcsncmp_generic
>  #include <wcsmbs/wcsncmp.c>
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
> index 5e00af2ca5..1836f794dd 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp.c
> +++ b/sysdeps/x86_64/multiarch/wcsncmp.c
> @@ -24,6 +24,8 @@
>  # undef wcsncmp
>  # undef __wcsncmp
>
> +# define GENERIC generic
> +
>  # define SYMBOL_NAME wcsncmp
>  # include "ifunc-avx2.h"
>
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> deleted file mode 100644
> index e1ec7cfbb5..0000000000
> --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -#if IS_IN (libc)
> -# include <wchar.h>
> -
> -# define WCSNLEN __wcsnlen_sse2
> -
> -extern __typeof (wcsnlen) __wcsnlen_sse2;
> -#endif
> -
> -#include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
> new file mode 100644
> index 0000000000..2d75da7709
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
> @@ -0,0 +1,28 @@
> +/* wcsnlen.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#if IS_IN (libc)
> +# include <wchar.h>
> +
> +# define WCSNLEN __wcsnlen_generic
> +
> +extern __typeof (wcsnlen) __wcsnlen_generic;
> +#endif
> +
> +#include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> index baa26666a8..05b7a211de 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> @@ -24,6 +24,7 @@
>  # undef __wcsnlen
>
>  # define SYMBOL_NAME wcsnlen
> +# define GENERIC generic
>  # include "ifunc-wcslen.h"
>
>  libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes
  2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
@ 2022-07-14  2:51   ` Sunil Pandey
  0 siblings, 0 replies; 11+ messages in thread
From: Sunil Pandey @ 2022-07-14  2:51 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Noah Goldstein, GNU C Library

On Thu, Jun 9, 2022 at 8:15 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > This ensures the load will never split a cache line.
> > ---
> >  sysdeps/x86_64/multiarch/varshift.c | 5 +++--
> >  sysdeps/x86_64/multiarch/varshift.h | 3 ++-
> >  2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
> > index c8210f0546..d27767520a 100644
> > --- a/sysdeps/x86_64/multiarch/varshift.c
> > +++ b/sysdeps/x86_64/multiarch/varshift.c
> > @@ -16,9 +16,10 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include "varshift.h"
> > +#include <stdint.h>
> >
> > -const int8_t ___m128i_shift_right[31] attribute_hidden =
> > +const int8_t ___m128i_shift_right[31] attribute_hidden
> > +    __attribute__((aligned(32))) =
> >    {
> >      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
> >      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
> > diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
> > index af30694488..ffd12d79e4 100644
> > --- a/sysdeps/x86_64/multiarch/varshift.h
> > +++ b/sysdeps/x86_64/multiarch/varshift.h
> > @@ -19,7 +19,8 @@
> >  #include <stdint.h>
> >  #include <tmmintrin.h>
> >
> > -extern const int8_t ___m128i_shift_right[31] attribute_hidden;
> > +extern const int8_t ___m128i_shift_right[31] attribute_hidden
> > +    __attribute__ ((aligned (32)));
> >
> >  static __inline__ __m128i
> >  __m128i_shift_right (__m128i value, unsigned long int offset)
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Thanks.
>
> --
> H.J.

I would like to backport this patch to release branches.
Any comments or objections?

--Sunil

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-07-14  2:52 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09  4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
2022-06-09  4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
2022-06-09 15:28   ` H.J. Lu
2022-06-09  4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
2022-06-10  0:58   ` [PATCH v2] " Noah Goldstein
2022-06-10  1:19     ` H.J. Lu
2022-06-10  1:26       ` Noah Goldstein
2022-06-16 22:11   ` [PATCH v3] " Noah Goldstein
2022-06-16 22:43     ` H.J. Lu
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
2022-07-14  2:51   ` Sunil Pandey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).