* [PATCH v1 1/3] x86: Align varshift table to 32-bytes
@ 2022-06-09 4:16 Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw)
To: libc-alpha
This ensures the load will never split a cache line.
---
sysdeps/x86_64/multiarch/varshift.c | 5 +++--
sysdeps/x86_64/multiarch/varshift.h | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
index c8210f0546..d27767520a 100644
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ b/sysdeps/x86_64/multiarch/varshift.c
@@ -16,9 +16,10 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "varshift.h"
+#include <stdint.h>
-const int8_t ___m128i_shift_right[31] attribute_hidden =
+const int8_t ___m128i_shift_right[31] attribute_hidden
+ __attribute__((aligned(32))) =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
index af30694488..ffd12d79e4 100644
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -19,7 +19,8 @@
#include <stdint.h>
#include <tmmintrin.h>
-extern const int8_t ___m128i_shift_right[31] attribute_hidden;
+extern const int8_t ___m128i_shift_right[31] attribute_hidden
+ __attribute__ ((aligned (32)));
static __inline__ __m128i
__m128i_shift_right (__m128i value, unsigned long int offset)
--
2.34.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk
2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
@ 2022-06-09 4:16 ` Noah Goldstein
2022-06-09 15:28 ` H.J. Lu
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
2 siblings, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw)
To: libc-alpha
No change to the actual logic of the functions. The goal is to so the
avx/avx2 machines rely less of sse instructions.
Full xcheck passes on x86_64.
---
sysdeps/x86_64/multiarch/Makefile | 21 ++++++++++-----
.../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} | 4 +++
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 +++++
sysdeps/x86_64/multiarch/strcspn-c-avx.c | 21 +++++++++++++++
.../{strcspn-c.c => strcspn-c-sse4.c} | 26 ++++++++++++-------
sysdeps/x86_64/multiarch/strcspn.c | 2 +-
sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 23 ++++++++++++++++
.../{strpbrk-c.c => strpbrk-c-sse4.c} | 6 ++---
sysdeps/x86_64/multiarch/strpbrk.c | 2 +-
sysdeps/x86_64/multiarch/strspn-c-avx.c | 21 +++++++++++++++
.../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++---
sysdeps/x86_64/multiarch/strspn.c | 2 +-
12 files changed, 122 insertions(+), 27 deletions(-)
rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%)
create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c
rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%)
create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c
rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%)
create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c
rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..27f306c7c8 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -76,7 +76,8 @@ sysdep_routines += \
strcpy-evex \
strcpy-sse2 \
strcpy-sse2-unaligned \
- strcspn-c \
+ strcspn-c-avx \
+ strcspn-c-sse4 \
strcspn-sse2 \
strlen-avx2 \
strlen-avx2-rtm \
@@ -108,22 +109,28 @@ sysdep_routines += \
strnlen-evex \
strnlen-evex512 \
strnlen-sse2 \
- strpbrk-c \
+ strpbrk-c-avx \
+ strpbrk-c-sse4 \
strpbrk-sse2 \
strrchr-avx2 \
strrchr-avx2-rtm \
strrchr-evex \
strrchr-sse2 \
- strspn-c \
+ strspn-c-avx \
+ strspn-c-sse4 \
strspn-sse2 \
strstr-avx512 \
strstr-sse2-unaligned \
varshift \
# sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-c-avx.c += -mavx
+CFLAGS-strcspn-c-sse4.c += -msse4
+CFLAGS-strpbrk-c-avx.c += -mavx
+CFLAGS-strpbrk-c-sse4.c += -msse4
+CFLAGS-strspn-c-avx.c += -mavx
+CFLAGS-strspn-c-sse4.c += -msse4
+
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
similarity index 89%
rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h
rename to sysdeps/x86_64/multiarch/ifunc-avx.h
index b555ff2fac..891f3ddcac 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
@@ -21,12 +21,16 @@
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ return OPTIMIZE (avx);
+
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
return OPTIMIZE (sse42);
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 58f3ec8306..507c563669 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strcspn.c. */
IFUNC_IMPL (i, name, strcspn,
+ IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX),
+ __strcspn_avx)
IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
__strcspn_sse42)
IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
@@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
+ IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX),
+ __strpbrk_avx)
IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
__strpbrk_sse42)
IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
@@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strspn.c. */
IFUNC_IMPL (i, name, strspn,
+ IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX),
+ __strspn_avx)
IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
__strspn_sse42)
IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
new file mode 100644
index 0000000000..b8d983f79f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
@@ -0,0 +1,21 @@
+/* strcspn with AVX intrinsics
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define STRCSPN __strcspn_avx
+#define SECTION "avx"
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
similarity index 90%
rename from sysdeps/x86_64/multiarch/strcspn-c.c
rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c
index c312fab8b1..848c3cfb14 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -52,9 +52,16 @@
when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
X for case 1. */
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
+#ifndef STRCSPN_FALLBACK
+# define STRCSPN_FALLBACK __strcspn_sse2
+#endif
+
+#ifndef STRCSPN
+# define STRCSPN __strcspn_sse42
+#endif
+
+#ifndef SECTION
+# define SECTION "sse4.2"
#endif
#ifdef USE_AS_STRPBRK
@@ -69,16 +76,15 @@ char *
#else
size_t
#endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
-
+STRCSPN_FALLBACK (const char *, const char *) attribute_hidden;
#ifdef USE_AS_STRPBRK
char *
#else
size_t
#endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
+__attribute__ ((section (".text." SECTION)))
+STRCSPN (const char *s, const char *a)
{
if (*a == 0)
RETURN (NULL, strlen (s));
@@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a)
maskz_bits = _mm_movemask_epi8 (maskz);
if (maskz_bits == 0)
{
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
+ /* There is no NULL terminator. Don't use pcmpstri based approach if the
+ length of A > 16. */
if (a[16] != 0)
- return STRCSPN_SSE2 (s, a);
+ return STRCSPN_FALLBACK (s, a);
}
aligned = s;
diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
index 4848fa8677..63e1cf052e 100644
--- a/sysdeps/x86_64/multiarch/strcspn.c
+++ b/sysdeps/x86_64/multiarch/strcspn.c
@@ -24,7 +24,7 @@
# undef strcspn
# define SYMBOL_NAME strcspn
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
new file mode 100644
index 0000000000..2918013994
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
@@ -0,0 +1,23 @@
+/* strpbrk with AVX intrinsics
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define USE_AS_STRPBRK
+#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN __strpbrk_avx
+#define SECTION "avx"
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
similarity index 89%
rename from sysdeps/x86_64/multiarch/strpbrk-c.c
rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index abf4ff7f1a..2efd38d809 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -17,6 +17,6 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN __strpbrk_sse42
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
index 04e300ea71..ab5b04a482 100644
--- a/sysdeps/x86_64/multiarch/strpbrk.c
+++ b/sysdeps/x86_64/multiarch/strpbrk.c
@@ -24,7 +24,7 @@
# undef strpbrk
# define SYMBOL_NAME strpbrk
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c
new file mode 100644
index 0000000000..9d5fdb9550
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c
@@ -0,0 +1,21 @@
+/* strspn with AVX intrinsics
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define STRSPN __strspn_avx
+#define SECTION "avx"
+#include "strspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/strspn-c.c
rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c
index 6124033ceb..6a91def2e0 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -53,10 +53,17 @@
extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
+#ifndef STRSPN
+# define STRSPN __strspn_sse42
+#endif
+
+#ifndef SECTION
+# define SECTION "sse4.2"
+#endif
size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
+__attribute__ ((section (".text." SECTION)))
+STRSPN (const char *s, const char *a)
{
if (*a == 0)
return 0;
@@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a)
maskz_bits = _mm_movemask_epi8 (maskz);
if (maskz_bits == 0)
{
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
+ /* There is no NULL terminator. Don't use pcmpstri based approach if the
+ length of A > 16. */
if (a[16] != 0)
return __strspn_sse2 (s, a);
}
diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
index 07f5def155..c3c5e7a3cc 100644
--- a/sysdeps/x86_64/multiarch/strspn.c
+++ b/sysdeps/x86_64/multiarch/strspn.c
@@ -24,7 +24,7 @@
# undef strspn
# define SYMBOL_NAME strspn
-# include "ifunc-sse4_2.h"
+# include "ifunc-avx.h"
libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
--
2.34.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity
2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
@ 2022-06-09 4:16 ` Noah Goldstein
2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein
2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
2 siblings, 2 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-09 4:16 UTC (permalink / raw)
To: libc-alpha
No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.
Full xcheck passed on x86_64.
---
sysdeps/x86_64/multiarch/Makefile | 6 +++---
sysdeps/x86_64/multiarch/ifunc-avx.h | 4 ++--
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 ++++++++--------
sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 ++++++--
sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 ++++++--
sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +-
sysdeps/x86_64/multiarch/stpncpy.c | 1 +
sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 2 +-
.../multiarch/{strcspn-sse2.c => strcspn-c.c} | 2 +-
sysdeps/x86_64/multiarch/strncat-c.c | 2 +-
sysdeps/x86_64/multiarch/strncat.c | 1 +
sysdeps/x86_64/multiarch/strncpy-c.c | 2 +-
sysdeps/x86_64/multiarch/strncpy.c | 1 +
sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 2 +-
sysdeps/x86_64/multiarch/strpbrk-c-sse4.c | 2 +-
.../multiarch/{strpbrk-sse2.c => strpbrk-c.c} | 2 +-
sysdeps/x86_64/multiarch/strspn-c-sse4.c | 4 ++--
.../multiarch/{strspn-sse2.c => strspn-c.c} | 2 +-
sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +-
sysdeps/x86_64/multiarch/wcscpy.c | 4 ++--
sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 ++--
sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
22 files changed, 45 insertions(+), 33 deletions(-)
rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-c.c} (96%)
rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-c.c} (96%)
rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-c.c} (96%)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 27f306c7c8..9b1e0add1a 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -76,9 +76,9 @@ sysdep_routines += \
strcpy-evex \
strcpy-sse2 \
strcpy-sse2-unaligned \
+ strcspn-c \
strcspn-c-avx \
strcspn-c-sse4 \
- strcspn-sse2 \
strlen-avx2 \
strlen-avx2-rtm \
strlen-evex \
@@ -109,16 +109,16 @@ sysdep_routines += \
strnlen-evex \
strnlen-evex512 \
strnlen-sse2 \
+ strpbrk-c \
strpbrk-c-avx \
strpbrk-c-sse4 \
- strpbrk-sse2 \
strrchr-avx2 \
strrchr-avx2-rtm \
strrchr-evex \
strrchr-sse2 \
+ strspn-c \
strspn-c-avx \
strspn-c-sse4 \
- strspn-sse2 \
strstr-avx512 \
strstr-sse2-unaligned \
varshift \
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
index 891f3ddcac..30efbd29d0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
@@ -19,7 +19,7 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
@@ -34,5 +34,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
return OPTIMIZE (sse42);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 507c563669..23a2d7114d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__stpncpy_evex)
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
__stpncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
/* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
@@ -533,7 +533,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strcspn_avx)
IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
__strcspn_sse42)
- IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
@@ -587,7 +587,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncat_evex)
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+ IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
/* Support sysdeps/x86_64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
@@ -603,7 +603,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncpy_evex)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
/* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
@@ -611,7 +611,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strpbrk_avx)
IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
__strpbrk_sse42)
- IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+ IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
/* Support sysdeps/x86_64/multiarch/strspn.c. */
@@ -620,7 +620,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strspn_avx)
IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
__strspn_sse42)
- IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
@@ -703,7 +703,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
__wcscpy_ssse3)
- IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+ IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
@@ -755,7 +755,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index b016e487e1..eb62fcf388 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,4 +1,4 @@
-#define STPNCPY __stpncpy_sse2
+#define STPNCPY __stpncpy_generic
#undef weak_alias
#define weak_alias(ignored1, ignored2)
#undef libc_hidden_def
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
# undef stpncpy
# undef __stpncpy
+# define GENERIC generic
# define SYMBOL_NAME stpncpy
# include "ifunc-strcpy.h"
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
index 848c3cfb14..8541035ccb 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -53,7 +53,7 @@
X for case 1. */
#ifndef STRCSPN_FALLBACK
-# define STRCSPN_FALLBACK __strcspn_sse2
+# define STRCSPN_FALLBACK __strcspn_generic
#endif
#ifndef STRCSPN
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
rename to sysdeps/x86_64/multiarch/strcspn-c.c
index 3a04bb39fc..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRCSPN __strcspn_sse2
+# define STRCSPN __strcspn_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRCSPN)
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index 93a7fab7ea..b729c033d9 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,2 +1,2 @@
-#define STRNCAT __strncat_sse2
+#define STRNCAT __strncat_generic
#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
# undef strncat
# define SYMBOL_NAME strncat
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 57c45ac7ab..183b0b8e0f 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,4 +1,4 @@
-#define STRNCPY __strncpy_sse2
+#define STRNCPY __strncpy_generic
#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(strncpy)
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
# undef strncpy
# define SYMBOL_NAME strncpy
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
index 2918013994..363daebd9e 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
@@ -17,7 +17,7 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRPBRK
-#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN_FALLBACK __strpbrk_generic
#define STRCSPN __strpbrk_avx
#define SECTION "avx"
#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index 2efd38d809..a02c951dfd 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -17,6 +17,6 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRPBRK
-#define STRCSPN_FALLBACK __strpbrk_sse2
+#define STRCSPN_FALLBACK __strpbrk_generic
#define STRCSPN __strpbrk_sse42
#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-c.c
index d03214c4fb..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
+# define STRPBRK __strpbrk_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRPBRK)
diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
index 6a91def2e0..9323a117ab 100644
--- a/sysdeps/x86_64/multiarch/strspn-c-sse4.c
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -51,7 +51,7 @@
We exit from the loop for case 1. */
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
#ifndef STRSPN
# define STRSPN __strspn_sse42
@@ -105,7 +105,7 @@ STRSPN (const char *s, const char *a)
/* There is no NULL terminator. Don't use pcmpstri based approach if the
length of A > 16. */
if (a[16] != 0)
- return __strspn_sse2 (s, a);
+ return __strspn_generic (s, a);
}
aligned = s;
offset = (unsigned int) ((size_t) s & 15);
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-c.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strspn-c.c
index 61cc6cb0a5..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRSPN __strspn_sse2
+# define STRSPN __strspn_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRSPN)
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
index 26d6984e9b..fa38dd898d 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
@@ -1,5 +1,5 @@
#if IS_IN (libc)
-# define WCSCPY __wcscpy_sse2
+# define WCSCPY __wcscpy_generic
#endif
#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
# define SYMBOL_NAME wcscpy
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
return OPTIMIZE (ssse3);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
index e1ec7cfbb5..1c9c04241a 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -1,9 +1,9 @@
#if IS_IN (libc)
# include <wchar.h>
-# define WCSNLEN __wcsnlen_sse2
+# define WCSNLEN __wcsnlen_generic
-extern __typeof (wcsnlen) __wcsnlen_sse2;
+extern __typeof (wcsnlen) __wcsnlen_generic;
#endif
#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
# undef __wcsnlen
# define SYMBOL_NAME wcsnlen
+# define GENERIC generic
# include "ifunc-wcslen.h"
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
--
2.34.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes
2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
@ 2022-06-09 15:14 ` H.J. Lu
2022-07-14 2:51 ` Sunil Pandey
2 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2022-06-09 15:14 UTC (permalink / raw)
To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell
On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This ensures the load will never split a cache line.
> ---
> sysdeps/x86_64/multiarch/varshift.c | 5 +++--
> sysdeps/x86_64/multiarch/varshift.h | 3 ++-
> 2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
> index c8210f0546..d27767520a 100644
> --- a/sysdeps/x86_64/multiarch/varshift.c
> +++ b/sysdeps/x86_64/multiarch/varshift.c
> @@ -16,9 +16,10 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#include "varshift.h"
> +#include <stdint.h>
>
> -const int8_t ___m128i_shift_right[31] attribute_hidden =
> +const int8_t ___m128i_shift_right[31] attribute_hidden
> + __attribute__((aligned(32))) =
> {
> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
> -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
> diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
> index af30694488..ffd12d79e4 100644
> --- a/sysdeps/x86_64/multiarch/varshift.h
> +++ b/sysdeps/x86_64/multiarch/varshift.h
> @@ -19,7 +19,8 @@
> #include <stdint.h>
> #include <tmmintrin.h>
>
> -extern const int8_t ___m128i_shift_right[31] attribute_hidden;
> +extern const int8_t ___m128i_shift_right[31] attribute_hidden
> + __attribute__ ((aligned (32)));
>
> static __inline__ __m128i
> __m128i_shift_right (__m128i value, unsigned long int offset)
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
@ 2022-06-09 15:28 ` H.J. Lu
0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2022-06-09 15:28 UTC (permalink / raw)
To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell
On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No change to the actual logic of the functions. The goal is to so the
> avx/avx2 machines rely less of sse instructions.
These aren't the only SSE2 functions. The rest of glibc may be still compiled
with SSE2. A different approach is to compile the whole glibc with x86-64
ISA level 3.
> Full xcheck passes on x86_64.
> ---
> sysdeps/x86_64/multiarch/Makefile | 21 ++++++++++-----
> .../multiarch/{ifunc-sse4_2.h => ifunc-avx.h} | 4 +++
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 +++++
> sysdeps/x86_64/multiarch/strcspn-c-avx.c | 21 +++++++++++++++
> .../{strcspn-c.c => strcspn-c-sse4.c} | 26 ++++++++++++-------
> sysdeps/x86_64/multiarch/strcspn.c | 2 +-
> sysdeps/x86_64/multiarch/strpbrk-c-avx.c | 23 ++++++++++++++++
> .../{strpbrk-c.c => strpbrk-c-sse4.c} | 6 ++---
> sysdeps/x86_64/multiarch/strpbrk.c | 2 +-
> sysdeps/x86_64/multiarch/strspn-c-avx.c | 21 +++++++++++++++
> .../multiarch/{strspn-c.c => strspn-c-sse4.c} | 15 ++++++++---
> sysdeps/x86_64/multiarch/strspn.c | 2 +-
> 12 files changed, 122 insertions(+), 27 deletions(-)
> rename sysdeps/x86_64/multiarch/{ifunc-sse4_2.h => ifunc-avx.h} (89%)
> create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-avx.c
> rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-c-sse4.c} (90%)
> create mode 100644 sysdeps/x86_64/multiarch/strpbrk-c-avx.c
> rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-c-sse4.c} (89%)
> create mode 100644 sysdeps/x86_64/multiarch/strspn-c-avx.c
> rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-c-sse4.c} (92%)
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..27f306c7c8 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -76,7 +76,8 @@ sysdep_routines += \
> strcpy-evex \
> strcpy-sse2 \
> strcpy-sse2-unaligned \
> - strcspn-c \
> + strcspn-c-avx \
> + strcspn-c-sse4 \
> strcspn-sse2 \
> strlen-avx2 \
> strlen-avx2-rtm \
> @@ -108,22 +109,28 @@ sysdep_routines += \
> strnlen-evex \
> strnlen-evex512 \
> strnlen-sse2 \
> - strpbrk-c \
> + strpbrk-c-avx \
> + strpbrk-c-sse4 \
> strpbrk-sse2 \
> strrchr-avx2 \
> strrchr-avx2-rtm \
> strrchr-evex \
> strrchr-sse2 \
> - strspn-c \
> + strspn-c-avx \
> + strspn-c-sse4 \
> strspn-sse2 \
> strstr-avx512 \
> strstr-sse2-unaligned \
> varshift \
> # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-c-avx.c += -mavx
> +CFLAGS-strcspn-c-sse4.c += -msse4
> +CFLAGS-strpbrk-c-avx.c += -mavx
> +CFLAGS-strpbrk-c-sse4.c += -msse4
> +CFLAGS-strspn-c-avx.c += -mavx
> +CFLAGS-strspn-c-sse4.c += -msse4
> +
> CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> endif
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-avx.h
> similarity index 89%
> rename from sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> rename to sysdeps/x86_64/multiarch/ifunc-avx.h
> index b555ff2fac..891f3ddcac 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx.h
> @@ -21,12 +21,16 @@
>
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
>
> static inline void *
> IFUNC_SELECTOR (void)
> {
> const struct cpu_features* cpu_features = __get_cpu_features ();
>
> + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
> + return OPTIMIZE (avx);
> +
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> return OPTIMIZE (sse42);
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 58f3ec8306..507c563669 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -529,6 +529,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
> /* Support sysdeps/x86_64/multiarch/strcspn.c. */
> IFUNC_IMPL (i, name, strcspn,
> + IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (AVX),
> + __strcspn_avx)
> IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> __strcspn_sse42)
> IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> @@ -605,6 +607,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
> /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
> IFUNC_IMPL (i, name, strpbrk,
> + IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (AVX),
> + __strpbrk_avx)
> IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> __strpbrk_sse42)
> IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> @@ -612,6 +616,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
> /* Support sysdeps/x86_64/multiarch/strspn.c. */
> IFUNC_IMPL (i, name, strspn,
> + IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (AVX),
> + __strspn_avx)
> IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> __strspn_sse42)
> IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c-avx.c b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
> new file mode 100644
> index 0000000000..b8d983f79f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-avx.c
> @@ -0,0 +1,21 @@
> +/* strcspn with AVX intrinsics
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define STRCSPN __strcspn_avx
> +#define SECTION "avx"
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> similarity index 90%
> rename from sysdeps/x86_64/multiarch/strcspn-c.c
> rename to sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> index c312fab8b1..848c3cfb14 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> @@ -52,9 +52,16 @@
> when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> X for case 1. */
>
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> -# define STRCSPN_SSE42 __strcspn_sse42
> +#ifndef STRCSPN_FALLBACK
> +# define STRCSPN_FALLBACK __strcspn_sse2
> +#endif
> +
> +#ifndef STRCSPN
> +# define STRCSPN __strcspn_sse42
> +#endif
> +
> +#ifndef SECTION
> +# define SECTION "sse4.2"
> #endif
>
> #ifdef USE_AS_STRPBRK
> @@ -69,16 +76,15 @@ char *
> #else
> size_t
> #endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> -
> +STRCSPN_FALLBACK (const char *, const char *) attribute_hidden;
>
> #ifdef USE_AS_STRPBRK
> char *
> #else
> size_t
> #endif
> -__attribute__ ((section (".text.sse4.2")))
> -STRCSPN_SSE42 (const char *s, const char *a)
> +__attribute__ ((section (".text." SECTION)))
> +STRCSPN (const char *s, const char *a)
> {
> if (*a == 0)
> RETURN (NULL, strlen (s));
> @@ -116,10 +122,10 @@ STRCSPN_SSE42 (const char *s, const char *a)
> maskz_bits = _mm_movemask_epi8 (maskz);
> if (maskz_bits == 0)
> {
> - /* There is no NULL terminator. Don't use SSE4.2 if the length
> - of A > 16. */
> + /* There is no NULL terminator. Don't use pcmpstri based approach if the
> + length of A > 16. */
> if (a[16] != 0)
> - return STRCSPN_SSE2 (s, a);
> + return STRCSPN_FALLBACK (s, a);
> }
>
> aligned = s;
> diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
> index 4848fa8677..63e1cf052e 100644
> --- a/sysdeps/x86_64/multiarch/strcspn.c
> +++ b/sysdeps/x86_64/multiarch/strcspn.c
> @@ -24,7 +24,7 @@
> # undef strcspn
>
> # define SYMBOL_NAME strcspn
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
> libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
>
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c-avx.c b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
> new file mode 100644
> index 0000000000..2918013994
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-avx.c
> @@ -0,0 +1,23 @@
> +/* strpbrk with AVX intrinsics
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define USE_AS_STRPBRK
> +#define STRCSPN_FALLBACK __strpbrk_sse2
> +#define STRCSPN __strpbrk_avx
> +#define SECTION "avx"
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> similarity index 89%
> rename from sysdeps/x86_64/multiarch/strpbrk-c.c
> rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> index abf4ff7f1a..2efd38d809 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> @@ -17,6 +17,6 @@
> <https://www.gnu.org/licenses/>. */
>
> #define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> -#define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#define STRCSPN_FALLBACK __strpbrk_sse2
> +#define STRCSPN __strpbrk_sse42
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
> index 04e300ea71..ab5b04a482 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk.c
> @@ -24,7 +24,7 @@
> # undef strpbrk
>
> # define SYMBOL_NAME strpbrk
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
> libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
>
> diff --git a/sysdeps/x86_64/multiarch/strspn-c-avx.c b/sysdeps/x86_64/multiarch/strspn-c-avx.c
> new file mode 100644
> index 0000000000..9d5fdb9550
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strspn-c-avx.c
> @@ -0,0 +1,21 @@
> +/* strspn with AVX intrinsics
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#define STRSPN __strspn_avx
> +#define SECTION "avx"
> +#include "strspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/strspn-c.c
> rename to sysdeps/x86_64/multiarch/strspn-c-sse4.c
> index 6124033ceb..6a91def2e0 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> @@ -53,10 +53,17 @@
>
> extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
>
> +#ifndef STRSPN
> +# define STRSPN __strspn_sse42
> +#endif
> +
> +#ifndef SECTION
> +# define SECTION "sse4.2"
> +#endif
>
> size_t
> -__attribute__ ((section (".text.sse4.2")))
> -__strspn_sse42 (const char *s, const char *a)
> +__attribute__ ((section (".text." SECTION)))
> +STRSPN (const char *s, const char *a)
> {
> if (*a == 0)
> return 0;
> @@ -95,8 +102,8 @@ __strspn_sse42 (const char *s, const char *a)
> maskz_bits = _mm_movemask_epi8 (maskz);
> if (maskz_bits == 0)
> {
> - /* There is no NULL terminator. Don't use SSE4.2 if the length
> - of A > 16. */
> + /* There is no NULL terminator. Don't use pcmpstri based approach if the
> + length of A > 16. */
> if (a[16] != 0)
> return __strspn_sse2 (s, a);
> }
> diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
> index 07f5def155..c3c5e7a3cc 100644
> --- a/sysdeps/x86_64/multiarch/strspn.c
> +++ b/sysdeps/x86_64/multiarch/strspn.c
> @@ -24,7 +24,7 @@
> # undef strspn
>
> # define SYMBOL_NAME strspn
> -# include "ifunc-sse4_2.h"
> +# include "ifunc-avx.h"
>
> libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
>
> --
> 2.34.1
>
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2] x86: Rename generic functions with unique postfix for clarity
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
@ 2022-06-10 0:58 ` Noah Goldstein
2022-06-10 1:19 ` H.J. Lu
2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein
1 sibling, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-10 0:58 UTC (permalink / raw)
To: libc-alpha
No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.
Full xcheck passed on x86_64.
---
sysdeps/x86_64/multiarch/Makefile | 15 +-
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +-
sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-
sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +-
sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +-
sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +-
sysdeps/x86_64/multiarch/stpncpy.c | 1 +
sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++
sysdeps/x86_64/multiarch/strcspn-c.c | 151 +---------------
sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 ---
sysdeps/x86_64/multiarch/strncat-c.c | 2 +-
sysdeps/x86_64/multiarch/strncat.c | 1 +
sysdeps/x86_64/multiarch/strncpy-c.c | 2 +-
sysdeps/x86_64/multiarch/strncpy.c | 1 +
.../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +-
sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +-
sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 ---
sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++
sysdeps/x86_64/multiarch/strspn-c.c | 126 +-------------
sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +-
sysdeps/x86_64/multiarch/wcscpy.c | 4 +-
sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +-
sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
23 files changed, 376 insertions(+), 363 deletions(-)
create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..86c6ecdfc1 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -77,7 +77,7 @@ sysdep_routines += \
strcpy-sse2 \
strcpy-sse2-unaligned \
strcspn-c \
- strcspn-sse2 \
+ strcspn-c-sse4 \
strlen-avx2 \
strlen-avx2-rtm \
strlen-evex \
@@ -109,21 +109,22 @@ sysdep_routines += \
strnlen-evex512 \
strnlen-sse2 \
strpbrk-c \
- strpbrk-sse2 \
+ strpbrk-c-sse4 \
strrchr-avx2 \
strrchr-avx2-rtm \
strrchr-evex \
strrchr-sse2 \
strspn-c \
- strspn-sse2 \
+ strspn-c-sse4 \
strstr-avx512 \
strstr-sse2-unaligned \
varshift \
# sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-c-sse4.c += -msse4
+CFLAGS-strpbrk-c-sse4.c += -msse4
+CFLAGS-strspn-c-sse4.c += -msse4
+
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 58f3ec8306..4cbd200d39 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__stpncpy_evex)
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
__stpncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
/* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
@@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strcspn,
IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
__strcspn_sse42)
- IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
@@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncat_evex)
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+ IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
/* Support sysdeps/x86_64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
@@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncpy_evex)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
/* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
__strpbrk_sse42)
- IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+ IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
/* Support sysdeps/x86_64/multiarch/strspn.c. */
IFUNC_IMPL (i, name, strspn,
IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
__strspn_sse42)
- IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
@@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
__wcscpy_ssse3)
- IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+ IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
@@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
index b555ff2fac..ee36525bcf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -19,7 +19,7 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
static inline void *
@@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
return OPTIMIZE (sse42);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index b016e487e1..eb62fcf388 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,4 +1,4 @@
-#define STPNCPY __stpncpy_sse2
+#define STPNCPY __stpncpy_generic
#undef weak_alias
#define weak_alias(ignored1, ignored2)
#undef libc_hidden_def
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
# undef stpncpy
# undef __stpncpy
+# define GENERIC generic
# define SYMBOL_NAME stpncpy
# include "ifunc-strcpy.h"
diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
new file mode 100644
index 0000000000..59f64f9fe8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
@@ -0,0 +1,163 @@
+/* strcspn with SSE4.2 intrinsics
+ Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x2:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_POSITIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ A A A A A A A A A A A A A A A A
+
+ to find out if the first 16byte data element has any byte A and
+ the offset of the first byte. There are 3 cases:
+
+ 1. The first 16byte data element has the byte A at the offset X.
+ 2. The first 16byte data element has EOS and doesn't have the byte A.
+ 3. The first 16byte data element is valid and doesn't have the byte A.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+ 1 X 1 0/1 0
+ 2 16 0 1 0
+ 3 16 0 0 0
+
+ We exit from the loop for cases 1 and 2 with jbe which branches
+ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
+ X for case 1. */
+
+#ifndef STRCSPN_GENERIC
+# define STRCSPN_GENERIC __strcspn_generic
+# define STRCSPN_SSE42 __strcspn_sse42
+#endif
+
+#ifdef USE_AS_STRPBRK
+# define RETURN(val1, val2) return val1
+#else
+# define RETURN(val1, val2) return val2
+#endif
+
+extern
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
+
+
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+__attribute__ ((section (".text.sse4.2")))
+STRCSPN_SSE42 (const char *s, const char *a)
+{
+ if (*a == 0)
+ RETURN (NULL, strlen (s));
+
+ const char *aligned;
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (unsigned int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
+ if (offset != 0)
+ {
+ /* Load masks. */
+ aligned = (const char *) ((size_t) a & -16L);
+ __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+ /* Find where the NULL terminator is. */
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
+ }
+
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return STRCSPN_GENERIC (s, a);
+ }
+
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset != 0)
+ {
+ start_unaligned:
+ /* Check partial string. */
+ aligned = (const char *) ((size_t) s & -16L);
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+
+ value = __m128i_shift_right (value, offset);
+
+ unsigned int length = _mm_cmpistri (mask, value, 0x2);
+ /* No need to check ZFlag since ZFlag is always 1. */
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+ if (cflag)
+ RETURN ((char *) (s + length), length);
+ /* Find where the NULL terminator is. */
+ unsigned int index = _mm_cmpistri (value, value, 0x3a);
+ if (index < 16 - offset)
+ RETURN (NULL, index);
+ aligned += 16;
+ }
+
+start_loop:
+ while (1)
+ {
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ unsigned int index = _mm_cmpistri (mask, value, 0x2);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+ unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
+ if (cflag)
+ RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
+ if (zflag)
+ RETURN (NULL,
+ /* Find where the NULL terminator is. */
+ (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
+ aligned += 16;
+ }
+}
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index c312fab8b1..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -1,5 +1,5 @@
-/* strcspn with SSE4.2 intrinsics
- Copyright (C) 2009-2022 Free Software Foundation, Inc.
+/* strcspn.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,148 +16,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
+#if IS_IN (libc)
-/* We use 0x2:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_ANY
- | _SIDD_POSITIVE_POLARITY
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
+# include <sysdep.h>
+# define STRCSPN __strcspn_generic
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- A A A A A A A A A A A A A A A A
-
- to find out if the first 16byte data element has any byte A and
- the offset of the first byte. There are 3 cases:
-
- 1. The first 16byte data element has the byte A at the offset X.
- 2. The first 16byte data element has EOS and doesn't have the byte A.
- 3. The first 16byte data element is valid and doesn't have the byte A.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
- 1 X 1 0/1 0
- 2 16 0 1 0
- 3 16 0 0 0
-
- We exit from the loop for cases 1 and 2 with jbe which branches
- when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
- X for case 1. */
-
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
-#endif
-
-#ifdef USE_AS_STRPBRK
-# define RETURN(val1, val2) return val1
-#else
-# define RETURN(val1, val2) return val2
-#endif
-
-extern
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
-
-
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRCSPN)
#endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
-{
- if (*a == 0)
- RETURN (NULL, strlen (s));
-
- const char *aligned;
- __m128i mask, maskz, zero;
- unsigned int maskz_bits;
- unsigned int offset = (unsigned int) ((size_t) a & 15);
- zero = _mm_set1_epi8 (0);
- if (offset != 0)
- {
- /* Load masks. */
- aligned = (const char *) ((size_t) a & -16L);
- __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
- maskz = _mm_cmpeq_epi8 (mask0, zero);
-
- /* Find where the NULL terminator is. */
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- {
- mask = __m128i_shift_right (mask0, offset);
- offset = (unsigned int) ((size_t) s & 15);
- if (offset)
- goto start_unaligned;
-
- aligned = s;
- goto start_loop;
- }
- }
-
- /* A is aligned. */
- mask = _mm_loadu_si128 ((__m128i *) a);
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (mask, zero);
- maskz_bits = _mm_movemask_epi8 (maskz);
- if (maskz_bits == 0)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return STRCSPN_SSE2 (s, a);
- }
-
- aligned = s;
- offset = (unsigned int) ((size_t) s & 15);
- if (offset != 0)
- {
- start_unaligned:
- /* Check partial string. */
- aligned = (const char *) ((size_t) s & -16L);
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
-
- value = __m128i_shift_right (value, offset);
-
- unsigned int length = _mm_cmpistri (mask, value, 0x2);
- /* No need to check ZFlag since ZFlag is always 1. */
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
- if (cflag)
- RETURN ((char *) (s + length), length);
- /* Find where the NULL terminator is. */
- unsigned int index = _mm_cmpistri (value, value, 0x3a);
- if (index < 16 - offset)
- RETURN (NULL, index);
- aligned += 16;
- }
-start_loop:
- while (1)
- {
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- unsigned int index = _mm_cmpistri (mask, value, 0x2);
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
- unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
- if (cflag)
- RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
- if (zflag)
- RETURN (NULL,
- /* Find where the NULL terminator is. */
- (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
- aligned += 16;
- }
-}
+#include <string/strcspn.c>
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
deleted file mode 100644
index 3a04bb39fc..0000000000
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* strcspn.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRCSPN __strcspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRCSPN)
-#endif
-
-#include <string/strcspn.c>
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index 93a7fab7ea..b729c033d9 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,2 +1,2 @@
-#define STRNCAT __strncat_sse2
+#define STRNCAT __strncat_generic
#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
# undef strncat
# define SYMBOL_NAME strncat
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 57c45ac7ab..183b0b8e0f 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,4 +1,4 @@
-#define STRNCPY __strncpy_sse2
+#define STRNCPY __strncpy_generic
#undef libc_hidden_builtin_def
#define libc_hidden_builtin_def(strncpy)
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
# undef strncpy
# define SYMBOL_NAME strncpy
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
similarity index 74%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
index 61cc6cb0a5..8700276773 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
@@ -1,5 +1,5 @@
-/* strspn.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
+/* strpbrk with SSE4.2 intrinsics
+ Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,7 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRSPN __strspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRSPN)
-#endif
-
-#include <string/strspn.c>
+#define USE_AS_STRPBRK
+#define STRCSPN_GENERIC __strpbrk_generic
+#define STRCSPN_SSE42 __strpbrk_sse42
+#include "strcspn-c-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
index abf4ff7f1a..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -1,5 +1,5 @@
-/* strpbrk with SSE4.2 intrinsics
- Copyright (C) 2022 Free Software Foundation, Inc.
+/* strpbrk.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRPBRK __strpbrk_generic
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRPBRK)
+#endif
+
+#include <string/strpbrk.c>
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
deleted file mode 100644
index d03214c4fb..0000000000
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* strpbrk.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRPBRK)
-#endif
-
-#include <string/strpbrk.c>
diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
new file mode 100644
index 0000000000..d044916688
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
@@ -0,0 +1,136 @@
+/* strspn with SSE4.2 intrinsics
+ Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x12:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_NEGATIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ A A A A A A A A A A A A A A A A
+
+ to find out if the first 16byte data element has any non-A byte and
+ the offset of the first byte. There are 2 cases:
+
+ 1. The first 16byte data element has the non-A byte, including
+ EOS, at the offset X.
+ 2. The first 16byte data element is valid and doesn't have the non-A
+ byte.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+ case ECX CFlag ZFlag SFlag
+ 1 X 1 0/1 0
+ 2 16 0 0 0
+
+ We exit from the loop for case 1. */
+
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
+
+
+size_t
+__attribute__ ((section (".text.sse4.2")))
+__strspn_sse42 (const char *s, const char *a)
+{
+ if (*a == 0)
+ return 0;
+
+ const char *aligned;
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
+ if (offset != 0)
+ {
+ /* Load masks. */
+ aligned = (const char *) ((size_t) a & -16L);
+ __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+ /* Find where the NULL terminator is. */
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
+ }
+
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return __strspn_generic (s, a);
+ }
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
+
+ if (offset != 0)
+ {
+ start_unaligned:
+ /* Check partial string. */
+ aligned = (const char *) ((size_t) s & -16L);
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ __m128i adj_value = __m128i_shift_right (value, offset);
+
+ unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
+ /* No need to check CFlag since it is always 1. */
+ if (length < 16 - offset)
+ return length;
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (value, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ return length;
+ aligned += 16;
+ }
+
+start_loop:
+ while (1)
+ {
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ unsigned int index = _mm_cmpistri (mask, value, 0x12);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
+ if (cflag)
+ return (size_t) (aligned + index - s);
+ aligned += 16;
+ }
+}
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 6124033ceb..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
-/* strspn with SSE4.2 intrinsics
- Copyright (C) 2009-2022 Free Software Foundation, Inc.
+/* strspn.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,121 +16,13 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
+#if IS_IN (libc)
-/* We use 0x12:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_ANY
- | _SIDD_NEGATIVE_POLARITY
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
+# include <sysdep.h>
+# define STRSPN __strspn_generic
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRSPN)
+#endif
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- A A A A A A A A A A A A A A A A
-
- to find out if the first 16byte data element has any non-A byte and
- the offset of the first byte. There are 2 cases:
-
- 1. The first 16byte data element has the non-A byte, including
- EOS, at the offset X.
- 2. The first 16byte data element is valid and doesn't have the non-A
- byte.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
- case ECX CFlag ZFlag SFlag
- 1 X 1 0/1 0
- 2 16 0 0 0
-
- We exit from the loop for case 1. */
-
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
-
-
-size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
-{
- if (*a == 0)
- return 0;
-
- const char *aligned;
- __m128i mask, maskz, zero;
- unsigned int maskz_bits;
- unsigned int offset = (int) ((size_t) a & 15);
- zero = _mm_set1_epi8 (0);
- if (offset != 0)
- {
- /* Load masks. */
- aligned = (const char *) ((size_t) a & -16L);
- __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
- maskz = _mm_cmpeq_epi8 (mask0, zero);
-
- /* Find where the NULL terminator is. */
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- {
- mask = __m128i_shift_right (mask0, offset);
- offset = (unsigned int) ((size_t) s & 15);
- if (offset)
- goto start_unaligned;
-
- aligned = s;
- goto start_loop;
- }
- }
-
- /* A is aligned. */
- mask = _mm_loadu_si128 ((__m128i *) a);
-
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (mask, zero);
- maskz_bits = _mm_movemask_epi8 (maskz);
- if (maskz_bits == 0)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return __strspn_sse2 (s, a);
- }
- aligned = s;
- offset = (unsigned int) ((size_t) s & 15);
-
- if (offset != 0)
- {
- start_unaligned:
- /* Check partial string. */
- aligned = (const char *) ((size_t) s & -16L);
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- __m128i adj_value = __m128i_shift_right (value, offset);
-
- unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
- /* No need to check CFlag since it is always 1. */
- if (length < 16 - offset)
- return length;
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (value, zero);
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- return length;
- aligned += 16;
- }
-
-start_loop:
- while (1)
- {
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- unsigned int index = _mm_cmpistri (mask, value, 0x12);
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
- if (cflag)
- return (size_t) (aligned + index - s);
- aligned += 16;
- }
-}
+#include <string/strspn.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
index 26d6984e9b..fa38dd898d 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
@@ -1,5 +1,5 @@
#if IS_IN (libc)
-# define WCSCPY __wcscpy_sse2
+# define WCSCPY __wcscpy_generic
#endif
#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
# define SYMBOL_NAME wcscpy
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
return OPTIMIZE (ssse3);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
index e1ec7cfbb5..1c9c04241a 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -1,9 +1,9 @@
#if IS_IN (libc)
# include <wchar.h>
-# define WCSNLEN __wcsnlen_sse2
+# define WCSNLEN __wcsnlen_generic
-extern __typeof (wcsnlen) __wcsnlen_sse2;
+extern __typeof (wcsnlen) __wcsnlen_generic;
#endif
#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
# undef __wcsnlen
# define SYMBOL_NAME wcsnlen
+# define GENERIC generic
# include "ifunc-wcslen.h"
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
--
2.34.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity
2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein
@ 2022-06-10 1:19 ` H.J. Lu
2022-06-10 1:26 ` Noah Goldstein
0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2022-06-10 1:19 UTC (permalink / raw)
To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell
On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No functions are changed. It just renames generic implementations from
> '{func}_sse2' to '{func}_generic'. This is just because the postfix
> "_sse2" was overloaded and was used for files that had hand-optimized
> sse2 assembly implementations and files that just redirected back
> to the generic implementation.
This change isn't small and its benefit is very small. Can it be the part of
a big change to support building glibc with
-march=x86-64-vN
> Full xcheck passed on x86_64.
> ---
> sysdeps/x86_64/multiarch/Makefile | 15 +-
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +-
> sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-
> sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +-
> sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +-
> sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +-
> sysdeps/x86_64/multiarch/stpncpy.c | 1 +
> sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++
> sysdeps/x86_64/multiarch/strcspn-c.c | 151 +---------------
> sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 ---
> sysdeps/x86_64/multiarch/strncat-c.c | 2 +-
> sysdeps/x86_64/multiarch/strncat.c | 1 +
> sysdeps/x86_64/multiarch/strncpy-c.c | 2 +-
> sysdeps/x86_64/multiarch/strncpy.c | 1 +
> .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +-
> sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +-
> sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 ---
> sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++
> sysdeps/x86_64/multiarch/strspn-c.c | 126 +-------------
> sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +-
> sysdeps/x86_64/multiarch/wcscpy.c | 4 +-
> sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +-
> sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
> 23 files changed, 376 insertions(+), 363 deletions(-)
> create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
> rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
> delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
> create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..86c6ecdfc1 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -77,7 +77,7 @@ sysdep_routines += \
> strcpy-sse2 \
> strcpy-sse2-unaligned \
> strcspn-c \
> - strcspn-sse2 \
> + strcspn-c-sse4 \
> strlen-avx2 \
> strlen-avx2-rtm \
> strlen-evex \
> @@ -109,21 +109,22 @@ sysdep_routines += \
> strnlen-evex512 \
> strnlen-sse2 \
> strpbrk-c \
> - strpbrk-sse2 \
> + strpbrk-c-sse4 \
> strrchr-avx2 \
> strrchr-avx2-rtm \
> strrchr-evex \
> strrchr-sse2 \
> strspn-c \
> - strspn-sse2 \
> + strspn-c-sse4 \
> strstr-avx512 \
> strstr-sse2-unaligned \
> varshift \
> # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-c-sse4.c += -msse4
> +CFLAGS-strpbrk-c-sse4.c += -msse4
> +CFLAGS-strspn-c-sse4.c += -msse4
> +
> CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> endif
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 58f3ec8306..4cbd200d39 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __stpncpy_evex)
> IFUNC_IMPL_ADD (array, i, stpncpy, 1,
> __stpncpy_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/stpcpy.c. */
> IFUNC_IMPL (i, name, stpcpy,
> @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, strcspn,
> IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> __strcspn_sse42)
> - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
>
> /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
> IFUNC_IMPL (i, name, strncasecmp,
> @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __strncat_evex)
> IFUNC_IMPL_ADD (array, i, strncat, 1,
> __strncat_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
>
> /* Support sysdeps/x86_64/multiarch/strncpy.c. */
> IFUNC_IMPL (i, name, strncpy,
> @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __strncpy_evex)
> IFUNC_IMPL_ADD (array, i, strncpy, 1,
> __strncpy_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
> IFUNC_IMPL (i, name, strpbrk,
> IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> __strpbrk_sse42)
> - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
>
>
> /* Support sysdeps/x86_64/multiarch/strspn.c. */
> IFUNC_IMPL (i, name, strspn,
> IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> __strspn_sse42)
> - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
>
> /* Support sysdeps/x86_64/multiarch/strstr.c. */
> IFUNC_IMPL (i, name, strstr,
> @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, wcscpy,
> IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
> __wcscpy_ssse3)
> - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/wcslen.c. */
> IFUNC_IMPL (i, name, wcslen,
> @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL_ADD (array, i, wcsnlen,
> CPU_FEATURE_USABLE (SSE4_1),
> __wcsnlen_sse4_1)
> - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
>
> /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
> IFUNC_IMPL (i, name, wmemchr,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> index b555ff2fac..ee36525bcf 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> @@ -19,7 +19,7 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
>
> static inline void *
> @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> return OPTIMIZE (sse42);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (generic);
> }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> index a15afa44e9..80529458d1 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> @@ -20,7 +20,11 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
> attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> return OPTIMIZE (sse2_unaligned);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (GENERIC);
> }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> index 2b29e7608a..88c1c502af 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> @@ -19,7 +19,11 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
> return OPTIMIZE (sse4_1);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (GENERIC);
> }
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> index b016e487e1..eb62fcf388 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
> @@ -1,4 +1,4 @@
> -#define STPNCPY __stpncpy_sse2
> +#define STPNCPY __stpncpy_generic
> #undef weak_alias
> #define weak_alias(ignored1, ignored2)
> #undef libc_hidden_def
> diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> index 82fa53957d..879bc83f0b 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> @@ -25,6 +25,7 @@
> # undef stpncpy
> # undef __stpncpy
>
> +# define GENERIC generic
> # define SYMBOL_NAME stpncpy
> # include "ifunc-strcpy.h"
>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> new file mode 100644
> index 0000000000..59f64f9fe8
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> @@ -0,0 +1,163 @@
> +/* strcspn with SSE4.2 intrinsics
> + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <nmmintrin.h>
> +#include <string.h>
> +#include "varshift.h"
> +
> +/* We use 0x2:
> + _SIDD_SBYTE_OPS
> + | _SIDD_CMP_EQUAL_ANY
> + | _SIDD_POSITIVE_POLARITY
> + | _SIDD_LEAST_SIGNIFICANT
> + on pcmpistri to compare xmm/mem128
> +
> + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> + X X X X X X X X X X X X X X X X
> +
> + against xmm
> +
> + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> + A A A A A A A A A A A A A A A A
> +
> + to find out if the first 16byte data element has any byte A and
> + the offset of the first byte. There are 3 cases:
> +
> + 1. The first 16byte data element has the byte A at the offset X.
> + 2. The first 16byte data element has EOS and doesn't have the byte A.
> + 3. The first 16byte data element is valid and doesn't have the byte A.
> +
> + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> +
> + 1 X 1 0/1 0
> + 2 16 0 1 0
> + 3 16 0 0 0
> +
> + We exit from the loop for cases 1 and 2 with jbe which branches
> + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> + X for case 1. */
> +
> +#ifndef STRCSPN_GENERIC
> +# define STRCSPN_GENERIC __strcspn_generic
> +# define STRCSPN_SSE42 __strcspn_sse42
> +#endif
> +
> +#ifdef USE_AS_STRPBRK
> +# define RETURN(val1, val2) return val1
> +#else
> +# define RETURN(val1, val2) return val2
> +#endif
> +
> +extern
> +#ifdef USE_AS_STRPBRK
> +char *
> +#else
> +size_t
> +#endif
> +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
> +
> +
> +#ifdef USE_AS_STRPBRK
> +char *
> +#else
> +size_t
> +#endif
> +__attribute__ ((section (".text.sse4.2")))
> +STRCSPN_SSE42 (const char *s, const char *a)
> +{
> + if (*a == 0)
> + RETURN (NULL, strlen (s));
> +
> + const char *aligned;
> + __m128i mask, maskz, zero;
> + unsigned int maskz_bits;
> + unsigned int offset = (unsigned int) ((size_t) a & 15);
> + zero = _mm_set1_epi8 (0);
> + if (offset != 0)
> + {
> + /* Load masks. */
> + aligned = (const char *) ((size_t) a & -16L);
> + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> + maskz = _mm_cmpeq_epi8 (mask0, zero);
> +
> + /* Find where the NULL terminator is. */
> + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> + if (maskz_bits != 0)
> + {
> + mask = __m128i_shift_right (mask0, offset);
> + offset = (unsigned int) ((size_t) s & 15);
> + if (offset)
> + goto start_unaligned;
> +
> + aligned = s;
> + goto start_loop;
> + }
> + }
> +
> + /* A is aligned. */
> + mask = _mm_loadu_si128 ((__m128i *) a);
> + /* Find where the NULL terminator is. */
> + maskz = _mm_cmpeq_epi8 (mask, zero);
> + maskz_bits = _mm_movemask_epi8 (maskz);
> + if (maskz_bits == 0)
> + {
> + /* There is no NULL terminator. Don't use SSE4.2 if the length
> + of A > 16. */
> + if (a[16] != 0)
> + return STRCSPN_GENERIC (s, a);
> + }
> +
> + aligned = s;
> + offset = (unsigned int) ((size_t) s & 15);
> + if (offset != 0)
> + {
> + start_unaligned:
> + /* Check partial string. */
> + aligned = (const char *) ((size_t) s & -16L);
> + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> +
> + value = __m128i_shift_right (value, offset);
> +
> + unsigned int length = _mm_cmpistri (mask, value, 0x2);
> + /* No need to check ZFlag since ZFlag is always 1. */
> + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> + if (cflag)
> + RETURN ((char *) (s + length), length);
> + /* Find where the NULL terminator is. */
> + unsigned int index = _mm_cmpistri (value, value, 0x3a);
> + if (index < 16 - offset)
> + RETURN (NULL, index);
> + aligned += 16;
> + }
> +
> +start_loop:
> + while (1)
> + {
> + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> + unsigned int index = _mm_cmpistri (mask, value, 0x2);
> + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> + if (cflag)
> + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> + if (zflag)
> + RETURN (NULL,
> + /* Find where the NULL terminator is. */
> + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> + aligned += 16;
> + }
> +}
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
> index c312fab8b1..423de2e2b2 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-c.c
> @@ -1,5 +1,5 @@
> -/* strcspn with SSE4.2 intrinsics
> - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +/* strcspn.
> + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -16,148 +16,13 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#include <nmmintrin.h>
> -#include <string.h>
> -#include "varshift.h"
> +#if IS_IN (libc)
>
> -/* We use 0x2:
> - _SIDD_SBYTE_OPS
> - | _SIDD_CMP_EQUAL_ANY
> - | _SIDD_POSITIVE_POLARITY
> - | _SIDD_LEAST_SIGNIFICANT
> - on pcmpistri to compare xmm/mem128
> +# include <sysdep.h>
> +# define STRCSPN __strcspn_generic
>
> - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> - X X X X X X X X X X X X X X X X
> -
> - against xmm
> -
> - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> - A A A A A A A A A A A A A A A A
> -
> - to find out if the first 16byte data element has any byte A and
> - the offset of the first byte. There are 3 cases:
> -
> - 1. The first 16byte data element has the byte A at the offset X.
> - 2. The first 16byte data element has EOS and doesn't have the byte A.
> - 3. The first 16byte data element is valid and doesn't have the byte A.
> -
> - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> -
> - 1 X 1 0/1 0
> - 2 16 0 1 0
> - 3 16 0 0 0
> -
> - We exit from the loop for cases 1 and 2 with jbe which branches
> - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> - X for case 1. */
> -
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> -# define STRCSPN_SSE42 __strcspn_sse42
> -#endif
> -
> -#ifdef USE_AS_STRPBRK
> -# define RETURN(val1, val2) return val1
> -#else
> -# define RETURN(val1, val2) return val2
> -#endif
> -
> -extern
> -#ifdef USE_AS_STRPBRK
> -char *
> -#else
> -size_t
> -#endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> -
> -
> -#ifdef USE_AS_STRPBRK
> -char *
> -#else
> -size_t
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRCSPN)
> #endif
> -__attribute__ ((section (".text.sse4.2")))
> -STRCSPN_SSE42 (const char *s, const char *a)
> -{
> - if (*a == 0)
> - RETURN (NULL, strlen (s));
> -
> - const char *aligned;
> - __m128i mask, maskz, zero;
> - unsigned int maskz_bits;
> - unsigned int offset = (unsigned int) ((size_t) a & 15);
> - zero = _mm_set1_epi8 (0);
> - if (offset != 0)
> - {
> - /* Load masks. */
> - aligned = (const char *) ((size_t) a & -16L);
> - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> - maskz = _mm_cmpeq_epi8 (mask0, zero);
> -
> - /* Find where the NULL terminator is. */
> - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> - if (maskz_bits != 0)
> - {
> - mask = __m128i_shift_right (mask0, offset);
> - offset = (unsigned int) ((size_t) s & 15);
> - if (offset)
> - goto start_unaligned;
> -
> - aligned = s;
> - goto start_loop;
> - }
> - }
> -
> - /* A is aligned. */
> - mask = _mm_loadu_si128 ((__m128i *) a);
> - /* Find where the NULL terminator is. */
> - maskz = _mm_cmpeq_epi8 (mask, zero);
> - maskz_bits = _mm_movemask_epi8 (maskz);
> - if (maskz_bits == 0)
> - {
> - /* There is no NULL terminator. Don't use SSE4.2 if the length
> - of A > 16. */
> - if (a[16] != 0)
> - return STRCSPN_SSE2 (s, a);
> - }
> -
> - aligned = s;
> - offset = (unsigned int) ((size_t) s & 15);
> - if (offset != 0)
> - {
> - start_unaligned:
> - /* Check partial string. */
> - aligned = (const char *) ((size_t) s & -16L);
> - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> -
> - value = __m128i_shift_right (value, offset);
> -
> - unsigned int length = _mm_cmpistri (mask, value, 0x2);
> - /* No need to check ZFlag since ZFlag is always 1. */
> - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> - if (cflag)
> - RETURN ((char *) (s + length), length);
> - /* Find where the NULL terminator is. */
> - unsigned int index = _mm_cmpistri (value, value, 0x3a);
> - if (index < 16 - offset)
> - RETURN (NULL, index);
> - aligned += 16;
> - }
>
> -start_loop:
> - while (1)
> - {
> - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> - unsigned int index = _mm_cmpistri (mask, value, 0x2);
> - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> - if (cflag)
> - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> - if (zflag)
> - RETURN (NULL,
> - /* Find where the NULL terminator is. */
> - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> - aligned += 16;
> - }
> -}
> +#include <string/strcspn.c>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
> deleted file mode 100644
> index 3a04bb39fc..0000000000
> --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> +++ /dev/null
> @@ -1,28 +0,0 @@
> -/* strcspn.
> - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <https://www.gnu.org/licenses/>. */
> -
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRCSPN __strcspn_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRCSPN)
> -#endif
> -
> -#include <string/strcspn.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> index 93a7fab7ea..b729c033d9 100644
> --- a/sysdeps/x86_64/multiarch/strncat-c.c
> +++ b/sysdeps/x86_64/multiarch/strncat-c.c
> @@ -1,2 +1,2 @@
> -#define STRNCAT __strncat_sse2
> +#define STRNCAT __strncat_generic
> #include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> index b649343a97..50fba8a41f 100644
> --- a/sysdeps/x86_64/multiarch/strncat.c
> +++ b/sysdeps/x86_64/multiarch/strncat.c
> @@ -24,6 +24,7 @@
> # undef strncat
>
> # define SYMBOL_NAME strncat
> +# define GENERIC generic
> # include "ifunc-strcpy.h"
>
> libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> index 57c45ac7ab..183b0b8e0f 100644
> --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> +++ b/sysdeps/x86_64/multiarch/strncpy-c.c
> @@ -1,4 +1,4 @@
> -#define STRNCPY __strncpy_sse2
> +#define STRNCPY __strncpy_generic
> #undef libc_hidden_builtin_def
> #define libc_hidden_builtin_def(strncpy)
>
> diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> index 2a780a7e16..7fc7d72ec5 100644
> --- a/sysdeps/x86_64/multiarch/strncpy.c
> +++ b/sysdeps/x86_64/multiarch/strncpy.c
> @@ -24,6 +24,7 @@
> # undef strncpy
>
> # define SYMBOL_NAME strncpy
> +# define GENERIC generic
> # include "ifunc-strcpy.h"
>
> libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> similarity index 74%
> rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> index 61cc6cb0a5..8700276773 100644
> --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> @@ -1,5 +1,5 @@
> -/* strspn.
> - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> +/* strpbrk with SSE4.2 intrinsics
> + Copyright (C) 2022 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -16,13 +16,7 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRSPN __strspn_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRSPN)
> -#endif
> -
> -#include <string/strspn.c>
> +#define USE_AS_STRPBRK
> +#define STRCSPN_GENERIC __strpbrk_generic
> +#define STRCSPN_SSE42 __strpbrk_sse42
> +#include "strcspn-c-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
> index abf4ff7f1a..d31acfe495 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
> @@ -1,5 +1,5 @@
> -/* strpbrk with SSE4.2 intrinsics
> - Copyright (C) 2022 Free Software Foundation, Inc.
> +/* strpbrk.
> + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -16,7 +16,13 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> -#define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#if IS_IN (libc)
> +
> +# include <sysdep.h>
> +# define STRPBRK __strpbrk_generic
> +
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRPBRK)
> +#endif
> +
> +#include <string/strpbrk.c>
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> deleted file mode 100644
> index d03214c4fb..0000000000
> --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> +++ /dev/null
> @@ -1,28 +0,0 @@
> -/* strpbrk.
> - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <https://www.gnu.org/licenses/>. */
> -
> -#if IS_IN (libc)
> -
> -# include <sysdep.h>
> -# define STRPBRK __strpbrk_sse2
> -
> -# undef libc_hidden_builtin_def
> -# define libc_hidden_builtin_def(STRPBRK)
> -#endif
> -
> -#include <string/strpbrk.c>
> diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> new file mode 100644
> index 0000000000..d044916688
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> @@ -0,0 +1,136 @@
> +/* strspn with SSE4.2 intrinsics
> + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <nmmintrin.h>
> +#include <string.h>
> +#include "varshift.h"
> +
> +/* We use 0x12:
> + _SIDD_SBYTE_OPS
> + | _SIDD_CMP_EQUAL_ANY
> + | _SIDD_NEGATIVE_POLARITY
> + | _SIDD_LEAST_SIGNIFICANT
> + on pcmpistri to compare xmm/mem128
> +
> + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> + X X X X X X X X X X X X X X X X
> +
> + against xmm
> +
> + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> + A A A A A A A A A A A A A A A A
> +
> + to find out if the first 16byte data element has any non-A byte and
> + the offset of the first byte. There are 2 cases:
> +
> + 1. The first 16byte data element has the non-A byte, including
> + EOS, at the offset X.
> + 2. The first 16byte data element is valid and doesn't have the non-A
> + byte.
> +
> + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> +
> + case ECX CFlag ZFlag SFlag
> + 1 X 1 0/1 0
> + 2 16 0 0 0
> +
> + We exit from the loop for case 1. */
> +
> +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
> +
> +
> +size_t
> +__attribute__ ((section (".text.sse4.2")))
> +__strspn_sse42 (const char *s, const char *a)
> +{
> + if (*a == 0)
> + return 0;
> +
> + const char *aligned;
> + __m128i mask, maskz, zero;
> + unsigned int maskz_bits;
> + unsigned int offset = (int) ((size_t) a & 15);
> + zero = _mm_set1_epi8 (0);
> + if (offset != 0)
> + {
> + /* Load masks. */
> + aligned = (const char *) ((size_t) a & -16L);
> + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> + maskz = _mm_cmpeq_epi8 (mask0, zero);
> +
> + /* Find where the NULL terminator is. */
> + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> + if (maskz_bits != 0)
> + {
> + mask = __m128i_shift_right (mask0, offset);
> + offset = (unsigned int) ((size_t) s & 15);
> + if (offset)
> + goto start_unaligned;
> +
> + aligned = s;
> + goto start_loop;
> + }
> + }
> +
> + /* A is aligned. */
> + mask = _mm_loadu_si128 ((__m128i *) a);
> +
> + /* Find where the NULL terminator is. */
> + maskz = _mm_cmpeq_epi8 (mask, zero);
> + maskz_bits = _mm_movemask_epi8 (maskz);
> + if (maskz_bits == 0)
> + {
> + /* There is no NULL terminator. Don't use SSE4.2 if the length
> + of A > 16. */
> + if (a[16] != 0)
> + return __strspn_generic (s, a);
> + }
> + aligned = s;
> + offset = (unsigned int) ((size_t) s & 15);
> +
> + if (offset != 0)
> + {
> + start_unaligned:
> + /* Check partial string. */
> + aligned = (const char *) ((size_t) s & -16L);
> + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> + __m128i adj_value = __m128i_shift_right (value, offset);
> +
> + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> + /* No need to check CFlag since it is always 1. */
> + if (length < 16 - offset)
> + return length;
> + /* Find where the NULL terminator is. */
> + maskz = _mm_cmpeq_epi8 (value, zero);
> + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> + if (maskz_bits != 0)
> + return length;
> + aligned += 16;
> + }
> +
> +start_loop:
> + while (1)
> + {
> + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> + unsigned int index = _mm_cmpistri (mask, value, 0x12);
> + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> + if (cflag)
> + return (size_t) (aligned + index - s);
> + aligned += 16;
> + }
> +}
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
> index 6124033ceb..6b50c36432 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-c.c
> @@ -1,5 +1,5 @@
> -/* strspn with SSE4.2 intrinsics
> - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> +/* strspn.
> + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -16,121 +16,13 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#include <nmmintrin.h>
> -#include <string.h>
> -#include "varshift.h"
> +#if IS_IN (libc)
>
> -/* We use 0x12:
> - _SIDD_SBYTE_OPS
> - | _SIDD_CMP_EQUAL_ANY
> - | _SIDD_NEGATIVE_POLARITY
> - | _SIDD_LEAST_SIGNIFICANT
> - on pcmpistri to compare xmm/mem128
> +# include <sysdep.h>
> +# define STRSPN __strspn_generic
>
> - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> - X X X X X X X X X X X X X X X X
> +# undef libc_hidden_builtin_def
> +# define libc_hidden_builtin_def(STRSPN)
> +#endif
>
> - against xmm
> -
> - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> - A A A A A A A A A A A A A A A A
> -
> - to find out if the first 16byte data element has any non-A byte and
> - the offset of the first byte. There are 2 cases:
> -
> - 1. The first 16byte data element has the non-A byte, including
> - EOS, at the offset X.
> - 2. The first 16byte data element is valid and doesn't have the non-A
> - byte.
> -
> - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> -
> - case ECX CFlag ZFlag SFlag
> - 1 X 1 0/1 0
> - 2 16 0 0 0
> -
> - We exit from the loop for case 1. */
> -
> -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> -
> -
> -size_t
> -__attribute__ ((section (".text.sse4.2")))
> -__strspn_sse42 (const char *s, const char *a)
> -{
> - if (*a == 0)
> - return 0;
> -
> - const char *aligned;
> - __m128i mask, maskz, zero;
> - unsigned int maskz_bits;
> - unsigned int offset = (int) ((size_t) a & 15);
> - zero = _mm_set1_epi8 (0);
> - if (offset != 0)
> - {
> - /* Load masks. */
> - aligned = (const char *) ((size_t) a & -16L);
> - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> - maskz = _mm_cmpeq_epi8 (mask0, zero);
> -
> - /* Find where the NULL terminator is. */
> - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> - if (maskz_bits != 0)
> - {
> - mask = __m128i_shift_right (mask0, offset);
> - offset = (unsigned int) ((size_t) s & 15);
> - if (offset)
> - goto start_unaligned;
> -
> - aligned = s;
> - goto start_loop;
> - }
> - }
> -
> - /* A is aligned. */
> - mask = _mm_loadu_si128 ((__m128i *) a);
> -
> - /* Find where the NULL terminator is. */
> - maskz = _mm_cmpeq_epi8 (mask, zero);
> - maskz_bits = _mm_movemask_epi8 (maskz);
> - if (maskz_bits == 0)
> - {
> - /* There is no NULL terminator. Don't use SSE4.2 if the length
> - of A > 16. */
> - if (a[16] != 0)
> - return __strspn_sse2 (s, a);
> - }
> - aligned = s;
> - offset = (unsigned int) ((size_t) s & 15);
> -
> - if (offset != 0)
> - {
> - start_unaligned:
> - /* Check partial string. */
> - aligned = (const char *) ((size_t) s & -16L);
> - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> - __m128i adj_value = __m128i_shift_right (value, offset);
> -
> - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> - /* No need to check CFlag since it is always 1. */
> - if (length < 16 - offset)
> - return length;
> - /* Find where the NULL terminator is. */
> - maskz = _mm_cmpeq_epi8 (value, zero);
> - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> - if (maskz_bits != 0)
> - return length;
> - aligned += 16;
> - }
> -
> -start_loop:
> - while (1)
> - {
> - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> - unsigned int index = _mm_cmpistri (mask, value, 0x12);
> - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> - if (cflag)
> - return (size_t) (aligned + index - s);
> - aligned += 16;
> - }
> -}
> +#include <string/strspn.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> index 26d6984e9b..fa38dd898d 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
> @@ -1,5 +1,5 @@
> #if IS_IN (libc)
> -# define WCSCPY __wcscpy_sse2
> +# define WCSCPY __wcscpy_generic
> #endif
>
> #include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> index 6a2d1421d9..53c3228dc2 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> @@ -26,7 +26,7 @@
> # define SYMBOL_NAME wcscpy
> # include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>
> static inline void *
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> return OPTIMIZE (ssse3);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (generic);
> }
>
> libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> index e1ec7cfbb5..1c9c04241a 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> @@ -1,9 +1,9 @@
> #if IS_IN (libc)
> # include <wchar.h>
>
> -# define WCSNLEN __wcsnlen_sse2
> +# define WCSNLEN __wcsnlen_generic
>
> -extern __typeof (wcsnlen) __wcsnlen_sse2;
> +extern __typeof (wcsnlen) __wcsnlen_generic;
> #endif
>
> #include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> index baa26666a8..05b7a211de 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> @@ -24,6 +24,7 @@
> # undef __wcsnlen
>
> # define SYMBOL_NAME wcsnlen
> +# define GENERIC generic
> # include "ifunc-wcslen.h"
>
> libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> --
> 2.34.1
>
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2] x86: Rename generic functions with unique postfix for clarity
2022-06-10 1:19 ` H.J. Lu
@ 2022-06-10 1:26 ` Noah Goldstein
0 siblings, 0 replies; 11+ messages in thread
From: Noah Goldstein @ 2022-06-10 1:26 UTC (permalink / raw)
To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell
On Thu, Jun 9, 2022 at 6:20 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Jun 9, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > No functions are changed. It just renames generic implementations from
> > '{func}_sse2' to '{func}_generic'. This is just because the postfix
> > "_sse2" was overloaded and was used for files that had hand-optimized
> > sse2 assembly implementations and files that just redirected back
> > to the generic implementation.
>
> This change isn't small and its benefit is very small. Can it be the part of
> a big change to support building glibc with
>
> -march=x86-64-vN
kk
>
> > Full xcheck passed on x86_64.
> > ---
> > sysdeps/x86_64/multiarch/Makefile | 15 +-
> > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 16 +-
> > sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +-
> > sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +-
> > sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +-
> > sysdeps/x86_64/multiarch/stpncpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/stpncpy.c | 1 +
> > sysdeps/x86_64/multiarch/strcspn-c-sse4.c | 163 ++++++++++++++++++
> > sysdeps/x86_64/multiarch/strcspn-c.c | 151 +---------------
> > sysdeps/x86_64/multiarch/strcspn-sse2.c | 28 ---
> > sysdeps/x86_64/multiarch/strncat-c.c | 2 +-
> > sysdeps/x86_64/multiarch/strncat.c | 1 +
> > sysdeps/x86_64/multiarch/strncpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/strncpy.c | 1 +
> > .../{strspn-sse2.c => strpbrk-c-sse4.c} | 18 +-
> > sysdeps/x86_64/multiarch/strpbrk-c.c | 18 +-
> > sysdeps/x86_64/multiarch/strpbrk-sse2.c | 28 ---
> > sysdeps/x86_64/multiarch/strspn-c-sse4.c | 136 +++++++++++++++
> > sysdeps/x86_64/multiarch/strspn-c.c | 126 +-------------
> > sysdeps/x86_64/multiarch/wcscpy-c.c | 2 +-
> > sysdeps/x86_64/multiarch/wcscpy.c | 4 +-
> > sysdeps/x86_64/multiarch/wcsnlen-c.c | 4 +-
> > sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
> > 23 files changed, 376 insertions(+), 363 deletions(-)
> > create mode 100644 sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > delete mode 100644 sysdeps/x86_64/multiarch/strcspn-sse2.c
> > rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strpbrk-c-sse4.c} (74%)
> > delete mode 100644 sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > create mode 100644 sysdeps/x86_64/multiarch/strspn-c-sse4.c
> >
> > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> > index 3d153cac35..86c6ecdfc1 100644
> > --- a/sysdeps/x86_64/multiarch/Makefile
> > +++ b/sysdeps/x86_64/multiarch/Makefile
> > @@ -77,7 +77,7 @@ sysdep_routines += \
> > strcpy-sse2 \
> > strcpy-sse2-unaligned \
> > strcspn-c \
> > - strcspn-sse2 \
> > + strcspn-c-sse4 \
> > strlen-avx2 \
> > strlen-avx2-rtm \
> > strlen-evex \
> > @@ -109,21 +109,22 @@ sysdep_routines += \
> > strnlen-evex512 \
> > strnlen-sse2 \
> > strpbrk-c \
> > - strpbrk-sse2 \
> > + strpbrk-c-sse4 \
> > strrchr-avx2 \
> > strrchr-avx2-rtm \
> > strrchr-evex \
> > strrchr-sse2 \
> > strspn-c \
> > - strspn-sse2 \
> > + strspn-c-sse4 \
> > strstr-avx512 \
> > strstr-sse2-unaligned \
> > varshift \
> > # sysdep_routines
> > -CFLAGS-varshift.c += -msse4
> > -CFLAGS-strcspn-c.c += -msse4
> > -CFLAGS-strpbrk-c.c += -msse4
> > -CFLAGS-strspn-c.c += -msse4
> > +
> > +CFLAGS-strcspn-c-sse4.c += -msse4
> > +CFLAGS-strpbrk-c-sse4.c += -msse4
> > +CFLAGS-strspn-c-sse4.c += -msse4
> > +
> > CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> > endif
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index 58f3ec8306..4cbd200d39 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -372,7 +372,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __stpncpy_evex)
> > IFUNC_IMPL_ADD (array, i, stpncpy, 1,
> > __stpncpy_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/stpcpy.c. */
> > IFUNC_IMPL (i, name, stpcpy,
> > @@ -531,7 +531,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, strcspn,
> > IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> > __strcspn_sse42)
> > - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> > + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
> > IFUNC_IMPL (i, name, strncasecmp,
> > @@ -585,7 +585,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __strncat_evex)
> > IFUNC_IMPL_ADD (array, i, strncat, 1,
> > __strncat_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> > + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strncpy.c. */
> > IFUNC_IMPL (i, name, strncpy,
> > @@ -601,20 +601,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > __strncpy_evex)
> > IFUNC_IMPL_ADD (array, i, strncpy, 1,
> > __strncpy_sse2_unaligned)
> > - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
> > IFUNC_IMPL (i, name, strpbrk,
> > IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> > __strpbrk_sse42)
> > - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> > + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
> >
> >
> > /* Support sysdeps/x86_64/multiarch/strspn.c. */
> > IFUNC_IMPL (i, name, strspn,
> > IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> > __strspn_sse42)
> > - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> > + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/strstr.c. */
> > IFUNC_IMPL (i, name, strstr,
> > @@ -697,7 +697,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, wcscpy,
> > IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
> > __wcscpy_ssse3)
> > - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> > + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/wcslen.c. */
> > IFUNC_IMPL (i, name, wcslen,
> > @@ -749,7 +749,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL_ADD (array, i, wcsnlen,
> > CPU_FEATURE_USABLE (SSE4_1),
> > __wcsnlen_sse4_1)
> > - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> > + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
> >
> > /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
> > IFUNC_IMPL (i, name, wmemchr,
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > index b555ff2fac..ee36525bcf 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> > @@ -19,7 +19,7 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> >
> > static inline void *
> > @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> > return OPTIMIZE (sse42);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (generic);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > index a15afa44e9..80529458d1 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> > @@ -20,7 +20,11 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
> > attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> > @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > return OPTIMIZE (sse2_unaligned);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (GENERIC);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > index 2b29e7608a..88c1c502af 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> > @@ -19,7 +19,11 @@
> >
> > #include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +#ifndef GENERIC
> > +# define GENERIC sse2
> > +#endif
> > +
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> > @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
> > return OPTIMIZE (sse4_1);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (GENERIC);
> > }
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > index b016e487e1..eb62fcf388 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STPNCPY __stpncpy_sse2
> > +#define STPNCPY __stpncpy_generic
> > #undef weak_alias
> > #define weak_alias(ignored1, ignored2)
> > #undef libc_hidden_def
> > diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> > index 82fa53957d..879bc83f0b 100644
> > --- a/sysdeps/x86_64/multiarch/stpncpy.c
> > +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> > @@ -25,6 +25,7 @@
> > # undef stpncpy
> > # undef __stpncpy
> >
> > +# define GENERIC generic
> > # define SYMBOL_NAME stpncpy
> > # include "ifunc-strcpy.h"
> >
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c-sse4.c b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..59f64f9fe8
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c-sse4.c
> > @@ -0,0 +1,163 @@
> > +/* strcspn with SSE4.2 intrinsics
> > + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x2:
> > + _SIDD_SBYTE_OPS
> > + | _SIDD_CMP_EQUAL_ANY
> > + | _SIDD_POSITIVE_POLARITY
> > + | _SIDD_LEAST_SIGNIFICANT
> > + on pcmpistri to compare xmm/mem128
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + X X X X X X X X X X X X X X X X
> > +
> > + against xmm
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + A A A A A A A A A A A A A A A A
> > +
> > + to find out if the first 16byte data element has any byte A and
> > + the offset of the first byte. There are 3 cases:
> > +
> > + 1. The first 16byte data element has the byte A at the offset X.
> > + 2. The first 16byte data element has EOS and doesn't have the byte A.
> > + 3. The first 16byte data element is valid and doesn't have the byte A.
> > +
> > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > + 1 X 1 0/1 0
> > + 2 16 0 1 0
> > + 3 16 0 0 0
> > +
> > + We exit from the loop for cases 1 and 2 with jbe which branches
> > + when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> > + X for case 1. */
> > +
> > +#ifndef STRCSPN_GENERIC
> > +# define STRCSPN_GENERIC __strcspn_generic
> > +# define STRCSPN_SSE42 __strcspn_sse42
> > +#endif
> > +
> > +#ifdef USE_AS_STRPBRK
> > +# define RETURN(val1, val2) return val1
> > +#else
> > +# define RETURN(val1, val2) return val2
> > +#endif
> > +
> > +extern
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
> > +
> > +
> > +#ifdef USE_AS_STRPBRK
> > +char *
> > +#else
> > +size_t
> > +#endif
> > +__attribute__ ((section (".text.sse4.2")))
> > +STRCSPN_SSE42 (const char *s, const char *a)
> > +{
> > + if (*a == 0)
> > + RETURN (NULL, strlen (s));
> > +
> > + const char *aligned;
> > + __m128i mask, maskz, zero;
> > + unsigned int maskz_bits;
> > + unsigned int offset = (unsigned int) ((size_t) a & 15);
> > + zero = _mm_set1_epi8 (0);
> > + if (offset != 0)
> > + {
> > + /* Load masks. */
> > + aligned = (const char *) ((size_t) a & -16L);
> > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > + maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + {
> > + mask = __m128i_shift_right (mask0, offset);
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset)
> > + goto start_unaligned;
> > +
> > + aligned = s;
> > + goto start_loop;
> > + }
> > + }
> > +
> > + /* A is aligned. */
> > + mask = _mm_loadu_si128 ((__m128i *) a);
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (mask, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz);
> > + if (maskz_bits == 0)
> > + {
> > + /* There is no NULL terminator. Don't use SSE4.2 if the length
> > + of A > 16. */
> > + if (a[16] != 0)
> > + return STRCSPN_GENERIC (s, a);
> > + }
> > +
> > + aligned = s;
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset != 0)
> > + {
> > + start_unaligned:
> > + /* Check partial string. */
> > + aligned = (const char *) ((size_t) s & -16L);
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > +
> > + value = __m128i_shift_right (value, offset);
> > +
> > + unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > + /* No need to check ZFlag since ZFlag is always 1. */
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > + if (cflag)
> > + RETURN ((char *) (s + length), length);
> > + /* Find where the NULL terminator is. */
> > + unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > + if (index < 16 - offset)
> > + RETURN (NULL, index);
> > + aligned += 16;
> > + }
> > +
> > +start_loop:
> > + while (1)
> > + {
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > + unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > + if (cflag)
> > + RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > + if (zflag)
> > + RETURN (NULL,
> > + /* Find where the NULL terminator is. */
> > + (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > + aligned += 16;
> > + }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
> > index c312fab8b1..423de2e2b2 100644
> > --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strcspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strcspn with SSE4.2 intrinsics
> > - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strcspn.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,148 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x2:
> > - _SIDD_SBYTE_OPS
> > - | _SIDD_CMP_EQUAL_ANY
> > - | _SIDD_POSITIVE_POLARITY
> > - | _SIDD_LEAST_SIGNIFICANT
> > - on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRCSPN __strcspn_generic
> >
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - X X X X X X X X X X X X X X X X
> > -
> > - against xmm
> > -
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - A A A A A A A A A A A A A A A A
> > -
> > - to find out if the first 16byte data element has any byte A and
> > - the offset of the first byte. There are 3 cases:
> > -
> > - 1. The first 16byte data element has the byte A at the offset X.
> > - 2. The first 16byte data element has EOS and doesn't have the byte A.
> > - 3. The first 16byte data element is valid and doesn't have the byte A.
> > -
> > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > - 1 X 1 0/1 0
> > - 2 16 0 1 0
> > - 3 16 0 0 0
> > -
> > - We exit from the loop for cases 1 and 2 with jbe which branches
> > - when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> > - X for case 1. */
> > -
> > -#ifndef STRCSPN_SSE2
> > -# define STRCSPN_SSE2 __strcspn_sse2
> > -# define STRCSPN_SSE42 __strcspn_sse42
> > -#endif
> > -
> > -#ifdef USE_AS_STRPBRK
> > -# define RETURN(val1, val2) return val1
> > -#else
> > -# define RETURN(val1, val2) return val2
> > -#endif
> > -
> > -extern
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > -#endif
> > -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -#ifdef USE_AS_STRPBRK
> > -char *
> > -#else
> > -size_t
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRCSPN)
> > #endif
> > -__attribute__ ((section (".text.sse4.2")))
> > -STRCSPN_SSE42 (const char *s, const char *a)
> > -{
> > - if (*a == 0)
> > - RETURN (NULL, strlen (s));
> > -
> > - const char *aligned;
> > - __m128i mask, maskz, zero;
> > - unsigned int maskz_bits;
> > - unsigned int offset = (unsigned int) ((size_t) a & 15);
> > - zero = _mm_set1_epi8 (0);
> > - if (offset != 0)
> > - {
> > - /* Load masks. */
> > - aligned = (const char *) ((size_t) a & -16L);
> > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > - maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - {
> > - mask = __m128i_shift_right (mask0, offset);
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset)
> > - goto start_unaligned;
> > -
> > - aligned = s;
> > - goto start_loop;
> > - }
> > - }
> > -
> > - /* A is aligned. */
> > - mask = _mm_loadu_si128 ((__m128i *) a);
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (mask, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz);
> > - if (maskz_bits == 0)
> > - {
> > - /* There is no NULL terminator. Don't use SSE4.2 if the length
> > - of A > 16. */
> > - if (a[16] != 0)
> > - return STRCSPN_SSE2 (s, a);
> > - }
> > -
> > - aligned = s;
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset != 0)
> > - {
> > - start_unaligned:
> > - /* Check partial string. */
> > - aligned = (const char *) ((size_t) s & -16L);
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > -
> > - value = __m128i_shift_right (value, offset);
> > -
> > - unsigned int length = _mm_cmpistri (mask, value, 0x2);
> > - /* No need to check ZFlag since ZFlag is always 1. */
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > - if (cflag)
> > - RETURN ((char *) (s + length), length);
> > - /* Find where the NULL terminator is. */
> > - unsigned int index = _mm_cmpistri (value, value, 0x3a);
> > - if (index < 16 - offset)
> > - RETURN (NULL, index);
> > - aligned += 16;
> > - }
> >
> > -start_loop:
> > - while (1)
> > - {
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - unsigned int index = _mm_cmpistri (mask, value, 0x2);
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
> > - unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
> > - if (cflag)
> > - RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
> > - if (zflag)
> > - RETURN (NULL,
> > - /* Find where the NULL terminator is. */
> > - (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
> > - aligned += 16;
> > - }
> > -}
> > +#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > deleted file mode 100644
> > index 3a04bb39fc..0000000000
> > --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strcspn.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > - This file is part of the GNU C Library.
> > -
> > - The GNU C Library is free software; you can redistribute it and/or
> > - modify it under the terms of the GNU Lesser General Public
> > - License as published by the Free Software Foundation; either
> > - version 2.1 of the License, or (at your option) any later version.
> > -
> > - The GNU C Library is distributed in the hope that it will be useful,
> > - but WITHOUT ANY WARRANTY; without even the implied warranty of
> > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > - Lesser General Public License for more details.
> > -
> > - You should have received a copy of the GNU Lesser General Public
> > - License along with the GNU C Library; if not, see
> > - <https://www.gnu.org/licenses/>. */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRCSPN __strcspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRCSPN)
> > -#endif
> > -
> > -#include <string/strcspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> > index 93a7fab7ea..b729c033d9 100644
> > --- a/sysdeps/x86_64/multiarch/strncat-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncat-c.c
> > @@ -1,2 +1,2 @@
> > -#define STRNCAT __strncat_sse2
> > +#define STRNCAT __strncat_generic
> > #include <string/strncat.c>
> > diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> > index b649343a97..50fba8a41f 100644
> > --- a/sysdeps/x86_64/multiarch/strncat.c
> > +++ b/sysdeps/x86_64/multiarch/strncat.c
> > @@ -24,6 +24,7 @@
> > # undef strncat
> >
> > # define SYMBOL_NAME strncat
> > +# define GENERIC generic
> > # include "ifunc-strcpy.h"
> >
> > libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> > index 57c45ac7ab..183b0b8e0f 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy-c.c
> > @@ -1,4 +1,4 @@
> > -#define STRNCPY __strncpy_sse2
> > +#define STRNCPY __strncpy_generic
> > #undef libc_hidden_builtin_def
> > #define libc_hidden_builtin_def(strncpy)
> >
> > diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> > index 2a780a7e16..7fc7d72ec5 100644
> > --- a/sysdeps/x86_64/multiarch/strncpy.c
> > +++ b/sysdeps/x86_64/multiarch/strncpy.c
> > @@ -24,6 +24,7 @@
> > # undef strncpy
> >
> > # define SYMBOL_NAME strncpy
> > +# define GENERIC generic
> > # include "ifunc-strcpy.h"
> >
> > libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > similarity index 74%
> > rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> > rename to sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > index 61cc6cb0a5..8700276773 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c-sse4.c
> > @@ -1,5 +1,5 @@
> > -/* strspn.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > +/* strpbrk with SSE4.2 intrinsics
> > + Copyright (C) 2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,13 +16,7 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRSPN __strspn_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRSPN)
> > -#endif
> > -
> > -#include <string/strspn.c>
> > +#define USE_AS_STRPBRK
> > +#define STRCSPN_GENERIC __strpbrk_generic
> > +#define STRCSPN_SSE42 __strpbrk_sse42
> > +#include "strcspn-c-sse4.c"
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > index abf4ff7f1a..d31acfe495 100644
> > --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> > +++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
> > @@ -1,5 +1,5 @@
> > -/* strpbrk with SSE4.2 intrinsics
> > - Copyright (C) 2022 Free Software Foundation, Inc.
> > +/* strpbrk.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,7 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#define USE_AS_STRPBRK
> > -#define STRCSPN_SSE2 __strpbrk_sse2
> > -#define STRCSPN_SSE42 __strpbrk_sse42
> > -#include "strcspn-c.c"
> > +#if IS_IN (libc)
> > +
> > +# include <sysdep.h>
> > +# define STRPBRK __strpbrk_generic
> > +
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRPBRK)
> > +#endif
> > +
> > +#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > deleted file mode 100644
> > index d03214c4fb..0000000000
> > --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> > +++ /dev/null
> > @@ -1,28 +0,0 @@
> > -/* strpbrk.
> > - Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > - This file is part of the GNU C Library.
> > -
> > - The GNU C Library is free software; you can redistribute it and/or
> > - modify it under the terms of the GNU Lesser General Public
> > - License as published by the Free Software Foundation; either
> > - version 2.1 of the License, or (at your option) any later version.
> > -
> > - The GNU C Library is distributed in the hope that it will be useful,
> > - but WITHOUT ANY WARRANTY; without even the implied warranty of
> > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > - Lesser General Public License for more details.
> > -
> > - You should have received a copy of the GNU Lesser General Public
> > - License along with the GNU C Library; if not, see
> > - <https://www.gnu.org/licenses/>. */
> > -
> > -#if IS_IN (libc)
> > -
> > -# include <sysdep.h>
> > -# define STRPBRK __strpbrk_sse2
> > -
> > -# undef libc_hidden_builtin_def
> > -# define libc_hidden_builtin_def(STRPBRK)
> > -#endif
> > -
> > -#include <string/strpbrk.c>
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c-sse4.c b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > new file mode 100644
> > index 0000000000..d044916688
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/strspn-c-sse4.c
> > @@ -0,0 +1,136 @@
> > +/* strspn with SSE4.2 intrinsics
> > + Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <nmmintrin.h>
> > +#include <string.h>
> > +#include "varshift.h"
> > +
> > +/* We use 0x12:
> > + _SIDD_SBYTE_OPS
> > + | _SIDD_CMP_EQUAL_ANY
> > + | _SIDD_NEGATIVE_POLARITY
> > + | _SIDD_LEAST_SIGNIFICANT
> > + on pcmpistri to compare xmm/mem128
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + X X X X X X X X X X X X X X X X
> > +
> > + against xmm
> > +
> > + 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > + A A A A A A A A A A A A A A A A
> > +
> > + to find out if the first 16byte data element has any non-A byte and
> > + the offset of the first byte. There are 2 cases:
> > +
> > + 1. The first 16byte data element has the non-A byte, including
> > + EOS, at the offset X.
> > + 2. The first 16byte data element is valid and doesn't have the non-A
> > + byte.
> > +
> > + Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > +
> > + case ECX CFlag ZFlag SFlag
> > + 1 X 1 0/1 0
> > + 2 16 0 0 0
> > +
> > + We exit from the loop for case 1. */
> > +
> > +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
> > +
> > +
> > +size_t
> > +__attribute__ ((section (".text.sse4.2")))
> > +__strspn_sse42 (const char *s, const char *a)
> > +{
> > + if (*a == 0)
> > + return 0;
> > +
> > + const char *aligned;
> > + __m128i mask, maskz, zero;
> > + unsigned int maskz_bits;
> > + unsigned int offset = (int) ((size_t) a & 15);
> > + zero = _mm_set1_epi8 (0);
> > + if (offset != 0)
> > + {
> > + /* Load masks. */
> > + aligned = (const char *) ((size_t) a & -16L);
> > + __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > + maskz = _mm_cmpeq_epi8 (mask0, zero);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + {
> > + mask = __m128i_shift_right (mask0, offset);
> > + offset = (unsigned int) ((size_t) s & 15);
> > + if (offset)
> > + goto start_unaligned;
> > +
> > + aligned = s;
> > + goto start_loop;
> > + }
> > + }
> > +
> > + /* A is aligned. */
> > + mask = _mm_loadu_si128 ((__m128i *) a);
> > +
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (mask, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz);
> > + if (maskz_bits == 0)
> > + {
> > + /* There is no NULL terminator. Don't use SSE4.2 if the length
> > + of A > 16. */
> > + if (a[16] != 0)
> > + return __strspn_generic (s, a);
> > + }
> > + aligned = s;
> > + offset = (unsigned int) ((size_t) s & 15);
> > +
> > + if (offset != 0)
> > + {
> > + start_unaligned:
> > + /* Check partial string. */
> > + aligned = (const char *) ((size_t) s & -16L);
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + __m128i adj_value = __m128i_shift_right (value, offset);
> > +
> > + unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > + /* No need to check CFlag since it is always 1. */
> > + if (length < 16 - offset)
> > + return length;
> > + /* Find where the NULL terminator is. */
> > + maskz = _mm_cmpeq_epi8 (value, zero);
> > + maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > + if (maskz_bits != 0)
> > + return length;
> > + aligned += 16;
> > + }
> > +
> > +start_loop:
> > + while (1)
> > + {
> > + __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > + unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > + unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > + if (cflag)
> > + return (size_t) (aligned + index - s);
> > + aligned += 16;
> > + }
> > +}
> > diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
> > index 6124033ceb..6b50c36432 100644
> > --- a/sysdeps/x86_64/multiarch/strspn-c.c
> > +++ b/sysdeps/x86_64/multiarch/strspn-c.c
> > @@ -1,5 +1,5 @@
> > -/* strspn with SSE4.2 intrinsics
> > - Copyright (C) 2009-2022 Free Software Foundation, Inc.
> > +/* strspn.
> > + Copyright (C) 2017-2022 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -16,121 +16,13 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#include <nmmintrin.h>
> > -#include <string.h>
> > -#include "varshift.h"
> > +#if IS_IN (libc)
> >
> > -/* We use 0x12:
> > - _SIDD_SBYTE_OPS
> > - | _SIDD_CMP_EQUAL_ANY
> > - | _SIDD_NEGATIVE_POLARITY
> > - | _SIDD_LEAST_SIGNIFICANT
> > - on pcmpistri to compare xmm/mem128
> > +# include <sysdep.h>
> > +# define STRSPN __strspn_generic
> >
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - X X X X X X X X X X X X X X X X
> > +# undef libc_hidden_builtin_def
> > +# define libc_hidden_builtin_def(STRSPN)
> > +#endif
> >
> > - against xmm
> > -
> > - 0 1 2 3 4 5 6 7 8 9 A B C D E F
> > - A A A A A A A A A A A A A A A A
> > -
> > - to find out if the first 16byte data element has any non-A byte and
> > - the offset of the first byte. There are 2 cases:
> > -
> > - 1. The first 16byte data element has the non-A byte, including
> > - EOS, at the offset X.
> > - 2. The first 16byte data element is valid and doesn't have the non-A
> > - byte.
> > -
> > - Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
> > -
> > - case ECX CFlag ZFlag SFlag
> > - 1 X 1 0/1 0
> > - 2 16 0 0 0
> > -
> > - We exit from the loop for case 1. */
> > -
> > -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> > -
> > -
> > -size_t
> > -__attribute__ ((section (".text.sse4.2")))
> > -__strspn_sse42 (const char *s, const char *a)
> > -{
> > - if (*a == 0)
> > - return 0;
> > -
> > - const char *aligned;
> > - __m128i mask, maskz, zero;
> > - unsigned int maskz_bits;
> > - unsigned int offset = (int) ((size_t) a & 15);
> > - zero = _mm_set1_epi8 (0);
> > - if (offset != 0)
> > - {
> > - /* Load masks. */
> > - aligned = (const char *) ((size_t) a & -16L);
> > - __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
> > - maskz = _mm_cmpeq_epi8 (mask0, zero);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - {
> > - mask = __m128i_shift_right (mask0, offset);
> > - offset = (unsigned int) ((size_t) s & 15);
> > - if (offset)
> > - goto start_unaligned;
> > -
> > - aligned = s;
> > - goto start_loop;
> > - }
> > - }
> > -
> > - /* A is aligned. */
> > - mask = _mm_loadu_si128 ((__m128i *) a);
> > -
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (mask, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz);
> > - if (maskz_bits == 0)
> > - {
> > - /* There is no NULL terminator. Don't use SSE4.2 if the length
> > - of A > 16. */
> > - if (a[16] != 0)
> > - return __strspn_sse2 (s, a);
> > - }
> > - aligned = s;
> > - offset = (unsigned int) ((size_t) s & 15);
> > -
> > - if (offset != 0)
> > - {
> > - start_unaligned:
> > - /* Check partial string. */
> > - aligned = (const char *) ((size_t) s & -16L);
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - __m128i adj_value = __m128i_shift_right (value, offset);
> > -
> > - unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
> > - /* No need to check CFlag since it is always 1. */
> > - if (length < 16 - offset)
> > - return length;
> > - /* Find where the NULL terminator is. */
> > - maskz = _mm_cmpeq_epi8 (value, zero);
> > - maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
> > - if (maskz_bits != 0)
> > - return length;
> > - aligned += 16;
> > - }
> > -
> > -start_loop:
> > - while (1)
> > - {
> > - __m128i value = _mm_load_si128 ((__m128i *) aligned);
> > - unsigned int index = _mm_cmpistri (mask, value, 0x12);
> > - unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
> > - if (cflag)
> > - return (size_t) (aligned + index - s);
> > - aligned += 16;
> > - }
> > -}
> > +#include <string/strspn.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > index 26d6984e9b..fa38dd898d 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy-c.c
> > @@ -1,5 +1,5 @@
> > #if IS_IN (libc)
> > -# define WCSCPY __wcscpy_sse2
> > +# define WCSCPY __wcscpy_generic
> > #endif
> >
> > #include <wcsmbs/wcscpy.c>
> > diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> > index 6a2d1421d9..53c3228dc2 100644
> > --- a/sysdeps/x86_64/multiarch/wcscpy.c
> > +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> > @@ -26,7 +26,7 @@
> > # define SYMBOL_NAME wcscpy
> > # include <init-arch.h>
> >
> > -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> > extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
> >
> > static inline void *
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> > return OPTIMIZE (ssse3);
> >
> > - return OPTIMIZE (sse2);
> > + return OPTIMIZE (generic);
> > }
> >
> > libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > index e1ec7cfbb5..1c9c04241a 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> > @@ -1,9 +1,9 @@
> > #if IS_IN (libc)
> > # include <wchar.h>
> >
> > -# define WCSNLEN __wcsnlen_sse2
> > +# define WCSNLEN __wcsnlen_generic
> >
> > -extern __typeof (wcsnlen) __wcsnlen_sse2;
> > +extern __typeof (wcsnlen) __wcsnlen_generic;
> > #endif
> >
> > #include "wcsmbs/wcsnlen.c"
> > diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> > index baa26666a8..05b7a211de 100644
> > --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> > +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> > @@ -24,6 +24,7 @@
> > # undef __wcsnlen
> >
> > # define SYMBOL_NAME wcsnlen
> > +# define GENERIC generic
> > # include "ifunc-wcslen.h"
> >
> > libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v3] x86: Rename generic functions with unique postfix for clarity
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein
@ 2022-06-16 22:11 ` Noah Goldstein
2022-06-16 22:43 ` H.J. Lu
1 sibling, 1 reply; 11+ messages in thread
From: Noah Goldstein @ 2022-06-16 22:11 UTC (permalink / raw)
To: libc-alpha
No functions are changed. It just renames generic implementations from
'{func}_sse2' to '{func}_generic'. This is just because the postfix
"_sse2" was overloaded and was used for files that had hand-optimized
sse2 assembly implementations and files that just redirected back
to the generic implementation.
Full xcheck passed on x86_64.
---
Note this change is in preperation to further changes to the file
organization in the multiarch directory.
sysdeps/x86_64/multiarch/Makefile | 33 ++++++++++---------
sysdeps/x86_64/multiarch/ifunc-avx2.h | 8 +++--
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 18 +++++-----
sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +--
sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +++--
sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +++--
sysdeps/x86_64/multiarch/stpncpy-c.c | 7 ----
sysdeps/x86_64/multiarch/stpncpy-generic.c | 26 +++++++++++++++
sysdeps/x86_64/multiarch/stpncpy.c | 1 +
.../{strcspn-sse2.c => strcspn-generic.c} | 2 +-
.../multiarch/{strcspn-c.c => strcspn-sse4.c} | 8 ++---
sysdeps/x86_64/multiarch/strncat-c.c | 2 --
sysdeps/x86_64/multiarch/strncat-generic.c | 21 ++++++++++++
sysdeps/x86_64/multiarch/strncat.c | 1 +
sysdeps/x86_64/multiarch/strncpy-c.c | 5 ---
sysdeps/x86_64/multiarch/strncpy-generic.c | 24 ++++++++++++++
sysdeps/x86_64/multiarch/strncpy.c | 1 +
.../{strpbrk-sse2.c => strpbrk-generic.c} | 2 +-
.../multiarch/{strpbrk-c.c => strpbrk-sse4.c} | 4 +--
.../{strspn-sse2.c => strspn-generic.c} | 2 +-
.../multiarch/{strspn-c.c => strspn-sse4.c} | 4 +--
sysdeps/x86_64/multiarch/wcscpy-c.c | 5 ---
sysdeps/x86_64/multiarch/wcscpy-generic.c | 24 ++++++++++++++
sysdeps/x86_64/multiarch/wcscpy.c | 4 +--
.../{wcsncmp-sse2.c => wcsncmp-generic.c} | 4 +--
sysdeps/x86_64/multiarch/wcsncmp.c | 2 ++
sysdeps/x86_64/multiarch/wcsnlen-c.c | 9 -----
sysdeps/x86_64/multiarch/wcsnlen-generic.c | 28 ++++++++++++++++
sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
29 files changed, 190 insertions(+), 76 deletions(-)
delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c
create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c
rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%)
rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%)
delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c
delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c
create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c
rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%)
rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%)
rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%)
rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%)
delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c
create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c
rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%)
delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c
create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 3d153cac35..666ee4d5d6 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -43,7 +43,7 @@ sysdep_routines += \
stpcpy-sse2-unaligned \
stpncpy-avx2 \
stpncpy-avx2-rtm \
- stpncpy-c \
+ stpncpy-generic \
stpncpy-evex \
stpncpy-sse2-unaligned \
strcasecmp_l-avx2 \
@@ -76,8 +76,8 @@ sysdep_routines += \
strcpy-evex \
strcpy-sse2 \
strcpy-sse2-unaligned \
- strcspn-c \
- strcspn-sse2 \
+ strcspn-generic \
+ strcspn-sse4 \
strlen-avx2 \
strlen-avx2-rtm \
strlen-evex \
@@ -90,7 +90,7 @@ sysdep_routines += \
strncase_l-sse4_2 \
strncat-avx2 \
strncat-avx2-rtm \
- strncat-c \
+ strncat-generic \
strncat-evex \
strncat-sse2-unaligned \
strncmp-avx2 \
@@ -100,7 +100,7 @@ sysdep_routines += \
strncmp-sse4_2 \
strncpy-avx2 \
strncpy-avx2-rtm \
- strncpy-c \
+ strncpy-generic \
strncpy-evex \
strncpy-sse2-unaligned \
strnlen-avx2 \
@@ -108,22 +108,23 @@ sysdep_routines += \
strnlen-evex \
strnlen-evex512 \
strnlen-sse2 \
- strpbrk-c \
- strpbrk-sse2 \
+ strpbrk-generic \
+ strpbrk-sse4 \
strrchr-avx2 \
strrchr-avx2-rtm \
strrchr-evex \
strrchr-sse2 \
- strspn-c \
- strspn-sse2 \
+ strspn-generic \
+ strspn-sse4 \
strstr-avx512 \
strstr-sse2-unaligned \
varshift \
# sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-sse4.c += -msse4
+CFLAGS-strpbrk-sse4.c += -msse4
+CFLAGS-strspn-sse4.c += -msse4
+
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
endif
@@ -137,7 +138,7 @@ sysdep_routines += \
wcscmp-avx2-rtm \
wcscmp-evex \
wcscmp-sse2 \
- wcscpy-c \
+ wcscpy-generic \
wcscpy-ssse3 \
wcslen-avx2 \
wcslen-avx2-rtm \
@@ -147,11 +148,11 @@ sysdep_routines += \
wcslen-sse4_1 \
wcsncmp-avx2 \
wcsncmp-avx2-rtm \
+ wcsncmp-generic \
wcsncmp-evex \
- wcsncmp-sse2 \
wcsnlen-avx2 \
wcsnlen-avx2-rtm \
- wcsnlen-c \
+ wcsnlen-generic \
wcsnlen-evex \
wcsnlen-evex512 \
wcsnlen-sse4_1 \
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 4289df29ec..1d9cdfcfec 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -19,7 +19,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
@@ -44,5 +48,5 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx2);
}
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index dc595752e0..883362f63d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__stpncpy_evex)
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
__stpncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
/* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
@@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strcspn,
IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
__strcspn_sse42)
- IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
@@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncat_evex)
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+ IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
/* Support sysdeps/x86_64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
@@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strncpy_evex)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
/* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
__strpbrk_sse42)
- IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+ IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
/* Support sysdeps/x86_64/multiarch/strspn.c. */
IFUNC_IMPL (i, name, strspn,
IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
__strspn_sse42)
- IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
@@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
- IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
__wcscpy_ssse3)
- IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+ IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
@@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
index b555ff2fac..ee36525bcf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -19,7 +19,7 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
static inline void *
@@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
return OPTIMIZE (sse42);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
index a15afa44e9..80529458d1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
@@ -20,7 +20,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
@@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
index 2b29e7608a..88c1c502af 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
@@ -19,7 +19,11 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
@@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
deleted file mode 100644
index b016e487e1..0000000000
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#define STPNCPY __stpncpy_sse2
-#undef weak_alias
-#define weak_alias(ignored1, ignored2)
-#undef libc_hidden_def
-#define libc_hidden_def(stpncpy)
-
-#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c
new file mode 100644
index 0000000000..87826845b0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c
@@ -0,0 +1,26 @@
+/* stpncpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STPNCPY __stpncpy_generic
+#undef weak_alias
+#define weak_alias(ignored1, ignored2)
+#undef libc_hidden_def
+#define libc_hidden_def(stpncpy)
+
+#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
index 82fa53957d..879bc83f0b 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.c
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -25,6 +25,7 @@
# undef stpncpy
# undef __stpncpy
+# define GENERIC generic
# define SYMBOL_NAME stpncpy
# include "ifunc-strcpy.h"
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
rename to sysdeps/x86_64/multiarch/strcspn-generic.c
index 3a04bb39fc..423de2e2b2 100644
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strcspn-generic.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRCSPN __strcspn_sse2
+# define STRCSPN __strcspn_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRCSPN)
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strcspn-c.c
rename to sysdeps/x86_64/multiarch/strcspn-sse4.c
index c312fab8b1..59f64f9fe8 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c
@@ -52,8 +52,8 @@
when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
X for case 1. */
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
+#ifndef STRCSPN_GENERIC
+# define STRCSPN_GENERIC __strcspn_generic
# define STRCSPN_SSE42 __strcspn_sse42
#endif
@@ -69,7 +69,7 @@ char *
#else
size_t
#endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
+STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
#ifdef USE_AS_STRPBRK
@@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
/* There is no NULL terminator. Don't use SSE4.2 if the length
of A > 16. */
if (a[16] != 0)
- return STRCSPN_SSE2 (s, a);
+ return STRCSPN_GENERIC (s, a);
}
aligned = s;
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
deleted file mode 100644
index 93a7fab7ea..0000000000
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define STRNCAT __strncat_sse2
-#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c
new file mode 100644
index 0000000000..0090669cd1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat-generic.c
@@ -0,0 +1,21 @@
+/* strncat.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STRNCAT __strncat_generic
+#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
index b649343a97..50fba8a41f 100644
--- a/sysdeps/x86_64/multiarch/strncat.c
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -24,6 +24,7 @@
# undef strncat
# define SYMBOL_NAME strncat
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
deleted file mode 100644
index 57c45ac7ab..0000000000
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#define STRNCPY __strncpy_sse2
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(strncpy)
-
-#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c
new file mode 100644
index 0000000000..9916153dd5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncpy-generic.c
@@ -0,0 +1,24 @@
+/* strncpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STRNCPY __strncpy_generic
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(strncpy)
+
+#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
index 2a780a7e16..7fc7d72ec5 100644
--- a/sysdeps/x86_64/multiarch/strncpy.c
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -24,6 +24,7 @@
# undef strncpy
# define SYMBOL_NAME strncpy
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
rename to sysdeps/x86_64/multiarch/strpbrk-generic.c
index d03214c4fb..d31acfe495 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
+# define STRPBRK __strpbrk_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRPBRK)
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/strpbrk-c.c
rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c
index abf4ff7f1a..bf74d660d5 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
@@ -17,6 +17,6 @@
<https://www.gnu.org/licenses/>. */
#define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
+#define STRCSPN_GENERIC __strpbrk_generic
#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
+#include "strcspn-sse4.c"
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c
similarity index 96%
rename from sysdeps/x86_64/multiarch/strspn-sse2.c
rename to sysdeps/x86_64/multiarch/strspn-generic.c
index 61cc6cb0a5..6b50c36432 100644
--- a/sysdeps/x86_64/multiarch/strspn-sse2.c
+++ b/sysdeps/x86_64/multiarch/strspn-generic.c
@@ -19,7 +19,7 @@
#if IS_IN (libc)
# include <sysdep.h>
-# define STRSPN __strspn_sse2
+# define STRSPN __strspn_generic
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(STRSPN)
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c
similarity index 97%
rename from sysdeps/x86_64/multiarch/strspn-c.c
rename to sysdeps/x86_64/multiarch/strspn-sse4.c
index 6124033ceb..d044916688 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-sse4.c
@@ -51,7 +51,7 @@
We exit from the loop for case 1. */
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
size_t
@@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a)
/* There is no NULL terminator. Don't use SSE4.2 if the length
of A > 16. */
if (a[16] != 0)
- return __strspn_sse2 (s, a);
+ return __strspn_generic (s, a);
}
aligned = s;
offset = (unsigned int) ((size_t) s & 15);
diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
deleted file mode 100644
index 26d6984e9b..0000000000
--- a/sysdeps/x86_64/multiarch/wcscpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define WCSCPY __wcscpy_sse2
-#endif
-
-#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c
new file mode 100644
index 0000000000..5ea905f33f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c
@@ -0,0 +1,24 @@
+/* wcscpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+# define WCSCPY __wcscpy_generic
+#endif
+
+#include <wcsmbs/wcscpy.c>
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
index 6a2d1421d9..53c3228dc2 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.c
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -26,7 +26,7 @@
# define SYMBOL_NAME wcscpy
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
static inline void *
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
return OPTIMIZE (ssse3);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
similarity index 92%
rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c
rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c
index 8d9cbbb900..658d541886 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
+++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
@@ -1,4 +1,4 @@
-/* wcsncmp optimized with SSE2.
+/* wcsncmp.
Copyright (C) 2018-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,5 +16,5 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define WCSNCMP __wcsncmp_sse2
+#define WCSNCMP __wcsncmp_generic
#include <wcsmbs/wcsncmp.c>
diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
index 5e00af2ca5..1836f794dd 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp.c
+++ b/sysdeps/x86_64/multiarch/wcsncmp.c
@@ -24,6 +24,8 @@
# undef wcsncmp
# undef __wcsncmp
+# define GENERIC generic
+
# define SYMBOL_NAME wcsncmp
# include "ifunc-avx2.h"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
deleted file mode 100644
index e1ec7cfbb5..0000000000
--- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#if IS_IN (libc)
-# include <wchar.h>
-
-# define WCSNLEN __wcsnlen_sse2
-
-extern __typeof (wcsnlen) __wcsnlen_sse2;
-#endif
-
-#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
new file mode 100644
index 0000000000..2d75da7709
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
@@ -0,0 +1,28 @@
+/* wcsnlen.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+# include <wchar.h>
+
+# define WCSNLEN __wcsnlen_generic
+
+extern __typeof (wcsnlen) __wcsnlen_generic;
+#endif
+
+#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
index baa26666a8..05b7a211de 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -24,6 +24,7 @@
# undef __wcsnlen
# define SYMBOL_NAME wcsnlen
+# define GENERIC generic
# include "ifunc-wcslen.h"
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
--
2.34.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v3] x86: Rename generic functions with unique postfix for clarity
2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein
@ 2022-06-16 22:43 ` H.J. Lu
0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2022-06-16 22:43 UTC (permalink / raw)
To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell
On Thu, Jun 16, 2022 at 3:12 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> No functions are changed. It just renames generic implementations from
> '{func}_sse2' to '{func}_generic'. This is just because the postfix
> "_sse2" was overloaded and was used for files that had hand-optimized
> sse2 assembly implementations and files that just redirected back
> to the generic implementation.
>
> Full xcheck passed on x86_64.
> ---
> Note this change is in preperation to further changes to the file
> organization in the multiarch directory.
> sysdeps/x86_64/multiarch/Makefile | 33 ++++++++++---------
> sysdeps/x86_64/multiarch/ifunc-avx2.h | 8 +++--
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 18 +++++-----
> sysdeps/x86_64/multiarch/ifunc-sse4_2.h | 4 +--
> sysdeps/x86_64/multiarch/ifunc-strcpy.h | 8 +++--
> sysdeps/x86_64/multiarch/ifunc-wcslen.h | 8 +++--
> sysdeps/x86_64/multiarch/stpncpy-c.c | 7 ----
> sysdeps/x86_64/multiarch/stpncpy-generic.c | 26 +++++++++++++++
> sysdeps/x86_64/multiarch/stpncpy.c | 1 +
> .../{strcspn-sse2.c => strcspn-generic.c} | 2 +-
> .../multiarch/{strcspn-c.c => strcspn-sse4.c} | 8 ++---
> sysdeps/x86_64/multiarch/strncat-c.c | 2 --
> sysdeps/x86_64/multiarch/strncat-generic.c | 21 ++++++++++++
> sysdeps/x86_64/multiarch/strncat.c | 1 +
> sysdeps/x86_64/multiarch/strncpy-c.c | 5 ---
> sysdeps/x86_64/multiarch/strncpy-generic.c | 24 ++++++++++++++
> sysdeps/x86_64/multiarch/strncpy.c | 1 +
> .../{strpbrk-sse2.c => strpbrk-generic.c} | 2 +-
> .../multiarch/{strpbrk-c.c => strpbrk-sse4.c} | 4 +--
> .../{strspn-sse2.c => strspn-generic.c} | 2 +-
> .../multiarch/{strspn-c.c => strspn-sse4.c} | 4 +--
> sysdeps/x86_64/multiarch/wcscpy-c.c | 5 ---
> sysdeps/x86_64/multiarch/wcscpy-generic.c | 24 ++++++++++++++
> sysdeps/x86_64/multiarch/wcscpy.c | 4 +--
> .../{wcsncmp-sse2.c => wcsncmp-generic.c} | 4 +--
> sysdeps/x86_64/multiarch/wcsncmp.c | 2 ++
> sysdeps/x86_64/multiarch/wcsnlen-c.c | 9 -----
> sysdeps/x86_64/multiarch/wcsnlen-generic.c | 28 ++++++++++++++++
> sysdeps/x86_64/multiarch/wcsnlen.c | 1 +
> 29 files changed, 190 insertions(+), 76 deletions(-)
> delete mode 100644 sysdeps/x86_64/multiarch/stpncpy-c.c
> create mode 100644 sysdeps/x86_64/multiarch/stpncpy-generic.c
> rename sysdeps/x86_64/multiarch/{strcspn-sse2.c => strcspn-generic.c} (96%)
> rename sysdeps/x86_64/multiarch/{strcspn-c.c => strcspn-sse4.c} (96%)
> delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
> create mode 100644 sysdeps/x86_64/multiarch/strncat-generic.c
> delete mode 100644 sysdeps/x86_64/multiarch/strncpy-c.c
> create mode 100644 sysdeps/x86_64/multiarch/strncpy-generic.c
> rename sysdeps/x86_64/multiarch/{strpbrk-sse2.c => strpbrk-generic.c} (96%)
> rename sysdeps/x86_64/multiarch/{strpbrk-c.c => strpbrk-sse4.c} (92%)
> rename sysdeps/x86_64/multiarch/{strspn-sse2.c => strspn-generic.c} (96%)
> rename sysdeps/x86_64/multiarch/{strspn-c.c => strspn-sse4.c} (97%)
> delete mode 100644 sysdeps/x86_64/multiarch/wcscpy-c.c
> create mode 100644 sysdeps/x86_64/multiarch/wcscpy-generic.c
> rename sysdeps/x86_64/multiarch/{wcsncmp-sse2.c => wcsncmp-generic.c} (92%)
> delete mode 100644 sysdeps/x86_64/multiarch/wcsnlen-c.c
> create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-generic.c
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3d153cac35..666ee4d5d6 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -43,7 +43,7 @@ sysdep_routines += \
> stpcpy-sse2-unaligned \
> stpncpy-avx2 \
> stpncpy-avx2-rtm \
> - stpncpy-c \
> + stpncpy-generic \
> stpncpy-evex \
> stpncpy-sse2-unaligned \
> strcasecmp_l-avx2 \
> @@ -76,8 +76,8 @@ sysdep_routines += \
> strcpy-evex \
> strcpy-sse2 \
> strcpy-sse2-unaligned \
> - strcspn-c \
> - strcspn-sse2 \
> + strcspn-generic \
> + strcspn-sse4 \
> strlen-avx2 \
> strlen-avx2-rtm \
> strlen-evex \
> @@ -90,7 +90,7 @@ sysdep_routines += \
> strncase_l-sse4_2 \
> strncat-avx2 \
> strncat-avx2-rtm \
> - strncat-c \
> + strncat-generic \
> strncat-evex \
> strncat-sse2-unaligned \
> strncmp-avx2 \
> @@ -100,7 +100,7 @@ sysdep_routines += \
> strncmp-sse4_2 \
> strncpy-avx2 \
> strncpy-avx2-rtm \
> - strncpy-c \
> + strncpy-generic \
> strncpy-evex \
> strncpy-sse2-unaligned \
> strnlen-avx2 \
> @@ -108,22 +108,23 @@ sysdep_routines += \
> strnlen-evex \
> strnlen-evex512 \
> strnlen-sse2 \
> - strpbrk-c \
> - strpbrk-sse2 \
> + strpbrk-generic \
> + strpbrk-sse4 \
> strrchr-avx2 \
> strrchr-avx2-rtm \
> strrchr-evex \
> strrchr-sse2 \
> - strspn-c \
> - strspn-sse2 \
> + strspn-generic \
> + strspn-sse4 \
> strstr-avx512 \
> strstr-sse2-unaligned \
> varshift \
> # sysdep_routines
> -CFLAGS-varshift.c += -msse4
> -CFLAGS-strcspn-c.c += -msse4
> -CFLAGS-strpbrk-c.c += -msse4
> -CFLAGS-strspn-c.c += -msse4
> +
> +CFLAGS-strcspn-sse4.c += -msse4
> +CFLAGS-strpbrk-sse4.c += -msse4
> +CFLAGS-strspn-sse4.c += -msse4
> +
> CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
> endif
>
> @@ -137,7 +138,7 @@ sysdep_routines += \
> wcscmp-avx2-rtm \
> wcscmp-evex \
> wcscmp-sse2 \
> - wcscpy-c \
> + wcscpy-generic \
> wcscpy-ssse3 \
> wcslen-avx2 \
> wcslen-avx2-rtm \
> @@ -147,11 +148,11 @@ sysdep_routines += \
> wcslen-sse4_1 \
> wcsncmp-avx2 \
> wcsncmp-avx2-rtm \
> + wcsncmp-generic \
> wcsncmp-evex \
> - wcsncmp-sse2 \
> wcsnlen-avx2 \
> wcsnlen-avx2-rtm \
> - wcsnlen-c \
> + wcsnlen-generic \
> wcsnlen-evex \
> wcsnlen-evex512 \
> wcsnlen-sse4_1 \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> index 4289df29ec..1d9cdfcfec 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> @@ -19,7 +19,11 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> @@ -44,5 +48,5 @@ IFUNC_SELECTOR (void)
> return OPTIMIZE (avx2);
> }
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (GENERIC);
> }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index dc595752e0..883362f63d 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -367,7 +367,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __stpncpy_evex)
> IFUNC_IMPL_ADD (array, i, stpncpy, 1,
> __stpncpy_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
> + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/stpcpy.c. */
> IFUNC_IMPL (i, name, stpcpy,
> @@ -526,7 +526,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, strcspn,
> IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
> __strcspn_sse42)
> - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
> + IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
>
> /* Support sysdeps/x86_64/multiarch/strncase_l.c. */
> IFUNC_IMPL (i, name, strncasecmp,
> @@ -580,7 +580,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __strncat_evex)
> IFUNC_IMPL_ADD (array, i, strncat, 1,
> __strncat_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
> + IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
>
> /* Support sysdeps/x86_64/multiarch/strncpy.c. */
> IFUNC_IMPL (i, name, strncpy,
> @@ -596,20 +596,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> __strncpy_evex)
> IFUNC_IMPL_ADD (array, i, strncpy, 1,
> __strncpy_sse2_unaligned)
> - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
> + IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/strpbrk.c. */
> IFUNC_IMPL (i, name, strpbrk,
> IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
> __strpbrk_sse42)
> - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
> + IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
>
>
> /* Support sysdeps/x86_64/multiarch/strspn.c. */
> IFUNC_IMPL (i, name, strspn,
> IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
> __strspn_sse42)
> - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
> + IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
>
> /* Support sysdeps/x86_64/multiarch/strstr.c. */
> IFUNC_IMPL (i, name, strstr,
> @@ -686,13 +686,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> && CPU_FEATURE_USABLE (AVX512BW)
> && CPU_FEATURE_USABLE (BMI2)),
> __wcsncmp_evex)
> - IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
> + IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
>
> /* Support sysdeps/x86_64/multiarch/wcscpy.c. */
> IFUNC_IMPL (i, name, wcscpy,
> IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
> __wcscpy_ssse3)
> - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
> + IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
>
> /* Support sysdeps/x86_64/multiarch/wcslen.c. */
> IFUNC_IMPL (i, name, wcslen,
> @@ -744,7 +744,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL_ADD (array, i, wcsnlen,
> CPU_FEATURE_USABLE (SSE4_1),
> __wcsnlen_sse4_1)
> - IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
> + IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
>
> /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
> IFUNC_IMPL (i, name, wmemchr,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> index b555ff2fac..ee36525bcf 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
> @@ -19,7 +19,7 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
>
> static inline void *
> @@ -30,5 +30,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
> return OPTIMIZE (sse42);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (generic);
> }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> index a15afa44e9..80529458d1 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
> @@ -20,7 +20,11 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
> attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> @@ -49,5 +53,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> return OPTIMIZE (sse2_unaligned);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (GENERIC);
> }
> diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> index 2b29e7608a..88c1c502af 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
> @@ -19,7 +19,11 @@
>
> #include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +#ifndef GENERIC
> +# define GENERIC sse2
> +#endif
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> @@ -48,5 +52,5 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
> return OPTIMIZE (sse4_1);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (GENERIC);
> }
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
> deleted file mode 100644
> index b016e487e1..0000000000
> --- a/sysdeps/x86_64/multiarch/stpncpy-c.c
> +++ /dev/null
> @@ -1,7 +0,0 @@
> -#define STPNCPY __stpncpy_sse2
> -#undef weak_alias
> -#define weak_alias(ignored1, ignored2)
> -#undef libc_hidden_def
> -#define libc_hidden_def(stpncpy)
> -
> -#include <string/stpncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-generic.c b/sysdeps/x86_64/multiarch/stpncpy-generic.c
> new file mode 100644
> index 0000000000..87826845b0
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/stpncpy-generic.c
> @@ -0,0 +1,26 @@
> +/* stpncpy.
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#define STPNCPY __stpncpy_generic
> +#undef weak_alias
> +#define weak_alias(ignored1, ignored2)
> +#undef libc_hidden_def
> +#define libc_hidden_def(stpncpy)
> +
> +#include <string/stpncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
> index 82fa53957d..879bc83f0b 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy.c
> +++ b/sysdeps/x86_64/multiarch/stpncpy.c
> @@ -25,6 +25,7 @@
> # undef stpncpy
> # undef __stpncpy
>
> +# define GENERIC generic
> # define SYMBOL_NAME stpncpy
> # include "ifunc-strcpy.h"
>
> diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.c b/sysdeps/x86_64/multiarch/strcspn-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strcspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strcspn-generic.c
> index 3a04bb39fc..423de2e2b2 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-generic.c
> @@ -19,7 +19,7 @@
> #if IS_IN (libc)
>
> # include <sysdep.h>
> -# define STRCSPN __strcspn_sse2
> +# define STRCSPN __strcspn_generic
>
> # undef libc_hidden_builtin_def
> # define libc_hidden_builtin_def(STRCSPN)
> diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-sse4.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strcspn-c.c
> rename to sysdeps/x86_64/multiarch/strcspn-sse4.c
> index c312fab8b1..59f64f9fe8 100644
> --- a/sysdeps/x86_64/multiarch/strcspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strcspn-sse4.c
> @@ -52,8 +52,8 @@
> when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
> X for case 1. */
>
> -#ifndef STRCSPN_SSE2
> -# define STRCSPN_SSE2 __strcspn_sse2
> +#ifndef STRCSPN_GENERIC
> +# define STRCSPN_GENERIC __strcspn_generic
> # define STRCSPN_SSE42 __strcspn_sse42
> #endif
>
> @@ -69,7 +69,7 @@ char *
> #else
> size_t
> #endif
> -STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
> +STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
>
>
> #ifdef USE_AS_STRPBRK
> @@ -119,7 +119,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
> /* There is no NULL terminator. Don't use SSE4.2 if the length
> of A > 16. */
> if (a[16] != 0)
> - return STRCSPN_SSE2 (s, a);
> + return STRCSPN_GENERIC (s, a);
> }
>
> aligned = s;
> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
> deleted file mode 100644
> index 93a7fab7ea..0000000000
> --- a/sysdeps/x86_64/multiarch/strncat-c.c
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -#define STRNCAT __strncat_sse2
> -#include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat-generic.c b/sysdeps/x86_64/multiarch/strncat-generic.c
> new file mode 100644
> index 0000000000..0090669cd1
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strncat-generic.c
> @@ -0,0 +1,21 @@
> +/* strncat.
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#define STRNCAT __strncat_generic
> +#include <string/strncat.c>
> diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
> index b649343a97..50fba8a41f 100644
> --- a/sysdeps/x86_64/multiarch/strncat.c
> +++ b/sysdeps/x86_64/multiarch/strncat.c
> @@ -24,6 +24,7 @@
> # undef strncat
>
> # define SYMBOL_NAME strncat
> +# define GENERIC generic
> # include "ifunc-strcpy.h"
>
> libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
> deleted file mode 100644
> index 57c45ac7ab..0000000000
> --- a/sysdeps/x86_64/multiarch/strncpy-c.c
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#define STRNCPY __strncpy_sse2
> -#undef libc_hidden_builtin_def
> -#define libc_hidden_builtin_def(strncpy)
> -
> -#include <string/strncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/strncpy-generic.c b/sysdeps/x86_64/multiarch/strncpy-generic.c
> new file mode 100644
> index 0000000000..9916153dd5
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strncpy-generic.c
> @@ -0,0 +1,24 @@
> +/* strncpy.
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#define STRNCPY __strncpy_generic
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(strncpy)
> +
> +#include <string/strncpy.c>
> diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
> index 2a780a7e16..7fc7d72ec5 100644
> --- a/sysdeps/x86_64/multiarch/strncpy.c
> +++ b/sysdeps/x86_64/multiarch/strncpy.c
> @@ -24,6 +24,7 @@
> # undef strncpy
>
> # define SYMBOL_NAME strncpy
> +# define GENERIC generic
> # include "ifunc-strcpy.h"
>
> libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.c b/sysdeps/x86_64/multiarch/strpbrk-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strpbrk-sse2.c
> rename to sysdeps/x86_64/multiarch/strpbrk-generic.c
> index d03214c4fb..d31acfe495 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-generic.c
> @@ -19,7 +19,7 @@
> #if IS_IN (libc)
>
> # include <sysdep.h>
> -# define STRPBRK __strpbrk_sse2
> +# define STRPBRK __strpbrk_generic
>
> # undef libc_hidden_builtin_def
> # define libc_hidden_builtin_def(STRPBRK)
> diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/strpbrk-c.c
> rename to sysdeps/x86_64/multiarch/strpbrk-sse4.c
> index abf4ff7f1a..bf74d660d5 100644
> --- a/sysdeps/x86_64/multiarch/strpbrk-c.c
> +++ b/sysdeps/x86_64/multiarch/strpbrk-sse4.c
> @@ -17,6 +17,6 @@
> <https://www.gnu.org/licenses/>. */
>
> #define USE_AS_STRPBRK
> -#define STRCSPN_SSE2 __strpbrk_sse2
> +#define STRCSPN_GENERIC __strpbrk_generic
> #define STRCSPN_SSE42 __strpbrk_sse42
> -#include "strcspn-c.c"
> +#include "strcspn-sse4.c"
> diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.c b/sysdeps/x86_64/multiarch/strspn-generic.c
> similarity index 96%
> rename from sysdeps/x86_64/multiarch/strspn-sse2.c
> rename to sysdeps/x86_64/multiarch/strspn-generic.c
> index 61cc6cb0a5..6b50c36432 100644
> --- a/sysdeps/x86_64/multiarch/strspn-sse2.c
> +++ b/sysdeps/x86_64/multiarch/strspn-generic.c
> @@ -19,7 +19,7 @@
> #if IS_IN (libc)
>
> # include <sysdep.h>
> -# define STRSPN __strspn_sse2
> +# define STRSPN __strspn_generic
>
> # undef libc_hidden_builtin_def
> # define libc_hidden_builtin_def(STRSPN)
> diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-sse4.c
> similarity index 97%
> rename from sysdeps/x86_64/multiarch/strspn-c.c
> rename to sysdeps/x86_64/multiarch/strspn-sse4.c
> index 6124033ceb..d044916688 100644
> --- a/sysdeps/x86_64/multiarch/strspn-c.c
> +++ b/sysdeps/x86_64/multiarch/strspn-sse4.c
> @@ -51,7 +51,7 @@
>
> We exit from the loop for case 1. */
>
> -extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
> +extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
>
>
> size_t
> @@ -98,7 +98,7 @@ __strspn_sse42 (const char *s, const char *a)
> /* There is no NULL terminator. Don't use SSE4.2 if the length
> of A > 16. */
> if (a[16] != 0)
> - return __strspn_sse2 (s, a);
> + return __strspn_generic (s, a);
> }
> aligned = s;
> offset = (unsigned int) ((size_t) s & 15);
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-c.c b/sysdeps/x86_64/multiarch/wcscpy-c.c
> deleted file mode 100644
> index 26d6984e9b..0000000000
> --- a/sysdeps/x86_64/multiarch/wcscpy-c.c
> +++ /dev/null
> @@ -1,5 +0,0 @@
> -#if IS_IN (libc)
> -# define WCSCPY __wcscpy_sse2
> -#endif
> -
> -#include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy-generic.c b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> new file mode 100644
> index 0000000000..5ea905f33f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcscpy-generic.c
> @@ -0,0 +1,24 @@
> +/* wcscpy.
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#if IS_IN (libc)
> +# define WCSCPY __wcscpy_generic
> +#endif
> +
> +#include <wcsmbs/wcscpy.c>
> diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
> index 6a2d1421d9..53c3228dc2 100644
> --- a/sysdeps/x86_64/multiarch/wcscpy.c
> +++ b/sysdeps/x86_64/multiarch/wcscpy.c
> @@ -26,7 +26,7 @@
> # define SYMBOL_NAME wcscpy
> # include <init-arch.h>
>
> -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
> extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
>
> static inline void *
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
> return OPTIMIZE (ssse3);
>
> - return OPTIMIZE (sse2);
> + return OPTIMIZE (generic);
> }
>
> libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
> similarity index 92%
> rename from sysdeps/x86_64/multiarch/wcsncmp-sse2.c
> rename to sysdeps/x86_64/multiarch/wcsncmp-generic.c
> index 8d9cbbb900..658d541886 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
> +++ b/sysdeps/x86_64/multiarch/wcsncmp-generic.c
> @@ -1,4 +1,4 @@
> -/* wcsncmp optimized with SSE2.
> +/* wcsncmp.
> Copyright (C) 2018-2022 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> @@ -16,5 +16,5 @@
> License along with the GNU C Library; if not, see
> <https://www.gnu.org/licenses/>. */
>
> -#define WCSNCMP __wcsncmp_sse2
> +#define WCSNCMP __wcsncmp_generic
> #include <wcsmbs/wcsncmp.c>
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
> index 5e00af2ca5..1836f794dd 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp.c
> +++ b/sysdeps/x86_64/multiarch/wcsncmp.c
> @@ -24,6 +24,8 @@
> # undef wcsncmp
> # undef __wcsncmp
>
> +# define GENERIC generic
> +
> # define SYMBOL_NAME wcsncmp
> # include "ifunc-avx2.h"
>
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
> deleted file mode 100644
> index e1ec7cfbb5..0000000000
> --- a/sysdeps/x86_64/multiarch/wcsnlen-c.c
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -#if IS_IN (libc)
> -# include <wchar.h>
> -
> -# define WCSNLEN __wcsnlen_sse2
> -
> -extern __typeof (wcsnlen) __wcsnlen_sse2;
> -#endif
> -
> -#include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen-generic.c b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
> new file mode 100644
> index 0000000000..2d75da7709
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/wcsnlen-generic.c
> @@ -0,0 +1,28 @@
> +/* wcsnlen.
> + Copyright (C) 2022 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#if IS_IN (libc)
> +# include <wchar.h>
> +
> +# define WCSNLEN __wcsnlen_generic
> +
> +extern __typeof (wcsnlen) __wcsnlen_generic;
> +#endif
> +
> +#include "wcsmbs/wcsnlen.c"
> diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
> index baa26666a8..05b7a211de 100644
> --- a/sysdeps/x86_64/multiarch/wcsnlen.c
> +++ b/sysdeps/x86_64/multiarch/wcsnlen.c
> @@ -24,6 +24,7 @@
> # undef __wcsnlen
>
> # define SYMBOL_NAME wcsnlen
> +# define GENERIC generic
> # include "ifunc-wcslen.h"
>
> libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
> --
> 2.34.1
>
LGTM.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v1 1/3] x86: Align varshift table to 32-bytes
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
@ 2022-07-14 2:51 ` Sunil Pandey
0 siblings, 0 replies; 11+ messages in thread
From: Sunil Pandey @ 2022-07-14 2:51 UTC (permalink / raw)
To: H.J. Lu; +Cc: Noah Goldstein, GNU C Library
On Thu, Jun 9, 2022 at 8:15 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Wed, Jun 8, 2022 at 9:16 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > This ensures the load will never split a cache line.
> > ---
> > sysdeps/x86_64/multiarch/varshift.c | 5 +++--
> > sysdeps/x86_64/multiarch/varshift.h | 3 ++-
> > 2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
> > index c8210f0546..d27767520a 100644
> > --- a/sysdeps/x86_64/multiarch/varshift.c
> > +++ b/sysdeps/x86_64/multiarch/varshift.c
> > @@ -16,9 +16,10 @@
> > License along with the GNU C Library; if not, see
> > <https://www.gnu.org/licenses/>. */
> >
> > -#include "varshift.h"
> > +#include <stdint.h>
> >
> > -const int8_t ___m128i_shift_right[31] attribute_hidden =
> > +const int8_t ___m128i_shift_right[31] attribute_hidden
> > + __attribute__((aligned(32))) =
> > {
> > 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
> > -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
> > diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
> > index af30694488..ffd12d79e4 100644
> > --- a/sysdeps/x86_64/multiarch/varshift.h
> > +++ b/sysdeps/x86_64/multiarch/varshift.h
> > @@ -19,7 +19,8 @@
> > #include <stdint.h>
> > #include <tmmintrin.h>
> >
> > -extern const int8_t ___m128i_shift_right[31] attribute_hidden;
> > +extern const int8_t ___m128i_shift_right[31] attribute_hidden
> > + __attribute__ ((aligned (32)));
> >
> > static __inline__ __m128i
> > __m128i_shift_right (__m128i value, unsigned long int offset)
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Thanks.
>
> --
> H.J.
I would like to backport this patch to release branches.
Any comments or objections?
--Sunil
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2022-07-14 2:52 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09 4:16 [PATCH v1 1/3] x86: Align varshift table to 32-bytes Noah Goldstein
2022-06-09 4:16 ` [PATCH v1 2/3] x86: Add avx compiled version for strspn, strcspn, and strpbrk Noah Goldstein
2022-06-09 15:28 ` H.J. Lu
2022-06-09 4:16 ` [PATCH v1 3/3] x86: Rename generic functions with unique postfix for clarity Noah Goldstein
2022-06-10 0:58 ` [PATCH v2] " Noah Goldstein
2022-06-10 1:19 ` H.J. Lu
2022-06-10 1:26 ` Noah Goldstein
2022-06-16 22:11 ` [PATCH v3] " Noah Goldstein
2022-06-16 22:43 ` H.J. Lu
2022-06-09 15:14 ` [PATCH v1 1/3] x86: Align varshift table to 32-bytes H.J. Lu
2022-07-14 2:51 ` Sunil Pandey
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).