* [PATCH v1] x86: Fix type of `Slow_SSE4_2` def in isa-level.h
@ 2022-07-18 10:38 Noah Goldstein
2022-07-18 10:38 ` [PATCH v1] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Noah Goldstein @ 2022-07-18 10:38 UTC (permalink / raw)
To: libc-alpha
Change from `Slow_SSE42_X86_ISA_LEVEL` to
`Slow_SSE4_2_X86_ISA_LEVEL`. Currently the def is unused to no
need to change anything else.
---
sysdeps/x86/isa-level.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 3c4480aba7..fe56af7e2b 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -104,7 +104,7 @@
/* NB: This feature is disable when ISA level >= 3. All CPUs with
this feature don't run on glibc built with ISA level >= 3. */
-#define Slow_SSE42_X86_ISA_LEVEL 3
+#define Slow_SSE4_2_X86_ISA_LEVEL 3
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v1] x86: Continue building memmove-ssse3.S as ISA level V3
2022-07-18 10:38 [PATCH v1] x86: Fix type of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
@ 2022-07-18 10:38 ` Noah Goldstein
2022-07-18 11:54 ` [PATCH v2] x86: Fix typo of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
2022-07-18 13:25 ` [PATCH v3 1/2] " Noah Goldstein
2 siblings, 0 replies; 5+ messages in thread
From: Noah Goldstein @ 2022-07-18 10:38 UTC (permalink / raw)
To: libc-alpha
Some V3 processors still strongly prefer memmove-ssse3.S because it is
heavily optimized to avoid unaligned memory accesses.
Tested builds for x86-64 v1, v2, v3, and v4 with and without
multiarch.
---
sysdeps/x86/isa-level.h | 15 +++++++++++
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 30 +++++++++++++---------
sysdeps/x86_64/multiarch/ifunc-memmove.h | 14 +++++-----
sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 ++-
4 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index fe56af7e2b..f49336acf3 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -90,6 +90,14 @@
/* For X86_ISA_CPU_FEATURES_ARCH_P. */
+
+/* NB: This is just an alias to `AVX_Fast_Unaligned_Load` that will
+ continue doing runtime check up to ISA level >= 4. This is for
+ some Zhaoxin CPUs which build at ISA level V3 but still have a
+ strong preference for avoiding unaligned `ymm` loads. */
+#define V4_AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 4
+#define V4_AVX_Fast_Unaligned_Load AVX_Fast_Unaligned_Load
+
/* NB: This feature is enabled when ISA level >= 3, which was disabled
for the following CPUs:
- AMD Excavator
@@ -106,6 +114,13 @@
this feature don't run on glibc built with ISA level >= 3. */
#define Slow_SSE4_2_X86_ISA_LEVEL 3
+/* NB: This is just an alias to `Fast_Unaligned_Copy` that will
+ continue doing runtime check up to ISA level >= 3. This is for
+ some Zhaoxin CPUs which build at ISA level V3 but still have a
+ strong preference for avoiding unaligned `ymm` loads. */
+#define V3_Fast_Unaligned_Copy_X86_ISA_LEVEL 3
+#define V3_Fast_Unaligned_Copy Fast_Unaligned_Copy
+
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a71444eccb..427f127427 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -143,8 +143,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memmove_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
CPU_FEATURE_USABLE (SSSE3),
__memmove_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -190,8 +191,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memmove_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, memmove,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
CPU_FEATURE_USABLE (SSSE3),
__memmove_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1004,8 +1006,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memcpy_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
CPU_FEATURE_USABLE (SSSE3),
__memcpy_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1051,8 +1054,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memcpy_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
CPU_FEATURE_USABLE (SSSE3),
__memcpy_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1098,8 +1102,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__mempcpy_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
CPU_FEATURE_USABLE (SSSE3),
__mempcpy_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1145,8 +1150,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__mempcpy_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
CPU_FEATURE_USABLE (SSSE3),
__mempcpy_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
index 1643d32887..be0c758783 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
@@ -72,7 +72,7 @@ IFUNC_SELECTOR (void)
}
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
- AVX_Fast_Unaligned_Load, ))
+ V4_AVX_Fast_Unaligned_Load, ))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
@@ -101,11 +101,13 @@ IFUNC_SELECTOR (void)
}
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
- /* Leave this as runtime check. The SSSE3 is optimized almost
- exclusively for avoiding unaligned memory access during the
- copy and by and large is not better than the sse2
- implementation as a general purpose memmove. */
- && !CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ /* Leave this as runtime check for V2. By V3 assume it must be
+ set. The SSSE3 is optimized almost exclusively for avoiding
+ unaligned memory access during the copy and by and large is
+ not better than the sse2 implementation as a general purpose
+ memmove. */
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ V3_Fast_Unaligned_Copy, !))
{
return OPTIMIZE (ssse3);
}
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S
index 57599752c7..15cafee766 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S
@@ -20,7 +20,9 @@
#include <isa-level.h>
-#if ISA_SHOULD_BUILD (2)
+/* Continue building up to ISA level V3 as some V3 CPUs strongly
+ prefer this implementation. */
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
# ifndef MEMMOVE
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2] x86: Fix typo of `Slow_SSE4_2` def in isa-level.h
2022-07-18 10:38 [PATCH v1] x86: Fix type of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
2022-07-18 10:38 ` [PATCH v1] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
@ 2022-07-18 11:54 ` Noah Goldstein
2022-07-18 13:25 ` [PATCH v3 1/2] " Noah Goldstein
2 siblings, 0 replies; 5+ messages in thread
From: Noah Goldstein @ 2022-07-18 11:54 UTC (permalink / raw)
To: libc-alpha
Change from `Slow_SSE42_X86_ISA_LEVEL` to
`Slow_SSE4_2_X86_ISA_LEVEL`. Currently the def is unused to no
need to change anything else.
---
sysdeps/x86/isa-level.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 3c4480aba7..fe56af7e2b 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -104,7 +104,7 @@
/* NB: This feature is disable when ISA level >= 3. All CPUs with
this feature don't run on glibc built with ISA level >= 3. */
-#define Slow_SSE42_X86_ISA_LEVEL 3
+#define Slow_SSE4_2_X86_ISA_LEVEL 3
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 1/2] x86: Fix typo of `Slow_SSE4_2` def in isa-level.h
2022-07-18 10:38 [PATCH v1] x86: Fix type of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
2022-07-18 10:38 ` [PATCH v1] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
2022-07-18 11:54 ` [PATCH v2] x86: Fix typo of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
@ 2022-07-18 13:25 ` Noah Goldstein
2022-07-18 13:25 ` [PATCH v3 2/2] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
2 siblings, 1 reply; 5+ messages in thread
From: Noah Goldstein @ 2022-07-18 13:25 UTC (permalink / raw)
To: libc-alpha
Change from `Slow_SSE42_X86_ISA_LEVEL` to
`Slow_SSE4_2_X86_ISA_LEVEL`. Currently the def is unused to no
need to change anything else.
---
sysdeps/x86/isa-level.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 3c4480aba7..fe56af7e2b 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -104,7 +104,7 @@
/* NB: This feature is disable when ISA level >= 3. All CPUs with
this feature don't run on glibc built with ISA level >= 3. */
-#define Slow_SSE42_X86_ISA_LEVEL 3
+#define Slow_SSE4_2_X86_ISA_LEVEL 3
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v3 2/2] x86: Continue building memmove-ssse3.S as ISA level V3
2022-07-18 13:25 ` [PATCH v3 1/2] " Noah Goldstein
@ 2022-07-18 13:25 ` Noah Goldstein
0 siblings, 0 replies; 5+ messages in thread
From: Noah Goldstein @ 2022-07-18 13:25 UTC (permalink / raw)
To: libc-alpha
Some V3 processors still strongly prefer memmove-ssse3.S because it is
heavily optimized to avoid unaligned memory accesses.
Tested builds for x86-64 v1, v2, v3, and v4 with and without
multiarch.
---
sysdeps/x86/isa-level.h | 15 +++++++++++
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 30 +++++++++++++---------
sysdeps/x86_64/multiarch/ifunc-memmove.h | 14 +++++-----
sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 ++-
4 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index fe56af7e2b..f49336acf3 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -90,6 +90,14 @@
/* For X86_ISA_CPU_FEATURES_ARCH_P. */
+
+/* NB: This is just an alias to `AVX_Fast_Unaligned_Load` that will
+ continue doing runtime check up to ISA level >= 4. This is for
+ some Zhaoxin CPUs which build at ISA level V3 but still have a
+ strong preference for avoiding unaligned `ymm` loads. */
+#define V4_AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 4
+#define V4_AVX_Fast_Unaligned_Load AVX_Fast_Unaligned_Load
+
/* NB: This feature is enabled when ISA level >= 3, which was disabled
for the following CPUs:
- AMD Excavator
@@ -106,6 +114,13 @@
this feature don't run on glibc built with ISA level >= 3. */
#define Slow_SSE4_2_X86_ISA_LEVEL 3
+/* NB: This is just an alias to `Fast_Unaligned_Copy` that will
+ continue doing runtime check up to ISA level >= 3. This is for
+ some Zhaoxin CPUs which build at ISA level V3 but still have a
+ strong preference for avoiding unaligned `ymm` loads. */
+#define V3_Fast_Unaligned_Copy_X86_ISA_LEVEL 3
+#define V3_Fast_Unaligned_Copy Fast_Unaligned_Copy
+
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a71444eccb..427f127427 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -143,8 +143,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memmove_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __memmove_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memmove_chk,
CPU_FEATURE_USABLE (SSSE3),
__memmove_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -190,8 +191,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memmove_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, memmove,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memmove,
CPU_FEATURE_USABLE (SSSE3),
__memmove_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1004,8 +1006,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memcpy_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __memcpy_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memcpy_chk,
CPU_FEATURE_USABLE (SSSE3),
__memcpy_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1051,8 +1054,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__memcpy_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, memcpy,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memcpy,
CPU_FEATURE_USABLE (SSSE3),
__memcpy_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1098,8 +1102,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__mempcpy_chk_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, __mempcpy_chk,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __mempcpy_chk,
CPU_FEATURE_USABLE (SSSE3),
__mempcpy_chk_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -1145,8 +1150,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
(CPU_FEATURE_USABLE (AVX)
&& CPU_FEATURE_USABLE (RTM)),
__mempcpy_avx_unaligned_erms_rtm)
- /* By V3 we assume fast aligned copy. */
- X86_IFUNC_IMPL_ADD_V2 (array, i, mempcpy,
+ /* Some V3 implementations still heavily prefer aligned
+ loads so keep SSSE3 implementation around. */
+ X86_IFUNC_IMPL_ADD_V3 (array, i, mempcpy,
CPU_FEATURE_USABLE (SSSE3),
__mempcpy_ssse3)
/* ISA V2 wrapper for SSE2 implementation because the SSE2
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
index 1643d32887..be0c758783 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
@@ -72,7 +72,7 @@ IFUNC_SELECTOR (void)
}
if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
- AVX_Fast_Unaligned_Load, ))
+ V4_AVX_Fast_Unaligned_Load, ))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
@@ -101,11 +101,13 @@ IFUNC_SELECTOR (void)
}
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
- /* Leave this as runtime check. The SSSE3 is optimized almost
- exclusively for avoiding unaligned memory access during the
- copy and by and large is not better than the sse2
- implementation as a general purpose memmove. */
- && !CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ /* Leave this as runtime check for V2. By V3 assume it must be
+ set. The SSSE3 is optimized almost exclusively for avoiding
+ unaligned memory access during the copy and by and large is
+ not better than the sse2 implementation as a general purpose
+ memmove. */
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ V3_Fast_Unaligned_Copy, !))
{
return OPTIMIZE (ssse3);
}
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S
index 57599752c7..15cafee766 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S
@@ -20,7 +20,9 @@
#include <isa-level.h>
-#if ISA_SHOULD_BUILD (2)
+/* Continue building up to ISA level V3 as some V3 CPUs strongly
+ prefer this implementation. */
+#if ISA_SHOULD_BUILD (3)
# include <sysdep.h>
# ifndef MEMMOVE
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-07-18 13:25 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-18 10:38 [PATCH v1] x86: Fix type of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
2022-07-18 10:38 ` [PATCH v1] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
2022-07-18 11:54 ` [PATCH v2] x86: Fix typo of `Slow_SSE4_2` def in isa-level.h Noah Goldstein
2022-07-18 13:25 ` [PATCH v3 1/2] " Noah Goldstein
2022-07-18 13:25 ` [PATCH v3 2/2] x86: Continue building memmove-ssse3.S as ISA level V3 Noah Goldstein
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).