public inbox for libc-stable@sourceware.org
 help / color / mirror / Atom feed
* [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level
@ 2022-10-03 22:02 Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The "System V Application Binary Interface AMD64 Architecture Processor
Supplement" mandates the BMI1 and BMI2 CPU features for the x86-64-v3
level.

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit b80f16adbd979831bf25ea491e1261e81885c2b6)
---
 sysdeps/x86/get-isa-level.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sysdeps/x86/get-isa-level.h b/sysdeps/x86/get-isa-level.h
index aa80f56ca6..785c25a835 100644
--- a/sysdeps/x86/get-isa-level.h
+++ b/sysdeps/x86/get-isa-level.h
@@ -47,6 +47,8 @@ get_isa_level (const struct cpu_features *cpu_features)
 	  isa_level |= GNU_PROPERTY_X86_ISA_1_V2;
 	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX)
 	      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
 	      && CPU_FEATURE_USABLE_P (cpu_features, F16C)
 	      && CPU_FEATURE_USABLE_P (cpu_features, FMA)
 	      && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 str(n)casecmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 10f79d3670b036925da63dc532b122d27ce65ff8)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 28 +++++++++++++++------
 sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 8d649e263e..ca64b34c14 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -386,13 +386,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcasecmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
@@ -407,13 +410,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_l_evex)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_l_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcasecmp_l_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
@@ -542,13 +548,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncasecmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
@@ -564,13 +573,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       & CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_l_evex)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_l_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncasecmp_l_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
index 40819caf5a..e61d6e9497 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -32,6 +32,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strcmp implementation uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 4d64c6445735e9b34e2ac8e369312cbfc2f88e17)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 4 +++-
 sysdeps/x86_64/multiarch/strcmp.c          | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index ca64b34c14..70931f1598 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -503,10 +503,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strcmp.c.  */
   IFUNC_IMPL (i, name, strcmp,
 	      IFUNC_IMPL_ADD (array, i, strcmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcmp,
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index b457fb4c15..0c0cd20a03 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -40,11 +40,11 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strncmp implementations uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit fc7de1d9b99ae1676bc626ddca422d7abee0eb48)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
 sysdeps/x86_64/multiarch/strncmp.c         | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 70931f1598..34d5f6efe5 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1022,15 +1022,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
 	      IFUNC_IMPL_ADD (array, i, strncmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
 			      __strncmp_sse42)
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index f94a421784..7632d7b2dd 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -39,11 +39,11 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (2 preceding siblings ...)
  2022-10-03 22:02 ` [COMMITTED 2.34 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit f31a5a884ed84bd37032729d4d1eb9d06c9f3c29)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 34d5f6efe5..e76a991dc6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -693,10 +693,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcscmp.c.  */
   IFUNC_IMPL (i, name, wcscmp,
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcscmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcscmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
@@ -709,10 +711,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcsncmp.c.  */
   IFUNC_IMPL (i, name, wcsncmp,
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsncmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcsncmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (3 preceding siblings ...)
  2022-10-03 22:02 ` [COMMITTED 2.34 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
and 'sarx' instructions, which belongs to the BMI2 CPU feature.

Fixes: acfd088a1963 ("x86: Optimize memchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit e3e7fab7fe5186d18ca2046d99ba321c27db30ad)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index e76a991dc6..81640cf006 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -41,10 +41,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/memchr.c.  */
   IFUNC_IMPL (i, name, memchr,
 	      IFUNC_IMPL_ADD (array, i, memchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __memchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, memchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __memchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, memchr,
@@ -283,10 +285,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
   IFUNC_IMPL (i, name, rawmemchr,
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __rawmemchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __rawmemchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
@@ -787,10 +791,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
   IFUNC_IMPL (i, name, wmemchr,
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wmemchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wmemchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (4 preceding siblings ...)
  2022-10-03 22:02 ` [COMMITTED 2.34 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  2022-10-03 22:02 ` [COMMITTED 2.34 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.

Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 3c0c78afabfed4b6fc161c159e628fbf14ff370b)
---
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 6de72f7272..52bd00ea5c 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -31,6 +31,7 @@ IFUNC_SELECTOR (void)
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+      && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 81640cf006..d1fc1e75d6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -174,15 +174,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/memrchr.c.  */
   IFUNC_IMPL (i, name, memrchr,
 	      IFUNC_IMPL_ADD (array, i, memrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)),
 			      __memrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, memrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __memrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, memrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)),
 			      __memrchr_evex)
 
 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.34 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations
  2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (5 preceding siblings ...)
  2022-10-03 22:02 ` [COMMITTED 2.34 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
@ 2022-10-03 22:02 ` Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 22:02 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
instruction which belongs to the BMI1 CPU feature and the 'shrx'
instruction, which belongs to the BMI2 CPU feature.

Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 7e8283170c5d6805b609a040801d819e362a6292)
---
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 52bd00ea5c..877f007dd6 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -30,6 +30,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
       && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d1fc1e75d6..84f9e73e2b 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -498,15 +498,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strrchr.c.  */
   IFUNC_IMPL (i, name, strrchr,
 	      IFUNC_IMPL_ADD (array, i, strrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, strrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strrchr_evex)
 	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
 
@@ -687,15 +693,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcsrchr.c.  */
   IFUNC_IMPL (i, name, wcsrchr,
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcsrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
 			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI1)
 			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsrchr_evex)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-03 22:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-03 22:02 [COMMITTED 2.34 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
2022-10-03 22:02 ` [COMMITTED 2.34 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).