public inbox for libc-stable@sourceware.org
 help / color / mirror / Atom feed
* [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level
@ 2022-10-03 21:56 Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The "System V Application Binary Interface AMD64 Architecture Processor
Supplement" mandates the BMI1 and BMI2 CPU features for the x86-64-v3
level.

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit b80f16adbd979831bf25ea491e1261e81885c2b6)
---
 sysdeps/x86/get-isa-level.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sysdeps/x86/get-isa-level.h b/sysdeps/x86/get-isa-level.h
index 1ade78ab73..5b4dd5f062 100644
--- a/sysdeps/x86/get-isa-level.h
+++ b/sysdeps/x86/get-isa-level.h
@@ -47,6 +47,8 @@ get_isa_level (const struct cpu_features *cpu_features)
 	  isa_level |= GNU_PROPERTY_X86_ISA_1_V2;
 	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX)
 	      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
 	      && CPU_FEATURE_USABLE_P (cpu_features, F16C)
 	      && CPU_FEATURE_USABLE_P (cpu_features, FMA)
 	      && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 str(n)casecmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 10f79d3670b036925da63dc532b122d27ce65ff8)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 28 +++++++++++++++------
 sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index e97218f624..2d8c87a42e 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -404,13 +404,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcasecmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
@@ -425,13 +428,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_l_evex)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcasecmp_l_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcasecmp_l_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
@@ -560,13 +566,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncasecmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
@@ -582,13 +591,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       & CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_l_evex)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncasecmp_l_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncasecmp_l_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
index 766539c241..e201f6e4d8 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -32,6 +32,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strcmp implementation uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 4d64c6445735e9b34e2ac8e369312cbfc2f88e17)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 4 +++-
 sysdeps/x86_64/multiarch/strcmp.c          | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 2d8c87a42e..136e3b8103 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -521,10 +521,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strcmp.c.  */
   IFUNC_IMPL (i, name, strcmp,
 	      IFUNC_IMPL_ADD (array, i, strcmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strcmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strcmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strcmp,
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index 08638e2632..b2bc619585 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -40,11 +40,11 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strncmp implementations uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit fc7de1d9b99ae1676bc626ddca422d7abee0eb48)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
 sysdeps/x86_64/multiarch/strncmp.c         | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 136e3b8103..b27ae975e4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1040,15 +1040,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
 	      IFUNC_IMPL_ADD (array, i, strncmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strncmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strncmp,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strncmp_evex)
 	      IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
 			      __strncmp_sse42)
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index fca74199d8..4241da5bdf 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -39,11 +39,11 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (2 preceding siblings ...)
  2022-10-03 21:56 ` [COMMITTED 2.35 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit f31a5a884ed84bd37032729d4d1eb9d06c9f3c29)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index b27ae975e4..5824554b4e 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -711,10 +711,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcscmp.c.  */
   IFUNC_IMPL (i, name, wcscmp,
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcscmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcscmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
@@ -727,10 +729,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcsncmp.c.  */
   IFUNC_IMPL (i, name, wcsncmp,
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsncmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcsncmp_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (3 preceding siblings ...)
  2022-10-03 21:56 ` [COMMITTED 2.35 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
and 'sarx' instructions, which belongs to the BMI2 CPU feature.

Fixes: acfd088a1963 ("x86: Optimize memchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit e3e7fab7fe5186d18ca2046d99ba321c27db30ad)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 5824554b4e..9959f7b228 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -59,10 +59,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/memchr.c.  */
   IFUNC_IMPL (i, name, memchr,
 	      IFUNC_IMPL_ADD (array, i, memchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __memchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, memchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __memchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, memchr,
@@ -301,10 +303,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
   IFUNC_IMPL (i, name, rawmemchr,
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __rawmemchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __rawmemchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
@@ -805,10 +809,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
   IFUNC_IMPL (i, name, wmemchr,
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wmemchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wmemchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (4 preceding siblings ...)
  2022-10-03 21:56 ` [COMMITTED 2.35 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  2022-10-03 21:56 ` [COMMITTED 2.35 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.

Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 3c0c78afabfed4b6fc161c159e628fbf14ff370b)
---
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 4289df29ec..5cc1e4b0bc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -31,6 +31,7 @@ IFUNC_SELECTOR (void)
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+      && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
     {
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 9959f7b228..a7c8ebbe85 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -192,15 +192,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/memrchr.c.  */
   IFUNC_IMPL (i, name, memrchr,
 	      IFUNC_IMPL_ADD (array, i, memrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)),
 			      __memrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, memrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __memrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, memrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI2)
+			       && CPU_FEATURE_USABLE (LZCNT)),
 			      __memrchr_evex)
 
 	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.35 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations
  2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (5 preceding siblings ...)
  2022-10-03 21:56 ` [COMMITTED 2.35 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
@ 2022-10-03 21:56 ` Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:56 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
instruction which belongs to the BMI1 CPU feature and the 'shrx'
instruction, which belongs to the BMI2 CPU feature.

Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 7e8283170c5d6805b609a040801d819e362a6292)
---
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index 5cc1e4b0bc..0456fa6efe 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -30,6 +30,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features* cpu_features = __get_cpu_features ();
 
   if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
       && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a7c8ebbe85..b224e6a644 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -516,15 +516,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/strrchr.c.  */
   IFUNC_IMPL (i, name, strrchr,
 	      IFUNC_IMPL_ADD (array, i, strrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, strrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __strrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, strrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
-			       && CPU_FEATURE_USABLE (AVX512BW)),
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __strrchr_evex)
 	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
 
@@ -705,15 +711,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/wcsrchr.c.  */
   IFUNC_IMPL (i, name, wcsrchr,
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
-			      CPU_FEATURE_USABLE (AVX2),
+			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsrchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
 			      (CPU_FEATURE_USABLE (AVX2)
+			       && CPU_FEATURE_USABLE (BMI1)
+			       && CPU_FEATURE_USABLE (BMI2)
 			       && CPU_FEATURE_USABLE (RTM)),
 			      __wcsrchr_avx2_rtm)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
 			      (CPU_FEATURE_USABLE (AVX512VL)
 			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (BMI1)
 			       && CPU_FEATURE_USABLE (BMI2)),
 			      __wcsrchr_evex)
 	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-03 21:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-03 21:56 [COMMITTED 2.35 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
2022-10-03 21:56 ` [COMMITTED 2.35 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).