public inbox for libc-stable@sourceware.org
 help / color / mirror / Atom feed
* [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level
@ 2022-10-03 21:50 Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The "System V Application Binary Interface AMD64 Architecture Processor
Supplement" mandates the BMI1 and BMI2 CPU features for the x86-64-v3
level.

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit b80f16adbd979831bf25ea491e1261e81885c2b6)
---
 sysdeps/x86/get-isa-level.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sysdeps/x86/get-isa-level.h b/sysdeps/x86/get-isa-level.h
index 1ade78ab73..5b4dd5f062 100644
--- a/sysdeps/x86/get-isa-level.h
+++ b/sysdeps/x86/get-isa-level.h
@@ -47,6 +47,8 @@ get_isa_level (const struct cpu_features *cpu_features)
 	  isa_level |= GNU_PROPERTY_X86_ISA_1_V2;
 	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX)
 	      && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
+	      && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
 	      && CPU_FEATURE_USABLE_P (cpu_features, F16C)
 	      && CPU_FEATURE_USABLE_P (cpu_features, FMA)
 	      && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 str(n)casecmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 10f79d3670b036925da63dc532b122d27ce65ff8)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c  | 28 +++++++++++++++------
 sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a71444eccb..d208fae4bf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -448,13 +448,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcasecmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp,
@@ -470,13 +473,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strcasecmp_l,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_l_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcasecmp_l_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcasecmp_l_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcasecmp_l,
@@ -638,13 +644,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncasecmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
@@ -660,13 +669,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      & CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_l_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncasecmp_l_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncasecmp_l_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
index 68646ef199..7622af259c 100644
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strcmp implementation uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 4d64c6445735e9b34e2ac8e369312cbfc2f88e17)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 4 +++-
 sysdeps/x86_64/multiarch/strcmp.c          | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d208fae4bf..a42b0a4620 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -591,10 +591,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strcmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strcmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strcmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strcmp,
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index fdd5afe3af..9d6c9f66ba 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -45,12 +45,12 @@ IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strncmp implementations uses the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit fc7de1d9b99ae1676bc626ddca422d7abee0eb48)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
 sysdeps/x86_64/multiarch/strncmp.c         | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index a42b0a4620..aebef3daaf 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1176,13 +1176,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strncmp,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strncmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strncmp_avx2_rtm)
 	      X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
index 4ebe4bde30..c4f8b6bbb5 100644
--- a/sysdeps/x86_64/multiarch/strncmp.c
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
 	return OPTIMIZE (evex);
 
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (2 preceding siblings ...)
  2022-10-03 21:50 ` [COMMITTED 2.36 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 wcs(n)cmp implementations use the 'bzhi' instruction, which
belongs to the BMI2 CPU feature.

NB: It also uses the 'tzcnt' BMI1 instruction, but it is executed as BSF
as BSF if the CPU doesn't support TZCNT, and produces the same result
for non-zero input.

Partially fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit f31a5a884ed84bd37032729d4d1eb9d06c9f3c29)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index aebef3daaf..fec8790c11 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -810,10 +810,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcscmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcscmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcscmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wcscmp_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -830,10 +832,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncmp_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsncmp_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wcsncmp_avx2_rtm)
 	      /* ISA V2 wrapper for GENERIC implementation because the
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (3 preceding siblings ...)
  2022-10-03 21:50 ` [COMMITTED 2.36 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:50 ` [COMMITTED 2.36 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
  2022-10-03 21:51 ` [COMMITTED 2.36 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memchr, rawmemchr and wmemchr implementations use the 'bzhi'
and 'sarx' instructions, which belongs to the BMI2 CPU feature.

Fixes: acfd088a1963 ("x86: Optimize memchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit e3e7fab7fe5186d18ca2046d99ba321c27db30ad)
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index fec8790c11..7c84963d92 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -69,10 +69,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __memchr_evex_rtm)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __memchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, memchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __memchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -335,10 +337,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __rawmemchr_evex_rtm)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __rawmemchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, rawmemchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __rawmemchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -927,10 +931,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wmemchr_evex_rtm)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wmemchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wmemchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wmemchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (4 preceding siblings ...)
  2022-10-03 21:50 ` [COMMITTED 2.36 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
@ 2022-10-03 21:50 ` Aurelien Jarno
  2022-10-03 21:51 ` [COMMITTED 2.36 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:50 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 memrchr implementation uses the 'shlxl' instruction, which
belongs to the BMI2 CPU feature and uses the 'lzcnt' instruction, which
belongs to the LZCNT CPU feature.

Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 3c0c78afabfed4b6fc161c159e628fbf14ff370b)
---
 sysdeps/x86/isa-level.h                    |  1 +
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 10 ++++++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 3c4480aba7..bbb90f5c5e 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -80,6 +80,7 @@
 #define AVX_X86_ISA_LEVEL 3
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
+#define LZCNT_X86_ISA_LEVEL 3
 #define MOVBE_X86_ISA_LEVEL 3
 
 /* ISA level >= 2 guaranteed includes.  */
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index a57a9952f3..f1741083fd 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
 				      AVX_Fast_Unaligned_Load, ))
     {
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 7c84963d92..ec1c5b55fb 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -209,13 +209,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, memrchr,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI2)
+				      && CPU_FEATURE_USABLE (LZCNT)),
 				     __memrchr_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
+				      && CPU_FEATURE_USABLE (LZCNT)),
 				     __memrchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI2)
+				      && CPU_FEATURE_USABLE (LZCNT)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __memrchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [COMMITTED 2.36 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations
  2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
                   ` (5 preceding siblings ...)
  2022-10-03 21:50 ` [COMMITTED 2.36 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
@ 2022-10-03 21:51 ` Aurelien Jarno
  6 siblings, 0 replies; 8+ messages in thread
From: Aurelien Jarno @ 2022-10-03 21:51 UTC (permalink / raw)
  To: libc-stable; +Cc: Aurelien Jarno, Noah Goldstein

The AVX2 strrchr and wcsrchr implementation uses the 'blsmsk'
instruction which belongs to the BMI1 CPU feature and the 'shrx'
instruction, which belongs to the BMI2 CPU feature.

Fixes: df7e295d18ff ("x86: Optimize {str|wcs}rchr-avx2")
Partially resolves: BZ #29611

Reviewed-by: Noah Goldstein  <goldstein.w.n@gmail.com>
(cherry picked from commit 7e8283170c5d6805b609a040801d819e362a6292)
---
 sysdeps/x86/isa-level.h                    |  1 +
 sysdeps/x86_64/multiarch/ifunc-avx2.h      |  1 +
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 17 ++++++++++++++---
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index bbb90f5c5e..06f6c9663e 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -79,6 +79,7 @@
 /* ISA level >= 3 guaranteed includes.  */
 #define AVX_X86_ISA_LEVEL 3
 #define AVX2_X86_ISA_LEVEL 3
+#define BMI1_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 #define LZCNT_X86_ISA_LEVEL 3
 #define MOVBE_X86_ISA_LEVEL 3
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
index f1741083fd..f2f5e8a211 100644
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -36,6 +36,7 @@ IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI1)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index ec1c5b55fb..00a91123d3 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -578,13 +578,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   IFUNC_IMPL (i, name, strrchr,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, strrchr,
 				     (CPU_FEATURE_USABLE (AVX512VL)
-				      && CPU_FEATURE_USABLE (AVX512BW)),
+				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI1)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strrchr_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI1)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __strrchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, strrchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI1)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __strrchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
@@ -797,13 +803,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      X86_IFUNC_IMPL_ADD_V4 (array, i, wcsrchr,
 				     (CPU_FEATURE_USABLE (AVX512VL)
 				      && CPU_FEATURE_USABLE (AVX512BW)
+				      && CPU_FEATURE_USABLE (BMI1)
 				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsrchr_evex)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
-				     CPU_FEATURE_USABLE (AVX2),
+				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI1)
+				      && CPU_FEATURE_USABLE (BMI2)),
 				     __wcsrchr_avx2)
 	      X86_IFUNC_IMPL_ADD_V3 (array, i, wcsrchr,
 				     (CPU_FEATURE_USABLE (AVX2)
+				      && CPU_FEATURE_USABLE (BMI1)
+				      && CPU_FEATURE_USABLE (BMI2)
 				      && CPU_FEATURE_USABLE (RTM)),
 				     __wcsrchr_avx2_rtm)
 	      /* ISA V2 wrapper for SSE2 implementation because the SSE2
-- 
2.35.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-10-03 21:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-03 21:50 [COMMITTED 2.36 1/8] x86: include BMI1 and BMI2 in x86-64-v3 level Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 2/8] x86-64: Require BMI2 for AVX2 str(n)casecmp implementations Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 3/8] x86-64: Require BMI2 for AVX2 strcmp implementation Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 4/8] x86-64: Require BMI2 for AVX2 strncmp implementation Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 5/8] x86-64: Require BMI2 for AVX2 wcs(n)cmp implementations Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 6/8] x86-64: Require BMI2 for AVX2 (raw|w)memchr implementations Aurelien Jarno
2022-10-03 21:50 ` [COMMITTED 2.36 7/8] x86-64: Require BMI2 and LZCNT for AVX2 memrchr implementation Aurelien Jarno
2022-10-03 21:51 ` [COMMITTED 2.36 8/8] x86-64: Require BMI1/BMI2 for AVX2 strrchr and wcsrchr implementations Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).