public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes.
@ 2022-06-24  6:36 Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 2/7] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
                   ` (5 more replies)
  0 siblings, 6 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

The function was tuned around 64-byte entry alignment and performs
better for all sizes with it.

As well different code boths where explicitly written to touch the
minimum number of cache line i.e sizes <= 32 touch only the entry
cache line.
---
Tested with:

for march in "--disable-multi-arch" ""; do for ISA in "-march=x86-64-v4" "-march=x86-64-v3" "" "-march=x86-64-v2"; do echo "START: ${ISA} - $march"; rm -rf glibc-dev/build; mkdir -p glibc-dev/build/glibc/; (cd glibc-dev/build/glibc/; unset LD_LIBRARY_PATH; glibc-dev/src/glibc/configure --prefix=/usr CC="gcc ${ISA}" $march; make -j7 --silent; make -r -C glibc-dev/src/glibc/string/ objdir=`pwd` check; make -r -C glibc-dev/src/glibc/wcsmbs/ objdir=`pwd` check;); echo "DONE: ${ISA} - $march"; objdump -d build/glibc/string/memchr.o;  objdump -d build/glibc/string/rawmemchr.o; objdump -d build/glibc/wcsmbs/wmemchr.o;  cat build/glibc/string/test-memchr.out; cat build/glibc/string/test-memchr.test-result; cat build/glibc/string/test-rawmemchr.out; cat build/glibc/string/test-rawmemchr.test-result; cat build/glibc/wcsmbs/test-wmemchr.out; cat build/glibc/wcsmbs/test-wmemchr.test-result; done; done

for march in "" "--disable-multi-arch"; do rm -rf build; mkdir -p build/glibc; (cd glibc-dev/build/glibc/; unset LD_LIBRARY_PATH; glibc-dev/src/glibc/configure CC="gcc -m32" CXX="g++ -m32" --prefix=/usr --build=i686-pc-linux-gnu --host=i686-pc-linux-gnu $march; make -j7 --silent;) done


(cd glibc-dev/build/glibc/; unset LD_LIBRARY_PATH; glibc-dev/src/glibc/configure --prefix=/usr; make -j7 --silent; make -j7 check --silent);

   5155 PASS
     19 UNSUPPORTED
     18 XFAIL
      4 XPASS
    
 sysdeps/x86_64/multiarch/memrchr-avx2.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
index 9c83c76d3c..f300d7daf4 100644
--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
@@ -35,7 +35,7 @@
 # define VEC_SIZE			32
 # define PAGE_SIZE			4096
 	.section SECTION(.text), "ax", @progbits
-ENTRY(MEMRCHR)
+ENTRY_P2ALIGN(MEMRCHR, 6)
 # ifdef __ILP32__
 	/* Clear upper bits.  */
 	and	%RDX_LP, %RDX_LP
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 2/7] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

This is in accordance with other files in the multiarch directory.
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c       | 2 +-
 sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S | 2 +-
 sysdeps/x86_64/multiarch/strstr.c                | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index bf52cf96d0..0d28319905 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -627,7 +627,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
                                && CPU_FEATURE_USABLE (BMI2)),
                               __strstr_avx512)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
+	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
 
   /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
   IFUNC_IMPL (i, name, wcschr,
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
index 3d12ffdf1e..c6aa8f45a6 100644
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
@@ -267,7 +267,7 @@ L(next_pair3):
 	.p2align 4
 L(switch_strstr):
 	movq	%rdi, %rdi
-	jmp	__strstr_sse2
+	jmp	__strstr_generic
 
 	.p2align 4
 L(cross_page):
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 2fb8b169b6..2b83199245 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -24,17 +24,17 @@
 #include <string.h>
 #undef  strstr
 
-#define STRSTR __strstr_sse2
+#define STRSTR __strstr_generic
 #ifdef SHARED
 # undef libc_hidden_builtin_def
 # define libc_hidden_builtin_def(name) \
-  __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
+  __hidden_ver1 (__strstr_generic, __GI_strstr, __strstr_generic);
 #endif
 
 #include "string/strstr.c"
 
 extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
-extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
+extern __typeof (__redirect_strstr) __strstr_generic attribute_hidden;
 extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden;
 
 #include "init-arch.h"
@@ -58,7 +58,7 @@ IFUNC_SELECTOR (void)
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return __strstr_sse2_unaligned;
 
-  return __strstr_sse2;
+  return __strstr_generic;
 }
 
 libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ());
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 2/7] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  2022-06-24 14:32   ` H.J. Lu
                     ` (4 more replies)
  2022-06-24  6:36 ` [PATCH v1 4/7] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
                   ` (3 subsequent siblings)
  5 siblings, 5 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

Some ARCH_P features such as Prefer_No_VZEROUPPER used to disable
implementations if true as opposed fo the majority of features
such as AVX2 which are used to enabled features.

Different ISA build levels want override certain disabling
features. For example ISA build level >= 3 should ignore
Prefer_No_VZEROUPPER which means converting the check to
false (as opposed to true for a feature like AVX2).
---
 sysdeps/x86/isa-ifunc-macros.h | 4 ++++
 sysdeps/x86/isa-level.h        | 7 ++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..e229e612a4 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -67,4 +67,8 @@
   (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
    || CPU_FEATURES_ARCH_P (ptr, name))
 
+#define X86_ISA_CPU_FEATURES_ARCH_P_NOT(ptr, name)                            \
+  (!X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                           \
+     && !CPU_FEATURES_ARCH_P (ptr, name))
+
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..e1a30ed83e 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -66,10 +66,10 @@
 
 
 /*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
+ * CPU Features that are hard coded as enabled/disabled depending on
+ * ISA build level.
  *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
+ *          Value <= MINIMUM_X86_ISA_LEVEL
  */
 
 
@@ -92,6 +92,7 @@
 /*
  * KNL (the only cpu that sets this supported in cpu-features.h)
  * builds with ISA V1 so this shouldn't harm any architectures.
+ * NB: Only use this feature with the ARCH_P_NOT macro.
  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 4/7] x86: Add comment with ISA level for all targets support by GCC12.1
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 2/7] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 5/7] x86: Use ARCH_P_NOT to check Prefer_No_VZeroupper in ifunc-evex.h Noah Goldstein
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

This is just a quality of life change to make it easier to see how the
ISA level will effect a given build.
---
 sysdeps/x86/isa-level.h | 67 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index e1a30ed83e..f14ae5cc00 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,8 +64,71 @@
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
+/* ISA levels for known GCC targets as of GCC12.1:
+ *
+ * amdfam10       -> 1
+ * athlon-fx      -> 1
+ * athlon64       -> 1
+ * athlon64-sse3  -> 1
+ * atom           -> 1
+ * barcelona      -> 1
+ * bonnell        -> 1
+ * btver1         -> 1
+ * core2          -> 1
+ * eden-x2        -> 1
+ * eden-x4        -> 1
+ * k8             -> 1
+ * k8-sse3        -> 1
+ * nano           -> 1
+ * nano-1000      -> 1
+ * nano-2000      -> 1
+ * nano-3000      -> 1
+ * nano-x2        -> 1
+ * nano-x4        -> 1
+ * nocona         -> 1
+ * opteron        -> 1
+ * opteron-sse3   -> 1
+ * x86-64         -> 1
+ * bdver1         -> 2
+ * bdver2         -> 2
+ * bdver3         -> 2
+ * btver2         -> 2
+ * core-avx-i     -> 2
+ * corei7         -> 2
+ * corei7-avx     -> 2
+ * goldmont       -> 2
+ * goldmont-plus  -> 2
+ * ivybridge      -> 2
+ * nehalem        -> 2
+ * sandybridge    -> 2
+ * silvermont     -> 2
+ * slm            -> 2
+ * tremont        -> 2
+ * westmere       -> 2
+ * x86-64-v2      -> 2
+ * alderlake      -> 3
+ * bdver4         -> 3
+ * broadwell      -> 3
+ * core-avx2      -> 3
+ * haswell        -> 3
+ * knl            -> 3
+ * knm            -> 3
+ * skylake        -> 3
+ * x86-64-v3      -> 3
+ * znver1         -> 3
+ * znver2         -> 3
+ * znver3         -> 3
+ * cannonlake     -> 4
+ * cascadelake    -> 4
+ * cooperlake     -> 4
+ * icelake-client -> 4
+ * icelake-server -> 4
+ * rocketlake     -> 4
+ * sapphirerapids -> 4
+ * skylake-avx512 -> 4
+ * tigerlake      -> 4
+ * x86-64-v4      -> 4
+ *
  * CPU Features that are hard coded as enabled/disabled depending on
  * ISA build level.
  *    - Values > 0 features are always ENABLED if:
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 5/7] x86: Use ARCH_P_NOT to check Prefer_No_VZeroupper in ifunc-evex.h
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
                   ` (2 preceding siblings ...)
  2022-06-24  6:36 ` [PATCH v1 4/7] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 6/7] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 7/7] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
  5 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

We want to Prefer_No_VZEROUPPER feature to always be false (i.e we
want to prefer with vzeroupper) when building with ISA level >= 3.
---
 sysdeps/x86_64/multiarch/ifunc-evex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..454f3c1eab 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -51,8 +51,8 @@ IFUNC_SELECTOR (void)
       if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
 	return OPTIMIZE (avx2_rtm);
 
-      if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+      if (X86_ISA_CPU_FEATURES_ARCH_P_NOT (cpu_features,
+					   Prefer_No_VZEROUPPER))
 	return OPTIMIZE (avx2);
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 6/7] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
                   ` (3 preceding siblings ...)
  2022-06-24  6:36 ` [PATCH v1 5/7] x86: Use ARCH_P_NOT to check Prefer_No_VZeroupper in ifunc-evex.h Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  2022-06-24  6:36 ` [PATCH v1 7/7] x86: Remove unused file wmemcmp-sse4 Noah Goldstein
  5 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

Previously was missing but the two implementations shouldn't get in
the sse2 (generic) text section.
---
 sysdeps/x86_64/multiarch/strlen-vec.S     | 6 +++++-
 sysdeps/x86_64/multiarch/wcslen-sse4_1.S  | 1 +
 sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
index 42b6124dfd..874123d604 100644
--- a/sysdeps/x86_64/multiarch/strlen-vec.S
+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
@@ -28,6 +28,10 @@
 # define SHIFT_RETURN
 #endif
 
+#ifndef SECTION
+# define SECTION(p)	p
+#endif
+
 /* Long lived register in strlen(s), strnlen(s, n) are:
 
 	%xmm3 - zero
@@ -37,7 +41,7 @@
 */
 
 
-.text
+	.section SECTION(.text),"ax",@progbits
 ENTRY(strlen)
 
 /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
index 7e62621afc..e306a77f51 100644
--- a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
+++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
@@ -1,4 +1,5 @@
 #define AS_WCSLEN
 #define strlen	__wcslen_sse4_1
+#define SECTION(p)	p##.sse4.1
 
 #include "strlen-vec.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
index 5fa51fe07c..d2f7dd6e22 100644
--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
@@ -1,5 +1,6 @@
 #define AS_WCSLEN
 #define AS_STRNLEN
 #define strlen	__wcsnlen_sse4_1
+#define SECTION(p)	p##.sse4.1
 
 #include "strlen-vec.S"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v1 7/7] x86: Remove unused file wmemcmp-sse4
  2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
                   ` (4 preceding siblings ...)
  2022-06-24  6:36 ` [PATCH v1 6/7] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
@ 2022-06-24  6:36 ` Noah Goldstein
  5 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24  6:36 UTC (permalink / raw)
  To: libc-alpha

The memcmp-sse4 was removed in:

commit 7cbc03d03091d5664060924789afe46d30a5477e
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Fri Apr 15 12:28:00 2022 -0500

    x86: Remove memcmp-sse4.S

so this file does nothing.
---
 sysdeps/x86_64/multiarch/wmemcmp-sse4.S | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 sysdeps/x86_64/multiarch/wmemcmp-sse4.S

diff --git a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S b/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
deleted file mode 100644
index b07973a4f6..0000000000
--- a/sysdeps/x86_64/multiarch/wmemcmp-sse4.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_sse4_1
-
-#include "memcmp-sse4.S"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
@ 2022-06-24 14:32   ` H.J. Lu
  2022-06-24 14:49     ` H.J. Lu
  2022-06-24 16:43     ` Noah Goldstein
  2022-06-24 20:10   ` [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h Noah Goldstein
                     ` (3 subsequent siblings)
  4 siblings, 2 replies; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 14:32 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Thu, Jun 23, 2022 at 11:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Some ARCH_P features such as Prefer_No_VZEROUPPER used to disable
> implementations if true as opposed fo the majority of features
> such as AVX2 which are used to enabled features.
>
> Different ISA build levels want override certain disabling
> features. For example ISA build level >= 3 should ignore
> Prefer_No_VZEROUPPER which means converting the check to
> false (as opposed to true for a feature like AVX2).
> ---
>  sysdeps/x86/isa-ifunc-macros.h | 4 ++++
>  sysdeps/x86/isa-level.h        | 7 ++++---
>  2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..e229e612a4 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -67,4 +67,8 @@
>    (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
>     || CPU_FEATURES_ARCH_P (ptr, name))
>
> +#define X86_ISA_CPU_FEATURES_ARCH_P_NOT(ptr, name)                            \
> +  (!X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                           \
> +     && !CPU_FEATURES_ARCH_P (ptr, name))
> +
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..e1a30ed83e 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -66,10 +66,10 @@
>
>
>  /*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> + * CPU Features that are hard coded as enabled/disabled depending on
> + * ISA build level.
>   *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> + *          Value <= MINIMUM_X86_ISA_LEVEL
>   */
>
>
> @@ -92,6 +92,7 @@
>  /*
>   * KNL (the only cpu that sets this supported in cpu-features.h)
>   * builds with ISA V1 so this shouldn't harm any architectures.
> + * NB: Only use this feature with the ARCH_P_NOT macro.
>   */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
> --
> 2.34.1
>

This is a bug fix.  Please make it a separate patch set.   Please also
send individual patches,
instead of a set, if there are no dependencies between patches.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments
  2022-06-24 14:32   ` H.J. Lu
@ 2022-06-24 14:49     ` H.J. Lu
  2022-06-24 16:43     ` Noah Goldstein
  1 sibling, 0 replies; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 14:49 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 7:32 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Jun 23, 2022 at 11:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Some ARCH_P features such as Prefer_No_VZEROUPPER used to disable
> > implementations if true as opposed fo the majority of features
> > such as AVX2 which are used to enabled features.
> >
> > Different ISA build levels want override certain disabling
> > features. For example ISA build level >= 3 should ignore
> > Prefer_No_VZEROUPPER which means converting the check to
> > false (as opposed to true for a feature like AVX2).
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h | 4 ++++
> >  sysdeps/x86/isa-level.h        | 7 ++++---
> >  2 files changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..e229e612a4 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -67,4 +67,8 @@
> >    (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> >     || CPU_FEATURES_ARCH_P (ptr, name))
> >
> > +#define X86_ISA_CPU_FEATURES_ARCH_P_NOT(ptr, name)                            \
> > +  (!X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                           \
> > +     && !CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..e1a30ed83e 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -66,10 +66,10 @@
> >
> >
> >  /*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > + * CPU Features that are hard coded as enabled/disabled depending on
> > + * ISA build level.
> >   *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > + *          Value <= MINIMUM_X86_ISA_LEVEL
> >   */
> >
> >
> > @@ -92,6 +92,7 @@
> >  /*
> >   * KNL (the only cpu that sets this supported in cpu-features.h)
> >   * builds with ISA V1 so this shouldn't harm any architectures.
> > + * NB: Only use this feature with the ARCH_P_NOT macro.
> >   */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> > --
> > 2.34.1
> >
>
> This is a bug fix.  Please make it a separate patch set.   Please also
> send individual patches,
> instead of a set, if there are no dependencies between patches.
>

How about something like this?

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..108d9cf023 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -63,8 +63,8 @@
   (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
    || CPU_FEATURE_USABLE_P (ptr, name))

-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (not (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                 \
+       || CPU_FEATURES_ARCH_P (ptr, name)))

 #endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h
b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..2d59e022f0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-                 AVX_Fast_Unaligned_Load))
+                 AVX_Fast_Unaligned_Load,))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
     && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
   return OPTIMIZE (avx2_rtm);

       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-                  Prefer_No_VZEROUPPER))
+                  Prefer_No_VZEROUPPER, !))
   return OPTIMIZE (avx2);
     }


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments
  2022-06-24 14:32   ` H.J. Lu
  2022-06-24 14:49     ` H.J. Lu
@ 2022-06-24 16:43     ` Noah Goldstein
  1 sibling, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 16:43 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 7:33 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Jun 23, 2022 at 11:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Some ARCH_P features such as Prefer_No_VZEROUPPER used to disable
> > implementations if true as opposed fo the majority of features
> > such as AVX2 which are used to enabled features.
> >
> > Different ISA build levels want override certain disabling
> > features. For example ISA build level >= 3 should ignore
> > Prefer_No_VZEROUPPER which means converting the check to
> > false (as opposed to true for a feature like AVX2).
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h | 4 ++++
> >  sysdeps/x86/isa-level.h        | 7 ++++---
> >  2 files changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..e229e612a4 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -67,4 +67,8 @@
> >    (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> >     || CPU_FEATURES_ARCH_P (ptr, name))
> >
> > +#define X86_ISA_CPU_FEATURES_ARCH_P_NOT(ptr, name)                            \
> > +  (!X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                           \
> > +     && !CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..e1a30ed83e 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -66,10 +66,10 @@
> >
> >
> >  /*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > + * CPU Features that are hard coded as enabled/disabled depending on
> > + * ISA build level.
> >   *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > + *          Value <= MINIMUM_X86_ISA_LEVEL
> >   */
> >
> >
> > @@ -92,6 +92,7 @@
> >  /*
> >   * KNL (the only cpu that sets this supported in cpu-features.h)
> >   * builds with ISA V1 so this shouldn't harm any architectures.
> > + * NB: Only use this feature with the ARCH_P_NOT macro.
> >   */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> > --
> > 2.34.1
> >
>
> This is a bug fix.  Please make it a separate patch set.   Please also
> send individual patches,
> instead of a set, if there are no dependencies between patches.

Split patches. This and the change to ifunc-evex in series. Rest standalone.
>
> Thanks.
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
  2022-06-24 14:32   ` H.J. Lu
@ 2022-06-24 20:10   ` Noah Goldstein
  2022-06-24 20:32     ` H.J. Lu
  2022-06-24 21:46   ` [PATCH v3] " Noah Goldstein
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 20:10 UTC (permalink / raw)
  To: libc-alpha

Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
CPU_FEATURES_ARCH_P check can be inverted if the
MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
the check.

Use this new macro to correct the backwards check in ifunc-evex.h
---
 sysdeps/x86/isa-ifunc-macros.h        | 29 +++++++++++++++++++++------
 sysdeps/x86/isa-level.h               | 26 +++++++++---------------
 sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..a3c98c841c 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -56,15 +56,32 @@
 # define X86_IFUNC_IMPL_ADD_V1(...)
 #endif
 
-#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
-  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
+/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
+   should only be used to check if a condition is true. I.e:
+
+        if (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Good
+        if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Bad
+
+   There should be no need for inverting USABLE_P checks, but there is
+   often need for inverting ARCH_P checks. If you want to get the not
+   of an ARCH_P feature do:
+
+        if (X86_ISA_CPU_FEATURES_ARCH_P (..., !)) // Good
+ */
+
 
 #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
    || CPU_FEATURE_USABLE_P (ptr, name))
 
-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+
+/* When using X86_ISA_CPU_FEATURES_ARCH_P a third argument must be
+   provided to optionally invert the runtime CPU_FEATURES_ARCH_P
+   check.  This is so we can consistently constant-evaluate conditions
+   using Feature_X86_ISA_LEVEL <= MINIMUM_X86_ISA_LEVEL.  */
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
+   || not CPU_FEATURES_ARCH_P (ptr, name))
+
 
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..bad9aba099 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -65,12 +65,8 @@
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
 
-/*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
- *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
- */
+/* CPU Features that are default set depending on ISA build level.
+   Feature is assumed set if: Value <= MINIMUM_X86_ISA_LEVEL.  */
 
 
 /* ISA level >= 4 guaranteed includes.  */
@@ -81,18 +77,16 @@
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 
-/*
- * NB: This may not be fully assumable for ISA level >= 3. From
- * looking over the architectures supported in cpu-features.h the
- * following CPUs may have an issue with this being default set:
- *      - AMD Excavator
- */
+/* NB: This feature is enabled when ISA level >= 3, which was disabled
+   for the following CPUs:
+        - AMD Excavator
+   when ISA level < 3.  */
 #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
 
-/*
- * KNL (the only cpu that sets this supported in cpu-features.h)
- * builds with ISA V1 so this shouldn't harm any architectures.
- */
+/* NB: This feature is disabled when ISA level >= 3, which was enabled
+   for the following CPUs:
+        - Intel KNL
+   when ISA level < 3.  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
 #define ISA_SHOULD_BUILD(isa_build_level)                              \
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..310cfd269f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				      AVX_Fast_Unaligned_Load))
+				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2_rtm);
 
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 20:10   ` [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h Noah Goldstein
@ 2022-06-24 20:32     ` H.J. Lu
  2022-06-24 21:26       ` Noah Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 20:32 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 1:10 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> CPU_FEATURES_ARCH_P check can be inverted if the
> MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> the check.
>
> Use this new macro to correct the backwards check in ifunc-evex.h
> ---
>  sysdeps/x86/isa-ifunc-macros.h        | 29 +++++++++++++++++++++------
>  sysdeps/x86/isa-level.h               | 26 +++++++++---------------
>  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
>  3 files changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..a3c98c841c 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -56,15 +56,32 @@
>  # define X86_IFUNC_IMPL_ADD_V1(...)
>  #endif
>
> -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> +   should only be used to check if a condition is true. I.e:
> +
> +        if (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Good
> +        if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Bad

If (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) works,
if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) should also
work.

> +
> +   There should be no need for inverting USABLE_P checks, but there is
> +   often need for inverting ARCH_P checks. If you want to get the not
> +   of an ARCH_P feature do:
> +
> +        if (X86_ISA_CPU_FEATURES_ARCH_P (..., !)) // Good
> + */
> +
>
>  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
>     || CPU_FEATURE_USABLE_P (ptr, name))
>
> -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> -   || CPU_FEATURES_ARCH_P (ptr, name))
> +
> +/* When using X86_ISA_CPU_FEATURES_ARCH_P a third argument must be
> +   provided to optionally invert the runtime CPU_FEATURES_ARCH_P
> +   check.  This is so we can consistently constant-evaluate conditions
> +   using Feature_X86_ISA_LEVEL <= MINIMUM_X86_ISA_LEVEL.  */
> +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> +   || not CPU_FEATURES_ARCH_P (ptr, name))
> +
>
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..bad9aba099 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -65,12 +65,8 @@
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
>
> -/*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> - *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> - */
> +/* CPU Features that are default set depending on ISA build level.
> +   Feature is assumed set if: Value <= MINIMUM_X86_ISA_LEVEL.  */

This isn't accurate for Prefer_No_VZEROUPPER_X86_ISA_LEVEL.
I think this should be removed.  Each feature needs a comment to
describe the default.

>
>  /* ISA level >= 4 guaranteed includes.  */
> @@ -81,18 +77,16 @@
>  #define AVX2_X86_ISA_LEVEL 3
>  #define BMI2_X86_ISA_LEVEL 3
>
> -/*
> - * NB: This may not be fully assumable for ISA level >= 3. From
> - * looking over the architectures supported in cpu-features.h the
> - * following CPUs may have an issue with this being default set:
> - *      - AMD Excavator
> - */
> +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> +   for the following CPUs:
> +        - AMD Excavator
> +   when ISA level < 3.  */
>  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
>
> -/*
> - * KNL (the only cpu that sets this supported in cpu-features.h)
> - * builds with ISA V1 so this shouldn't harm any architectures.
> - */
> +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> +   for the following CPUs:
> +        - Intel KNL
> +   when ISA level < 3.  */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
>  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> index 856c6261f8..310cfd269f 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
>        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                     AVX_Fast_Unaligned_Load))
> +                                     AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2_rtm);
>
>        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                      Prefer_No_VZEROUPPER))
> +                                      Prefer_No_VZEROUPPER, !))
>         return OPTIMIZE (avx2);
>      }
>
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 20:32     ` H.J. Lu
@ 2022-06-24 21:26       ` Noah Goldstein
  2022-06-24 21:36         ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 21:26 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 1:10 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > CPU_FEATURES_ARCH_P check can be inverted if the
> > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > the check.
> >
> > Use this new macro to correct the backwards check in ifunc-evex.h
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h        | 29 +++++++++++++++++++++------
> >  sysdeps/x86/isa-level.h               | 26 +++++++++---------------
> >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
> >  3 files changed, 35 insertions(+), 24 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..a3c98c841c 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -56,15 +56,32 @@
> >  # define X86_IFUNC_IMPL_ADD_V1(...)
> >  #endif
> >
> > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > +   should only be used to check if a condition is true. I.e:
> > +
> > +        if (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Good
> > +        if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Bad
>
> If (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) works,
> if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) should also
> work.
>
> > +
> > +   There should be no need for inverting USABLE_P checks, but there is
> > +   often need for inverting ARCH_P checks. If you want to get the not
> > +   of an ARCH_P feature do:
> > +
> > +        if (X86_ISA_CPU_FEATURES_ARCH_P (..., !)) // Good
> > + */
> > +
> >
> >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> >     || CPU_FEATURE_USABLE_P (ptr, name))
> >
> > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > +
> > +/* When using X86_ISA_CPU_FEATURES_ARCH_P a third argument must be
> > +   provided to optionally invert the runtime CPU_FEATURES_ARCH_P
> > +   check.  This is so we can consistently constant-evaluate conditions
> > +   using Feature_X86_ISA_LEVEL <= MINIMUM_X86_ISA_LEVEL.  */
> > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..bad9aba099 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -65,12 +65,8 @@
> >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> >
> >
> > -/*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > - *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > - */
> > +/* CPU Features that are default set depending on ISA build level.
> > +   Feature is assumed set if: Value <= MINIMUM_X86_ISA_LEVEL.  */
>
> This isn't accurate for Prefer_No_VZEROUPPER_X86_ISA_LEVEL.
> I think this should be removed.  Each feature needs a comment to
> describe the default.

How about:

/* Depending on ISA level some feature checks will default evaluate
   to true if the MINIMUM_X86_ISA_LEVEL is high enough. The check
   on a feature will default evaluate to true if:
   Value <= MINIMUM_X86_ISA_LEVEL. */

?

>
> >
> >  /* ISA level >= 4 guaranteed includes.  */
> > @@ -81,18 +77,16 @@
> >  #define AVX2_X86_ISA_LEVEL 3
> >  #define BMI2_X86_ISA_LEVEL 3
> >
> > -/*
> > - * NB: This may not be fully assumable for ISA level >= 3. From
> > - * looking over the architectures supported in cpu-features.h the
> > - * following CPUs may have an issue with this being default set:
> > - *      - AMD Excavator
> > - */
> > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > +   for the following CPUs:
> > +        - AMD Excavator
> > +   when ISA level < 3.  */
> >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> >
> > -/*
> > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > - * builds with ISA V1 so this shouldn't harm any architectures.
> > - */
> > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > +   for the following CPUs:
> > +        - Intel KNL
> > +   when ISA level < 3.  */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > index 856c6261f8..310cfd269f 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                     AVX_Fast_Unaligned_Load))
> > +                                     AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> >         return OPTIMIZE (avx2_rtm);
> >
> >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                      Prefer_No_VZEROUPPER))
> > +                                      Prefer_No_VZEROUPPER, !))
> >         return OPTIMIZE (avx2);
> >      }
> >
> > --
> > 2.34.1
> >
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 21:26       ` Noah Goldstein
@ 2022-06-24 21:36         ` H.J. Lu
  0 siblings, 0 replies; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 21:36 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 2:26 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 1:33 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Fri, Jun 24, 2022 at 1:10 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > > CPU_FEATURES_ARCH_P check can be inverted if the
> > > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > > the check.
> > >
> > > Use this new macro to correct the backwards check in ifunc-evex.h
> > > ---
> > >  sysdeps/x86/isa-ifunc-macros.h        | 29 +++++++++++++++++++++------
> > >  sysdeps/x86/isa-level.h               | 26 +++++++++---------------
> > >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
> > >  3 files changed, 35 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > > index ba6826d518..a3c98c841c 100644
> > > --- a/sysdeps/x86/isa-ifunc-macros.h
> > > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > > @@ -56,15 +56,32 @@
> > >  # define X86_IFUNC_IMPL_ADD_V1(...)
> > >  #endif
> > >
> > > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > > +   should only be used to check if a condition is true. I.e:
> > > +
> > > +        if (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Good
> > > +        if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) // Bad
> >
> > If (X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) works,
> > if (!X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P (...)) should also
> > work.
> >
> > > +
> > > +   There should be no need for inverting USABLE_P checks, but there is
> > > +   often need for inverting ARCH_P checks. If you want to get the not
> > > +   of an ARCH_P feature do:
> > > +
> > > +        if (X86_ISA_CPU_FEATURES_ARCH_P (..., !)) // Good
> > > + */
> > > +
> > >
> > >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > >     || CPU_FEATURE_USABLE_P (ptr, name))
> > >
> > > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > > +/* When using X86_ISA_CPU_FEATURES_ARCH_P a third argument must be
> > > +   provided to optionally invert the runtime CPU_FEATURES_ARCH_P
> > > +   check.  This is so we can consistently constant-evaluate conditions
> > > +   using Feature_X86_ISA_LEVEL <= MINIMUM_X86_ISA_LEVEL.  */
> > > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > >
> > >  #endif
> > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > > index 7cae11c228..bad9aba099 100644
> > > --- a/sysdeps/x86/isa-level.h
> > > +++ b/sysdeps/x86/isa-level.h
> > > @@ -65,12 +65,8 @@
> > >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> > >
> > >
> > > -/*
> > > - * CPU Features that are hard coded as enabled depending on ISA build
> > > - *   level.
> > > - *    - Values > 0 features are always ENABLED if:
> > > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > > - */
> > > +/* CPU Features that are default set depending on ISA build level.
> > > +   Feature is assumed set if: Value <= MINIMUM_X86_ISA_LEVEL.  */
> >
> > This isn't accurate for Prefer_No_VZEROUPPER_X86_ISA_LEVEL.
> > I think this should be removed.  Each feature needs a comment to
> > describe the default.
>
> How about:
>
> /* Depending on ISA level some feature checks will default evaluate
>    to true if the MINIMUM_X86_ISA_LEVEL is high enough. The check
>    on a feature will default evaluate to true if:
>    Value <= MINIMUM_X86_ISA_LEVEL. */

Depending on the minimum ISA level, a feature check result can be a
compile-time constant.

True or false may be confusing since the meaning of the compile-time
constant depends on the feature.

> ?
>
> >
> > >
> > >  /* ISA level >= 4 guaranteed includes.  */
> > > @@ -81,18 +77,16 @@
> > >  #define AVX2_X86_ISA_LEVEL 3
> > >  #define BMI2_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * NB: This may not be fully assumable for ISA level >= 3. From
> > > - * looking over the architectures supported in cpu-features.h the
> > > - * following CPUs may have an issue with this being default set:
> > > - *      - AMD Excavator
> > > - */
> > > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > > +   for the following CPUs:
> > > +        - AMD Excavator
> > > +   when ISA level < 3.  */
> > >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > > - * builds with ISA V1 so this shouldn't harm any architectures.
> > > - */
> > > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > > +   for the following CPUs:
> > > +        - Intel KNL
> > > +   when ISA level < 3.  */
> > >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> > >
> > >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > index 856c6261f8..310cfd269f 100644
> > > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                     AVX_Fast_Unaligned_Load))
> > > +                                     AVX_Fast_Unaligned_Load, ))
> > >      {
> > >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> > >         return OPTIMIZE (avx2_rtm);
> > >
> > >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                      Prefer_No_VZEROUPPER))
> > > +                                      Prefer_No_VZEROUPPER, !))
> > >         return OPTIMIZE (avx2);
> > >      }
> > >
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.



-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
  2022-06-24 14:32   ` H.J. Lu
  2022-06-24 20:10   ` [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h Noah Goldstein
@ 2022-06-24 21:46   ` Noah Goldstein
  2022-06-24 22:15     ` H.J. Lu
  2022-06-24 22:29   ` [PATCH v4] " Noah Goldstein
  2022-06-24 23:15   ` [PATCH v5] " Noah Goldstein
  4 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 21:46 UTC (permalink / raw)
  To: libc-alpha

Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
CPU_FEATURES_ARCH_P check can be inverted if the
MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
the check.

Use this new macro to correct the backwards check in ifunc-evex.h
---
 sysdeps/x86/isa-ifunc-macros.h        | 50 +++++++++++++++++++++++----
 sysdeps/x86/isa-level.h               | 32 ++++++++---------
 sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
 3 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..8ceca29b63 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -56,15 +56,53 @@
 # define X86_IFUNC_IMPL_ADD_V1(...)
 #endif
 
-#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
-  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
+/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
+   macros are wrappers for the the respective
+   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks. They differ in two
+   ways.
+
+    1. They will first check the feature's default isa value (see
+       isa-level.h for the values) and if the feature's default value
+       is <= MINIMUM_X86_ISA_LEVEL they will constant evaluate to
+       true.  If default check does not evaluate to true they will
+       then do the runtime check.
+
+    2. The ARCH_P version has a third argument `not`. The `not`
+       argument may either be '!' or empty. It will be used to
+       optionally invert the condition of the RUNTIME check.
+
+    i.e
+
+    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is NOT set                              -> True
+
+    All cases not listed will evaluate to false.
+
+    Only the A) cases will constantly evaluate.
+
+    WARNING: The constant evaluation of certain feature checks is
+    relied upon to allow the compiler to perform deadcode elimination
+    to remove references to non-built functions.  Misuse of these
+    macros will likely result in a link-time error.
+ */
 
 #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
    || CPU_FEATURE_USABLE_P (ptr, name))
 
-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
+   || not CPU_FEATURES_ARCH_P (ptr, name))
+
 
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..73937fecdc 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,14 +64,8 @@
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
- *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
- */
-
+/* Depending on the minimum ISA level, a feature check result can be a
+   compile-time constant.. */
 
 /* ISA level >= 4 guaranteed includes.  */
 #define AVX512VL_X86_ISA_LEVEL 4
@@ -81,20 +75,22 @@
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 
-/*
- * NB: This may not be fully assumable for ISA level >= 3. From
- * looking over the architectures supported in cpu-features.h the
- * following CPUs may have an issue with this being default set:
- *      - AMD Excavator
- */
+/* NB: This feature is enabled when ISA level >= 3, which was disabled
+   for the following CPUs:
+        - AMD Excavator
+   when ISA level < 3.  */
 #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
 
-/*
- * KNL (the only cpu that sets this supported in cpu-features.h)
- * builds with ISA V1 so this shouldn't harm any architectures.
- */
+/* NB: This feature is disabled when ISA level >= 3, which was enabled
+   for the following CPUs:
+        - Intel KNL
+   when ISA level < 3.  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
+
+/* ISA level >= 2 guaranteed includes.  */
+#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
+
 #define ISA_SHOULD_BUILD(isa_build_level)                              \
   (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
    || defined ISA_DEFAULT_IMPL
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..310cfd269f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				      AVX_Fast_Unaligned_Load))
+				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2_rtm);
 
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 21:46   ` [PATCH v3] " Noah Goldstein
@ 2022-06-24 22:15     ` H.J. Lu
  2022-06-24 22:29       ` Noah Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 22:15 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

 On Fri, Jun 24, 2022 at 2:46 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> CPU_FEATURES_ARCH_P check can be inverted if the
> MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> the check.
>
> Use this new macro to correct the backwards check in ifunc-evex.h
> ---
>  sysdeps/x86/isa-ifunc-macros.h        | 50 +++++++++++++++++++++++----
>  sysdeps/x86/isa-level.h               | 32 ++++++++---------
>  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
>  3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..8ceca29b63 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -56,15 +56,53 @@
>  # define X86_IFUNC_IMPL_ADD_V1(...)
>  #endif
>
> -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> +   macros are wrappers for the the respective
> +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks. They differ in two
> +   ways.
> +
> +    1. They will first check the feature's default isa value (see
> +       isa-level.h for the values) and if the feature's default value
> +       is <= MINIMUM_X86_ISA_LEVEL they will constant evaluate to
> +       true.  If default check does not evaluate to true they will
> +       then do the runtime check.
> +
> +    2. The ARCH_P version has a third argument `not`. The `not`
> +       argument may either be '!' or empty. It will be used to
> +       optionally invert the condition of the RUNTIME check.
>

2 spaces after a period.

1. The USABLE_P version is evaluated to true when the feature
is enabled.
2. The ARCH_P version has a third argument `not`.  The `not`
argument can either be '!' or empty.  If the feature is enabled
above an ISA level, the third argument should be empty and
the expression is evaluated to true when the feature is enabled.
If the feature is disabled above an ISA level, the third argument
should be `!` and the expression is evaluated to true when the
feature is disabled.

> +    i.e
> +
> +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is NOT set                              -> True
> +
> +    All cases not listed will evaluate to false.
> +
> +    Only the A) cases will constantly evaluate.
> +
> +    WARNING: The constant evaluation of certain feature checks is
> +    relied upon to allow the compiler to perform deadcode elimination
> +    to remove references to non-built functions.  Misuse of these
> +    macros will likely result in a link-time error.
> + */

No need for the above comments.

>  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
>     || CPU_FEATURE_USABLE_P (ptr, name))
>
> -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> -   || CPU_FEATURES_ARCH_P (ptr, name))
> +
> +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> +   || not CPU_FEATURES_ARCH_P (ptr, name))
> +
>
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..73937fecdc 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -64,14 +64,8 @@
>  #define MINIMUM_X86_ISA_LEVEL                                                 \
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
> -
> -/*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> - *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> - */
> -
> +/* Depending on the minimum ISA level, a feature check result can be a
> +   compile-time constant.. */
>
>  /* ISA level >= 4 guaranteed includes.  */
>  #define AVX512VL_X86_ISA_LEVEL 4
> @@ -81,20 +75,22 @@
>  #define AVX2_X86_ISA_LEVEL 3
>  #define BMI2_X86_ISA_LEVEL 3
>
> -/*
> - * NB: This may not be fully assumable for ISA level >= 3. From
> - * looking over the architectures supported in cpu-features.h the
> - * following CPUs may have an issue with this being default set:
> - *      - AMD Excavator
> - */
> +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> +   for the following CPUs:
> +        - AMD Excavator
> +   when ISA level < 3.  */
>  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
>
> -/*
> - * KNL (the only cpu that sets this supported in cpu-features.h)
> - * builds with ISA V1 so this shouldn't harm any architectures.
> - */
> +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> +   for the following CPUs:
> +        - Intel KNL
> +   when ISA level < 3.  */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
> +
> +/* ISA level >= 2 guaranteed includes.  */
> +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> +
>  #define ISA_SHOULD_BUILD(isa_build_level)                              \
>    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
>     || defined ISA_DEFAULT_IMPL
> diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> index 856c6261f8..310cfd269f 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
>        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                     AVX_Fast_Unaligned_Load))
> +                                     AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2_rtm);
>
>        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                      Prefer_No_VZEROUPPER))
> +                                      Prefer_No_VZEROUPPER, !))
>         return OPTIMIZE (avx2);
>      }
>
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
                     ` (2 preceding siblings ...)
  2022-06-24 21:46   ` [PATCH v3] " Noah Goldstein
@ 2022-06-24 22:29   ` Noah Goldstein
  2022-06-24 22:41     ` H.J. Lu
  2022-06-24 23:15   ` [PATCH v5] " Noah Goldstein
  4 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 22:29 UTC (permalink / raw)
  To: libc-alpha

Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
CPU_FEATURES_ARCH_P check can be inverted if the
MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
the check.

Use this new macro to correct the backwards check in ifunc-evex.h
---
 sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
 sysdeps/x86/isa-level.h               | 32 ++++++++-----------
 sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
 3 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..4cdc71e40e 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -56,15 +56,49 @@
 # define X86_IFUNC_IMPL_ADD_V1(...)
 #endif
 
-#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
-  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
+/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
+   macros are wrappers for the the respective
+   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
+   ways.
+
+    1.  The USABLE_P version is evaluated to true when the feature
+        is enabled.
+
+    2.  The ARCH_P version has a third argument `not`.  The `not`
+        argument can either be '!' or empty.  If the feature is
+        enabled above an ISA level, the third argument should be empty
+        and the expression is evaluated to true when the feature is
+        enabled.  If the feature is disabled above an ISA level, the
+        third argument should be `!` and the expression is evaluated
+        to true when the feature is disabled.
+
+    i.e
+
+    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is NOT set                              -> True
+
+    All cases not listed will evaluate to false.
+
+    Only the A) cases will constantly evaluate.
+ */
 
 #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
    || CPU_FEATURE_USABLE_P (ptr, name))
 
-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
+   || not CPU_FEATURES_ARCH_P (ptr, name))
+
 
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..73937fecdc 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,14 +64,8 @@
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
- *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
- */
-
+/* Depending on the minimum ISA level, a feature check result can be a
+   compile-time constant.. */
 
 /* ISA level >= 4 guaranteed includes.  */
 #define AVX512VL_X86_ISA_LEVEL 4
@@ -81,20 +75,22 @@
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 
-/*
- * NB: This may not be fully assumable for ISA level >= 3. From
- * looking over the architectures supported in cpu-features.h the
- * following CPUs may have an issue with this being default set:
- *      - AMD Excavator
- */
+/* NB: This feature is enabled when ISA level >= 3, which was disabled
+   for the following CPUs:
+        - AMD Excavator
+   when ISA level < 3.  */
 #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
 
-/*
- * KNL (the only cpu that sets this supported in cpu-features.h)
- * builds with ISA V1 so this shouldn't harm any architectures.
- */
+/* NB: This feature is disabled when ISA level >= 3, which was enabled
+   for the following CPUs:
+        - Intel KNL
+   when ISA level < 3.  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
+
+/* ISA level >= 2 guaranteed includes.  */
+#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
+
 #define ISA_SHOULD_BUILD(isa_build_level)                              \
   (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
    || defined ISA_DEFAULT_IMPL
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..310cfd269f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				      AVX_Fast_Unaligned_Load))
+				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2_rtm);
 
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 22:15     ` H.J. Lu
@ 2022-06-24 22:29       ` Noah Goldstein
  0 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 22:29 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 3:15 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
>  On Fri, Jun 24, 2022 at 2:46 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > CPU_FEATURES_ARCH_P check can be inverted if the
> > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > the check.
> >
> > Use this new macro to correct the backwards check in ifunc-evex.h
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h        | 50 +++++++++++++++++++++++----
> >  sysdeps/x86/isa-level.h               | 32 ++++++++---------
> >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> >  3 files changed, 60 insertions(+), 26 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..8ceca29b63 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -56,15 +56,53 @@
> >  # define X86_IFUNC_IMPL_ADD_V1(...)
> >  #endif
> >
> > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > +   macros are wrappers for the the respective
> > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks. They differ in two
> > +   ways.
> > +
> > +    1. They will first check the feature's default isa value (see
> > +       isa-level.h for the values) and if the feature's default value
> > +       is <= MINIMUM_X86_ISA_LEVEL they will constant evaluate to
> > +       true.  If default check does not evaluate to true they will
> > +       then do the runtime check.
> > +
> > +    2. The ARCH_P version has a third argument `not`. The `not`
> > +       argument may either be '!' or empty. It will be used to
> > +       optionally invert the condition of the RUNTIME check.
> >
>
> 2 spaces after a period.

Fixed in V4.
>
> 1. The USABLE_P version is evaluated to true when the feature
> is enabled.
> 2. The ARCH_P version has a third argument `not`.  The `not`
> argument can either be '!' or empty.  If the feature is enabled
> above an ISA level, the third argument should be empty and
> the expression is evaluated to true when the feature is enabled.
> If the feature is disabled above an ISA level, the third argument
> should be `!` and the expression is evaluated to true when the
> feature is disabled.

Okay. Changed in v4.
>
> > +    i.e
> > +
> > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is NOT set                              -> True
> > +
> > +    All cases not listed will evaluate to false.
> > +
> > +    Only the A) cases will constantly evaluate.
> > +
> > +    WARNING: The constant evaluation of certain feature checks is
> > +    relied upon to allow the compiler to perform deadcode elimination
> > +    to remove references to non-built functions.  Misuse of these
> > +    macros will likely result in a link-time error.
> > + */
>
> No need for the above comments.

Removed in V4.
>
> >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> >     || CPU_FEATURE_USABLE_P (ptr, name))
> >
> > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > +
> > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..73937fecdc 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -64,14 +64,8 @@
> >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> >
> > -
> > -/*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > - *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > - */
> > -
> > +/* Depending on the minimum ISA level, a feature check result can be a
> > +   compile-time constant.. */
> >
> >  /* ISA level >= 4 guaranteed includes.  */
> >  #define AVX512VL_X86_ISA_LEVEL 4
> > @@ -81,20 +75,22 @@
> >  #define AVX2_X86_ISA_LEVEL 3
> >  #define BMI2_X86_ISA_LEVEL 3
> >
> > -/*
> > - * NB: This may not be fully assumable for ISA level >= 3. From
> > - * looking over the architectures supported in cpu-features.h the
> > - * following CPUs may have an issue with this being default set:
> > - *      - AMD Excavator
> > - */
> > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > +   for the following CPUs:
> > +        - AMD Excavator
> > +   when ISA level < 3.  */
> >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> >
> > -/*
> > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > - * builds with ISA V1 so this shouldn't harm any architectures.
> > - */
> > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > +   for the following CPUs:
> > +        - Intel KNL
> > +   when ISA level < 3.  */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> > +
> > +/* ISA level >= 2 guaranteed includes.  */
> > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > +
> >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> >     || defined ISA_DEFAULT_IMPL
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > index 856c6261f8..310cfd269f 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                     AVX_Fast_Unaligned_Load))
> > +                                     AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> >         return OPTIMIZE (avx2_rtm);
> >
> >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                      Prefer_No_VZEROUPPER))
> > +                                      Prefer_No_VZEROUPPER, !))
> >         return OPTIMIZE (avx2);
> >      }
> >
> > --
> > 2.34.1
> >
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 22:29   ` [PATCH v4] " Noah Goldstein
@ 2022-06-24 22:41     ` H.J. Lu
  2022-06-24 22:57       ` Noah Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 22:41 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> CPU_FEATURES_ARCH_P check can be inverted if the
> MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> the check.
>
> Use this new macro to correct the backwards check in ifunc-evex.h
> ---
>  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
>  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
>  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
>  3 files changed, 56 insertions(+), 26 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..4cdc71e40e 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -56,15 +56,49 @@
>  # define X86_IFUNC_IMPL_ADD_V1(...)
>  #endif
>
> -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> +   macros are wrappers for the the respective
> +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> +   ways.
> +
> +    1.  The USABLE_P version is evaluated to true when the feature
> +        is enabled.
> +
> +    2.  The ARCH_P version has a third argument `not`.  The `not`
> +        argument can either be '!' or empty.  If the feature is
> +        enabled above an ISA level, the third argument should be empty
> +        and the expression is evaluated to true when the feature is
> +        enabled.  If the feature is disabled above an ISA level, the
> +        third argument should be `!` and the expression is evaluated
> +        to true when the feature is disabled.
>

No need for the comments below.  These macros are self-explaining.

> +    i.e
> +
> +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is NOT set                              -> True
> +
> +    All cases not listed will evaluate to false.
> +
> +    Only the A) cases will constantly evaluate.
> + */
>
>  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
>     || CPU_FEATURE_USABLE_P (ptr, name))
>
> -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> -   || CPU_FEATURES_ARCH_P (ptr, name))
> +
> +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> +   || not CPU_FEATURES_ARCH_P (ptr, name))
> +
>
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..73937fecdc 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -64,14 +64,8 @@
>  #define MINIMUM_X86_ISA_LEVEL                                                 \
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
> -
> -/*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> - *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> - */
> -
> +/* Depending on the minimum ISA level, a feature check result can be a
> +   compile-time constant.. */
>
>  /* ISA level >= 4 guaranteed includes.  */
>  #define AVX512VL_X86_ISA_LEVEL 4
> @@ -81,20 +75,22 @@
>  #define AVX2_X86_ISA_LEVEL 3
>  #define BMI2_X86_ISA_LEVEL 3
>
> -/*
> - * NB: This may not be fully assumable for ISA level >= 3. From
> - * looking over the architectures supported in cpu-features.h the
> - * following CPUs may have an issue with this being default set:
> - *      - AMD Excavator
> - */
> +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> +   for the following CPUs:
> +        - AMD Excavator
> +   when ISA level < 3.  */
>  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
>
> -/*
> - * KNL (the only cpu that sets this supported in cpu-features.h)
> - * builds with ISA V1 so this shouldn't harm any architectures.
> - */
> +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> +   for the following CPUs:
> +        - Intel KNL
> +   when ISA level < 3.  */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
> +
> +/* ISA level >= 2 guaranteed includes.  */
> +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> +

These belong to a different patch.

>  #define ISA_SHOULD_BUILD(isa_build_level)                              \
>    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
>     || defined ISA_DEFAULT_IMPL
> diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> index 856c6261f8..310cfd269f 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
>        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                     AVX_Fast_Unaligned_Load))
> +                                     AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2_rtm);
>
>        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                      Prefer_No_VZEROUPPER))
> +                                      Prefer_No_VZEROUPPER, !))
>         return OPTIMIZE (avx2);
>      }
>
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 22:41     ` H.J. Lu
@ 2022-06-24 22:57       ` Noah Goldstein
  2022-06-24 23:05         ` H.J. Lu
  0 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 22:57 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > CPU_FEATURES_ARCH_P check can be inverted if the
> > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > the check.
> >
> > Use this new macro to correct the backwards check in ifunc-evex.h
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> >  3 files changed, 56 insertions(+), 26 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..4cdc71e40e 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -56,15 +56,49 @@
> >  # define X86_IFUNC_IMPL_ADD_V1(...)
> >  #endif
> >
> > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > +   macros are wrappers for the the respective
> > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > +   ways.
> > +
> > +    1.  The USABLE_P version is evaluated to true when the feature
> > +        is enabled.
> > +
> > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > +        argument can either be '!' or empty.  If the feature is
> > +        enabled above an ISA level, the third argument should be empty
> > +        and the expression is evaluated to true when the feature is
> > +        enabled.  If the feature is disabled above an ISA level, the
> > +        third argument should be `!` and the expression is evaluated
> > +        to true when the feature is disabled.
> >
>
> No need for the comments below.  These macros are self-explaining.

Do they harm things at all? I prefer them.
>
> > +    i.e
> > +
> > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is NOT set                              -> True
> > +
> > +    All cases not listed will evaluate to false.
> > +
> > +    Only the A) cases will constantly evaluate.
> > + */
> >
> >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> >     || CPU_FEATURE_USABLE_P (ptr, name))
> >
> > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > +
> > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..73937fecdc 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -64,14 +64,8 @@
> >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> >
> > -
> > -/*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > - *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > - */
> > -
> > +/* Depending on the minimum ISA level, a feature check result can be a
> > +   compile-time constant.. */
> >
> >  /* ISA level >= 4 guaranteed includes.  */
> >  #define AVX512VL_X86_ISA_LEVEL 4
> > @@ -81,20 +75,22 @@
> >  #define AVX2_X86_ISA_LEVEL 3
> >  #define BMI2_X86_ISA_LEVEL 3
> >
> > -/*
> > - * NB: This may not be fully assumable for ISA level >= 3. From
> > - * looking over the architectures supported in cpu-features.h the
> > - * following CPUs may have an issue with this being default set:
> > - *      - AMD Excavator
> > - */
> > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > +   for the following CPUs:
> > +        - AMD Excavator
> > +   when ISA level < 3.  */
> >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> >
> > -/*
> > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > - * builds with ISA V1 so this shouldn't harm any architectures.
> > - */
> > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > +   for the following CPUs:
> > +        - Intel KNL
> > +   when ISA level < 3.  */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> > +
> > +/* ISA level >= 2 guaranteed includes.  */
> > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > +
>
> These belong to a different patch.
>
> >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> >     || defined ISA_DEFAULT_IMPL
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > index 856c6261f8..310cfd269f 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                     AVX_Fast_Unaligned_Load))
> > +                                     AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> >         return OPTIMIZE (avx2_rtm);
> >
> >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                      Prefer_No_VZEROUPPER))
> > +                                      Prefer_No_VZEROUPPER, !))
> >         return OPTIMIZE (avx2);
> >      }
> >
> > --
> > 2.34.1
> >
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 22:57       ` Noah Goldstein
@ 2022-06-24 23:05         ` H.J. Lu
  2022-06-24 23:16           ` Noah Goldstein
  0 siblings, 1 reply; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 23:05 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 3:57 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > > CPU_FEATURES_ARCH_P check can be inverted if the
> > > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > > the check.
> > >
> > > Use this new macro to correct the backwards check in ifunc-evex.h
> > > ---
> > >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> > >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> > >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> > >  3 files changed, 56 insertions(+), 26 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > > index ba6826d518..4cdc71e40e 100644
> > > --- a/sysdeps/x86/isa-ifunc-macros.h
> > > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > > @@ -56,15 +56,49 @@
> > >  # define X86_IFUNC_IMPL_ADD_V1(...)
> > >  #endif
> > >
> > > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > > +   macros are wrappers for the the respective
> > > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > > +   ways.
> > > +
> > > +    1.  The USABLE_P version is evaluated to true when the feature
> > > +        is enabled.
> > > +
> > > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > > +        argument can either be '!' or empty.  If the feature is
> > > +        enabled above an ISA level, the third argument should be empty
> > > +        and the expression is evaluated to true when the feature is
> > > +        enabled.  If the feature is disabled above an ISA level, the
> > > +        third argument should be `!` and the expression is evaluated
> > > +        to true when the feature is disabled.
> > >
> >
> > No need for the comments below.  These macros are self-explaining.
>
> Do they harm things at all? I prefer them.

They lead to more questions.  How are macro arguments related
to A and B?  How is the runtime bit checked?

> >
> > > +    i.e
> > > +
> > > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is set                                  -> True
> > > +
> > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is set                                  -> True
> > > +
> > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is NOT set                              -> True
> > > +
> > > +    All cases not listed will evaluate to false.
> > > +
> > > +    Only the A) cases will constantly evaluate.
> > > + */
> > >
> > >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > >     || CPU_FEATURE_USABLE_P (ptr, name))
> > >
> > > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > >
> > >  #endif
> > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > > index 7cae11c228..73937fecdc 100644
> > > --- a/sysdeps/x86/isa-level.h
> > > +++ b/sysdeps/x86/isa-level.h
> > > @@ -64,14 +64,8 @@
> > >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> > >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> > >
> > > -
> > > -/*
> > > - * CPU Features that are hard coded as enabled depending on ISA build
> > > - *   level.
> > > - *    - Values > 0 features are always ENABLED if:
> > > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > > - */
> > > -
> > > +/* Depending on the minimum ISA level, a feature check result can be a
> > > +   compile-time constant.. */
> > >
> > >  /* ISA level >= 4 guaranteed includes.  */
> > >  #define AVX512VL_X86_ISA_LEVEL 4
> > > @@ -81,20 +75,22 @@
> > >  #define AVX2_X86_ISA_LEVEL 3
> > >  #define BMI2_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * NB: This may not be fully assumable for ISA level >= 3. From
> > > - * looking over the architectures supported in cpu-features.h the
> > > - * following CPUs may have an issue with this being default set:
> > > - *      - AMD Excavator
> > > - */
> > > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > > +   for the following CPUs:
> > > +        - AMD Excavator
> > > +   when ISA level < 3.  */
> > >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > > - * builds with ISA V1 so this shouldn't harm any architectures.
> > > - */
> > > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > > +   for the following CPUs:
> > > +        - Intel KNL
> > > +   when ISA level < 3.  */
> > >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> > >
> > > +
> > > +/* ISA level >= 2 guaranteed includes.  */
> > > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > > +
> >
> > These belong to a different patch.
> >
> > >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> > >     || defined ISA_DEFAULT_IMPL
> > > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > index 856c6261f8..310cfd269f 100644
> > > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                     AVX_Fast_Unaligned_Load))
> > > +                                     AVX_Fast_Unaligned_Load, ))
> > >      {
> > >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> > >         return OPTIMIZE (avx2_rtm);
> > >
> > >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                      Prefer_No_VZEROUPPER))
> > > +                                      Prefer_No_VZEROUPPER, !))
> > >         return OPTIMIZE (avx2);
> > >      }
> > >
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.



-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v5] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
                     ` (3 preceding siblings ...)
  2022-06-24 22:29   ` [PATCH v4] " Noah Goldstein
@ 2022-06-24 23:15   ` Noah Goldstein
  2022-06-24 23:20     ` H.J. Lu
  4 siblings, 1 reply; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 23:15 UTC (permalink / raw)
  To: libc-alpha

Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
CPU_FEATURES_ARCH_P check can be inverted if the
MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
the check.

Use this new macro to correct the backwards check in ifunc-evex.h
---
 sysdeps/x86/isa-ifunc-macros.h        | 28 +++++++++++++++++++++------
 sysdeps/x86/isa-level.h               | 28 ++++++++++-----------------
 sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
 3 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..d69905689b 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -56,15 +56,31 @@
 # define X86_IFUNC_IMPL_ADD_V1(...)
 #endif
 
-#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
-  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
+/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
+   macros are wrappers for the the respective
+   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
+   ways.
+
+    1.  The USABLE_P version is evaluated to true when the feature
+        is enabled.
+
+    2.  The ARCH_P version has a third argument `not`.  The `not`
+        argument can either be '!' or empty.  If the feature is
+        enabled above an ISA level, the third argument should be empty
+        and the expression is evaluated to true when the feature is
+        enabled.  If the feature is disabled above an ISA level, the
+        third argument should be `!` and the expression is evaluated
+        to true when the feature is disabled.
+ */
 
 #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
    || CPU_FEATURE_USABLE_P (ptr, name))
 
-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
+   || not CPU_FEATURES_ARCH_P (ptr, name))
+
 
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..075e7c6ee1 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,14 +64,8 @@
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
- *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
- */
-
+/* Depending on the minimum ISA level, a feature check result can be a
+   compile-time constant.. */
 
 /* ISA level >= 4 guaranteed includes.  */
 #define AVX512VL_X86_ISA_LEVEL 4
@@ -81,18 +75,16 @@
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 
-/*
- * NB: This may not be fully assumable for ISA level >= 3. From
- * looking over the architectures supported in cpu-features.h the
- * following CPUs may have an issue with this being default set:
- *      - AMD Excavator
- */
+/* NB: This feature is enabled when ISA level >= 3, which was disabled
+   for the following CPUs:
+        - AMD Excavator
+   when ISA level < 3.  */
 #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
 
-/*
- * KNL (the only cpu that sets this supported in cpu-features.h)
- * builds with ISA V1 so this shouldn't harm any architectures.
- */
+/* NB: This feature is disabled when ISA level >= 3, which was enabled
+   for the following CPUs:
+        - Intel KNL
+   when ISA level < 3.  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
 #define ISA_SHOULD_BUILD(isa_build_level)                              \
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..310cfd269f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				      AVX_Fast_Unaligned_Load))
+				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2_rtm);
 
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 23:05         ` H.J. Lu
@ 2022-06-24 23:16           ` Noah Goldstein
  0 siblings, 0 replies; 24+ messages in thread
From: Noah Goldstein @ 2022-06-24 23:16 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 4:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:57 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > > > CPU_FEATURES_ARCH_P check can be inverted if the
> > > > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > > > the check.
> > > >
> > > > Use this new macro to correct the backwards check in ifunc-evex.h
> > > > ---
> > > >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> > > >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> > > >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> > > >  3 files changed, 56 insertions(+), 26 deletions(-)
> > > >
> > > > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > > > index ba6826d518..4cdc71e40e 100644
> > > > --- a/sysdeps/x86/isa-ifunc-macros.h
> > > > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > > > @@ -56,15 +56,49 @@
> > > >  # define X86_IFUNC_IMPL_ADD_V1(...)
> > > >  #endif
> > > >
> > > > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > > > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > > > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > > > +   macros are wrappers for the the respective
> > > > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > > > +   ways.
> > > > +
> > > > +    1.  The USABLE_P version is evaluated to true when the feature
> > > > +        is enabled.
> > > > +
> > > > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > > > +        argument can either be '!' or empty.  If the feature is
> > > > +        enabled above an ISA level, the third argument should be empty
> > > > +        and the expression is evaluated to true when the feature is
> > > > +        enabled.  If the feature is disabled above an ISA level, the
> > > > +        third argument should be `!` and the expression is evaluated
> > > > +        to true when the feature is disabled.
> > > >
> > >
> > > No need for the comments below.  These macros are self-explaining.
> >
> > Do they harm things at all? I prefer them.
>
> They lead to more questions.  How are macro arguments related
> to A and B?  How is the runtime bit checked?

Okay. removed in v5.
>
> > >
> > > > +    i.e
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is set                                  -> True
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is set                                  -> True
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is NOT set                              -> True
> > > > +
> > > > +    All cases not listed will evaluate to false.
> > > > +
> > > > +    Only the A) cases will constantly evaluate.
> > > > + */
> > > >
> > > >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > >     || CPU_FEATURE_USABLE_P (ptr, name))
> > > >
> > > > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > > > +
> > > > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > > > +
> > > >
> > > >  #endif
> > > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > > > index 7cae11c228..73937fecdc 100644
> > > > --- a/sysdeps/x86/isa-level.h
> > > > +++ b/sysdeps/x86/isa-level.h
> > > > @@ -64,14 +64,8 @@
> > > >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> > > >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> > > >
> > > > -
> > > > -/*
> > > > - * CPU Features that are hard coded as enabled depending on ISA build
> > > > - *   level.
> > > > - *    - Values > 0 features are always ENABLED if:
> > > > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > > > - */
> > > > -
> > > > +/* Depending on the minimum ISA level, a feature check result can be a
> > > > +   compile-time constant.. */
> > > >
> > > >  /* ISA level >= 4 guaranteed includes.  */
> > > >  #define AVX512VL_X86_ISA_LEVEL 4
> > > > @@ -81,20 +75,22 @@
> > > >  #define AVX2_X86_ISA_LEVEL 3
> > > >  #define BMI2_X86_ISA_LEVEL 3
> > > >
> > > > -/*
> > > > - * NB: This may not be fully assumable for ISA level >= 3. From
> > > > - * looking over the architectures supported in cpu-features.h the
> > > > - * following CPUs may have an issue with this being default set:
> > > > - *      - AMD Excavator
> > > > - */
> > > > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > > > +   for the following CPUs:
> > > > +        - AMD Excavator
> > > > +   when ISA level < 3.  */
> > > >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> > > >
> > > > -/*
> > > > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > > > - * builds with ISA V1 so this shouldn't harm any architectures.
> > > > - */
> > > > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > > > +   for the following CPUs:
> > > > +        - Intel KNL
> > > > +   when ISA level < 3.  */
> > > >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> > > >
> > > > +
> > > > +/* ISA level >= 2 guaranteed includes.  */
> > > > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > > > +
> > >
> > > These belong to a different patch.

Removed in v5.
> > >
> > > >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > > >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> > > >     || defined ISA_DEFAULT_IMPL
> > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > index 856c6261f8..310cfd269f 100644
> > > > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > > >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > > >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > > >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > > -                                     AVX_Fast_Unaligned_Load))
> > > > +                                     AVX_Fast_Unaligned_Load, ))
> > > >      {
> > > >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > > > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> > > >         return OPTIMIZE (avx2_rtm);
> > > >
> > > >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > > -                                      Prefer_No_VZEROUPPER))
> > > > +                                      Prefer_No_VZEROUPPER, !))
> > > >         return OPTIMIZE (avx2);
> > > >      }
> > > >
> > > > --
> > > > 2.34.1
> > > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v5] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h
  2022-06-24 23:15   ` [PATCH v5] " Noah Goldstein
@ 2022-06-24 23:20     ` H.J. Lu
  0 siblings, 0 replies; 24+ messages in thread
From: H.J. Lu @ 2022-06-24 23:20 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library, Carlos O'Donell

On Fri, Jun 24, 2022 at 4:15 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> CPU_FEATURES_ARCH_P check can be inverted if the
> MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> the check.
>
> Use this new macro to correct the backwards check in ifunc-evex.h
> ---
>  sysdeps/x86/isa-ifunc-macros.h        | 28 +++++++++++++++++++++------
>  sysdeps/x86/isa-level.h               | 28 ++++++++++-----------------
>  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 ++--
>  3 files changed, 34 insertions(+), 26 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..d69905689b 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -56,15 +56,31 @@
>  # define X86_IFUNC_IMPL_ADD_V1(...)
>  #endif
>
> -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> +   macros are wrappers for the the respective
> +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> +   ways.
> +
> +    1.  The USABLE_P version is evaluated to true when the feature
> +        is enabled.
> +
> +    2.  The ARCH_P version has a third argument `not`.  The `not`
> +        argument can either be '!' or empty.  If the feature is
> +        enabled above an ISA level, the third argument should be empty
> +        and the expression is evaluated to true when the feature is
> +        enabled.  If the feature is disabled above an ISA level, the
> +        third argument should be `!` and the expression is evaluated
> +        to true when the feature is disabled.
> + */
>
>  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
>     || CPU_FEATURE_USABLE_P (ptr, name))
>
> -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> -   || CPU_FEATURES_ARCH_P (ptr, name))
> +
> +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> +   || not CPU_FEATURES_ARCH_P (ptr, name))
> +
>
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..075e7c6ee1 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -64,14 +64,8 @@
>  #define MINIMUM_X86_ISA_LEVEL                                                 \
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
> -
> -/*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> - *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> - */
> -
> +/* Depending on the minimum ISA level, a feature check result can be a
> +   compile-time constant.. */
>
>  /* ISA level >= 4 guaranteed includes.  */
>  #define AVX512VL_X86_ISA_LEVEL 4
> @@ -81,18 +75,16 @@
>  #define AVX2_X86_ISA_LEVEL 3
>  #define BMI2_X86_ISA_LEVEL 3
>
> -/*
> - * NB: This may not be fully assumable for ISA level >= 3. From
> - * looking over the architectures supported in cpu-features.h the
> - * following CPUs may have an issue with this being default set:
> - *      - AMD Excavator
> - */
> +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> +   for the following CPUs:
> +        - AMD Excavator
> +   when ISA level < 3.  */
>  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
>
> -/*
> - * KNL (the only cpu that sets this supported in cpu-features.h)
> - * builds with ISA V1 so this shouldn't harm any architectures.
> - */
> +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> +   for the following CPUs:
> +        - Intel KNL
> +   when ISA level < 3.  */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
>  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> index 856c6261f8..310cfd269f 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
>        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                     AVX_Fast_Unaligned_Load))
> +                                     AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2_rtm);
>
>        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                      Prefer_No_VZEROUPPER))
> +                                      Prefer_No_VZEROUPPER, !))
>         return OPTIMIZE (avx2);
>      }
>
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2022-06-24 23:21 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-24  6:36 [PATCH v1 1/7] x86: Align entry for memrchr to 64-bytes Noah Goldstein
2022-06-24  6:36 ` [PATCH v1 2/7] x86: Rename strstr_sse2 to strstr_generic as it uses string/strstr.c Noah Goldstein
2022-06-24  6:36 ` [PATCH v1 3/7] x86: Add macro for NOT of a cpu arch feature and improve comments Noah Goldstein
2022-06-24 14:32   ` H.J. Lu
2022-06-24 14:49     ` H.J. Lu
2022-06-24 16:43     ` Noah Goldstein
2022-06-24 20:10   ` [PATCH v2] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h Noah Goldstein
2022-06-24 20:32     ` H.J. Lu
2022-06-24 21:26       ` Noah Goldstein
2022-06-24 21:36         ` H.J. Lu
2022-06-24 21:46   ` [PATCH v3] " Noah Goldstein
2022-06-24 22:15     ` H.J. Lu
2022-06-24 22:29       ` Noah Goldstein
2022-06-24 22:29   ` [PATCH v4] " Noah Goldstein
2022-06-24 22:41     ` H.J. Lu
2022-06-24 22:57       ` Noah Goldstein
2022-06-24 23:05         ` H.J. Lu
2022-06-24 23:16           ` Noah Goldstein
2022-06-24 23:15   ` [PATCH v5] " Noah Goldstein
2022-06-24 23:20     ` H.J. Lu
2022-06-24  6:36 ` [PATCH v1 4/7] x86: Add comment with ISA level for all targets support by GCC12.1 Noah Goldstein
2022-06-24  6:36 ` [PATCH v1 5/7] x86: Use ARCH_P_NOT to check Prefer_No_VZeroupper in ifunc-evex.h Noah Goldstein
2022-06-24  6:36 ` [PATCH v1 6/7] x86: Put wcs{n}len-sse4.1 in the sse4.1 text section Noah Goldstein
2022-06-24  6:36 ` [PATCH v1 7/7] x86: Remove unused file wmemcmp-sse4 Noah Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).