* [RFC][PATCH] <sys/platform/x86.h>: Add initial AVX10 support
@ 2023-08-16 15:43 H.J. Lu
0 siblings, 0 replies; only message in thread
From: H.J. Lu @ 2023-08-16 15:43 UTC (permalink / raw)
To: GNU C Library
Hi,
AVX10 CPUID enumeration is different. Since AVX10 version is stored as
a byte value, but CPU_FEATURE_PRESENT/CPU_FEATURE_ACTIVE return a boolean
value and can't return AVX10 version. This patch adds AVX10_VERSION and
AVX10_VECTOR_SIZE macros. Any suggestions?
Thanks.
H.J.
---
Add initial support for Intel Advanced Performance Extensions:
https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.html
to <sys/platform/x86.h>.
1. Add CPUID_INDEX_24_ECX_0 for CPUID leaf 0x24 to store AVX10 version
and vector size info.
2. Add AVX10_VERSION and AVX10_VECTOR_SIZE for AVX10 version and vector
size.
---
manual/platform.texi | 27 +++++++++++++++++++++++
sysdeps/x86/bits/platform/x86.h | 10 +++++++--
sysdeps/x86/cpu-features.c | 35 ++++++++++++++++++++++++++++++
sysdeps/x86/include/cpu-features.h | 5 ++++-
sysdeps/x86/sys/platform/x86.h | 28 ++++++++++++++++++++++++
sysdeps/x86/tst-get-cpu-features.c | 8 +++++++
6 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/manual/platform.texi b/manual/platform.texi
index 2a2d557067..1567fdf255 100644
--- a/manual/platform.texi
+++ b/manual/platform.texi
@@ -222,6 +222,9 @@ Leaf (EAX = 23H).
@item
@code{AVX} -- The AVX instruction extensions.
+@item
+@code{AVX10} -- The AVX10 instruction extensions.
+
@item
@code{AVX2} -- The AVX2 instruction extensions.
@@ -760,3 +763,27 @@ avx_active (void)
return CPU_FEATURE_ACTIVE (AVX);
@}
@end smallexample
+
+You could query @code{AVX10} version number with:
+
+@smallexample
+#include <sys/platform/x86.h>
+
+int
+get_avx10_version (void)
+@{
+ return AVX10_VERSION ();
+@}
+@end smallexample
+
+and @code{AVX10} vector size in bits with:
+
+@smallexample
+#include <sys/platform/x86.h>
+
+int
+get_avx10_vector_size (void)
+@{
+ return AVX10_VECTOR_SIZE ();
+@}
+@end smallexample
diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h
index 88ca071aa7..dbba9c95c3 100644
--- a/sysdeps/x86/bits/platform/x86.h
+++ b/sysdeps/x86/bits/platform/x86.h
@@ -30,7 +30,8 @@ enum
CPUID_INDEX_80000008,
CPUID_INDEX_7_ECX_1,
CPUID_INDEX_19,
- CPUID_INDEX_14_ECX_0
+ CPUID_INDEX_14_ECX_0,
+ CPUID_INDEX_24_ECX_0
};
struct cpuid_feature
@@ -312,6 +313,7 @@ enum
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
+ x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
x86_cpu_index_19_ebx
@@ -325,5 +327,9 @@ enum
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
- x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4
+ x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
+
+ x86_cpu_index_24_ecx_0_ebx
+ = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
};
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index badf088874..8dd8392586 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -120,6 +120,14 @@ update_active (struct cpu_features *cpu_features)
{
unsigned int xcrlow;
unsigned int xcrhigh;
+ enum
+ {
+ xmm = 0,
+ ymm,
+ zmm
+ }
+ vector_size = xmm;
+ CPU_FEATURE_SET (cpu_features, AVX10);
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
/* Is YMM and XMM state usable? */
if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +136,7 @@ update_active (struct cpu_features *cpu_features)
/* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
{
+ vector_size = ymm;
CPU_FEATURE_SET (cpu_features, AVX);
/* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */
@@ -166,6 +175,7 @@ update_active (struct cpu_features *cpu_features)
| bit_ZMM16_31_state))
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
{
+ vector_size = zmm;
/* Determine if AVX512F is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
{
@@ -210,6 +220,31 @@ update_active (struct cpu_features *cpu_features)
}
}
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+ && cpu_features->basic.max_cpuid >= 0x24)
+ {
+ __cpuid_count (0x24, 0,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+ switch (vector_size)
+ {
+ case zmm:
+ break;
+ case ymm:
+ /* Clear the ZMM bit. */
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx
+ &= ~(1 << 18);
+ break;
+ case xmm:
+ /* Clear the YMM and ZMM bits. */
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx
+ &= ~((1 << 17) | (1 << 18));
+ break;
+ }
+ }
+
/* Are XTILECFG and XTILEDATA states usable? */
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
== (bit_XTILECFG_state | bit_XTILEDATA_state))
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index eb30d342a6..c1b1811528 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -29,7 +29,7 @@
enum
{
- CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+ CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
};
enum
@@ -319,6 +319,7 @@ enum
#define bit_cpu_AVX_NE_CONVERT (1u << 5)
#define bit_cpu_AMX_COMPLEX (1u << 8)
#define bit_cpu_PREFETCHI (1u << 14)
+#define bit_cpu_AVX10 (1u << 19)
#define bit_cpu_APX_F (1u << 21)
/* CPUID_INDEX_19. */
@@ -563,6 +564,7 @@ enum
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1
/* CPUID_INDEX_19. */
@@ -809,6 +811,7 @@ enum
#define reg_AVX_NE_CONVERT edx
#define reg_AMX_COMPLEX edx
#define reg_PREFETCHI edx
+#define reg_AVX10 edx
#define reg_APX_F edx
/* CPUID_INDEX_19. */
diff --git a/sysdeps/x86/sys/platform/x86.h b/sysdeps/x86/sys/platform/x86.h
index 1ea2c5fc0b..11edf4df3e 100644
--- a/sysdeps/x86/sys/platform/x86.h
+++ b/sysdeps/x86/sys/platform/x86.h
@@ -55,10 +55,38 @@ x86_cpu_active (unsigned int __index)
return __ptr->active_array[__reg] & (1 << __bit);
}
+static __inline__ unsigned int
+x86_cpu_get_avx10_info (unsigned int __index)
+{
+ const struct cpuid_feature *__ptr = __x86_get_cpuid_feature_leaf
+ (__index / (8 * sizeof (unsigned int) * 4));
+ unsigned int __reg
+ = __index & (8 * sizeof (unsigned int) * 4 - 1);
+ __reg /= 8 * sizeof (unsigned int);
+
+ return __ptr->cpuid_array[__reg];
+}
+
+static __inline__ unsigned int
+x86_cpu_get_avx10_vector_size (void)
+{
+ unsigned int ebx = x86_cpu_get_avx10_info (x86_cpu_index_24_ecx_0_ebx);
+ if ((ebx & (1 << 18)) != 0)
+ return 512;
+ if ((ebx & (1 << 17)) != 0)
+ return 256;
+ return 128;
+}
+
/* CPU_FEATURE_PRESENT evaluates to true if CPU supports the feature. */
#define CPU_FEATURE_PRESENT(name) x86_cpu_present (x86_cpu_##name)
/* CPU_FEATURE_ACTIVE evaluates to true if the feature is active. */
#define CPU_FEATURE_ACTIVE(name) x86_cpu_active (x86_cpu_##name)
+/* Get AVX10 version number. */
+#define AVX10_VERSION() \
+ (x86_cpu_get_avx10_info (x86_cpu_index_24_ecx_0_ebx) & 0xff)
+/* Get AVX10 vector size. */
+#define AVX10_VECTOR_SIZE() x86_cpu_get_avx10_vector_size ()
__END_DECLS
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b27fa7324a..e788f37df2 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -219,6 +219,7 @@ do_test (void)
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+ CHECK_CPU_FEATURE_PRESENT (AVX10);
CHECK_CPU_FEATURE_PRESENT (APX_F);
CHECK_CPU_FEATURE_PRESENT (AESKLE);
CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@ do_test (void)
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10);
CHECK_CPU_FEATURE_ACTIVE (APX_F);
CHECK_CPU_FEATURE_ACTIVE (AESKLE);
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
+ if (CPU_FEATURE_ACTIVE (AVX10))
+ {
+ printf ("AVX10 version: %d\n", AVX10_VERSION ());
+ printf ("AVX10 vector size: %d\n", AVX10_VECTOR_SIZE ());
+ }
+
return 0;
}
--
2.41.0
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-08-16 15:43 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-16 15:43 [RFC][PATCH] <sys/platform/x86.h>: Add initial AVX10 support H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).