From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1895) id 488913858282; Wed, 10 Apr 2024 13:04:52 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 488913858282 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1712754292; bh=sf3R9FZpdHMnxvAbmkMC7qR04QbuI+Mrl24yhssj0Rg=; h=From:To:Subject:Date:From; b=LbUKjcJ6KOwGZ/iGjyNNvdROm7nLJQvK90naHLNZMC93RBJw8lPS7+o2U2LoR7yFX XzbKYof5V86kOziwCjgI4f0spVmsZHrV8csgrSK6xfsjYdtoJtu5mqiw5sS9vOmqFi KlDYR5eJYoPx/1+hLLoxl86vTcVariKo86bwKEoI= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Wilco Dijkstra To: glibc-cvs@sourceware.org Subject: [glibc/release/2.38/master] AArch64: Check kernel version for SVE ifuncs X-Act-Checkin: glibc X-Git-Author: Wilco Dijkstra X-Git-Refname: refs/heads/release/2.38/master X-Git-Oldrev: 1bf17ce978da71431dbd1fc3660cfae3dff0672f X-Git-Newrev: 92da7c2cfeeea36d651142f47e570dd5076bc166 Message-Id: <20240410130452.488913858282@sourceware.org> Date: Wed, 10 Apr 2024 13:04:52 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=92da7c2cfeeea36d651142f47e570dd5076bc166 commit 92da7c2cfeeea36d651142f47e570dd5076bc166 Author: Wilco Dijkstra Date: Thu Mar 21 16:48:33 2024 +0000 AArch64: Check kernel version for SVE ifuncs Old Linux kernels disable SVE after every system call. Calling the SVE-optimized memcpy afterwards will then cause a trap to reenable SVE. As a result, applications with a high use of syscalls may run slower with the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0, except for 5.14.0 which was patched. Avoid this by checking the kernel version and selecting the SVE ifunc on modern kernels. Parse the kernel version reported by uname() into a 24-bit kernel.major.minor value without calling any library functions. If uname() is not supported or if the version format is not recognized, assume the kernel is modern. Tested-by: Florian Weimer Reviewed-by: Szabolcs Nagy (cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2) Diff: --- sysdeps/aarch64/multiarch/init-arch.h | 2 ++ sysdeps/aarch64/multiarch/memcpy.c | 2 +- sysdeps/aarch64/multiarch/memmove.c | 2 +- sysdeps/unix/sysv/linux/aarch64/cpu-features.c | 48 ++++++++++++++++++++++++++ sysdeps/unix/sysv/linux/aarch64/cpu-features.h | 1 + 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h index e23e6ff290..daef631e04 100644 --- a/sysdeps/aarch64/multiarch/init-arch.h +++ b/sysdeps/aarch64/multiarch/init-arch.h @@ -36,5 +36,7 @@ MTE_ENABLED (); \ bool __attribute__((unused)) sve = \ GLRO(dl_aarch64_cpu_features).sve; \ + bool __attribute__((unused)) prefer_sve_ifuncs = \ + GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \ bool __attribute__((unused)) mops = \ GLRO(dl_aarch64_cpu_features).mops; diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index 6471fe82e3..e7c7795db6 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -47,7 +47,7 @@ select_memcpy_ifunc (void) { if (IS_A64FX (midr)) return __memcpy_a64fx; - return __memcpy_sve; + return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic; } if (IS_THUNDERX (midr)) diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c index 7602a5d57d..6b77166851 100644 --- a/sysdeps/aarch64/multiarch/memmove.c +++ b/sysdeps/aarch64/multiarch/memmove.c @@ -47,7 +47,7 @@ select_memmove_ifunc (void) { if (IS_A64FX (midr)) return __memmove_a64fx; - return __memmove_sve; + return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic; } if (IS_THUNDERX (midr)) diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index a11a86efab..4a205a6b35 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DCZID_DZP_MASK (1 << 4) #define DCZID_BS_MASK (0xf) @@ -57,6 +58,46 @@ get_midr_from_mcpu (const char *mcpu) return UINT64_MAX; } +#if __LINUX_KERNEL_VERSION < 0x060200 + +/* Return true if we prefer using SVE in string ifuncs. Old kernels disable + SVE after every system call which results in unnecessary traps if memcpy + uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except + for 5.14.0 which was patched. For these versions return false to avoid using + SVE ifuncs. + Parse the kernel version into a 24-bit kernel.major.minor value without + calling any library functions. If uname() is not supported or if the version + format is not recognized, assume the kernel is modern and return true. */ + +static inline bool +prefer_sve_ifuncs (void) +{ + struct utsname buf; + const char *p = &buf.release[0]; + int kernel = 0; + int val; + + if (__uname (&buf) < 0) + return true; + + for (int shift = 16; shift >= 0; shift -= 8) + { + for (val = 0; *p >= '0' && *p <= '9'; p++) + val = val * 10 + *p - '0'; + kernel |= (val & 255) << shift; + if (*p++ != '.') + break; + } + + if (kernel >= 0x060200 || kernel == 0x050e00) + return true; + if (kernel >= 0x040f00) + return false; + return true; +} + +#endif + static inline void init_cpu_features (struct cpu_features *cpu_features) { @@ -119,6 +160,13 @@ init_cpu_features (struct cpu_features *cpu_features) /* Check if SVE is supported. */ cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + cpu_features->prefer_sve_ifuncs = cpu_features->sve; + +#if __LINUX_KERNEL_VERSION < 0x060200 + if (cpu_features->sve) + cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs (); +#endif + /* Check if MOPS is supported. */ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; } diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h index 2cf745cd19..351a619dcb 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h @@ -71,6 +71,7 @@ struct cpu_features /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ uint8_t mte_state; bool sve; + bool prefer_sve_ifuncs; bool mops; };