public inbox for glibc-cvs@sourceware.org help / color / mirror / Atom feed
From: H.J. Lu <hjl@sourceware.org> To: glibc-cvs@sourceware.org Subject: [glibc/release/2.30/master] x86-64: Use ZMM16-ZMM31 in AVX512 memset family functions Date: Thu, 27 Jan 2022 20:47:23 +0000 (GMT) [thread overview] Message-ID: <20220127204723.8CB23385DC31@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=12e572c1500ec947914328ae454235daa69549b9 commit 12e572c1500ec947914328ae454235daa69549b9 Author: H.J. Lu <hjl.tools@gmail.com> Date: Sun Mar 7 09:44:18 2021 -0800 x86-64: Use ZMM16-ZMM31 in AVX512 memset family functions Update ifunc-memset.h/ifunc-wmemset.h to select the function optimized with AVX512 instructions using ZMM16-ZMM31 registers to avoid RTM abort with usable AVX512VL and AVX512BW since VZEROUPPER isn't needed at function exit. (cherry picked from commit 4e2d8f352774b56078c34648b14a2412c38384f4) Diff: --- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 14 +++++++++----- sysdeps/x86_64/multiarch/ifunc-memset.h | 13 ++++++++----- sysdeps/x86_64/multiarch/ifunc-wmemset.h | 12 ++++++------ sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S | 16 ++++++++-------- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 786b5dbdad..2016072489 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -211,10 +211,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -252,10 +254,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -719,7 +723,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512VL_Usable), __wmemset_evex_unaligned) IFUNC_IMPL_ADD (array, i, wmemset, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __wmemset_avx512_unaligned)) #ifdef SHARED diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h index 4b0ad8b308..198c8c6ba5 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memset.h +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -53,13 +53,16 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) { - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_no_vzeroupper); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx512_unaligned_erms); + return OPTIMIZE (avx512_unaligned); + } - return OPTIMIZE (avx512_unaligned); + return OPTIMIZE (avx512_no_vzeroupper); } if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h index 59933b58aa..2913a365d8 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h @@ -33,13 +33,13 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) { - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_unaligned); - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) - return OPTIMIZE (evex_unaligned); + { + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + return OPTIMIZE (avx512_unaligned); + + return OPTIMIZE (evex_unaligned); + } if (CPU_FEATURES_CPU_P (cpu_features, RTM)) return OPTIMIZE (avx2_unaligned_rtm); diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S index 0783979ca5..22e7b187c8 100644 --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S @@ -1,22 +1,22 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 zmm16 +# define VEC(i) VEC##i # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastb %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastb d, %VEC0 # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastd %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastd d, %VEC0 -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMSET_SYMBOL(p,s) p##_avx512_##s # define WMEMSET_SYMBOL(p,s) p##_avx512_##s
reply other threads:[~2022-01-27 20:47 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220127204723.8CB23385DC31@sourceware.org \ --to=hjl@sourceware.org \ --cc=glibc-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).