public inbox for glibc-cvs@sourceware.org help / color / mirror / Atom feed
From: Noah Goldstein <nwg@sourceware.org> To: glibc-cvs@sourceware.org Subject: [glibc] x86: Remove AVX512-BVMI2 instruction from strrchr-evex.S Date: Thu, 20 Oct 2022 19:03:55 +0000 (GMT) [thread overview] Message-ID: <20221020190355.2CD0B384D14D@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c25eb94aed942761aabdcd05239b7e76dcdde098 commit c25eb94aed942761aabdcd05239b7e76dcdde098 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Thu Oct 20 10:26:46 2022 -0700 x86: Remove AVX512-BVMI2 instruction from strrchr-evex.S commit b412213eee0afa3b51dfe92b736dfc7c981309f5 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Tue Oct 18 17:44:07 2022 -0700 x86: Optimize strrchr-evex.S and implement with VMM headers Added `vpcompress{b|d}` to the page-cross logic with is an AVX512-VBMI2 instruction. This is not supported on SKX. Since the page-cross logic is relatively cold and the benefit is minimal revert the page-cross case back to the old logic which is supported on SKX. Tested on x86-64. Diff: --- sysdeps/x86_64/multiarch/strrchr-evex.S | 69 ++++++++++++++------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S index 45487dc87a..26b3457875 100644 --- a/sysdeps/x86_64/multiarch/strrchr-evex.S +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S @@ -29,9 +29,7 @@ # include "x86-evex256-vecs.h" # ifdef USE_AS_WCSRCHR -# define RCX_M cl -# define SHIFT_REG rcx -# define VPCOMPRESS vpcompressd +# define SHIFT_REG rsi # define kunpck_2x kunpckbw # define kmov_2x kmovd # define maskz_2x ecx @@ -46,9 +44,7 @@ # define USE_WIDE_CHAR # else -# define RCX_M ecx # define SHIFT_REG rdi -# define VPCOMPRESS vpcompressb # define kunpck_2x kunpckdq # define kmov_2x kmovq # define maskz_2x rcx @@ -78,7 +74,7 @@ ENTRY_P2ALIGN(STRRCHR, 6) andl $(PAGE_SIZE - 1), %eax cmpl $(PAGE_SIZE - VEC_SIZE), %eax jg L(cross_page_boundary) - +L(page_cross_continue): VMOVU (%rdi), %VMM(1) /* k0 has a 1 for each zero CHAR in VEC(1). */ VPTESTN %VMM(1), %VMM(1), %k0 @@ -86,7 +82,6 @@ ENTRY_P2ALIGN(STRRCHR, 6) test %VRSI, %VRSI jz L(aligned_more) /* fallthrough: zero CHAR in first VEC. */ -L(page_cross_return): /* K1 has a 1 for each search CHAR match in VEC(1). */ VPCMPEQ %VMATCH, %VMM(1), %k1 KMOV %k1, %VRAX @@ -197,7 +192,6 @@ L(first_vec_x2): .p2align 4,, 12 L(aligned_more): -L(page_cross_continue): /* Need to keep original pointer incase VEC(1) has last match. */ movq %rdi, %rsi @@ -353,53 +347,48 @@ L(return_new_match): leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax ret - .p2align 4,, 4 L(cross_page_boundary): + /* eax contains all the page offset bits of src (rdi). `xor rdi, + rax` sets pointer will all page offset bits cleared so + offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC + before page cross (guranteed to be safe to read). Doing this + as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves + a bit of code size. */ xorq %rdi, %rax - mov $-1, %VRDX - VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(6) - VPTESTN %VMM(6), %VMM(6), %k0 - KMOV %k0, %VRSI - -# ifdef USE_AS_WCSRCHR - movl %edi, %ecx - and $(VEC_SIZE - 1), %ecx - shrl $2, %ecx -# endif - shlx %VGPR(SHIFT_REG), %VRDX, %VRDX + VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(1) + VPTESTN %VMM(1), %VMM(1), %k0 + KMOV %k0, %VRCX + /* Shift out zero CHAR matches that are before the begining of + src (rdi). */ # ifdef USE_AS_WCSRCHR - kmovb %edx, %k1 -# else - KMOV %VRDX, %k1 + movl %edi, %esi + andl $(VEC_SIZE - 1), %esi + shrl $2, %esi # endif + shrx %VGPR(SHIFT_REG), %VRCX, %VRCX - /* Need to adjust result to VEC(1) so it can be re-used by - L(return_vec_x0_test). The alternative is to collect VEC(1) - will a page cross load which is far more expensive. */ - VPCOMPRESS %VMM(6), %VMM(1){%k1}{z} - - /* We could technically just jmp back after the vpcompress but - it doesn't save any 16-byte blocks. */ - shrx %VGPR(SHIFT_REG), %VRSI, %VRSI - test %VRSI, %VRSI + test %VRCX, %VRCX jz L(page_cross_continue) - /* Duplicate of return logic from ENTRY. Doesn't cause spill to - next cache line so might as well copy it here. */ - VPCMPEQ %VMATCH, %VMM(1), %k1 + /* Found zero CHAR so need to test for search CHAR. */ + VPCMP $0, %VMATCH, %VMM(1), %k1 KMOV %k1, %VRAX - blsmsk %VRSI, %VRSI - and %VRSI, %VRAX - jz L(ret_page_cross) + /* Shift out search CHAR matches that are before the begining of + src (rdi). */ + shrx %VGPR(SHIFT_REG), %VRAX, %VRAX + + /* Check if any search CHAR match in range. */ + blsmsk %VRCX, %VRCX + and %VRCX, %VRAX + jz L(ret3) bsr %VRAX, %VRAX # ifdef USE_AS_WCSRCHR leaq (%rdi, %rax, CHAR_SIZE), %rax # else addq %rdi, %rax # endif -L(ret_page_cross): +L(ret3): ret - /* 1 byte till next cache line. */ END(STRRCHR) #endif
reply other threads:[~2022-10-20 19:03 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20221020190355.2CD0B384D14D@sourceware.org \ --to=nwg@sourceware.org \ --cc=glibc-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).