public inbox for glibc-cvs@sourceware.org help / color / mirror / Atom feed
From: Noah Goldstein <nwg@sourceware.org> To: glibc-cvs@sourceware.org Subject: [glibc] x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S Date: Fri, 25 Mar 2022 18:19:02 +0000 (GMT) [thread overview] Message-ID: <20220325181902.8E8D43889806@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=670b54bc585ea4a94f3b2e9272ba44aa6b730b73 commit 670b54bc585ea4a94f3b2e9272ba44aa6b730b73 Author: Noah Goldstein <goldstein.w.n@gmail.com> Date: Wed Mar 23 16:57:36 2022 -0500 x86: Optimize str{n}casecmp TOLOWER logic in strcmp.S Slightly faster method of doing TOLOWER that saves an instruction. Also replace the hard coded 5-byte no with .p2align 4. On builds with CET enabled this misaligned entry to strcasecmp. geometric_mean(N=40) of all benchmarks New / Original: .894 All string/memory tests pass. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Diff: --- sysdeps/x86_64/strcmp.S | 64 ++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index e2ab59c555..99d8b36f1d 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -75,9 +75,8 @@ ENTRY2 (__strcasecmp) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RDX_LP - // XXX 5 byte should be before the function - /* 5-byte NOP. */ - .byte 0x0f,0x1f,0x44,0x00,0x00 + /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + .p2align 4 END2 (__strcasecmp) # ifndef NO_NOLOCALE_ALIAS weak_alias (__strcasecmp, strcasecmp) @@ -94,9 +93,8 @@ ENTRY2 (__strncasecmp) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RCX_LP - // XXX 5 byte should be before the function - /* 5-byte NOP. */ - .byte 0x0f,0x1f,0x44,0x00,0x00 + /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + .p2align 4 END2 (__strncasecmp) # ifndef NO_NOLOCALE_ALIAS weak_alias (__strncasecmp, strncasecmp) @@ -146,22 +144,22 @@ ENTRY (STRCMP) #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L .section .rodata.cst16,"aM",@progbits,16 .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: +.Llcase_min: + .quad 0x3f3f3f3f3f3f3f3f + .quad 0x3f3f3f3f3f3f3f3f +.Llcase_max: + .quad 0x9999999999999999 + .quad 0x9999999999999999 +.Lcase_add: .quad 0x2020202020202020 .quad 0x2020202020202020 .previous - movdqa .Lbelowupper(%rip), %xmm5 -# define UCLOW_reg %xmm5 - movdqa .Ltopupper(%rip), %xmm6 -# define UCHIGH_reg %xmm6 - movdqa .Ltouppermask(%rip), %xmm7 -# define LCQWORD_reg %xmm7 + movdqa .Llcase_min(%rip), %xmm5 +# define LCASE_MIN_reg %xmm5 + movdqa .Llcase_max(%rip), %xmm6 +# define LCASE_MAX_reg %xmm6 + movdqa .Lcase_add(%rip), %xmm7 +# define CASE_ADD_reg %xmm7 #endif cmp $0x30, %ecx ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ @@ -172,22 +170,18 @@ ENTRY (STRCMP) movhpd 8(%rdi), %xmm1 movhpd 8(%rsi), %xmm2 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm8; \ - movdqa UCHIGH_reg, %xmm9; \ - movdqa reg2, %xmm10; \ - movdqa UCHIGH_reg, %xmm11; \ - pcmpgtb UCLOW_reg, %xmm8; \ - pcmpgtb reg1, %xmm9; \ - pcmpgtb UCLOW_reg, %xmm10; \ - pcmpgtb reg2, %xmm11; \ - pand %xmm9, %xmm8; \ - pand %xmm11, %xmm10; \ - pand LCQWORD_reg, %xmm8; \ - pand LCQWORD_reg, %xmm10; \ - por %xmm8, reg1; \ - por %xmm10, reg2 - TOLOWER (%xmm1, %xmm2) +# define TOLOWER(reg1, reg2) \ + movdqa LCASE_MIN_reg, %xmm8; \ + movdqa LCASE_MIN_reg, %xmm9; \ + paddb reg1, %xmm8; \ + paddb reg2, %xmm9; \ + pcmpgtb LCASE_MAX_reg, %xmm8; \ + pcmpgtb LCASE_MAX_reg, %xmm9; \ + pandn CASE_ADD_reg, %xmm8; \ + pandn CASE_ADD_reg, %xmm9; \ + paddb %xmm8, reg1; \ + paddb %xmm9, reg2 + TOLOWER (%xmm1, %xmm2) #else # define TOLOWER(reg1, reg2) #endif
reply other threads:[~2022-03-25 18:19 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220325181902.8E8D43889806@sourceware.org \ --to=nwg@sourceware.org \ --cc=glibc-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).