From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7844) id B840A385842B; Wed, 13 Jul 2022 22:54:27 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B840A385842B Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Noah Goldstein To: glibc-cvs@sourceware.org Subject: [glibc] x86: Move wcscmp SSE2 implementation to multiarch/wcscmp-sse2.S X-Act-Checkin: glibc X-Git-Author: Noah Goldstein X-Git-Refname: refs/heads/master X-Git-Oldrev: d561fbb041fe6aa205f652aecefe4bb84fd124a5 X-Git-Newrev: 427eaa2c8547d61e1b1a09be5d58992ed5211c67 Message-Id: <20220713225427.B840A385842B@sourceware.org> Date: Wed, 13 Jul 2022 22:54:27 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Jul 2022 22:54:27 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=427eaa2c8547d61e1b1a09be5d58992ed5211c67 commit 427eaa2c8547d61e1b1a09be5d58992ed5211c67 Author: Noah Goldstein Date: Tue Jul 12 12:28:07 2022 -0700 x86: Move wcscmp SSE2 implementation to multiarch/wcscmp-sse2.S This commit doesn't affect libc.so.6, its just housekeeping to prepare for adding explicit ISA level support. Tested build on x86_64 and x86_32 with/without multiarch. Diff: --- sysdeps/x86_64/multiarch/wcscmp-sse2.S | 936 ++++++++++++++++++++++++++++++++- sysdeps/x86_64/wcscmp.S | 932 +------------------------------- 2 files changed, 934 insertions(+), 934 deletions(-) diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S index 72a19bd64d..6cb7d9faf9 100644 --- a/sysdeps/x86_64/multiarch/wcscmp-sse2.S +++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S @@ -16,8 +16,936 @@ License along with the GNU C Library; if not, see . */ -#if IS_IN (libc) -# define __wcscmp __wcscmp_sse2 -#endif +#define USE_AS_WCSCMP +#define STRCMP_ISA _sse2 +#include "strcmp-naming.h" -#include "../wcscmp.S" +#include + +/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ + + .text +ENTRY (STRCMP) +/* + * This implementation uses SSE to compare up to 16 bytes at a time. +*/ + mov %esi, %eax + mov %edi, %edx + pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ + mov %al, %ch + mov %dl, %cl + and $63, %eax /* rsi alignment in cache line */ + and $63, %edx /* rdi alignment in cache line */ + and $15, %cl + jz L(continue_00) + cmp $16, %edx + jb L(continue_0) + cmp $32, %edx + jb L(continue_16) + cmp $48, %edx + jb L(continue_32) + +L(continue_48): + and $15, %ch + jz L(continue_48_00) + cmp $16, %eax + jb L(continue_0_48) + cmp $32, %eax + jb L(continue_16_48) + cmp $48, %eax + jb L(continue_32_48) + + .p2align 4 +L(continue_48_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_48_48) + +L(continue_0): + and $15, %ch + jz L(continue_0_00) + cmp $16, %eax + jb L(continue_0_0) + cmp $32, %eax + jb L(continue_0_16) + cmp $48, %eax + jb L(continue_0_32) + + .p2align 4 +L(continue_0_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + mov 48(%rsi), %ecx + cmp %ecx, 48(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 52(%rsi), %ecx + cmp %ecx, 52(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 56(%rsi), %ecx + cmp %ecx, 56(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 60(%rsi), %ecx + cmp %ecx, 60(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + add $64, %rsi + add $64, %rdi + jmp L(continue_0_48) + + .p2align 4 +L(continue_00): + and $15, %ch + jz L(continue_00_00) + cmp $16, %eax + jb L(continue_00_0) + cmp $32, %eax + jb L(continue_00_16) + cmp $48, %eax + jb L(continue_00_32) + + .p2align 4 +L(continue_00_48): + pcmpeqd (%rdi), %xmm0 + mov (%rdi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + cmp (%rsi), %eax + jne L(nequal) + + mov 4(%rdi), %eax + cmp 4(%rsi), %eax + jne L(nequal) + + mov 8(%rdi), %eax + cmp 8(%rsi), %eax + jne L(nequal) + + mov 12(%rdi), %eax + cmp 12(%rsi), %eax + jne L(nequal) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_32): + and $15, %ch + jz L(continue_32_00) + cmp $16, %eax + jb L(continue_0_32) + cmp $32, %eax + jb L(continue_16_32) + cmp $48, %eax + jb L(continue_32_32) + + .p2align 4 +L(continue_32_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 16(%rsi), %ecx + cmp %ecx, 16(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 20(%rsi), %ecx + cmp %ecx, 20(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 24(%rsi), %ecx + cmp %ecx, 24(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 28(%rsi), %ecx + cmp %ecx, 28(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(continue_16): + and $15, %ch + jz L(continue_16_00) + cmp $16, %eax + jb L(continue_0_16) + cmp $32, %eax + jb L(continue_16_16) + cmp $48, %eax + jb L(continue_16_32) + + .p2align 4 +L(continue_16_48): + mov (%rsi), %ecx + cmp %ecx, (%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + mov 32(%rsi), %ecx + cmp %ecx, 32(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 36(%rsi), %ecx + cmp %ecx, 36(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 40(%rsi), %ecx + cmp %ecx, 40(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 44(%rsi), %ecx + cmp %ecx, 44(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + movdqu 48(%rdi), %xmm1 + movdqu 48(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_16_48) + + .p2align 4 +L(continue_00_00): + movdqa (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqa 16(%rdi), %xmm3 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqa 32(%rdi), %xmm5 + pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm5 /* packed sub of comparison results*/ + pmovmskb %xmm5, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqa 48(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_00_00) + + .p2align 4 +L(continue_00_32): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_16): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_00_0): + movdqu (%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm2 /* packed sub of comparison results*/ + pmovmskb %xmm2, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_00_48) + + .p2align 4 +L(continue_48_00): + pcmpeqd (%rsi), %xmm0 + mov (%rdi), %eax + pmovmskb %xmm0, %ecx + test %ecx, %ecx + jnz L(less4_double_words1) + + cmp (%rsi), %eax + jne L(nequal) + + mov 4(%rdi), %eax + cmp 4(%rsi), %eax + jne L(nequal) + + mov 8(%rdi), %eax + cmp 8(%rsi), %eax + jne L(nequal) + + mov 12(%rdi), %eax + cmp 12(%rsi), %eax + jne L(nequal) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + movdqu 48(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_48) + + add $64, %rsi + add $64, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_16_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_0_00): + movdqu (%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_48_00) + + .p2align 4 +L(continue_32_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_16_16): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm3 + movdqu 16(%rsi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_0): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm3 + movdqu 16(%rsi), %xmm4 + pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm3 /* packed sub of comparison results*/ + pmovmskb %xmm3, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + movdqu 32(%rdi), %xmm1 + movdqu 32(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_32) + + add $48, %rsi + add $48, %rdi + jmp L(continue_48_48) + + .p2align 4 +L(continue_0_16): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + movdqu 16(%rdi), %xmm1 + movdqu 16(%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words_16) + + add $32, %rsi + add $32, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(continue_0_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_16_48) + + .p2align 4 +L(continue_16_32): + movdqu (%rdi), %xmm1 + movdqu (%rsi), %xmm2 + pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ + pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ + psubb %xmm0, %xmm1 /* packed sub of comparison results*/ + pmovmskb %xmm1, %edx + sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ + jnz L(less4_double_words) + + add $16, %rsi + add $16, %rdi + jmp L(continue_32_48) + + .p2align 4 +L(less4_double_words1): + cmp (%rsi), %eax + jne L(nequal) + test %eax, %eax + jz L(equal) + + mov 4(%rsi), %ecx + cmp %ecx, 4(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 8(%rsi), %ecx + cmp %ecx, 8(%rdi) + jne L(nequal) + test %ecx, %ecx + jz L(equal) + + mov 12(%rsi), %ecx + cmp %ecx, 12(%rdi) + jne L(nequal) + xor %eax, %eax + ret + + .p2align 4 +L(less4_double_words): + xor %eax, %eax + test %dl, %dl + jz L(next_two_double_words) + and $15, %dl + jz L(second_double_word) + mov (%rdi), %eax + cmp (%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(second_double_word): + mov 4(%rdi), %eax + cmp 4(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(next_two_double_words): + and $15, %dh + jz L(fourth_double_word) + mov 8(%rdi), %eax + cmp 8(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(fourth_double_word): + mov 12(%rdi), %eax + cmp 12(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(less4_double_words_16): + xor %eax, %eax + test %dl, %dl + jz L(next_two_double_words_16) + and $15, %dl + jz L(second_double_word_16) + mov 16(%rdi), %eax + cmp 16(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(second_double_word_16): + mov 20(%rdi), %eax + cmp 20(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(next_two_double_words_16): + and $15, %dh + jz L(fourth_double_word_16) + mov 24(%rdi), %eax + cmp 24(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(fourth_double_word_16): + mov 28(%rdi), %eax + cmp 28(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(less4_double_words_32): + xor %eax, %eax + test %dl, %dl + jz L(next_two_double_words_32) + and $15, %dl + jz L(second_double_word_32) + mov 32(%rdi), %eax + cmp 32(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(second_double_word_32): + mov 36(%rdi), %eax + cmp 36(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(next_two_double_words_32): + and $15, %dh + jz L(fourth_double_word_32) + mov 40(%rdi), %eax + cmp 40(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(fourth_double_word_32): + mov 44(%rdi), %eax + cmp 44(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(less4_double_words_48): + xor %eax, %eax + test %dl, %dl + jz L(next_two_double_words_48) + and $15, %dl + jz L(second_double_word_48) + mov 48(%rdi), %eax + cmp 48(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(second_double_word_48): + mov 52(%rdi), %eax + cmp 52(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(next_two_double_words_48): + and $15, %dh + jz L(fourth_double_word_48) + mov 56(%rdi), %eax + cmp 56(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(fourth_double_word_48): + mov 60(%rdi), %eax + cmp 60(%rsi), %eax + jne L(nequal) + ret + + .p2align 4 +L(nequal): + mov $1, %eax + jg L(nequal_bigger) + neg %eax + +L(nequal_bigger): + ret + + .p2align 4 +L(equal): + xor %rax, %rax + ret + +END (STRCMP) diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S index 5cb42f47b9..e04cdbf5fe 100644 --- a/sysdeps/x86_64/wcscmp.S +++ b/sysdeps/x86_64/wcscmp.S @@ -16,936 +16,8 @@ License along with the GNU C Library; if not, see . */ -#include +/* Symbol = __wcscmp. */ -/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ - - .text -ENTRY (__wcscmp) -/* - * This implementation uses SSE to compare up to 16 bytes at a time. -*/ - mov %esi, %eax - mov %edi, %edx - pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ - mov %al, %ch - mov %dl, %cl - and $63, %eax /* rsi alignment in cache line */ - and $63, %edx /* rdi alignment in cache line */ - and $15, %cl - jz L(continue_00) - cmp $16, %edx - jb L(continue_0) - cmp $32, %edx - jb L(continue_16) - cmp $48, %edx - jb L(continue_32) - -L(continue_48): - and $15, %ch - jz L(continue_48_00) - cmp $16, %eax - jb L(continue_0_48) - cmp $32, %eax - jb L(continue_16_48) - cmp $48, %eax - jb L(continue_32_48) - - .p2align 4 -L(continue_48_48): - mov (%rsi), %ecx - cmp %ecx, (%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%rsi), %ecx - cmp %ecx, 4(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%rsi), %ecx - cmp %ecx, 8(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%rsi), %ecx - cmp %ecx, 12(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%rdi), %xmm1 - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rdi), %xmm1 - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%rdi), %xmm1 - movdqu 48(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_48_48) - -L(continue_0): - and $15, %ch - jz L(continue_0_00) - cmp $16, %eax - jb L(continue_0_0) - cmp $32, %eax - jb L(continue_0_16) - cmp $48, %eax - jb L(continue_0_32) - - .p2align 4 -L(continue_0_48): - mov (%rsi), %ecx - cmp %ecx, (%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%rsi), %ecx - cmp %ecx, 4(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%rsi), %ecx - cmp %ecx, 8(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%rsi), %ecx - cmp %ecx, 12(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%rdi), %xmm1 - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rdi), %xmm1 - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - mov 48(%rsi), %ecx - cmp %ecx, 48(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 52(%rsi), %ecx - cmp %ecx, 52(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 56(%rsi), %ecx - cmp %ecx, 56(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 60(%rsi), %ecx - cmp %ecx, 60(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - add $64, %rsi - add $64, %rdi - jmp L(continue_0_48) - - .p2align 4 -L(continue_00): - and $15, %ch - jz L(continue_00_00) - cmp $16, %eax - jb L(continue_00_0) - cmp $32, %eax - jb L(continue_00_16) - cmp $48, %eax - jb L(continue_00_32) - - .p2align 4 -L(continue_00_48): - pcmpeqd (%rdi), %xmm0 - mov (%rdi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%rsi), %eax - jne L(nequal) - - mov 4(%rdi), %eax - cmp 4(%rsi), %eax - jne L(nequal) - - mov 8(%rdi), %eax - cmp 8(%rsi), %eax - jne L(nequal) - - mov 12(%rdi), %eax - cmp 12(%rsi), %eax - jne L(nequal) - - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_00_48) - - .p2align 4 -L(continue_32): - and $15, %ch - jz L(continue_32_00) - cmp $16, %eax - jb L(continue_0_32) - cmp $32, %eax - jb L(continue_16_32) - cmp $48, %eax - jb L(continue_32_32) - - .p2align 4 -L(continue_32_48): - mov (%rsi), %ecx - cmp %ecx, (%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%rsi), %ecx - cmp %ecx, 4(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%rsi), %ecx - cmp %ecx, 8(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%rsi), %ecx - cmp %ecx, 12(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 16(%rsi), %ecx - cmp %ecx, 16(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 20(%rsi), %ecx - cmp %ecx, 20(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 24(%rsi), %ecx - cmp %ecx, 24(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 28(%rsi), %ecx - cmp %ecx, 28(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 32(%rdi), %xmm1 - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%rdi), %xmm1 - movdqu 48(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_32_48) - - .p2align 4 -L(continue_16): - and $15, %ch - jz L(continue_16_00) - cmp $16, %eax - jb L(continue_0_16) - cmp $32, %eax - jb L(continue_16_16) - cmp $48, %eax - jb L(continue_16_32) - - .p2align 4 -L(continue_16_48): - mov (%rsi), %ecx - cmp %ecx, (%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%rsi), %ecx - cmp %ecx, 4(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%rsi), %ecx - cmp %ecx, 8(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%rsi), %ecx - cmp %ecx, 12(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%rdi), %xmm1 - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - mov 32(%rsi), %ecx - cmp %ecx, 32(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 36(%rsi), %ecx - cmp %ecx, 36(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 40(%rsi), %ecx - cmp %ecx, 40(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 44(%rsi), %ecx - cmp %ecx, 44(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 48(%rdi), %xmm1 - movdqu 48(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_16_48) - - .p2align 4 -L(continue_00_00): - movdqa (%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqa 16(%rdi), %xmm3 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqa 32(%rdi), %xmm5 - pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm5 /* packed sub of comparison results*/ - pmovmskb %xmm5, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqa 48(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_00_00) - - .p2align 4 -L(continue_00_32): - movdqu (%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %rsi - add $16, %rdi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_16): - movdqu (%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %rsi - add $32, %rdi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_0): - movdqu (%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %rsi - add $48, %rdi - jmp L(continue_00_48) - - .p2align 4 -L(continue_48_00): - pcmpeqd (%rsi), %xmm0 - mov (%rdi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%rsi), %eax - jne L(nequal) - - mov 4(%rdi), %eax - cmp 4(%rsi), %eax - jne L(nequal) - - mov 8(%rdi), %eax - cmp 8(%rsi), %eax - jne L(nequal) - - mov 12(%rdi), %eax - cmp 12(%rsi), %eax - jne L(nequal) - - movdqu 16(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %rsi - add $64, %rdi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_00): - movdqu (%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %rsi - add $16, %rdi - jmp L(continue_48_00) - - .p2align 4 -L(continue_16_00): - movdqu (%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %rsi - add $32, %rdi - jmp L(continue_48_00) - - .p2align 4 -L(continue_0_00): - movdqu (%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rdi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %rsi - add $48, %rdi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_32): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %rsi - add $16, %rdi - jmp L(continue_48_48) - - .p2align 4 -L(continue_16_16): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rdi), %xmm3 - movdqu 16(%rsi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %rsi - add $32, %rdi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_0): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rdi), %xmm3 - movdqu 16(%rsi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%rdi), %xmm1 - movdqu 32(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %rsi - add $48, %rdi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_16): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%rdi), %xmm1 - movdqu 16(%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %rsi - add $32, %rdi - jmp L(continue_32_48) - - .p2align 4 -L(continue_0_32): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %rsi - add $16, %rdi - jmp L(continue_16_48) - - .p2align 4 -L(continue_16_32): - movdqu (%rdi), %xmm1 - movdqu (%rsi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %rsi - add $16, %rdi - jmp L(continue_32_48) - - .p2align 4 -L(less4_double_words1): - cmp (%rsi), %eax - jne L(nequal) - test %eax, %eax - jz L(equal) - - mov 4(%rsi), %ecx - cmp %ecx, 4(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%rsi), %ecx - cmp %ecx, 8(%rdi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%rsi), %ecx - cmp %ecx, 12(%rdi) - jne L(nequal) - xor %eax, %eax - ret - - .p2align 4 -L(less4_double_words): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words) - and $15, %dl - jz L(second_double_word) - mov (%rdi), %eax - cmp (%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(second_double_word): - mov 4(%rdi), %eax - cmp 4(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(next_two_double_words): - and $15, %dh - jz L(fourth_double_word) - mov 8(%rdi), %eax - cmp 8(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(fourth_double_word): - mov 12(%rdi), %eax - cmp 12(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(less4_double_words_16): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_16) - and $15, %dl - jz L(second_double_word_16) - mov 16(%rdi), %eax - cmp 16(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(second_double_word_16): - mov 20(%rdi), %eax - cmp 20(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(next_two_double_words_16): - and $15, %dh - jz L(fourth_double_word_16) - mov 24(%rdi), %eax - cmp 24(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(fourth_double_word_16): - mov 28(%rdi), %eax - cmp 28(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(less4_double_words_32): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_32) - and $15, %dl - jz L(second_double_word_32) - mov 32(%rdi), %eax - cmp 32(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(second_double_word_32): - mov 36(%rdi), %eax - cmp 36(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(next_two_double_words_32): - and $15, %dh - jz L(fourth_double_word_32) - mov 40(%rdi), %eax - cmp 40(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(fourth_double_word_32): - mov 44(%rdi), %eax - cmp 44(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(less4_double_words_48): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_48) - and $15, %dl - jz L(second_double_word_48) - mov 48(%rdi), %eax - cmp 48(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(second_double_word_48): - mov 52(%rdi), %eax - cmp 52(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(next_two_double_words_48): - and $15, %dh - jz L(fourth_double_word_48) - mov 56(%rdi), %eax - cmp 56(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(fourth_double_word_48): - mov 60(%rdi), %eax - cmp 60(%rsi), %eax - jne L(nequal) - ret - - .p2align 4 -L(nequal): - mov $1, %eax - jg L(nequal_bigger) - neg %eax - -L(nequal_bigger): - ret - - .p2align 4 -L(equal): - xor %rax, %rax - ret - -END (__wcscmp) -#ifndef __wcscmp +#include "multiarch/wcscmp-sse2.S" libc_hidden_def (__wcscmp) weak_alias (__wcscmp, wcscmp) -#endif