From 0b4aae9e15eeb63419dc1df2578b3df50aae7edf Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 18 May 2017 12:22:31 -0700 Subject: [PATCH] x86_64: Remove redundant REX bytes from memchr.S By x86-64 specification, 32-bit destination registers are zero-extended to 64 bits. There is no need to use 64-bit registers when only the lower 32 bits are non-zero. * sysdeps/x86_64/memchr.S (MEMCHR): Use 32-bit registers for the lower 32 bits. --- sysdeps/x86_64/memchr.S | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index 8242f2d..3167cd8 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -31,7 +31,7 @@ .text ENTRY(MEMCHR) movd %esi, %xmm1 - mov %rdi, %rcx + mov %edi, %ecx #ifdef USE_AS_WMEMCHR test %rdx, %rdx @@ -44,10 +44,10 @@ ENTRY(MEMCHR) punpcklbw %xmm1, %xmm1 #endif - and $63, %rcx + and $63, %ecx pshufd $0, %xmm1, %xmm1 - cmp $48, %rcx + cmp $48, %ecx ja L(crosscache) movdqu (%rdi), %xmm0 @@ -59,7 +59,7 @@ ENTRY(MEMCHR) sub $16, %rdx jbe L(return_null) add $16, %rdi - and $15, %rcx + and $15, %ecx and $-16, %rdi add %rcx, %rdx sub $64, %rdx @@ -68,7 +68,7 @@ ENTRY(MEMCHR) .p2align 4 L(crosscache): - and $15, %rcx + and $15, %ecx and $-16, %rdi movdqa (%rdi), %xmm0 @@ -162,7 +162,7 @@ L(loop_prolog): mov %rdi, %rcx and $-64, %rdi - and $63, %rcx + and $63, %ecx add %rcx, %rdx .p2align 4 @@ -214,7 +214,7 @@ L(align64_loop): .p2align 4 L(exit_loop): - add $32, %rdx + add $32, %edx jle L(exit_loop_32) movdqa (%rdi), %xmm0 @@ -234,32 +234,32 @@ L(exit_loop): pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32_1) - sub $16, %rdx + sub $16, %edx jle L(return_null) PCMPEQ 48(%rdi), %xmm1 pmovmskb %xmm1, %eax test %eax, %eax jnz L(matches48_1) - xor %rax, %rax + xor %eax, %eax ret .p2align 4 L(exit_loop_32): - add $32, %rdx + add $32, %edx movdqa (%rdi), %xmm0 PCMPEQ %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches_1) - sub $16, %rdx + sub $16, %edx jbe L(return_null) PCMPEQ 16(%rdi), %xmm1 pmovmskb %xmm1, %eax test %eax, %eax jnz L(matches16_1) - xor %rax, %rax + xor %eax, %eax ret .p2align 4 @@ -320,7 +320,7 @@ L(matches48_1): .p2align 4 L(return_null): - xor %rax, %rax + xor %eax, %eax ret END(MEMCHR) -- 2.9.4