From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 98314 invoked by alias); 2 Nov 2015 21:31:06 -0000 Mailing-List: contact newlib-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: newlib-cvs-owner@sourceware.org Received: (qmail 98249 invoked by uid 9333); 2 Nov 2015 21:31:05 -0000 Date: Mon, 02 Nov 2015 21:31:00 -0000 Message-ID: <20151102213105.98247.qmail@sourceware.org> Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Steve Ellcey To: newlib-cvs@sourceware.org Subject: [newlib-cygwin] Improve performance of MIPS memcpy. X-Act-Checkin: newlib-cygwin X-Git-Author: Steve Ellcey X-Git-Refname: refs/heads/master X-Git-Oldrev: 7321662bb0db3bb14e9f0214a663cf013330c248 X-Git-Newrev: 50455323229d54a19ffc0a6fa2e1f1f24898b9c4 X-SW-Source: 2015-q4/txt/msg00025.txt.bz2 https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=50455323229d54a19ffc0a6fa2e1f1f24898b9c4 commit 50455323229d54a19ffc0a6fa2e1f1f24898b9c4 Author: Steve Ellcey Date: Mon Nov 2 13:30:19 2015 -0800 Improve performance of MIPS memcpy. * libc/machine/mips/memcpy.S (memcpy): Add word copies for small aligned data. Diff: --- newlib/ChangeLog | 5 +++++ newlib/libc/machine/mips/memcpy.S | 45 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 38726ff..d46870b 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,8 @@ +2015-11-02 Steve Ellcey + + * libc/machine/mips/memcpy.S (memcpy): Add word copies for small + aligned data. + 2015-10-30 Sebastian Huber * libc/include/pthread.h: Include instead of . diff --git a/newlib/libc/machine/mips/memcpy.S b/newlib/libc/machine/mips/memcpy.S index ed0edb4..3130f6e 100644 --- a/newlib/libc/machine/mips/memcpy.S +++ b/newlib/libc/machine/mips/memcpy.S @@ -311,7 +311,7 @@ L(memcpy): * size, copy dst pointer to v0 for the return value. */ slti t2,a2,(2 * NSIZE) - bne t2,zero,L(lastb) + bne t2,zero,L(lasts) #if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH) move v0,zero #else @@ -562,7 +562,7 @@ L(chkw): */ L(chk1w): andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */ - beq a2,t8,L(lastb) + beq a2,t8,L(lastw) PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */ PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */ @@ -574,6 +574,20 @@ L(wordCopy_loop): bne a0,a3,L(wordCopy_loop) C_ST REG3,UNIT(-1)(a0) +/* If we have been copying double words, see if we can copy a single word + before doing byte copies. We can have, at most, one word to copy. */ + +L(lastw): +#ifdef USE_DOUBLE + andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */ + beq t8,a2,L(lastb) + lw REG3,0(a1) + sw REG3,0(a0) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 + move a2,t8 +#endif + /* Copy the last 8 (or 16) bytes */ L(lastb): blez a2,L(leave) @@ -588,6 +602,33 @@ L(leave): j ra nop +/* We jump here with a memcpy of less than 8 or 16 bytes, depending on + whether or not USE_DOUBLE is defined. Instead of just doing byte + copies, check the alignment and size and use lw/sw if possible. + Otherwise, do byte copies. */ + +L(lasts): + andi t8,a2,3 + beq t8,a2,L(lastb) + + andi t9,a0,3 + bne t9,zero,L(lastb) + andi t9,a1,3 + bne t9,zero,L(lastb) + + PTR_SUBU a3,a2,t8 + PTR_ADDU a3,a0,a3 + +L(wcopy_loop): + lw REG3,0(a1) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 + bne a0,a3,L(wcopy_loop) + sw REG3,-4(a0) + + b L(lastb) + move a2,t8 + #ifndef R6_CODE /* * UNALIGNED case, got here with a3 = "negu a0"