public inbox for newlib-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin]     Improve performance of MIPS memcpy.
@ 2015-11-02 21:31 Steve Ellcey
  0 siblings, 0 replies; only message in thread
From: Steve Ellcey @ 2015-11-02 21:31 UTC (permalink / raw)
  To: newlib-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=50455323229d54a19ffc0a6fa2e1f1f24898b9c4

commit 50455323229d54a19ffc0a6fa2e1f1f24898b9c4
Author: Steve Ellcey <Steve.Ellcey@imgtec.com>
Date:   Mon Nov 2 13:30:19 2015 -0800

        Improve performance of MIPS memcpy.
    
    	* libc/machine/mips/memcpy.S (memcpy): Add word copies for small
    	aligned data.

Diff:
---
 newlib/ChangeLog                  |  5 +++++
 newlib/libc/machine/mips/memcpy.S | 45 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index 38726ff..d46870b 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,3 +1,8 @@
+2015-11-02  Steve Ellcey  <sellcey@imgtec.com>
+
+	* libc/machine/mips/memcpy.S (memcpy): Add word copies for small
+	aligned data.
+
 2015-10-30  Sebastian Huber  <sebastian.huber@embedded-brains.de>
 
 	* libc/include/pthread.h: Include <sched.h> instead of <sys/sched.h>.
diff --git a/newlib/libc/machine/mips/memcpy.S b/newlib/libc/machine/mips/memcpy.S
index ed0edb4..3130f6e 100644
--- a/newlib/libc/machine/mips/memcpy.S
+++ b/newlib/libc/machine/mips/memcpy.S
@@ -311,7 +311,7 @@ L(memcpy):
  * size, copy dst pointer to v0 for the return value.
  */
 	slti	t2,a2,(2 * NSIZE)
-	bne	t2,zero,L(lastb)
+	bne	t2,zero,L(lasts)
 #if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
 	move	v0,zero
 #else
@@ -562,7 +562,7 @@ L(chkw):
  */
 L(chk1w):
 	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
-	beq	a2,t8,L(lastb)
+	beq	a2,t8,L(lastw)
 	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
 	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
 
@@ -574,6 +574,20 @@ L(wordCopy_loop):
 	bne	a0,a3,L(wordCopy_loop)
 	C_ST	REG3,UNIT(-1)(a0)
 
+/* If we have been copying double words, see if we can copy a single word
+   before doing byte copies.  We can have, at most, one word to copy.  */
+
+L(lastw):
+#ifdef USE_DOUBLE
+	andi    t8,a2,3		/* a2 is the remainder past 4 byte chunks.  */
+	beq	t8,a2,L(lastb)
+	lw	REG3,0(a1)
+	sw	REG3,0(a0)
+	PTR_ADDIU a0,a0,4
+	PTR_ADDIU a1,a1,4
+	move	a2,t8
+#endif
+
 /* Copy the last 8 (or 16) bytes */
 L(lastb):
 	blez	a2,L(leave)
@@ -588,6 +602,33 @@ L(leave):
 	j	ra
 	nop
 
+/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
+   whether or not USE_DOUBLE is defined.  Instead of just doing byte
+   copies, check the alignment and size and use lw/sw if possible.
+   Otherwise, do byte copies.  */
+
+L(lasts):
+	andi	t8,a2,3
+	beq	t8,a2,L(lastb)
+
+	andi	t9,a0,3
+	bne	t9,zero,L(lastb)
+	andi	t9,a1,3
+	bne	t9,zero,L(lastb)
+
+	PTR_SUBU a3,a2,t8
+	PTR_ADDU a3,a0,a3
+
+L(wcopy_loop):
+	lw	REG3,0(a1)
+	PTR_ADDIU a0,a0,4
+	PTR_ADDIU a1,a1,4
+	bne	a0,a3,L(wcopy_loop)
+	sw	REG3,-4(a0)
+
+	b	L(lastb)
+	move	a2,t8
+
 #ifndef R6_CODE
 /*
  * UNALIGNED case, got here with a3 = "negu a0"


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-11-02 21:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-02 21:31 [newlib-cygwin] Improve performance of MIPS memcpy Steve Ellcey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).