public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
* better memset for arm
@ 2003-09-14 17:15 Philip Blundell
  0 siblings, 0 replies; only message in thread
From: Philip Blundell @ 2003-09-14 17:15 UTC (permalink / raw)
  To: libc-hacker

[-- Attachment #1: Type: text/plain, Size: 95 bytes --]

The existing implementation of memset() on arm is fairly poor.  Here's a
better version.

p.



[-- Attachment #2: memset.diff --]
[-- Type: text/plain, Size: 2081 bytes --]

2003-09-14  Philip Blundell  <philb@gnu.org>

	* sysdeps/arm/memset.S: Rewrite.

Index: sysdeps/arm/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/arm/memset.S,v
retrieving revision 1.4
diff -u -r1.4 memset.S
--- sysdeps/arm/memset.S	29 Apr 2003 22:47:20 -0000	1.4
+++ sysdeps/arm/memset.S	14 Sep 2003 17:12:45 -0000
@@ -22,47 +22,46 @@
 /* void *memset (dstpp, c, len) */
 
 ENTRY(memset)
-	mov	a4, a1
-	cmp	a3, $8		@ at least 8 bytes to do?
-	blt	2f
-	orr	a2, a2, a2, lsl $8
-	orr	a2, a2, a2, lsl $16
+	mov	r3, r0
+	cmp	r2, #8
+	bcc	2f		@ less than 8 bytes to move
+
 1:
-	tst	a4, $3		@ aligned yet?
-	strneb	a2, [a4], $1
-	subne	a3, a3, $1
+	tst	r3, #3		@ aligned yet?
+	strneb	r1, [r3], #1
+	subne	r2, r2, #1
 	bne	1b
-	mov	ip, a2
+
+	orr	r1, r1, r1, lsl $8
+	orr	r1, r1, r1, lsl $16
+
 1:
-	cmp	a3, $8		@ 8 bytes still to do?
-	blt	2f
-	stmia	a4!, {a2, ip}
-	sub	a3, a3, $8
-	cmp	a3, $8		@ 8 bytes still to do?
-	blt	2f
-	stmia	a4!, {a2, ip}
-	sub	a3, a3, $8
-	cmp	a3, $8		@ 8 bytes still to do?
-	blt	2f
-	stmia	a4!, {a2, ip}
-	sub	a3, a3, $8
-	cmp	a3, $8		@ 8 bytes still to do?
-	stmgeia	a4!, {a2, ip}
-	subge	a3, a3, $8
-	bge	1b
+	subs	r2, r2, #8
+	strcs	r1, [r3], #4	@ store up to 32 bytes per loop iteration
+	strcs	r1, [r3], #4
+	subcss	r2, r2, #8
+	strcs	r1, [r3], #4
+	strcs	r1, [r3], #4
+	subcss	r2, r2, #8
+	strcs	r1, [r3], #4
+	strcs	r1, [r3], #4
+	subcss	r2, r2, #8
+	strcs	r1, [r3], #4
+	strcs	r1, [r3], #4
+	bcs	1b
+
+	and	r2, r2, #7
 2:
-	movs	a3, a3		@ anything left?
-	RETINSTR(moveq,pc,lr)	@ nope
-	rsb	a3, a3, $7
-	add	pc, pc, a3, lsl $2
-	mov	r0, r0
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	strb	a2, [a4], $1
-	RETINSTR(mov,pc,lr)
+	subs	r2, r2, #1	@ store up to 4 bytes per loop iteration
+	strcsb	r1, [r3], #1
+	subcss	r2, r2, #1
+	strcsb	r1, [r3], #1
+	subcss	r2, r2, #1
+	strcsb	r1, [r3], #1
+	subcss	r2, r2, #1
+	strcsb	r1, [r3], #1
+	bcs	2b
+	
+	DO_RET(lr)
 END(memset)
 libc_hidden_builtin_def (memset)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2003-09-14 17:15 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-09-14 17:15 better memset for arm Philip Blundell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).