* better memset for arm
@ 2003-09-14 17:15 Philip Blundell
0 siblings, 0 replies; only message in thread
From: Philip Blundell @ 2003-09-14 17:15 UTC (permalink / raw)
To: libc-hacker
[-- Attachment #1: Type: text/plain, Size: 95 bytes --]
The existing implementation of memset() on arm is fairly poor. Here's a
better version.
p.
[-- Attachment #2: memset.diff --]
[-- Type: text/plain, Size: 2081 bytes --]
2003-09-14 Philip Blundell <philb@gnu.org>
* sysdeps/arm/memset.S: Rewrite.
Index: sysdeps/arm/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/arm/memset.S,v
retrieving revision 1.4
diff -u -r1.4 memset.S
--- sysdeps/arm/memset.S 29 Apr 2003 22:47:20 -0000 1.4
+++ sysdeps/arm/memset.S 14 Sep 2003 17:12:45 -0000
@@ -22,47 +22,46 @@
/* void *memset (dstpp, c, len) */
ENTRY(memset)
- mov a4, a1
- cmp a3, $8 @ at least 8 bytes to do?
- blt 2f
- orr a2, a2, a2, lsl $8
- orr a2, a2, a2, lsl $16
+ mov r3, r0
+ cmp r2, #8
+ bcc 2f @ less than 8 bytes to move
+
1:
- tst a4, $3 @ aligned yet?
- strneb a2, [a4], $1
- subne a3, a3, $1
+ tst r3, #3 @ aligned yet?
+ strneb r1, [r3], #1
+ subne r2, r2, #1
bne 1b
- mov ip, a2
+
+ orr r1, r1, r1, lsl $8
+ orr r1, r1, r1, lsl $16
+
1:
- cmp a3, $8 @ 8 bytes still to do?
- blt 2f
- stmia a4!, {a2, ip}
- sub a3, a3, $8
- cmp a3, $8 @ 8 bytes still to do?
- blt 2f
- stmia a4!, {a2, ip}
- sub a3, a3, $8
- cmp a3, $8 @ 8 bytes still to do?
- blt 2f
- stmia a4!, {a2, ip}
- sub a3, a3, $8
- cmp a3, $8 @ 8 bytes still to do?
- stmgeia a4!, {a2, ip}
- subge a3, a3, $8
- bge 1b
+ subs r2, r2, #8
+ strcs r1, [r3], #4 @ store up to 32 bytes per loop iteration
+ strcs r1, [r3], #4
+ subcss r2, r2, #8
+ strcs r1, [r3], #4
+ strcs r1, [r3], #4
+ subcss r2, r2, #8
+ strcs r1, [r3], #4
+ strcs r1, [r3], #4
+ subcss r2, r2, #8
+ strcs r1, [r3], #4
+ strcs r1, [r3], #4
+ bcs 1b
+
+ and r2, r2, #7
2:
- movs a3, a3 @ anything left?
- RETINSTR(moveq,pc,lr) @ nope
- rsb a3, a3, $7
- add pc, pc, a3, lsl $2
- mov r0, r0
- strb a2, [a4], $1
- strb a2, [a4], $1
- strb a2, [a4], $1
- strb a2, [a4], $1
- strb a2, [a4], $1
- strb a2, [a4], $1
- strb a2, [a4], $1
- RETINSTR(mov,pc,lr)
+ subs r2, r2, #1 @ store up to 4 bytes per loop iteration
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ subcss r2, r2, #1
+ strcsb r1, [r3], #1
+ bcs 2b
+
+ DO_RET(lr)
END(memset)
libc_hidden_builtin_def (memset)
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2003-09-14 17:15 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-09-14 17:15 better memset for arm Philip Blundell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).