* [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
@ 2013-08-12 7:56 Will Newton
2013-08-27 7:47 ` Will Newton
2013-08-30 0:13 ` Joseph S. Myers
0 siblings, 2 replies; 6+ messages in thread
From: Will Newton @ 2013-08-12 7:56 UTC (permalink / raw)
To: libc-ports; +Cc: patches
This implementation of strlen is faster than the armv6 version for
all string lengths greater than 1 on a Cortex-A15.
ports/ChangeLog.arm:
2013-08-09 Will Newton <will.newton@linaro.org>
* sysdeps/arm/armv6t2/strlen.S: New file.
---
ports/sysdeps/arm/armv6t2/strlen.S | 141 +++++++++++++++++++++++++++++++++++++
1 file changed, 141 insertions(+)
create mode 100644 ports/sysdeps/arm/armv6t2/strlen.S
diff --git a/ports/sysdeps/arm/armv6t2/strlen.S b/ports/sysdeps/arm/armv6t2/strlen.S
new file mode 100644
index 0000000..a52e2e7
--- /dev/null
+++ b/ports/sysdeps/arm/armv6t2/strlen.S
@@ -0,0 +1,141 @@
+/* Copyright (C) 2010-2011,2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ Assumes:
+ ARMv6T2, AArch32
+
+ */
+
+#include <sysdep.h>
+
+#ifdef __ARMEB__
+#define S2LO lsl
+#define S2HI lsr
+#else
+#define S2LO lsr
+#define S2HI lsl
+#endif
+
+ /* This code requires Thumb. */
+ .thumb
+ .syntax unified
+
+/* Parameters and result. */
+#define srcin r0
+#define result r0
+
+/* Internal variables. */
+#define src r1
+#define data1a r2
+#define data1b r3
+#define const_m1 r12
+#define const_0 r4
+#define tmp1 r4 /* Overlaps const_0 */
+#define tmp2 r5
+
+ .text
+ .p2align 6
+ENTRY(strlen)
+ pld [srcin, #0]
+ strd r4, r5, [sp, #-8]!
+ cfi_adjust_cfa_offset (8)
+ cfi_rel_offset (r4, 0)
+ cfi_rel_offset (r5, 4)
+ cfi_remember_state
+ bic src, srcin, #7
+ mvn const_m1, #0
+ ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
+ pld [src, #32]
+ bne.w .Lmisaligned8
+ mov const_0, #0
+ mov result, #-8
+.Lloop_aligned:
+ /* Bytes 0-7. */
+ ldrd data1a, data1b, [src]
+ pld [src, #64]
+ add result, result, #8
+.Lstart_realigned:
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 8-15. */
+ ldrd data1a, data1b, [src, #8]
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 16-23. */
+ ldrd data1a, data1b, [src, #16]
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cbnz data1b, .Lnull_found
+
+ /* Bytes 24-31. */
+ ldrd data1a, data1b, [src, #24]
+ add src, src, #32
+ uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
+ add result, result, #8
+ sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
+ uadd8 data1b, data1b, const_m1
+ sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
+ cmp data1b, #0
+ beq .Lloop_aligned
+
+.Lnull_found:
+ cmp data1a, #0
+ itt eq
+ addeq result, result, #4
+ moveq data1a, data1b
+#ifndef __ARMEB__
+ rev data1a, data1a
+#endif
+ clz data1a, data1a
+ ldrd r4, r5, [sp], #8
+ cfi_adjust_cfa_offset (-8)
+ cfi_restore (r4)
+ cfi_restore (r5)
+ add result, result, data1a, lsr #3 /* Bits -> Bytes. */
+ DO_RET(lr)
+
+.Lmisaligned8:
+ cfi_restore_state
+ ldrd data1a, data1b, [src]
+ and tmp2, tmp1, #3
+ rsb result, tmp1, #0
+ lsl tmp2, tmp2, #3 /* Bytes -> bits. */
+ tst tmp1, #4
+ pld [src, #64]
+ S2HI tmp2, const_m1, tmp2
+ orn data1a, data1a, tmp2
+ itt ne
+ ornne data1b, data1b, tmp2
+ movne data1a, const_m1
+ mov const_0, #0
+ b .Lstart_realigned
+
+END(strlen)
+libc_hidden_builtin_def (strlen)
--
1.8.1.4
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
2013-08-12 7:56 [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2 Will Newton
@ 2013-08-27 7:47 ` Will Newton
2013-08-30 0:13 ` Joseph S. Myers
1 sibling, 0 replies; 6+ messages in thread
From: Will Newton @ 2013-08-27 7:47 UTC (permalink / raw)
To: libc-ports; +Cc: Patch Tracking
On 12 August 2013 08:56, Will Newton <will.newton@linaro.org> wrote:
>
> This implementation of strlen is faster than the armv6 version for
> all string lengths greater than 1 on a Cortex-A15.
>
> ports/ChangeLog.arm:
>
> 2013-08-09 Will Newton <will.newton@linaro.org>
>
> * sysdeps/arm/armv6t2/strlen.S: New file.
> ---
> ports/sysdeps/arm/armv6t2/strlen.S | 141 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 141 insertions(+)
> create mode 100644 ports/sysdeps/arm/armv6t2/strlen.S
Ping?
--
Will Newton
Toolchain Working Group, Linaro
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
2013-08-12 7:56 [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2 Will Newton
2013-08-27 7:47 ` Will Newton
@ 2013-08-30 0:13 ` Joseph S. Myers
2013-08-30 9:06 ` Will Newton
1 sibling, 1 reply; 6+ messages in thread
From: Joseph S. Myers @ 2013-08-30 0:13 UTC (permalink / raw)
To: Will Newton; +Cc: libc-ports, patches
On Mon, 12 Aug 2013, Will Newton wrote:
> This implementation of strlen is faster than the armv6 version for
> all string lengths greater than 1 on a Cortex-A15.
>
> ports/ChangeLog.arm:
>
> 2013-08-09 Will Newton <will.newton@linaro.org>
>
> * sysdeps/arm/armv6t2/strlen.S: New file.
OK, presuming you've run the full glibc testsuite with this version used.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
2013-08-30 0:13 ` Joseph S. Myers
@ 2013-08-30 9:06 ` Will Newton
2013-08-30 17:36 ` Carlos O'Donell
0 siblings, 1 reply; 6+ messages in thread
From: Will Newton @ 2013-08-30 9:06 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports, Patch Tracking
On 30 August 2013 01:13, Joseph S. Myers <joseph@codesourcery.com> wrote:
> On Mon, 12 Aug 2013, Will Newton wrote:
>
>> This implementation of strlen is faster than the armv6 version for
>> all string lengths greater than 1 on a Cortex-A15.
>>
>> ports/ChangeLog.arm:
>>
>> 2013-08-09 Will Newton <will.newton@linaro.org>
>>
>> * sysdeps/arm/armv6t2/strlen.S: New file.
>
> OK, presuming you've run the full glibc testsuite with this version used.
Yes, testsuite is clean. Committed.
--
Will Newton
Toolchain Working Group, Linaro
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
2013-08-30 9:06 ` Will Newton
@ 2013-08-30 17:36 ` Carlos O'Donell
2013-08-30 17:38 ` Carlos O'Donell
0 siblings, 1 reply; 6+ messages in thread
From: Carlos O'Donell @ 2013-08-30 17:36 UTC (permalink / raw)
To: Will Newton; +Cc: Joseph S. Myers, libc-ports, Patch Tracking
On 08/30/2013 05:06 AM, Will Newton wrote:
> On 30 August 2013 01:13, Joseph S. Myers <joseph@codesourcery.com> wrote:
>> On Mon, 12 Aug 2013, Will Newton wrote:
>>
>>> This implementation of strlen is faster than the armv6 version for
>>> all string lengths greater than 1 on a Cortex-A15.
>>>
>>> ports/ChangeLog.arm:
>>>
>>> 2013-08-09 Will Newton <will.newton@linaro.org>
>>>
>>> * sysdeps/arm/armv6t2/strlen.S: New file.
>>
>> OK, presuming you've run the full glibc testsuite with this version used.
>
> Yes, testsuite is clean. Committed.
I'm not happy seeing these kinds of patches go in without some
kind of numbers around "faster" and a reproducible way to get
those numbers.
Cheers,
Carlos.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2.
2013-08-30 17:36 ` Carlos O'Donell
@ 2013-08-30 17:38 ` Carlos O'Donell
0 siblings, 0 replies; 6+ messages in thread
From: Carlos O'Donell @ 2013-08-30 17:38 UTC (permalink / raw)
To: Will Newton; +Cc: Joseph S. Myers, libc-ports, Patch Tracking
On 08/30/2013 01:36 PM, Carlos O'Donell wrote:
> On 08/30/2013 05:06 AM, Will Newton wrote:
>> On 30 August 2013 01:13, Joseph S. Myers <joseph@codesourcery.com> wrote:
>>> On Mon, 12 Aug 2013, Will Newton wrote:
>>>
>>>> This implementation of strlen is faster than the armv6 version for
>>>> all string lengths greater than 1 on a Cortex-A15.
>>>>
>>>> ports/ChangeLog.arm:
>>>>
>>>> 2013-08-09 Will Newton <will.newton@linaro.org>
>>>>
>>>> * sysdeps/arm/armv6t2/strlen.S: New file.
>>>
>>> OK, presuming you've run the full glibc testsuite with this version used.
>>
>> Yes, testsuite is clean. Committed.
>
> I'm not happy seeing these kinds of patches go in without some
> kind of numbers around "faster" and a reproducible way to get
> those numbers.
I don't say this because I'm just cranky, I want all of us to be
more accountable when it comes to the engineering rigour required
for performance patches. That way I can look back at these numbers
when users report issues with the speed of strlen on A15.
Cheers,
Carlos.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2013-08-30 17:38 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-12 7:56 [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2 Will Newton
2013-08-27 7:47 ` Will Newton
2013-08-30 0:13 ` Joseph S. Myers
2013-08-30 9:06 ` Will Newton
2013-08-30 17:36 ` Carlos O'Donell
2013-08-30 17:38 ` Carlos O'Donell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).