From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 28241 invoked by alias); 12 Aug 2013 07:56:12 -0000 Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org Received: (qmail 28227 invoked by uid 89); 12 Aug 2013 07:56:12 -0000 X-Spam-SWARE-Status: No, score=-1.7 required=5.0 tests=AWL,BAYES_00,KHOP_RCVD_UNTRUST,RCVD_IN_DNSWL_LOW,RCVD_IN_HOSTKARMA_YE autolearn=ham version=3.3.2 Received: from mail-wg0-f54.google.com (HELO mail-wg0-f54.google.com) (74.125.82.54) by sourceware.org (qpsmtpd/0.84/v0.84-167-ge50287c) with ESMTP; Mon, 12 Aug 2013 07:56:11 +0000 Received: by mail-wg0-f54.google.com with SMTP id e12so5112077wgh.33 for ; Mon, 12 Aug 2013 00:56:09 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=x-gm-message-state:message-id:date:from:user-agent:mime-version:to :cc:subject:content-type:content-transfer-encoding; bh=B5rAeVrTGY37oaJQOMnANLUTA5AykuMgDuUmHP1IqCo=; b=f+2l2dFenoGt2RmIIkOG1ekSMXDgClt999QEJBHqhWxFcnGQfEyTS+UQ05O7F9JO5r C/ydrYPVW1osLJSzt3wW3zJV/KV3JbMORGsjSwaNL5WUw2bvJnU+1ff9DwRfS/D6Efwm nLTUa7ODzUd8/1+YdFfi+zbYTiLx9cvWvhfClTqln7Osa0UMXigeY/vXkX9U1zT9gAL2 AKkEG8gNQOO24VjLS8Q8f3q0J9zbT9mcmqjlFA5+O+I2VkIwwe80UMlBC2/OBuoKNi6L ch06uy+e5XJVqeujn4LSkNwEEp0/28iCWW4J4JGUWhEleyq5KRsKjP0eolGxrRRlMuHU iYQg== X-Gm-Message-State: ALoCoQkvVRUg3oKe7HR7wV87eNmRG9cTuPid/V8j/mQKRQyDQrSusXv7MaILu6LPUPZaVxykIIMn X-Received: by 10.180.183.43 with SMTP id ej11mr5848145wic.9.1376294169378; Mon, 12 Aug 2013 00:56:09 -0700 (PDT) Received: from localhost.localdomain (cpc11-seac20-2-0-cust84.7-2.cable.virginmedia.com. [81.108.156.85]) by mx.google.com with ESMTPSA id fu13sm5792006wic.7.2013.08.12.00.56.07 for (version=TLSv1 cipher=RC4-SHA bits=128/128); Mon, 12 Aug 2013 00:56:08 -0700 (PDT) Message-ID: <52089516.3080304@linaro.org> Date: Mon, 12 Aug 2013 07:56:00 -0000 From: Will Newton User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130625 Thunderbird/17.0.7 MIME-Version: 1.0 To: libc-ports@sourceware.org CC: patches@linaro.org Subject: [PATCH] sysdeps/arm/armv6t2/strlen.S: strlen implementation for armv6t2. Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit X-SW-Source: 2013-08/txt/msg00001.txt.bz2 This implementation of strlen is faster than the armv6 version for all string lengths greater than 1 on a Cortex-A15. ports/ChangeLog.arm: 2013-08-09 Will Newton * sysdeps/arm/armv6t2/strlen.S: New file. --- ports/sysdeps/arm/armv6t2/strlen.S | 141 +++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 ports/sysdeps/arm/armv6t2/strlen.S diff --git a/ports/sysdeps/arm/armv6t2/strlen.S b/ports/sysdeps/arm/armv6t2/strlen.S new file mode 100644 index 0000000..a52e2e7 --- /dev/null +++ b/ports/sysdeps/arm/armv6t2/strlen.S @@ -0,0 +1,141 @@ +/* Copyright (C) 2010-2011,2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +/* + Assumes: + ARMv6T2, AArch32 + + */ + +#include + +#ifdef __ARMEB__ +#define S2LO lsl +#define S2HI lsr +#else +#define S2LO lsr +#define S2HI lsl +#endif + + /* This code requires Thumb. */ + .thumb + .syntax unified + +/* Parameters and result. */ +#define srcin r0 +#define result r0 + +/* Internal variables. */ +#define src r1 +#define data1a r2 +#define data1b r3 +#define const_m1 r12 +#define const_0 r4 +#define tmp1 r4 /* Overlaps const_0 */ +#define tmp2 r5 + + .text + .p2align 6 +ENTRY(strlen) + pld [srcin, #0] + strd r4, r5, [sp, #-8]! + cfi_adjust_cfa_offset (8) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r5, 4) + cfi_remember_state + bic src, srcin, #7 + mvn const_m1, #0 + ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ + pld [src, #32] + bne.w .Lmisaligned8 + mov const_0, #0 + mov result, #-8 +.Lloop_aligned: + /* Bytes 0-7. */ + ldrd data1a, data1b, [src] + pld [src, #64] + add result, result, #8 +.Lstart_realigned: + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .Lnull_found + + /* Bytes 8-15. */ + ldrd data1a, data1b, [src, #8] + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .Lnull_found + + /* Bytes 16-23. */ + ldrd data1a, data1b, [src, #16] + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .Lnull_found + + /* Bytes 24-31. */ + ldrd data1a, data1b, [src, #24] + add src, src, #32 + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cmp data1b, #0 + beq .Lloop_aligned + +.Lnull_found: + cmp data1a, #0 + itt eq + addeq result, result, #4 + moveq data1a, data1b +#ifndef __ARMEB__ + rev data1a, data1a +#endif + clz data1a, data1a + ldrd r4, r5, [sp], #8 + cfi_adjust_cfa_offset (-8) + cfi_restore (r4) + cfi_restore (r5) + add result, result, data1a, lsr #3 /* Bits -> Bytes. */ + DO_RET(lr) + +.Lmisaligned8: + cfi_restore_state + ldrd data1a, data1b, [src] + and tmp2, tmp1, #3 + rsb result, tmp1, #0 + lsl tmp2, tmp2, #3 /* Bytes -> bits. */ + tst tmp1, #4 + pld [src, #64] + S2HI tmp2, const_m1, tmp2 + orn data1a, data1a, tmp2 + itt ne + ornne data1b, data1b, tmp2 + movne data1a, const_m1 + mov const_0, #0 + b .Lstart_realigned + +END(strlen) +libc_hidden_builtin_def (strlen) -- 1.8.1.4