From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27462 invoked by alias); 1 Mar 2013 17:36:29 -0000 Received: (qmail 26515 invoked by uid 22791); 1 Mar 2013 17:36:10 -0000 X-SWARE-Spam-Status: No, hits=-5.3 required=5.0 tests=AWL,BAYES_00,DKIM_SIGNED,DKIM_VALID,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,KHOP_RCVD_TRUST,KHOP_THREADED,RCVD_IN_DNSWL_LOW,RCVD_IN_HOSTKARMA_YE X-Spam-Check-By: sourceware.org Received: from mail-da0-f46.google.com (HELO mail-da0-f46.google.com) (209.85.210.46) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 01 Mar 2013 17:35:59 +0000 Received: by mail-da0-f46.google.com with SMTP id z8so1503214dad.19 for ; Fri, 01 Mar 2013 09:35:58 -0800 (PST) X-Received: by 10.68.134.100 with SMTP id pj4mr15515508pbb.12.1362159358923; Fri, 01 Mar 2013 09:35:58 -0800 (PST) Received: from fremont.twiddle.net (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPS id kl4sm12679430pbc.31.2013.03.01.09.35.56 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Fri, 01 Mar 2013 09:35:57 -0800 (PST) From: Richard Henderson To: libc-ports@sourceware.org Cc: joseph@codesourcery.com Subject: [PATCH v2 13/14] arm: Add optimized submul_1 Date: Fri, 01 Mar 2013 17:36:00 -0000 Message-Id: <1362159320-5934-14-git-send-email-rth@twiddle.net> In-Reply-To: <1362159320-5934-1-git-send-email-rth@twiddle.net> References: <1362159320-5934-1-git-send-email-rth@twiddle.net> X-IsSubscribed: yes Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org X-SW-Source: 2013-03/txt/msg00005.txt.bz2 Written from scratch rather than copied from GMP, due to GPL 2.1 vs GPL 3, but tested with the GMP testsuite. This is 50% faster than the generic code as measured on Cortex-A15, and the same speed as GMP on the same core. It's probably slower than GMP on the A8 and A9 cores though. --- * sysdeps/arm/submul_1.S: New file. --- ports/sysdeps/arm/submul_1.S | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 ports/sysdeps/arm/submul_1.S diff --git a/ports/sysdeps/arm/submul_1.S b/ports/sysdeps/arm/submul_1.S new file mode 100644 index 0000000..35e1348 --- /dev/null +++ b/ports/sysdeps/arm/submul_1.S @@ -0,0 +1,67 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + + .syntax unified + .text + +@ cycles/limb +@ StrongArm ? +@ Cortex-A8 ? +@ Cortex-A9 ? +@ Cortex-A15 4 + +/* mp_limb_t mpn_submul_1(res_ptr, src1_ptr, size, s2_limb) */ + +ENTRY(__mpn_submul_1) + push { r4, r5, r6, r7 } + cfi_adjust_cfa_offset (16) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r5, 4) + cfi_rel_offset (r6, 8) + cfi_rel_offset (r7, 12) + + ldr r6, [r1], #4 + ldr r7, [r0] + mov r4, #0 /* init carry in */ + b 1f +0: + ldr r6, [r1], #4 /* load next ul */ + adds r5, r5, r4 /* (lpl, c) = lpl + cl */ + adc r4, ip, #0 /* cl = hpl + c */ + subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ + ldr r7, [r0, #4] /* load next rl */ + it cc + addcc r4, r4, #1 /* cl += !c */ + str r5, [r0], #4 +1: + umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */ + subs r2, r2, #1 + bne 0b + + adds r5, r5, r4 /* (lpl, c) = lpl + cl */ + adc r4, ip, #0 /* cl = hpl + c */ + subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ + str r5, [r0], #4 + ite cc + addcc r0, r4, #1 /* cl += !c */ + movcs r0, r4 /* return carry */ + + pop { r4, r5, r6, r7 } + DO_RET(lr) +END(__mpn_submul_1) -- 1.8.1.2