From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27975 invoked by alias); 27 Feb 2013 03:17:25 -0000 Received: (qmail 27944 invoked by uid 22791); 27 Feb 2013 03:17:23 -0000 X-SWARE-Spam-Status: No, hits=-5.3 required=5.0 tests=AWL,BAYES_00,DKIM_SIGNED,DKIM_VALID,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,KHOP_RCVD_TRUST,KHOP_SPAMHAUS_DROP,KHOP_THREADED,RCVD_IN_DNSWL_LOW,RCVD_IN_HOSTKARMA_YE X-Spam-Check-By: sourceware.org Received: from mail-da0-f42.google.com (HELO mail-da0-f42.google.com) (209.85.210.42) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 27 Feb 2013 03:17:14 +0000 Received: by mail-da0-f42.google.com with SMTP id n15so60113dad.15 for ; Tue, 26 Feb 2013 19:17:14 -0800 (PST) X-Received: by 10.66.79.231 with SMTP id m7mr5166060pax.76.1361935034075; Tue, 26 Feb 2013 19:17:14 -0800 (PST) Received: from pebble.twiddle.net (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPS id pp1sm265271pac.7.2013.02.26.19.17.12 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Tue, 26 Feb 2013 19:17:13 -0800 (PST) From: Richard Henderson To: libc-ports@sourceware.org Cc: Joseph Myers Subject: [PATCH 24/26] arm: Add optimized addmul_1 Date: Wed, 27 Feb 2013 03:17:00 -0000 Message-Id: <1361934986-17018-25-git-send-email-rth@twiddle.net> In-Reply-To: <1361934986-17018-1-git-send-email-rth@twiddle.net> References: <1361934986-17018-1-git-send-email-rth@twiddle.net> X-IsSubscribed: yes Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org X-SW-Source: 2013-02/txt/msg00077.txt.bz2 Written from scratch rather than copied from GMP, due to GPL 2.1 vs GPL 3, but tested with the GMP testsuite. This is 25% faster than the generic code as measured on Cortex-A15, and the same speed as GMP on the same core. It's probably slower than GMP on the A8 and A9 cores though. --- * sysdeps/arm/addmul_1.S: New file. --- ports/sysdeps/arm/addmul_1.S | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ports/sysdeps/arm/addmul_1.S diff --git a/ports/sysdeps/arm/addmul_1.S b/ports/sysdeps/arm/addmul_1.S new file mode 100644 index 0000000..ecb8983 --- /dev/null +++ b/ports/sysdeps/arm/addmul_1.S @@ -0,0 +1,60 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + + .syntax unified + .text + +@ cycles/limb +@ StrongArm ? +@ Cortex-A8 ? +@ Cortex-A9 ? +@ Cortex-A15 4 + +/* mp_limb_t mpn_addmul_1(res_ptr, src1_ptr, size, s2_limb) */ + +ENTRY(__mpn_addmul_1) + push { r4, r5, r6 } + cfi_adjust_cfa_offset (12) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r5, 4) + cfi_rel_offset (r6, 8) + + ldr r6, [r1], #4 + ldr r5, [r0] + mov r4, #0 /* init carry in */ + b 1f +0: + ldr r6, [r1], #4 /* load next ul */ + adds r4, r4, r5 /* (out, c) = cl + lpl */ + ldr r5, [r0, #4] /* load next rl */ + str r4, [r0], #4 + adc r4, ip, #0 /* cl = hpl + c */ +1: + mov ip, #0 /* zero-extend rl */ + umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */ + subs r2, r2, #1 + bne 0b + + adds r4, r4, r5 /* (out, c) = cl + llpl */ + str r4, [r0] + adc r0, ip, #0 /* return hpl + c */ + + pop { r4, r5, r6 } + DO_RET(lr) +END(__mpn_addmul_1) -- 1.8.1.2