From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27130 invoked by alias); 1 Mar 2013 17:36:24 -0000 Received: (qmail 26614 invoked by uid 22791); 1 Mar 2013 17:36:12 -0000 X-SWARE-Spam-Status: No, hits=-5.4 required=5.0 tests=AWL,BAYES_00,DKIM_SIGNED,DKIM_VALID,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,KHOP_RCVD_TRUST,KHOP_THREADED,RCVD_IN_DNSWL_LOW,RCVD_IN_HOSTKARMA_YE X-Spam-Check-By: sourceware.org Received: from mail-pa0-f49.google.com (HELO mail-pa0-f49.google.com) (209.85.220.49) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 01 Mar 2013 17:35:58 +0000 Received: by mail-pa0-f49.google.com with SMTP id kp6so1923966pab.22 for ; Fri, 01 Mar 2013 09:35:56 -0800 (PST) X-Received: by 10.66.27.199 with SMTP id v7mr19327697pag.215.1362159356700; Fri, 01 Mar 2013 09:35:56 -0800 (PST) Received: from fremont.twiddle.net (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPS id kl4sm12679430pbc.31.2013.03.01.09.35.54 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Fri, 01 Mar 2013 09:35:55 -0800 (PST) From: Richard Henderson To: libc-ports@sourceware.org Cc: joseph@codesourcery.com Subject: [PATCH v2 12/14] arm: Add optimized addmul_1 Date: Fri, 01 Mar 2013 17:36:00 -0000 Message-Id: <1362159320-5934-13-git-send-email-rth@twiddle.net> In-Reply-To: <1362159320-5934-1-git-send-email-rth@twiddle.net> References: <1362159320-5934-1-git-send-email-rth@twiddle.net> X-IsSubscribed: yes Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org X-SW-Source: 2013-03/txt/msg00012.txt.bz2 Written from scratch rather than copied from GMP, due to GPL 2.1 vs GPL 3, but tested with the GMP testsuite. This is 25% faster than the generic code as measured on Cortex-A15, and the same speed as GMP on the same core. It's probably slower than GMP on the A8 and A9 cores though. --- * sysdeps/arm/addmul_1.S: New file. --- ports/sysdeps/arm/addmul_1.S | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 ports/sysdeps/arm/addmul_1.S diff --git a/ports/sysdeps/arm/addmul_1.S b/ports/sysdeps/arm/addmul_1.S new file mode 100644 index 0000000..4e2f6da --- /dev/null +++ b/ports/sysdeps/arm/addmul_1.S @@ -0,0 +1,66 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + + .syntax unified + .text + +@ cycles/limb +@ StrongArm ? +@ Cortex-A8 ? +@ Cortex-A9 ? +@ Cortex-A15 4 + +/* mp_limb_t mpn_addmul_1(res_ptr, src1_ptr, size, s2_limb) */ + +ENTRY(__mpn_addmul_1) + push { r4, r5, r6, r7 } + cfi_adjust_cfa_offset (16) + cfi_rel_offset (r4, 0) + cfi_rel_offset (r5, 4) + cfi_rel_offset (r6, 8) + cfi_rel_offset (r7, 12) + + ldr r6, [r1], #4 + ldr r5, [r0] + mov r4, #0 /* init carry in */ + b 1f +0: + ldr r6, [r1], #4 /* load next ul */ + adds r7, r4, r5 /* (out, c) = cl + lpl */ + ldr r5, [r0, #4] /* load next rl */ + adc r4, ip, #0 /* cl = hpl + c */ + str r7, [r0], #4 +1: + mov ip, #0 /* zero-extend rl */ + umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */ + subs r2, r2, #1 + bne 0b + + adds r4, r4, r5 /* (out, c) = cl + llpl */ + str r4, [r0] + adc r0, ip, #0 /* return hpl + c */ + + pop { r4, r5, r6, r7 } + cfi_adjust_cfa_offset (-16) + cfi_restore (r4) + cfi_restore (r5) + cfi_restore (r6) + cfi_restore (r7) + DO_RET(lr) +END(__mpn_addmul_1) -- 1.8.1.2