From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from wout3-smtp.messagingengine.com (wout3-smtp.messagingengine.com [64.147.123.19]) by sourceware.org (Postfix) with ESMTPS id C6658384BC02 for ; Mon, 31 Oct 2022 15:47:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org C6658384BC02 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=danielengel.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=danielengel.com Received: from compute5.internal (compute5.nyi.internal [10.202.2.45]) by mailout.west.internal (Postfix) with ESMTP id B40C53200974; Mon, 31 Oct 2022 11:47:05 -0400 (EDT) Received: from mailfrontend2 ([10.202.2.163]) by compute5.internal (MEProxy); Mon, 31 Oct 2022 11:47:06 -0400 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=danielengel.com; h=cc:cc:content-transfer-encoding:date:date:from:from :in-reply-to:in-reply-to:message-id:mime-version:references :reply-to:sender:subject:subject:to:to; s=fm1; t=1667231225; x= 1667317625; bh=+mqXK0GkDMOt2mCL0pDwD+3ZSBXYCopi06zR1g2vQUs=; b=t F6bmtkKdX6mk0kfPp0k68buO6WPD6Ccl/EU9KxbgacjLhb6+KTnutO/Rh80H3SFE 6JjPbbfDsjlS77aI595UZAlm9+FWzuKgOImfxoC5oj8/pgKIawX9ipAh+VQSXde9 0g7gkcb6mKMTvR2y9DvXdxz/+Ds2bpMcWtoVLS8p9o31S1cHEvoDP31841GWgX7v NGUUqomu39C+nx3JkfjT2su0+Tdqo6tdvEWtuIIlLPB/ZjOXJ7UPnxftqlyuSPrF DtxzYKJbhinkCzhd9kRClakkVP1MV/auRDxiEom3fHOj4FaUUtXdVfp64Wv+xrYG IeUSTB332PPuL4mAppuBg== DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d= messagingengine.com; h=cc:cc:content-transfer-encoding:date:date :feedback-id:feedback-id:from:from:in-reply-to:in-reply-to :message-id:mime-version:references:reply-to:sender:subject :subject:to:to:x-me-proxy:x-me-proxy:x-me-sender:x-me-sender :x-sasl-enc; s=fm3; t=1667231225; x=1667317625; bh=+mqXK0GkDMOt2 mCL0pDwD+3ZSBXYCopi06zR1g2vQUs=; b=F+mb0jERWvciOdo8RD4ThYP0xQFpD +PYtFWgR8BFdIE6tYU8hF87rwieV0teIsyPNCyg3U63aecdsCJ7137qLX+S9rQk4 d98cMg3oPbJQhDC0h5uevX2GSg1wGhBcQTkYmiT9FYi++93DJJT7lk11mZiDGQQj eePwoy50CidH5r9ithFWSE0OEBe81ZkK3zubWmSewqhIv0sFDC2dPX0TCIk0bP+S 8GUJ0jy0uX5WBLX18FtMLbT2BI/qkkaObOm+2SOSWRnvgpChPw9Yr8yhsMD5yYfW 8hRyJCtkPQtYoDpUZKeK1oa3eJWm0dFJ+1WEl9OplGrGJ67Tz8FNS++jw== X-ME-Sender: X-ME-Received: X-ME-Proxy-Cause: gggruggvucftvghtrhhoucdtuddrgedvgedrudefgdejlecutefuodetggdotefrodftvf curfhrohhfihhlvgemucfhrghsthforghilhdpqfgfvfdpuffrtefokffrpgfnqfghnecu uegrihhlohhuthemuceftddtnecusecvtfgvtghiphhivghnthhsucdlqddutddtmdenuc fjughrpefhvfevufffkffojghfggfgsedtkeertdertddtnecuhfhrohhmpeffrghnihgv lhcugfhnghgvlhcuoehgnhhusegurghnihgvlhgvnhhgvghlrdgtohhmqeenucggtffrrg htthgvrhhnpefgvedtvdffvdejgfejffehfeejgfelfeelhfdtgfehheeftdetieffhfdt heduheenucffohhmrghinheplhhshhhifhhtrdhssgdpghhnuhdrohhrghenucevlhhush htvghrufhiiigvpedtnecurfgrrhgrmhepmhgrihhlfhhrohhmpehgnhhusegurghnihgv lhgvnhhgvghlrdgtohhm X-ME-Proxy: Feedback-ID: i791144d6:Fastmail Received: by mail.messagingengine.com (Postfix) with ESMTPA; Mon, 31 Oct 2022 11:47:04 -0400 (EDT) Received: from ubuntu.lorien.danielengel.com (ubuntu.lorien.danielengel.com [10.0.0.96]) by sendmail.lorien.danielengel.com (8.15.2/8.15.2) with ESMTP id 29VFkuQv087262; Mon, 31 Oct 2022 08:46:56 -0700 (PDT) (envelope-from gnu@danielengel.com) From: Daniel Engel To: Richard Earnshaw , gcc-patches@gcc.gnu.org Cc: Daniel Engel , Christophe Lyon Subject: [PATCH v7 11/34] Import 64-bit shift functions from the CM0 library Date: Mon, 31 Oct 2022 08:45:06 -0700 Message-Id: <20221031154529.3627576-12-gnu@danielengel.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221031154529.3627576-1-gnu@danielengel.com> References: <20221031154529.3627576-1-gnu@danielengel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-12.3 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,JMQ_SPF_NEUTRAL,KAM_SHORT,RCVD_IN_DNSWL_LOW,SCC_10_SHORT_WORD_LINES,SCC_5_SHORT_WORD_LINES,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: The Thumb versions of these functions are each 1-2 instructions smaller and faster, and branchless when the IT instruction is available. The ARM versions were converted to the "xxl/xxh" big-endian register naming convention, but are otherwise unchanged. gcc/libgcc/ChangeLog: 2022-10-09 Daniel Engel * config/arm/bits/shift.S (__ashldi3, __ashrdi3, __lshldi3): Reduced code size on Thumb architectures; updated big-endian register naming convention to "xxl/xxh". --- libgcc/config/arm/eabi/lshift.S | 338 +++++++++++++++++++++----------- 1 file changed, 228 insertions(+), 110 deletions(-) diff --git a/libgcc/config/arm/eabi/lshift.S b/libgcc/config/arm/eabi/lshift.S index 6e79d96c118..365350dfb2d 100644 --- a/libgcc/config/arm/eabi/lshift.S +++ b/libgcc/config/arm/eabi/lshift.S @@ -1,123 +1,241 @@ -/* Copyright (C) 1995-2022 Free Software Foundation, Inc. +/* lshift.S: ARM optimized 64-bit integer shift -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Daniel Engel, Senva Inc (gnu@danielengel.com) -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ #ifdef L_lshrdi3 - FUNC_START lshrdi3 - FUNC_ALIAS aeabi_llsr lshrdi3 - -#ifdef __thumb__ - lsrs al, r2 - movs r3, ah - lsrs ah, r2 - mov ip, r3 - subs r2, #32 - lsrs r3, r2 - orrs al, r3 - negs r2, r2 - mov r3, ip - lsls r3, r2 - orrs al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, lsr r2 - RET -#endif - FUNC_END aeabi_llsr - FUNC_END lshrdi3 - -#endif - +// long long __aeabi_llsr(long long, int) +// Logical shift right the 64 bit value in $r1:$r0 by the count in $r2. +// The result is only guaranteed for shifts in the range of '0' to '63'. +// Uses $r3 as scratch space. +FUNC_START_SECTION aeabi_llsr .text.sorted.libgcc.lshrdi3 +FUNC_ALIAS lshrdi3 aeabi_llsr + CFI_START_FUNCTION + + #if defined(__thumb__) && __thumb__ + + // Save a copy for the remainder. + movs r3, xxh + + // Assume a simple shift. + lsrs xxl, r2 + lsrs xxh, r2 + + // Test if the shift distance is larger than 1 word. + subs r2, #32 + + #ifdef __HAVE_FEATURE_IT + do_it lo,te + + // The remainder is opposite the main shift, (32 - x) bits. + rsblo r2, #0 + lsllo r3, r2 + + // The remainder shift extends into the hi word. + lsrhs r3, r2 + + #else /* !__HAVE_FEATURE_IT */ + bhs LLSYM(__llsr_large) + + // The remainder is opposite the main shift, (32 - x) bits. + rsbs r2, #0 + lsls r3, r2 + + // Cancel any remaining shift. + eors r2, r2 + + LLSYM(__llsr_large): + // Apply any remaining shift to the hi word. + lsrs r3, r2 + + #endif /* !__HAVE_FEATURE_IT */ + + // Merge remainder and result. + adds xxl, r3 + RET + + #else /* !__thumb__ */ + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi xxl, xxl, lsr r2 + movpl xxl, xxh, lsr r3 + orrmi xxl, xxl, xxh, lsl ip + mov xxh, xxh, lsr r2 + RET + + #endif /* !__thumb__ */ + + + CFI_END_FUNCTION +FUNC_END lshrdi3 +FUNC_END aeabi_llsr + +#endif /* L_lshrdi3 */ + + #ifdef L_ashrdi3 - - FUNC_START ashrdi3 - FUNC_ALIAS aeabi_lasr ashrdi3 - -#ifdef __thumb__ - lsrs al, r2 - movs r3, ah - asrs ah, r2 - subs r2, #32 - @ If r2 is negative at this point the following step would OR - @ the sign bit into all of AL. That's not what we want... - bmi 1f - mov ip, r3 - asrs r3, r2 - orrs al, r3 - mov r3, ip -1: - negs r2, r2 - lsls r3, r2 - orrs al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, asr r2 - RET -#endif - - FUNC_END aeabi_lasr - FUNC_END ashrdi3 - -#endif + +// long long __aeabi_lasr(long long, int) +// Arithmetic shift right the 64 bit value in $r1:$r0 by the count in $r2. +// The result is only guaranteed for shifts in the range of '0' to '63'. +// Uses $r3 as scratch space. +FUNC_START_SECTION aeabi_lasr .text.sorted.libgcc.ashrdi3 +FUNC_ALIAS ashrdi3 aeabi_lasr + CFI_START_FUNCTION + + #if defined(__thumb__) && __thumb__ + + // Save a copy for the remainder. + movs r3, xxh + + // Assume a simple shift. + lsrs xxl, r2 + asrs xxh, r2 + + // Test if the shift distance is larger than 1 word. + subs r2, #32 + + #ifdef __HAVE_FEATURE_IT + do_it lo,te + + // The remainder is opposite the main shift, (32 - x) bits. + rsblo r2, #0 + lsllo r3, r2 + + // The remainder shift extends into the hi word. + asrhs r3, r2 + + #else /* !__HAVE_FEATURE_IT */ + bhs LLSYM(__lasr_large) + + // The remainder is opposite the main shift, (32 - x) bits. + rsbs r2, #0 + lsls r3, r2 + + // Cancel any remaining shift. + eors r2, r2 + + LLSYM(__lasr_large): + // Apply any remaining shift to the hi word. + asrs r3, r2 + + #endif /* !__HAVE_FEATURE_IT */ + + // Merge remainder and result. + adds xxl, r3 + RET + + #else /* !__thumb__ */ + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi xxl, xxl, lsr r2 + movpl xxl, xxh, asr r3 + orrmi xxl, xxl, xxh, lsl ip + mov xxh, xxh, asr r2 + RET + + #endif /* !__thumb__ */ + + CFI_END_FUNCTION +FUNC_END ashrdi3 +FUNC_END aeabi_lasr + +#endif /* L_ashrdi3 */ + #ifdef L_ashldi3 - FUNC_START ashldi3 - FUNC_ALIAS aeabi_llsl ashldi3 - -#ifdef __thumb__ - lsls ah, r2 - movs r3, al - lsls al, r2 - mov ip, r3 - subs r2, #32 - lsls r3, r2 - orrs ah, r3 - negs r2, r2 - mov r3, ip - lsrs r3, r2 - orrs ah, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi ah, ah, lsl r2 - movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip - mov al, al, lsl r2 - RET -#endif - FUNC_END aeabi_llsl - FUNC_END ashldi3 - -#endif +// long long __aeabi_llsl(long long, int) +// Logical shift left the 64 bit value in $r1:$r0 by the count in $r2. +// The result is only guaranteed for shifts in the range of '0' to '63'. +// Uses $r3 as scratch space. +.section .text.sorted.libgcc.ashldi3,"x" +FUNC_START_SECTION aeabi_llsl .text.sorted.libgcc.ashldi3 +FUNC_ALIAS ashldi3 aeabi_llsl + CFI_START_FUNCTION + + #if defined(__thumb__) && __thumb__ + + // Save a copy for the remainder. + movs r3, xxl + + // Assume a simple shift. + lsls xxl, r2 + lsls xxh, r2 + + // Test if the shift distance is larger than 1 word. + subs r2, #32 + + #ifdef __HAVE_FEATURE_IT + do_it lo,te + + // The remainder is opposite the main shift, (32 - x) bits. + rsblo r2, #0 + lsrlo r3, r2 + + // The remainder shift extends into the hi word. + lslhs r3, r2 + + #else /* !__HAVE_FEATURE_IT */ + bhs LLSYM(__llsl_large) + + // The remainder is opposite the main shift, (32 - x) bits. + rsbs r2, #0 + lsrs r3, r2 + + // Cancel any remaining shift. + eors r2, r2 + + LLSYM(__llsl_large): + // Apply any remaining shift to the hi word. + lsls r3, r2 + + #endif /* !__HAVE_FEATURE_IT */ + + // Merge remainder and result. + adds xxh, r3 + RET + + #else /* !__thumb__ */ + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi xxh, xxh, lsl r2 + movpl xxh, xxl, lsl r3 + orrmi xxh, xxh, xxl, lsr ip + mov xxl, xxl, lsl r2 + RET + + #endif /* !__thumb__ */ + + CFI_END_FUNCTION +FUNC_END ashldi3 +FUNC_END aeabi_llsl + +#endif /* L_ashldi3 */ + + -- 2.34.1