[PATCH v5 11/33] Import 64-bit shift functions from the CM0 library

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Daniel Engel <gnu@danielengel.com>
To: gcc-patches@gcc.gnu.org
Cc: Richard.Earnshaw@foss.arm.com, christophe.lyon@linaro.org
Subject: [PATCH v5 11/33] Import 64-bit shift functions from the CM0 library
Date: Fri, 15 Jan 2021 03:30:39 -0800	[thread overview]
Message-ID: <8d025ab5ba947e552a204e7df511cd2dab73c880.1610709584.git.gnu@danielengel.com> (raw)
In-Reply-To: <cover.1610709584.git.gnu@danielengel.com>

The Thumb versions of these functions are each 1-2 instructions smaller
and faster, and branchless when the IT instruction is available.

The ARM versions were converted to the "xxl/xxh" big-endian register
naming convention, but are otherwise unchanged.

gcc/libgcc/ChangeLog:
2021-01-13 Daniel Engel <gnu@danielengel.com>

	* config/arm/bits/shift.S (__ashldi3, __ashrdi3, __lshldi3):
        Reduced code size on Thumb architectures;
	updated big-endian register naming convention to "xxl/xxh".
---
 libgcc/config/arm/eabi/lshift.S | 338 +++++++++++++++++++++-----------
 1 file changed, 228 insertions(+), 110 deletions(-)

diff --git a/libgcc/config/arm/eabi/lshift.S b/libgcc/config/arm/eabi/lshift.S
index 0974a72c377..16cf2dcef04 100644
--- a/libgcc/config/arm/eabi/lshift.S
+++ b/libgcc/config/arm/eabi/lshift.S
@@ -1,123 +1,241 @@
-/* Copyright (C) 1995-2021 Free Software Foundation, Inc.
+/* lshift.S: ARM optimized 64-bit integer shift
 
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
+   Copyright (C) 2018-2021 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (gnu@danielengel.com)
 
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
 
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
 
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
 
 
 #ifdef L_lshrdi3
 
-	FUNC_START lshrdi3
-	FUNC_ALIAS aeabi_llsr lshrdi3
-	
-#ifdef __thumb__
-	lsrs	al, r2
-	movs	r3, ah
-	lsrs	ah, r2
-	mov	ip, r3
-	subs	r2, #32
-	lsrs	r3, r2
-	orrs	al, r3
-	negs	r2, r2
-	mov	r3, ip
-	lsls	r3, r2
-	orrs	al, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, lsr r3
-	orrmi	al, al, ah, lsl ip
-	mov	ah, ah, lsr r2
-	RET
-#endif
-	FUNC_END aeabi_llsr
-	FUNC_END lshrdi3
-
-#endif
-	
+// long long __aeabi_llsr(long long, int)
+// Logical shift right the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+FUNC_START_SECTION aeabi_llsr .text.sorted.libgcc.lshrdi3
+FUNC_ALIAS lshrdi3 aeabi_llsr
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxh
+
+        // Assume a simple shift.
+        lsrs    xxl,    r2
+        lsrs    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsllo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        lsrhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__llsr_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsls    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__llsr_large):
+        // Apply any remaining shift to the hi word.
+        lsrs    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxl,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxl,    xxl,    lsr r2
+        movpl   xxl,    xxh,    lsr r3
+        orrmi   xxl,    xxl,    xxh,    lsl ip
+        mov     xxh,    xxh,    lsr r2
+        RET
+
+  #endif /* !__thumb__ */
+
+
+    CFI_END_FUNCTION
+FUNC_END lshrdi3
+FUNC_END aeabi_llsr
+
+#endif /* L_lshrdi3 */
+
+
 #ifdef L_ashrdi3
-	
-	FUNC_START ashrdi3
-	FUNC_ALIAS aeabi_lasr ashrdi3
-	
-#ifdef __thumb__
-	lsrs	al, r2
-	movs	r3, ah
-	asrs	ah, r2
-	subs	r2, #32
-	@ If r2 is negative at this point the following step would OR
-	@ the sign bit into all of AL.  That's not what we want...
-	bmi	1f
-	mov	ip, r3
-	asrs	r3, r2
-	orrs	al, r3
-	mov	r3, ip
-1:
-	negs	r2, r2
-	lsls	r3, r2
-	orrs	al, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, asr r3
-	orrmi	al, al, ah, lsl ip
-	mov	ah, ah, asr r2
-	RET
-#endif
-
-	FUNC_END aeabi_lasr
-	FUNC_END ashrdi3
-
-#endif
+
+// long long __aeabi_lasr(long long, int)
+// Arithmetic shift right the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+FUNC_START_SECTION aeabi_lasr .text.sorted.libgcc.ashrdi3
+FUNC_ALIAS ashrdi3 aeabi_lasr
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxh
+
+        // Assume a simple shift.
+        lsrs    xxl,    r2
+        asrs    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsllo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        asrhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__lasr_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsls    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__lasr_large):
+        // Apply any remaining shift to the hi word.
+        asrs    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxl,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxl,    xxl,    lsr r2
+        movpl   xxl,    xxh,    asr r3
+        orrmi   xxl,    xxl,    xxh,    lsl ip
+        mov     xxh,    xxh,    asr r2
+        RET
+
+  #endif /* !__thumb__ */
+
+    CFI_END_FUNCTION
+FUNC_END ashrdi3
+FUNC_END aeabi_lasr
+
+#endif /* L_ashrdi3 */
+
 
 #ifdef L_ashldi3
 
-	FUNC_START ashldi3
-	FUNC_ALIAS aeabi_llsl ashldi3
-	
-#ifdef __thumb__
-	lsls	ah, r2
-	movs	r3, al
-	lsls	al, r2
-	mov	ip, r3
-	subs	r2, #32
-	lsls	r3, r2
-	orrs	ah, r3
-	negs	r2, r2
-	mov	r3, ip
-	lsrs	r3, r2
-	orrs	ah, r3
-	RET
-#else
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	ah, ah, lsl r2
-	movpl	ah, al, lsl r3
-	orrmi	ah, ah, al, lsr ip
-	mov	al, al, lsl r2
-	RET
-#endif
-	FUNC_END aeabi_llsl
-	FUNC_END ashldi3
-
-#endif
+// long long __aeabi_llsl(long long, int)
+// Logical shift left the 64 bit value in $r1:$r0 by the count in $r2.
+// The result is only guaranteed for shifts in the range of '0' to '63'.
+// Uses $r3 as scratch space.
+.section .text.sorted.libgcc.ashldi3,"x"
+FUNC_START_SECTION aeabi_llsl .text.sorted.libgcc.ashldi3
+FUNC_ALIAS ashldi3 aeabi_llsl
+    CFI_START_FUNCTION
+
+  #if defined(__thumb__) && __thumb__
+
+        // Save a copy for the remainder.
+        movs    r3,     xxl
+
+        // Assume a simple shift.
+        lsls    xxl,    r2
+        lsls    xxh,    r2
+
+        // Test if the shift distance is larger than 1 word.
+        subs    r2,     #32
+
+    #ifdef __HAVE_FEATURE_IT
+        do_it   lo,te
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsblo   r2,     #0
+        lsrlo   r3,     r2
+
+        // The remainder shift extends into the hi word.
+        lslhs   r3,     r2
+
+    #else /* !__HAVE_FEATURE_IT */
+        bhs     LLSYM(__llsl_large)
+
+        // The remainder is opposite the main shift, (32 - x) bits.
+        rsbs    r2,     #0
+        lsrs    r3,     r2
+
+        // Cancel any remaining shift.
+        eors    r2,     r2
+
+      LLSYM(__llsl_large):
+        // Apply any remaining shift to the hi word.
+        lsls    r3,     r2
+
+    #endif /* !__HAVE_FEATURE_IT */
+
+        // Merge remainder and result.
+        adds    xxh,    r3
+        RET
+
+  #else /* !__thumb__ */
+
+        subs    r3,     r2,     #32
+        rsb     ip,     r2,     #32
+        movmi   xxh,    xxh,    lsl r2
+        movpl   xxh,    xxl,    lsl r3
+        orrmi   xxh,    xxh,    xxl,    lsr ip
+        mov     xxl,    xxl,    lsl r2
+        RET
+
+  #endif /* !__thumb__ */
+
+    CFI_END_FUNCTION
+FUNC_END ashldi3
+FUNC_END aeabi_llsl
+
+#endif /* L_ashldi3 */
+
+
 
-- 
2.25.1

next prev parent reply	other threads:[~2021-01-15 11:31 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-15 11:30 [PATCH v5 00/33] libgcc: Thumb-1 Floating-Point Library for Cortex M0 Daniel Engel
2021-01-15 11:30 ` [PATCH v5 01/33] Add and restructure function declaration macros Daniel Engel
2021-01-15 11:30 ` [PATCH v5 02/33] Rename THUMB_FUNC_START to THUMB_FUNC_ENTRY Daniel Engel
2021-01-15 11:30 ` [PATCH v5 03/33] Fix syntax warnings on conditional instructions Daniel Engel
2021-01-15 11:30 ` [PATCH v5 04/33] Reorganize LIB1ASMFUNCS object wrapper macros Daniel Engel
2021-01-15 11:30 ` [PATCH v5 05/33] Add the __HAVE_FEATURE_IT and IT() macros Daniel Engel
2021-01-15 11:30 ` [PATCH v5 06/33] Refactor 'clz' functions into a new file Daniel Engel
2021-01-15 11:30 ` [PATCH v5 07/33] Refactor 'ctz' " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 08/33] Refactor 64-bit shift " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 09/33] Import 'clz' functions from the CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 10/33] Import 'ctz' " Daniel Engel
2021-01-15 11:30 ` Daniel Engel [this message]
2021-01-15 11:30 ` [PATCH v5 12/33] Import 'clrsb' " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 13/33] Import 'ffs' " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 14/33] Import 'parity' " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 15/33] Import 'popcnt' " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 16/33] Refactor Thumb-1 64-bit comparison into a new file Daniel Engel
2021-01-15 11:30 ` [PATCH v5 17/33] Import 64-bit comparison from CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 18/33] Merge Thumb-2 optimizations for 64-bit comparison Daniel Engel
2021-01-15 11:30 ` [PATCH v5 19/33] Import 32-bit division from the CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 20/33] Refactor Thumb-1 64-bit division into a new file Daniel Engel
2021-01-15 11:30 ` [PATCH v5 21/33] Import 64-bit division from the CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 22/33] Import integer multiplication " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 23/33] Refactor Thumb-1 float comparison into a new file Daniel Engel
2021-01-15 11:30 ` [PATCH v5 24/33] Import float comparison from the CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 25/33] Refactor Thumb-1 float subtraction into a new file Daniel Engel
2021-01-15 11:30 ` [PATCH v5 26/33] Import float addition and subtraction from the CM0 library Daniel Engel
2021-01-15 11:30 ` [PATCH v5 27/33] Import float multiplication " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 28/33] Import float division " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 29/33] Import integer-to-float conversion " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 30/33] Import float-to-integer " Daniel Engel
2021-01-15 11:30 ` [PATCH v5 31/33] Import float<->double " Daniel Engel
2021-01-15 11:31 ` [PATCH v5 32/33] Import float<->__fp16 " Daniel Engel
2021-01-15 11:31 ` [PATCH v5 33/33] Drop single-precision Thumb-1 soft-float functions Daniel Engel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8d025ab5ba947e552a204e7df511cd2dab73c880.1610709584.git.gnu@danielengel.com \
    --to=gnu@danielengel.com \
    --cc=Richard.Earnshaw@foss.arm.com \
    --cc=christophe.lyon@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).