From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <libc-ports-return-3813-listarch-libc-ports=sources.redhat.com@sourceware.org>
Received: (qmail 3957 invoked by alias); 4 Mar 2013 19:08:18 -0000
Received: (qmail 3947 invoked by uid 22791); 4 Mar 2013 19:08:17 -0000
X-SWARE-Spam-Status: No, hits=-2.2 required=5.0	tests=AWL,BAYES_00,TW_BG,TW_DM
X-Spam-Check-By: sourceware.org
Received: from toast.topped-with-meat.com (HELO topped-with-meat.com) (204.197.218.159)    by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 04 Mar 2013 19:07:43 +0000
Received: by topped-with-meat.com (Postfix, from userid 5281)	id F178D2C09B; Mon,  4 Mar 2013 11:07:41 -0800 (PST)
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
From: Roland McGrath <roland@hack.frob.com>
To: libc-ports@sourceware.org
Subject: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
Message-Id: <20130304190741.F178D2C09B@topped-with-meat.com>
Date: Mon, 04 Mar 2013 19:08:00 -0000
X-CMAE-Score: 0
X-CMAE-Analysis: v=2.1 cv=LYSvtFvi c=1 sm=1 tr=0		a=WkljmVdYkabdwxfqvArNOQ==:117 a=14OXPxybAAAA:8 a=Cp02NCq-BFIA:10		a=Z6MIti7PxpgA:10 a=kj9zAlcOel0A:10 a=hOe2yjtxAAAA:8 a=lBSqbSby7r8A:10		a=lfO5th-Wo9bpuGVhrDkA:9 a=CjuIK1q_8ugA:10
X-IsSubscribed: yes
Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-ports.sourceware.org>
List-Subscribe: <mailto:libc-ports-subscribe@sourceware.org>
List-Post: <mailto:libc-ports@sourceware.org>
List-Help: <mailto:libc-ports-help@sourceware.org>, <http://sourceware.org/lists.html#faqs>
Sender: libc-ports-owner@sourceware.org
X-SW-Source: 2013-03/txt/msg00025.txt.bz2

On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all.  I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.

I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case.  The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about.  But maybe ARM
experts have better ideas.


Thanks,
Roland


ports/ChangeLog.arm
2013-03-04  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -88,7 +89,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -107,8 +113,16 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
 		strcsb	r4, [r0], #1
 		strcsb	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		ldmfd	sp!, {r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
@@ -178,7 +203,7 @@ ENTRY(memcpy)
 		bgt	18f
 
 
-		.macro	forward_copy_shift pull push
+		.macro	forward_copy_shift PULL PUSH
 
 		subs	r2, r2, #28
 		blt	14f
@@ -206,24 +231,24 @@ ENTRY(memcpy)
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldmia	r1!, {r4, r5, r6, r7}
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, PULL #\PULL
 		subs	r2, r2, #32
 		ldmia	r1!, {r8, r9, ip, lr}
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, PUSH #\PUSH
+		mov	r4, r4, PULL #\PULL
+		orr	r4, r4, r5, PUSH #\PUSH
+		mov	r5, r5, PULL #\PULL
+		orr	r5, r5, r6, PUSH #\PUSH
+		mov	r6, r6, PULL #\PULL
+		orr	r6, r6, r7, PUSH #\PUSH
+		mov	r7, r7, PULL #\PULL
+		orr	r7, r7, r8, PUSH #\PUSH
+		mov	r8, r8, PULL #\PULL
+		orr	r8, r8, r9, PUSH #\PUSH
+		mov	r9, r9, PULL #\PULL
+		orr	r9, r9, ip, PUSH #\PUSH
+		mov	ip, ip, PULL #\PULL
+		orr	ip, ip, lr, PUSH #\PUSH
 		stmia	r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -240,26 +265,26 @@ ENTRY(memcpy)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, PULL #\PULL
 		ldr	lr, [r1], #4
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, PUSH #\PUSH
 		str	r3, [r0], #4
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
 	CALGN(	bge	11b			)
 
-16:		sub	r1, r1, #(\push / 8)
+16:		sub	r1, r1, #(\PUSH / 8)
 		b	8b
 
 		.endm
 
 
-		forward_copy_shift	pull=8	push=24
+		forward_copy_shift	PULL=8	PUSH=24
 
-17:		forward_copy_shift	pull=16	push=16
+17:		forward_copy_shift	PULL=16	PUSH=16
 
-18:		forward_copy_shift	pull=24	push=8
+18:		forward_copy_shift	PULL=24	PUSH=8
 
 END(memcpy)
 libc_hidden_builtin_def (memcpy)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -104,7 +105,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -123,8 +129,16 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		ldmfd	sp!, {r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
@@ -193,7 +218,7 @@ ENTRY(memmove)
 		blt	18f
 
 
-		.macro	backward_copy_shift push pull
+		.macro	backward_copy_shift PUSH PULL
 
 		subs	r2, r2, #28
 		blt	14f
@@ -221,24 +246,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, PUSH #\PUSH
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, PULL #\PULL
+		mov     ip, ip, PUSH #\PUSH
+		orr     ip, ip, r9, PULL #\PULL
+		mov     r9, r9, PUSH #\PUSH
+		orr     r9, r9, r8, PULL #\PULL
+		mov     r8, r8, PUSH #\PUSH
+		orr     r8, r8, r7, PULL #\PULL
+		mov     r7, r7, PUSH #\PUSH
+		orr     r7, r7, r6, PULL #\PULL
+		mov     r6, r6, PUSH #\PUSH
+		orr     r6, r6, r5, PULL #\PULL
+		mov     r5, r5, PUSH #\PUSH
+		orr     r5, r5, r4, PULL #\PULL
+		mov     r4, r4, PUSH #\PUSH
+		orr     r4, r4, r3, PULL #\PULL
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -255,26 +280,26 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, PUSH #\PUSH
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, PULL #\PULL
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
 	CALGN(	bge	11b			)
 
-16:		add	r1, r1, #(\pull / 8)
+16:		add	r1, r1, #(\PULL / 8)
 		b	8b
 
 		.endm
 
 
-		backward_copy_shift	push=8	pull=24
+		backward_copy_shift	PUSH=8	PULL=24
 
-17:		backward_copy_shift	push=16	pull=16
+17:		backward_copy_shift	PUSH=16	PULL=16
 
-18:		backward_copy_shift	push=24	pull=8
+18:		backward_copy_shift	PUSH=24	PULL=8
 
 
 END(memmove)