public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
@ 2013-03-04 23:33 Roland McGrath
  0 siblings, 0 replies; 4+ messages in thread
From: Roland McGrath @ 2013-03-04 23:33 UTC (permalink / raw)
  To: libc-ports

This is on top of the ARM_ALWAYS_BX patch, which is on the same branch.

Tested on armv7l-linux-gnueabihf: no changes to the object code.  Tested
the new code by locally hacking arm-features.h to define ARM_ALWAYS_BX and
ARM_BX_ALIGN_LOG2=4, and verifying no failures in 'make check subdirs=string'.
I didn't actually test ARM_BX_ALIGN_LOG2=4 without ARM_ALWAYS_BX, which is
a configuration that will probably never be used (but I wrote this code
to support it)--it's pretty easy to tell by inspection that it's equivalent
to what I did test.


Thanks,
Roland


ports/ChangeLog.arm
	* sysdeps/arm/arm-features.h (ARM_BX_ALIGN_LOG2): New macro.
	* sysdeps/arm/memcpy.S: Respect ARM_BX_ALIGN_LOG2.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -40,4 +40,12 @@
    that instructions using pc as a destination register must never be used,
    so a "bx" (or "blx") instruction is always required.  */
 
+/* The log2 of the minimum alignment required for an address that
+   is the target of a computed branch (i.e. a "bx" instruction).
+   A more-specific arm-features.h file may define this to set a more
+   stringent requirement.  */
+#ifndef ARM_BX_ALIGN_LOG2
+# define ARM_BX_ALIGN_LOG2	2
+#endif
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -90,9 +90,9 @@ ENTRY(memcpy)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -114,38 +114,55 @@ ENTRY(memcpy)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1], #4
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0], #4
 
 #ifdef ARM_ALWAYS_BX
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -106,9 +106,9 @@ ENTRY(memmove)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -130,38 +130,55 @@ ENTRY(memmove)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1, #-4]!
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0, #-4]!
 
 #ifdef ARM_ALWAYS_BX

^ permalink raw reply	[flat|nested] 4+ messages in thread
* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-11 20:54 Roland McGrath
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
  0 siblings, 1 reply; 4+ messages in thread
From: Roland McGrath @ 2013-03-11 20:54 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-ports

Updated for current trunk.  Tested as before:

On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all.  I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.

I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case.  The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about.  But maybe ARM
experts have better ideas.


OK?


Thanks,
Roland


ports/ChangeLog.arm
2013-03-11  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -88,7 +89,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -107,8 +113,16 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
 		strcsb	r4, [r0], #1
 		strcsb	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -104,7 +105,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -123,8 +129,16 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-03-13 19:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-04 23:33 [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-12 23:40   ` Joseph S. Myers
2013-03-13 19:42     ` Roland McGrath

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).