public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-11 20:54 Roland McGrath
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
  0 siblings, 2 replies; 7+ messages in thread
From: Roland McGrath @ 2013-03-11 20:54 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-ports

Updated for current trunk.  Tested as before:

On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all.  I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.

I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case.  The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about.  But maybe ARM
experts have better ideas.


OK?


Thanks,
Roland


ports/ChangeLog.arm
2013-03-11  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -88,7 +89,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -107,8 +113,16 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
 		strcsb	r4, [r0], #1
 		strcsb	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -104,7 +105,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -123,8 +129,16 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
@ 2013-03-11 20:55 ` Roland McGrath
  2013-03-12 23:40   ` Joseph S. Myers
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
  1 sibling, 1 reply; 7+ messages in thread
From: Roland McGrath @ 2013-03-11 20:55 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-ports

Updated for current trunk.  Tested as before:

This is on top of the ARM_ALWAYS_BX patch, which is on the same branch.

Tested on armv7l-linux-gnueabihf: no changes to the object code.  Tested
the new code by locally hacking arm-features.h to define ARM_ALWAYS_BX and
ARM_BX_ALIGN_LOG2=4, and verifying no failures in 'make check subdirs=string'.
I didn't actually test ARM_BX_ALIGN_LOG2=4 without ARM_ALWAYS_BX, which is
a configuration that will probably never be used (but I wrote this code
to support it)--it's pretty easy to tell by inspection that it's equivalent
to what I did test.


OK?


Thanks,
Roland


ports/ChangeLog.arm
	* sysdeps/arm/arm-features.h (ARM_BX_ALIGN_LOG2): New macro.
	* sysdeps/arm/memcpy.S: Respect ARM_BX_ALIGN_LOG2.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -40,4 +40,12 @@
    that instructions using pc as a destination register must never be used,
    so a "bx" (or "blx") instruction is always required.  */
 
+/* The log2 of the minimum alignment required for an address that
+   is the target of a computed branch (i.e. a "bx" instruction).
+   A more-specific arm-features.h file may define this to set a more
+   stringent requirement.  */
+#ifndef ARM_BX_ALIGN_LOG2
+# define ARM_BX_ALIGN_LOG2	2
+#endif
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -90,9 +90,9 @@ ENTRY(memcpy)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -114,38 +114,55 @@ ENTRY(memcpy)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1], #4
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0], #4
 
 #ifdef ARM_ALWAYS_BX
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -106,9 +106,9 @@ ENTRY(memmove)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -130,38 +130,55 @@ ENTRY(memmove)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1, #-4]!
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0, #-4]!
 
 #ifdef ARM_ALWAYS_BX

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:35 ` Joseph S. Myers
  2013-03-13 16:50   ` Roland McGrath
  1 sibling, 1 reply; 7+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:35 UTC (permalink / raw)
  To: Roland McGrath; +Cc: libc-ports

On Mon, 11 Mar 2013, Roland McGrath wrote:

> 	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.

That's OK.

> +#else
> +		beq	7f
> +		push	{r10}
> +		cfi_adjust_cfa_offset (4)

This should also have cfi_rel_offset (r10, 0) to record the save itself in 
CFI debug information.

> +#ifdef ARM_ALWAYS_BX
> +		pop	{r10}
> +		cfi_adjust_cfa_offset (-4)
> +#endif

Likewise, record the restore of r10.

> +#else
> +		beq	7f
> +		push	{r10}
> +		cfi_adjust_cfa_offset (4)

Likewise, in memmove.

> +#ifdef ARM_ALWAYS_BX
> +		pop	{r10}
> +		cfi_adjust_cfa_offset (-4)

Likewise.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:40   ` Joseph S. Myers
  2013-03-13 19:42     ` Roland McGrath
  0 siblings, 1 reply; 7+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:40 UTC (permalink / raw)
  To: Roland McGrath; +Cc: libc-ports

On Mon, 11 Mar 2013, Roland McGrath wrote:

> +/* The log2 of the minimum alignment required for an address that
> +   is the target of a computed branch (i.e. a "bx" instruction).
> +   A more-specific arm-features.h file may define this to set a more
> +   stringent requirement.  */

The comment should say that this should only be used in ARM-mode code 
(certainly this definition doesn't make sense by default for code built as 
Thumb).  OK with that change.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
@ 2013-03-13 16:50   ` Roland McGrath
  0 siblings, 0 replies; 7+ messages in thread
From: Roland McGrath @ 2013-03-13 16:50 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

> This should also have cfi_rel_offset (r10, 0) to record the save itself in 
> CFI debug information.

Good point!  Fixed.  Committed as follows.

Thanks,
Roland


ports/ChangeLog.arm
2013-03-13  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -89,7 +90,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -108,8 +114,17 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -119,8 +134,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -130,6 +150,12 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -147,7 +173,8 @@ ENTRY(memcpy)
 		strbcs	r4, [r0], #1
 		strbcs	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -105,7 +106,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -124,8 +130,17 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -135,8 +150,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -146,6 +166,12 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -163,7 +189,8 @@ ENTRY(memmove)
 		strbcs	r4, [r0, #-1]!
 		strbcs	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-12 23:40   ` Joseph S. Myers
@ 2013-03-13 19:42     ` Roland McGrath
  0 siblings, 0 replies; 7+ messages in thread
From: Roland McGrath @ 2013-03-13 19:42 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

> The comment should say that this should only be used in ARM-mode code 
> (certainly this definition doesn't make sense by default for code built as 
> Thumb).  OK with that change.

Well, it makes some sense for any situation doing something like what the
memcpy code is doing (i.e. arithmetically computing branch targets, which
implicitly assumes fixed distance between them).  Unless one were
completely sure that each Thumb instruction would always have a short
encoding, then you'd need to explicitly align each one to four.

I've committed it with the following comment.
Feel free to adjust it further as you see fit.


Thanks,
Roland


/* The log2 of the minimum alignment required for an address that
   is the target of a computed branch (i.e. a "bx" instruction).
   A more-specific arm-features.h file may define this to set a more
   stringent requirement.

   Using this only makes sense for code in ARM mode (where instructions
   always have a fixed size of four bytes), or for Thumb-mode code that is
   specifically aligning all the related branch targets to match (since
   Thumb instructions might be either two or four bytes).  */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
@ 2013-03-04 23:33 Roland McGrath
  0 siblings, 0 replies; 7+ messages in thread
From: Roland McGrath @ 2013-03-04 23:33 UTC (permalink / raw)
  To: libc-ports

This is on top of the ARM_ALWAYS_BX patch, which is on the same branch.

Tested on armv7l-linux-gnueabihf: no changes to the object code.  Tested
the new code by locally hacking arm-features.h to define ARM_ALWAYS_BX and
ARM_BX_ALIGN_LOG2=4, and verifying no failures in 'make check subdirs=string'.
I didn't actually test ARM_BX_ALIGN_LOG2=4 without ARM_ALWAYS_BX, which is
a configuration that will probably never be used (but I wrote this code
to support it)--it's pretty easy to tell by inspection that it's equivalent
to what I did test.


Thanks,
Roland


ports/ChangeLog.arm
	* sysdeps/arm/arm-features.h (ARM_BX_ALIGN_LOG2): New macro.
	* sysdeps/arm/memcpy.S: Respect ARM_BX_ALIGN_LOG2.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -40,4 +40,12 @@
    that instructions using pc as a destination register must never be used,
    so a "bx" (or "blx") instruction is always required.  */
 
+/* The log2 of the minimum alignment required for an address that
+   is the target of a computed branch (i.e. a "bx" instruction).
+   A more-specific arm-features.h file may define this to set a more
+   stringent requirement.  */
+#ifndef ARM_BX_ALIGN_LOG2
+# define ARM_BX_ALIGN_LOG2	2
+#endif
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -90,9 +90,9 @@ ENTRY(memcpy)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -114,38 +114,55 @@ ENTRY(memcpy)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1], #4
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0], #4
 
 #ifdef ARM_ALWAYS_BX
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -106,9 +106,9 @@ ENTRY(memmove)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -130,38 +130,55 @@ ENTRY(memmove)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1, #-4]!
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0, #-4]!
 
 #ifdef ARM_ALWAYS_BX

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-03-13 19:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-12 23:40   ` Joseph S. Myers
2013-03-13 19:42     ` Roland McGrath
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
2013-03-13 16:50   ` Roland McGrath
  -- strict thread matches above, loose matches on Subject: below --
2013-03-04 23:33 [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).