public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-11 20:54 Roland McGrath
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
  0 siblings, 2 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 20:54 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-ports

Updated for current trunk.  Tested as before:

On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all.  I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.

I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case.  The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about.  But maybe ARM
experts have better ideas.


OK?


Thanks,
Roland


ports/ChangeLog.arm
2013-03-11  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -88,7 +89,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -107,8 +113,16 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
 		strcsb	r4, [r0], #1
 		strcsb	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -104,7 +105,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -123,8 +129,16 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
@ 2013-03-11 20:55 ` Roland McGrath
  2013-03-12 23:40   ` Joseph S. Myers
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
  1 sibling, 1 reply; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 20:55 UTC (permalink / raw)
  To: Joseph Myers; +Cc: libc-ports

Updated for current trunk.  Tested as before:

This is on top of the ARM_ALWAYS_BX patch, which is on the same branch.

Tested on armv7l-linux-gnueabihf: no changes to the object code.  Tested
the new code by locally hacking arm-features.h to define ARM_ALWAYS_BX and
ARM_BX_ALIGN_LOG2=4, and verifying no failures in 'make check subdirs=string'.
I didn't actually test ARM_BX_ALIGN_LOG2=4 without ARM_ALWAYS_BX, which is
a configuration that will probably never be used (but I wrote this code
to support it)--it's pretty easy to tell by inspection that it's equivalent
to what I did test.


OK?


Thanks,
Roland


ports/ChangeLog.arm
	* sysdeps/arm/arm-features.h (ARM_BX_ALIGN_LOG2): New macro.
	* sysdeps/arm/memcpy.S: Respect ARM_BX_ALIGN_LOG2.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -40,4 +40,12 @@
    that instructions using pc as a destination register must never be used,
    so a "bx" (or "blx") instruction is always required.  */
 
+/* The log2 of the minimum alignment required for an address that
+   is the target of a computed branch (i.e. a "bx" instruction).
+   A more-specific arm-features.h file may define this to set a more
+   stringent requirement.  */
+#ifndef ARM_BX_ALIGN_LOG2
+# define ARM_BX_ALIGN_LOG2	2
+#endif
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -90,9 +90,9 @@ ENTRY(memcpy)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -114,38 +114,55 @@ ENTRY(memcpy)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1], #4
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0], #4
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0], #4
 
 #ifdef ARM_ALWAYS_BX
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -106,9 +106,9 @@ ENTRY(memmove)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
 #ifndef ARM_ALWAYS_BX
-	CALGN(	add	pc, r4, ip		)
+	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 #else
-	CALGN(	add	r4, r4, ip		)
+	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
 	CALGN(	bx	r4			)
 #endif
 
@@ -130,38 +130,55 @@ ENTRY(memmove)
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
 #ifndef ARM_ALWAYS_BX
-		addne	pc, pc, ip		@ C is always clear here
+		/* C is always clear here.  */
+		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		b	7f
 #else
 		beq	7f
 		push	{r10}
 		cfi_adjust_cfa_offset (4)
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 6:		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r3, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r4, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r5, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r6, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r7, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	r8, [r1, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		ldr	lr, [r1, #-4]!
 
 #ifndef ARM_ALWAYS_BX
-		add	pc, pc, ip
+		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		nop
 #else
-		add	r10, pc, ip
+		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
 		bx	r10
 #endif
+		.p2align ARM_BX_ALIGN_LOG2
 		nop
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r3, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r4, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r5, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r6, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r7, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	r8, [r0, #-4]!
+		.p2align ARM_BX_ALIGN_LOG2
 		str	lr, [r0, #-4]!
 
 #ifdef ARM_ALWAYS_BX

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:35 ` Joseph S. Myers
  2013-03-13 16:50   ` Roland McGrath
  1 sibling, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:35 UTC (permalink / raw)
  To: Roland McGrath; +Cc: libc-ports

On Mon, 11 Mar 2013, Roland McGrath wrote:

> 	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.

That's OK.

> +#else
> +		beq	7f
> +		push	{r10}
> +		cfi_adjust_cfa_offset (4)

This should also have cfi_rel_offset (r10, 0) to record the save itself in 
CFI debug information.

> +#ifdef ARM_ALWAYS_BX
> +		pop	{r10}
> +		cfi_adjust_cfa_offset (-4)
> +#endif

Likewise, record the restore of r10.

> +#else
> +		beq	7f
> +		push	{r10}
> +		cfi_adjust_cfa_offset (4)

Likewise, in memmove.

> +#ifdef ARM_ALWAYS_BX
> +		pop	{r10}
> +		cfi_adjust_cfa_offset (-4)

Likewise.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:40   ` Joseph S. Myers
  2013-03-13 19:42     ` Roland McGrath
  0 siblings, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:40 UTC (permalink / raw)
  To: Roland McGrath; +Cc: libc-ports

On Mon, 11 Mar 2013, Roland McGrath wrote:

> +/* The log2 of the minimum alignment required for an address that
> +   is the target of a computed branch (i.e. a "bx" instruction).
> +   A more-specific arm-features.h file may define this to set a more
> +   stringent requirement.  */

The comment should say that this should only be used in ARM-mode code 
(certainly this definition doesn't make sense by default for code built as 
Thumb).  OK with that change.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
@ 2013-03-13 16:50   ` Roland McGrath
  0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-13 16:50 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

> This should also have cfi_rel_offset (r10, 0) to record the save itself in 
> CFI debug information.

Good point!  Fixed.  Committed as follows.

Thanks,
Roland


ports/ChangeLog.arm
2013-03-13  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -89,7 +90,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -108,8 +114,17 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -119,8 +134,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -130,6 +150,12 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -147,7 +173,8 @@ ENTRY(memcpy)
 		strbcs	r4, [r0], #1
 		strbcs	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -105,7 +106,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -124,8 +130,17 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		cfi_rel_offset (r10, 0)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -135,8 +150,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -146,6 +166,12 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+		cfi_restore (r10)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		pop	{r5 - r8}
@@ -163,7 +189,8 @@ ENTRY(memmove)
 		strbcs	r4, [r0, #-1]!
 		strbcs	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
  2013-03-12 23:40   ` Joseph S. Myers
@ 2013-03-13 19:42     ` Roland McGrath
  0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-13 19:42 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

> The comment should say that this should only be used in ARM-mode code 
> (certainly this definition doesn't make sense by default for code built as 
> Thumb).  OK with that change.

Well, it makes some sense for any situation doing something like what the
memcpy code is doing (i.e. arithmetically computing branch targets, which
implicitly assumes fixed distance between them).  Unless one were
completely sure that each Thumb instruction would always have a short
encoding, then you'd need to explicitly align each one to four.

I've committed it with the following comment.
Feel free to adjust it further as you see fit.


Thanks,
Roland


/* The log2 of the minimum alignment required for an address that
   is the target of a computed branch (i.e. a "bx" instruction).
   A more-specific arm-features.h file may define this to set a more
   stringent requirement.

   Using this only makes sense for code in ARM mode (where instructions
   always have a fixed size of four bytes), or for Thumb-mode code that is
   specifically aligning all the related branch targets to match (since
   Thumb instructions might be either two or four bytes).  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-09  2:21 ` Joseph S. Myers
@ 2013-03-11 16:42   ` Roland McGrath
  0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 16:42 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

> I expect these various patches will need updating after Richard's changes 

Yes.  I've updated all my branches after those went in.  I'm completing the
re-testing today and will post new versions of each when they're verified.

> ... also, is there an ABI document explaining the exact restrictions 
> involved, especially as regards pc-avoidance?

I don't think there is anything that is either up to date or concise.
https://sites.google.com/a/chromium.org/dev/nativeclient/reference/arm-overview
is something you might find informative.

For the particular issue at hand, it's enough to know that the platform
prohibits all operations using pc as a destination register.  Only the
various explicit branch operations are allowed.  The platform also requires
ARMv7-A as its baseline, so "bx" is always available.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
  2013-03-04 19:08 Roland McGrath
@ 2013-03-09  2:21 ` Joseph S. Myers
  2013-03-11 16:42   ` Roland McGrath
  0 siblings, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-09  2:21 UTC (permalink / raw)
  To: Roland McGrath; +Cc: libc-ports

I expect these various patches will need updating after Richard's changes 
... also, is there an ABI document explaining the exact restrictions 
involved, especially as regards pc-avoidance?

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-04 19:08 Roland McGrath
  2013-03-09  2:21 ` Joseph S. Myers
  0 siblings, 1 reply; 9+ messages in thread
From: Roland McGrath @ 2013-03-04 19:08 UTC (permalink / raw)
  To: libc-ports

On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all.  I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.

I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case.  The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about.  But maybe ARM
experts have better ideas.


Thanks,
Roland


ports/ChangeLog.arm
2013-03-04  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
	* sysdeps/arm/memcpy.S: Include <arm-features.h>.
	[ARM_ALWAYS_BX]: Avoid pc as destination.
	* sysdeps/arm/memmove.S: Likewise.

--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
    at runtime (or that we never care about its state) and so need not
    be checked for.  */
 
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+   that instructions using pc as a destination register must never be used,
+   so a "bx" (or "blx") instruction is always required.  */
+
 #endif  /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -88,7 +89,12 @@ ENTRY(memcpy)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #0]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -107,8 +113,16 @@ ENTRY(memcpy)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1], #4
 		ldr	r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
 		ldr	r8, [r1], #4
 		ldr	lr, [r1], #4
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0], #4
 		str	r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
 		str	r8, [r0], #4
 		str	lr, [r0], #4
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
 		strcsb	r4, [r0], #1
 		strcsb	ip, [r0]
 
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		ldmfd	sp!, {r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
@@ -178,7 +203,7 @@ ENTRY(memcpy)
 		bgt	18f
 
 
-		.macro	forward_copy_shift pull push
+		.macro	forward_copy_shift PULL PUSH
 
 		subs	r2, r2, #28
 		blt	14f
@@ -206,24 +231,24 @@ ENTRY(memcpy)
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldmia	r1!, {r4, r5, r6, r7}
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, PULL #\PULL
 		subs	r2, r2, #32
 		ldmia	r1!, {r8, r9, ip, lr}
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, PUSH #\PUSH
+		mov	r4, r4, PULL #\PULL
+		orr	r4, r4, r5, PUSH #\PUSH
+		mov	r5, r5, PULL #\PULL
+		orr	r5, r5, r6, PUSH #\PUSH
+		mov	r6, r6, PULL #\PULL
+		orr	r6, r6, r7, PUSH #\PUSH
+		mov	r7, r7, PULL #\PULL
+		orr	r7, r7, r8, PUSH #\PUSH
+		mov	r8, r8, PULL #\PULL
+		orr	r8, r8, r9, PUSH #\PUSH
+		mov	r9, r9, PULL #\PULL
+		orr	r9, r9, ip, PUSH #\PUSH
+		mov	ip, ip, PULL #\PULL
+		orr	ip, ip, lr, PUSH #\PUSH
 		stmia	r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -240,26 +265,26 @@ ENTRY(memcpy)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, PULL #\PULL
 		ldr	lr, [r1], #4
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, PUSH #\PUSH
 		str	r3, [r0], #4
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
 	CALGN(	bge	11b			)
 
-16:		sub	r1, r1, #(\push / 8)
+16:		sub	r1, r1, #(\PUSH / 8)
 		b	8b
 
 		.endm
 
 
-		forward_copy_shift	pull=8	push=24
+		forward_copy_shift	PULL=8	PUSH=24
 
-17:		forward_copy_shift	pull=16	push=16
+17:		forward_copy_shift	PULL=16	PUSH=16
 
-18:		forward_copy_shift	pull=24	push=8
+18:		forward_copy_shift	PULL=24	PUSH=8
 
 END(memcpy)
 libc_hidden_builtin_def (memcpy)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
 /* Thumb requires excessive IT insns here.  */
 #define NO_THUMB
 #include <sysdep.h>
+#include <arm-features.h>
 
 /*
  * Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -104,7 +105,12 @@ ENTRY(memmove)
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+#ifndef ARM_ALWAYS_BX
 	CALGN(	add	pc, r4, ip		)
+#else
+	CALGN(	add	r4, r4, ip		)
+	CALGN(	bx	r4			)
+#endif
 
 	PLD(	pld	[r1, #-4]		)
 2:	PLD(	subs	r2, r2, #96		)
@@ -123,8 +129,16 @@ ENTRY(memmove)
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#ifndef ARM_ALWAYS_BX
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
+#else
+		beq	7f
+		push	{r10}
+		cfi_adjust_cfa_offset (4)
+		add	r10, pc, ip
+		bx	r10
+#endif
 6:		nop
 		ldr	r3, [r1, #-4]!
 		ldr	r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
 		ldr	r8, [r1, #-4]!
 		ldr	lr, [r1, #-4]!
 
+#ifndef ARM_ALWAYS_BX
 		add	pc, pc, ip
 		nop
+#else
+		add	r10, pc, ip
+		bx	r10
+#endif
 		nop
 		str	r3, [r0, #-4]!
 		str	r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
 		str	r8, [r0, #-4]!
 		str	lr, [r0, #-4]!
 
+#ifdef ARM_ALWAYS_BX
+		pop	{r10}
+		cfi_adjust_cfa_offset (-4)
+#endif
+
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
 		strcsb	r4, [r0, #-1]!
 		strcsb	ip, [r0, #-1]
 
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+     || defined (ARM_ALWAYS_BX))
 		ldmfd	sp!, {r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
@@ -193,7 +218,7 @@ ENTRY(memmove)
 		blt	18f
 
 
-		.macro	backward_copy_shift push pull
+		.macro	backward_copy_shift PUSH PULL
 
 		subs	r2, r2, #28
 		blt	14f
@@ -221,24 +246,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, PUSH #\PUSH
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, PULL #\PULL
+		mov     ip, ip, PUSH #\PUSH
+		orr     ip, ip, r9, PULL #\PULL
+		mov     r9, r9, PUSH #\PUSH
+		orr     r9, r9, r8, PULL #\PULL
+		mov     r8, r8, PUSH #\PUSH
+		orr     r8, r8, r7, PULL #\PULL
+		mov     r7, r7, PUSH #\PUSH
+		orr     r7, r7, r6, PULL #\PULL
+		mov     r6, r6, PUSH #\PUSH
+		orr     r6, r6, r5, PULL #\PULL
+		mov     r5, r5, PUSH #\PUSH
+		orr     r5, r5, r4, PULL #\PULL
+		mov     r4, r4, PUSH #\PUSH
+		orr     r4, r4, r3, PULL #\PULL
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -255,26 +280,26 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, PUSH #\PUSH
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, PULL #\PULL
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
 	CALGN(	bge	11b			)
 
-16:		add	r1, r1, #(\pull / 8)
+16:		add	r1, r1, #(\PULL / 8)
 		b	8b
 
 		.endm
 
 
-		backward_copy_shift	push=8	pull=24
+		backward_copy_shift	PUSH=8	PULL=24
 
-17:		backward_copy_shift	push=16	pull=16
+17:		backward_copy_shift	PUSH=16	PULL=16
 
-18:		backward_copy_shift	push=24	pull=8
+18:		backward_copy_shift	PUSH=24	PULL=8
 
 
 END(memmove)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-03-13 19:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-12 23:40   ` Joseph S. Myers
2013-03-13 19:42     ` Roland McGrath
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
2013-03-13 16:50   ` Roland McGrath
  -- strict thread matches above, loose matches on Subject: below --
2013-03-04 19:08 Roland McGrath
2013-03-09  2:21 ` Joseph S. Myers
2013-03-11 16:42   ` Roland McGrath

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).