* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-11 20:54 Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
0 siblings, 2 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 20:54 UTC (permalink / raw)
To: Joseph Myers; +Cc: libc-ports
Updated for current trunk. Tested as before:
On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all. I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.
I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case. The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about. But maybe ARM
experts have better ideas.
OK?
Thanks,
Roland
ports/ChangeLog.arm
2013-03-11 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -88,7 +89,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -107,8 +113,16 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
strcsb r4, [r0], #1
strcsb ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -104,7 +105,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -123,8 +129,16 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
@ 2013-03-11 20:55 ` Roland McGrath
2013-03-12 23:40 ` Joseph S. Myers
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
1 sibling, 1 reply; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 20:55 UTC (permalink / raw)
To: Joseph Myers; +Cc: libc-ports
Updated for current trunk. Tested as before:
This is on top of the ARM_ALWAYS_BX patch, which is on the same branch.
Tested on armv7l-linux-gnueabihf: no changes to the object code. Tested
the new code by locally hacking arm-features.h to define ARM_ALWAYS_BX and
ARM_BX_ALIGN_LOG2=4, and verifying no failures in 'make check subdirs=string'.
I didn't actually test ARM_BX_ALIGN_LOG2=4 without ARM_ALWAYS_BX, which is
a configuration that will probably never be used (but I wrote this code
to support it)--it's pretty easy to tell by inspection that it's equivalent
to what I did test.
OK?
Thanks,
Roland
ports/ChangeLog.arm
* sysdeps/arm/arm-features.h (ARM_BX_ALIGN_LOG2): New macro.
* sysdeps/arm/memcpy.S: Respect ARM_BX_ALIGN_LOG2.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -40,4 +40,12 @@
that instructions using pc as a destination register must never be used,
so a "bx" (or "blx") instruction is always required. */
+/* The log2 of the minimum alignment required for an address that
+ is the target of a computed branch (i.e. a "bx" instruction).
+ A more-specific arm-features.h file may define this to set a more
+ stringent requirement. */
+#ifndef ARM_BX_ALIGN_LOG2
+# define ARM_BX_ALIGN_LOG2 2
+#endif
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -90,9 +90,9 @@ ENTRY(memcpy)
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
#ifndef ARM_ALWAYS_BX
- CALGN( add pc, r4, ip )
+ CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
#else
- CALGN( add r4, r4, ip )
+ CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
CALGN( bx r4 )
#endif
@@ -114,38 +114,55 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
#ifndef ARM_ALWAYS_BX
- addne pc, pc, ip @ C is always clear here
+ /* C is always clear here. */
+ addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
b 7f
#else
beq 7f
push {r10}
cfi_adjust_cfa_offset (4)
- add r10, pc, ip
+ add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
bx r10
#endif
+ .p2align ARM_BX_ALIGN_LOG2
6: nop
+ .p2align ARM_BX_ALIGN_LOG2
ldr r3, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr r4, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr r5, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr r6, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr r7, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr r8, [r1], #4
+ .p2align ARM_BX_ALIGN_LOG2
ldr lr, [r1], #4
#ifndef ARM_ALWAYS_BX
- add pc, pc, ip
+ add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
nop
#else
- add r10, pc, ip
+ add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
bx r10
#endif
+ .p2align ARM_BX_ALIGN_LOG2
nop
+ .p2align ARM_BX_ALIGN_LOG2
str r3, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str r4, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str r5, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str r6, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str r7, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str r8, [r0], #4
+ .p2align ARM_BX_ALIGN_LOG2
str lr, [r0], #4
#ifdef ARM_ALWAYS_BX
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -106,9 +106,9 @@ ENTRY(memmove)
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
#ifndef ARM_ALWAYS_BX
- CALGN( add pc, r4, ip )
+ CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
#else
- CALGN( add r4, r4, ip )
+ CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
CALGN( bx r4 )
#endif
@@ -130,38 +130,55 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
#ifndef ARM_ALWAYS_BX
- addne pc, pc, ip @ C is always clear here
+ /* C is always clear here. */
+ addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
b 7f
#else
beq 7f
push {r10}
cfi_adjust_cfa_offset (4)
- add r10, pc, ip
+ add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
bx r10
#endif
+ .p2align ARM_BX_ALIGN_LOG2
6: nop
+ .p2align ARM_BX_ALIGN_LOG2
ldr r3, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr r4, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr r5, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr r6, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr r7, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr r8, [r1, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
ldr lr, [r1, #-4]!
#ifndef ARM_ALWAYS_BX
- add pc, pc, ip
+ add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
nop
#else
- add r10, pc, ip
+ add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
bx r10
#endif
+ .p2align ARM_BX_ALIGN_LOG2
nop
+ .p2align ARM_BX_ALIGN_LOG2
str r3, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str r4, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str r5, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str r6, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str r7, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str r8, [r0, #-4]!
+ .p2align ARM_BX_ALIGN_LOG2
str lr, [r0, #-4]!
#ifdef ARM_ALWAYS_BX
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:35 ` Joseph S. Myers
2013-03-13 16:50 ` Roland McGrath
1 sibling, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:35 UTC (permalink / raw)
To: Roland McGrath; +Cc: libc-ports
On Mon, 11 Mar 2013, Roland McGrath wrote:
> * sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
That's OK.
> +#else
> + beq 7f
> + push {r10}
> + cfi_adjust_cfa_offset (4)
This should also have cfi_rel_offset (r10, 0) to record the save itself in
CFI debug information.
> +#ifdef ARM_ALWAYS_BX
> + pop {r10}
> + cfi_adjust_cfa_offset (-4)
> +#endif
Likewise, record the restore of r10.
> +#else
> + beq 7f
> + push {r10}
> + cfi_adjust_cfa_offset (4)
Likewise, in memmove.
> +#ifdef ARM_ALWAYS_BX
> + pop {r10}
> + cfi_adjust_cfa_offset (-4)
Likewise.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
@ 2013-03-12 23:40 ` Joseph S. Myers
2013-03-13 19:42 ` Roland McGrath
0 siblings, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:40 UTC (permalink / raw)
To: Roland McGrath; +Cc: libc-ports
On Mon, 11 Mar 2013, Roland McGrath wrote:
> +/* The log2 of the minimum alignment required for an address that
> + is the target of a computed branch (i.e. a "bx" instruction).
> + A more-specific arm-features.h file may define this to set a more
> + stringent requirement. */
The comment should say that this should only be used in ARM-mode code
(certainly this definition doesn't make sense by default for code built as
Thumb). OK with that change.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
@ 2013-03-13 16:50 ` Roland McGrath
0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-13 16:50 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
> This should also have cfi_rel_offset (r10, 0) to record the save itself in
> CFI debug information.
Good point! Fixed. Committed as follows.
Thanks,
Roland
ports/ChangeLog.arm
2013-03-13 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -89,7 +90,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -108,8 +114,17 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (r10, 0)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -119,8 +134,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -130,6 +150,12 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (r10)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -147,7 +173,8 @@ ENTRY(memcpy)
strbcs r4, [r0], #1
strbcs ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -105,7 +106,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -124,8 +130,17 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (r10, 0)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -135,8 +150,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -146,6 +166,12 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (r10)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -163,7 +189,8 @@ ENTRY(memmove)
strbcs r4, [r0, #-1]!
strbcs ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2
2013-03-12 23:40 ` Joseph S. Myers
@ 2013-03-13 19:42 ` Roland McGrath
0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-13 19:42 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
> The comment should say that this should only be used in ARM-mode code
> (certainly this definition doesn't make sense by default for code built as
> Thumb). OK with that change.
Well, it makes some sense for any situation doing something like what the
memcpy code is doing (i.e. arithmetically computing branch targets, which
implicitly assumes fixed distance between them). Unless one were
completely sure that each Thumb instruction would always have a short
encoding, then you'd need to explicitly align each one to four.
I've committed it with the following comment.
Feel free to adjust it further as you see fit.
Thanks,
Roland
/* The log2 of the minimum alignment required for an address that
is the target of a computed branch (i.e. a "bx" instruction).
A more-specific arm-features.h file may define this to set a more
stringent requirement.
Using this only makes sense for code in ARM mode (where instructions
always have a fixed size of four bytes), or for Thumb-mode code that is
specifically aligning all the related branch targets to match (since
Thumb instructions might be either two or four bytes). */
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-09 2:21 ` Joseph S. Myers
@ 2013-03-11 16:42 ` Roland McGrath
0 siblings, 0 replies; 9+ messages in thread
From: Roland McGrath @ 2013-03-11 16:42 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
> I expect these various patches will need updating after Richard's changes
Yes. I've updated all my branches after those went in. I'm completing the
re-testing today and will post new versions of each when they're verified.
> ... also, is there an ABI document explaining the exact restrictions
> involved, especially as regards pc-avoidance?
I don't think there is anything that is either up to date or concise.
https://sites.google.com/a/chromium.org/dev/nativeclient/reference/arm-overview
is something you might find informative.
For the particular issue at hand, it's enough to know that the platform
prohibits all operations using pc as a destination register. Only the
various explicit branch operations are allowed. The platform also requires
ARMv7-A as its baseline, so "bx" is always available.
Thanks,
Roland
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-04 19:08 Roland McGrath
@ 2013-03-09 2:21 ` Joseph S. Myers
2013-03-11 16:42 ` Roland McGrath
0 siblings, 1 reply; 9+ messages in thread
From: Joseph S. Myers @ 2013-03-09 2:21 UTC (permalink / raw)
To: Roland McGrath; +Cc: libc-ports
I expect these various patches will need updating after Richard's changes
... also, is there an ABI document explaining the exact restrictions
involved, especially as regards pc-avoidance?
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-04 19:08 Roland McGrath
2013-03-09 2:21 ` Joseph S. Myers
0 siblings, 1 reply; 9+ messages in thread
From: Roland McGrath @ 2013-03-04 19:08 UTC (permalink / raw)
To: libc-ports
On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all. I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.
I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case. The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about. But maybe ARM
experts have better ideas.
Thanks,
Roland
ports/ChangeLog.arm
2013-03-04 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -88,7 +89,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -107,8 +113,16 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
strcsb r4, [r0], #1
strcsb ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
ldmfd sp!, {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
@@ -178,7 +203,7 @@ ENTRY(memcpy)
bgt 18f
- .macro forward_copy_shift pull push
+ .macro forward_copy_shift PULL PUSH
subs r2, r2, #28
blt 14f
@@ -206,24 +231,24 @@ ENTRY(memcpy)
12: PLD( pld [r1, #124] )
13: ldmia r1!, {r4, r5, r6, r7}
- mov r3, lr, pull #\pull
+ mov r3, lr, PULL #\PULL
subs r2, r2, #32
ldmia r1!, {r8, r9, ip, lr}
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, PUSH #\PUSH
+ mov r4, r4, PULL #\PULL
+ orr r4, r4, r5, PUSH #\PUSH
+ mov r5, r5, PULL #\PULL
+ orr r5, r5, r6, PUSH #\PUSH
+ mov r6, r6, PULL #\PULL
+ orr r6, r6, r7, PUSH #\PUSH
+ mov r7, r7, PULL #\PULL
+ orr r7, r7, r8, PUSH #\PUSH
+ mov r8, r8, PULL #\PULL
+ orr r8, r8, r9, PUSH #\PUSH
+ mov r9, r9, PULL #\PULL
+ orr r9, r9, ip, PUSH #\PUSH
+ mov ip, ip, PULL #\PULL
+ orr ip, ip, lr, PUSH #\PUSH
stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
bge 12b
PLD( cmn r2, #96 )
@@ -240,26 +265,26 @@ ENTRY(memcpy)
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, PULL #\PULL
ldr lr, [r1], #4
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, PUSH #\PUSH
str r3, [r0], #4
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
-16: sub r1, r1, #(\push / 8)
+16: sub r1, r1, #(\PUSH / 8)
b 8b
.endm
- forward_copy_shift pull=8 push=24
+ forward_copy_shift PULL=8 PUSH=24
-17: forward_copy_shift pull=16 push=16
+17: forward_copy_shift PULL=16 PUSH=16
-18: forward_copy_shift pull=24 push=8
+18: forward_copy_shift PULL=24 PUSH=8
END(memcpy)
libc_hidden_builtin_def (memcpy)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -104,7 +105,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -123,8 +129,16 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
ldmfd sp!, {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
@@ -193,7 +218,7 @@ ENTRY(memmove)
blt 18f
- .macro backward_copy_shift push pull
+ .macro backward_copy_shift PUSH PULL
subs r2, r2, #28
blt 14f
@@ -221,24 +246,24 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, PUSH #\PUSH
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, PULL #\PULL
+ mov ip, ip, PUSH #\PUSH
+ orr ip, ip, r9, PULL #\PULL
+ mov r9, r9, PUSH #\PUSH
+ orr r9, r9, r8, PULL #\PULL
+ mov r8, r8, PUSH #\PUSH
+ orr r8, r8, r7, PULL #\PULL
+ mov r7, r7, PUSH #\PUSH
+ orr r7, r7, r6, PULL #\PULL
+ mov r6, r6, PUSH #\PUSH
+ orr r6, r6, r5, PULL #\PULL
+ mov r5, r5, PUSH #\PUSH
+ orr r5, r5, r4, PULL #\PULL
+ mov r4, r4, PUSH #\PUSH
+ orr r4, r4, r3, PULL #\PULL
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
@@ -255,26 +280,26 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, PUSH #\PUSH
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, PULL #\PULL
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
-16: add r1, r1, #(\pull / 8)
+16: add r1, r1, #(\PULL / 8)
b 8b
.endm
- backward_copy_shift push=8 pull=24
+ backward_copy_shift PUSH=8 PULL=24
-17: backward_copy_shift push=16 pull=16
+17: backward_copy_shift PUSH=16 PULL=16
-18: backward_copy_shift push=24 pull=8
+18: backward_copy_shift PUSH=24 PULL=8
END(memmove)
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2013-03-13 19:42 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-11 20:54 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-11 20:55 ` [PATCH roland/arm-avoid-pc] ARM_BX_ALIGN_LOG2 Roland McGrath
2013-03-12 23:40 ` Joseph S. Myers
2013-03-13 19:42 ` Roland McGrath
2013-03-12 23:35 ` [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Joseph S. Myers
2013-03-13 16:50 ` Roland McGrath
-- strict thread matches above, loose matches on Subject: below --
2013-03-04 19:08 Roland McGrath
2013-03-09 2:21 ` Joseph S. Myers
2013-03-11 16:42 ` Roland McGrath
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).