* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-04 19:08 Roland McGrath
2013-03-09 2:21 ` Joseph S. Myers
0 siblings, 1 reply; 6+ messages in thread
From: Roland McGrath @ 2013-03-04 19:08 UTC (permalink / raw)
To: libc-ports
On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all. I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.
I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case. The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about. But maybe ARM
experts have better ideas.
Thanks,
Roland
ports/ChangeLog.arm
2013-03-04 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -88,7 +89,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -107,8 +113,16 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
strcsb r4, [r0], #1
strcsb ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
ldmfd sp!, {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
@@ -178,7 +203,7 @@ ENTRY(memcpy)
bgt 18f
- .macro forward_copy_shift pull push
+ .macro forward_copy_shift PULL PUSH
subs r2, r2, #28
blt 14f
@@ -206,24 +231,24 @@ ENTRY(memcpy)
12: PLD( pld [r1, #124] )
13: ldmia r1!, {r4, r5, r6, r7}
- mov r3, lr, pull #\pull
+ mov r3, lr, PULL #\PULL
subs r2, r2, #32
ldmia r1!, {r8, r9, ip, lr}
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, PUSH #\PUSH
+ mov r4, r4, PULL #\PULL
+ orr r4, r4, r5, PUSH #\PUSH
+ mov r5, r5, PULL #\PULL
+ orr r5, r5, r6, PUSH #\PUSH
+ mov r6, r6, PULL #\PULL
+ orr r6, r6, r7, PUSH #\PUSH
+ mov r7, r7, PULL #\PULL
+ orr r7, r7, r8, PUSH #\PUSH
+ mov r8, r8, PULL #\PULL
+ orr r8, r8, r9, PUSH #\PUSH
+ mov r9, r9, PULL #\PULL
+ orr r9, r9, ip, PUSH #\PUSH
+ mov ip, ip, PULL #\PULL
+ orr ip, ip, lr, PUSH #\PUSH
stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
bge 12b
PLD( cmn r2, #96 )
@@ -240,26 +265,26 @@ ENTRY(memcpy)
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, PULL #\PULL
ldr lr, [r1], #4
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, PUSH #\PUSH
str r3, [r0], #4
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
-16: sub r1, r1, #(\push / 8)
+16: sub r1, r1, #(\PUSH / 8)
b 8b
.endm
- forward_copy_shift pull=8 push=24
+ forward_copy_shift PULL=8 PUSH=24
-17: forward_copy_shift pull=16 push=16
+17: forward_copy_shift PULL=16 PUSH=16
-18: forward_copy_shift pull=24 push=8
+18: forward_copy_shift PULL=24 PUSH=8
END(memcpy)
libc_hidden_builtin_def (memcpy)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -45,11 +46,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -104,7 +105,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -123,8 +129,16 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: ldmfd sp!, {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
ldmfd sp!, {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
@@ -193,7 +218,7 @@ ENTRY(memmove)
blt 18f
- .macro backward_copy_shift push pull
+ .macro backward_copy_shift PUSH PULL
subs r2, r2, #28
blt 14f
@@ -221,24 +246,24 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, PUSH #\PUSH
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, PULL #\PULL
+ mov ip, ip, PUSH #\PUSH
+ orr ip, ip, r9, PULL #\PULL
+ mov r9, r9, PUSH #\PUSH
+ orr r9, r9, r8, PULL #\PULL
+ mov r8, r8, PUSH #\PUSH
+ orr r8, r8, r7, PULL #\PULL
+ mov r7, r7, PUSH #\PUSH
+ orr r7, r7, r6, PULL #\PULL
+ mov r6, r6, PUSH #\PUSH
+ orr r6, r6, r5, PULL #\PULL
+ mov r5, r5, PUSH #\PUSH
+ orr r5, r5, r4, PULL #\PULL
+ mov r4, r4, PUSH #\PUSH
+ orr r4, r4, r3, PULL #\PULL
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
@@ -255,26 +280,26 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, PUSH #\PUSH
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, PULL #\PULL
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
CALGN( bge 11b )
-16: add r1, r1, #(\pull / 8)
+16: add r1, r1, #(\PULL / 8)
b 8b
.endm
- backward_copy_shift push=8 pull=24
+ backward_copy_shift PUSH=8 PULL=24
-17: backward_copy_shift push=16 pull=16
+17: backward_copy_shift PUSH=16 PULL=16
-18: backward_copy_shift push=24 pull=8
+18: backward_copy_shift PUSH=24 PULL=8
END(memmove)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-04 19:08 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
@ 2013-03-09 2:21 ` Joseph S. Myers
2013-03-11 16:42 ` Roland McGrath
0 siblings, 1 reply; 6+ messages in thread
From: Joseph S. Myers @ 2013-03-09 2:21 UTC (permalink / raw)
To: Roland McGrath; +Cc: libc-ports
I expect these various patches will need updating after Richard's changes
... also, is there an ABI document explaining the exact restrictions
involved, especially as regards pc-avoidance?
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-09 2:21 ` Joseph S. Myers
@ 2013-03-11 16:42 ` Roland McGrath
0 siblings, 0 replies; 6+ messages in thread
From: Roland McGrath @ 2013-03-11 16:42 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
> I expect these various patches will need updating after Richard's changes
Yes. I've updated all my branches after those went in. I'm completing the
re-testing today and will post new versions of each when they're verified.
> ... also, is there an ABI document explaining the exact restrictions
> involved, especially as regards pc-avoidance?
I don't think there is anything that is either up to date or concise.
https://sites.google.com/a/chromium.org/dev/nativeclient/reference/arm-overview
is something you might find informative.
For the particular issue at hand, it's enough to know that the platform
prohibits all operations using pc as a destination register. Only the
various explicit branch operations are allowed. The platform also requires
ARMv7-A as its baseline, so "bx" is always available.
Thanks,
Roland
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-12 23:35 ` Joseph S. Myers
@ 2013-03-13 16:50 ` Roland McGrath
0 siblings, 0 replies; 6+ messages in thread
From: Roland McGrath @ 2013-03-13 16:50 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
> This should also have cfi_rel_offset (r10, 0) to record the save itself in
> CFI debug information.
Good point! Fixed. Committed as follows.
Thanks,
Roland
ports/ChangeLog.arm
2013-03-13 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -89,7 +90,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -108,8 +114,17 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (r10, 0)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -119,8 +134,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -130,6 +150,12 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (r10)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -147,7 +173,8 @@ ENTRY(memcpy)
strbcs r4, [r0], #1
strbcs ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -105,7 +106,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -124,8 +130,17 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (r10, 0)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -135,8 +150,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -146,6 +166,12 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (r10)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -163,7 +189,8 @@ ENTRY(memmove)
strbcs r4, [r0, #-1]!
strbcs ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
2013-03-11 20:54 Roland McGrath
@ 2013-03-12 23:35 ` Joseph S. Myers
2013-03-13 16:50 ` Roland McGrath
0 siblings, 1 reply; 6+ messages in thread
From: Joseph S. Myers @ 2013-03-12 23:35 UTC (permalink / raw)
To: Roland McGrath; +Cc: libc-ports
On Mon, 11 Mar 2013, Roland McGrath wrote:
> * sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
That's OK.
> +#else
> + beq 7f
> + push {r10}
> + cfi_adjust_cfa_offset (4)
This should also have cfi_rel_offset (r10, 0) to record the save itself in
CFI debug information.
> +#ifdef ARM_ALWAYS_BX
> + pop {r10}
> + cfi_adjust_cfa_offset (-4)
> +#endif
Likewise, record the restore of r10.
> +#else
> + beq 7f
> + push {r10}
> + cfi_adjust_cfa_offset (4)
Likewise, in memmove.
> +#ifdef ARM_ALWAYS_BX
> + pop {r10}
> + cfi_adjust_cfa_offset (-4)
Likewise.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register.
@ 2013-03-11 20:54 Roland McGrath
2013-03-12 23:35 ` Joseph S. Myers
0 siblings, 1 reply; 6+ messages in thread
From: Roland McGrath @ 2013-03-11 20:54 UTC (permalink / raw)
To: Joseph Myers; +Cc: libc-ports
Updated for current trunk. Tested as before:
On arm-linux-gnueabihf, I tested that this doesn't change the object
code at all. I also tested the changes by hacking the local copy of
arm-features.h to define ARM_ALWAYS_BX and verifying that there are no
regressions (no failures at all) when running 'make check subdirs=string'.
I'd be grateful for any suggestions to improve the efficiency of the
code in the ARM_ALWAYS_BX case. The extra push/pop for the scratch
register seems unavoidable without reworking the whole function in some
way more complicated than I wanted to think about. But maybe ARM
experts have better ideas.
OK?
Thanks,
Roland
ports/ChangeLog.arm
2013-03-11 Roland McGrath <roland@hack.frob.com>
* sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX.
* sysdeps/arm/memcpy.S: Include <arm-features.h>.
[ARM_ALWAYS_BX]: Avoid pc as destination.
* sysdeps/arm/memmove.S: Likewise.
--- a/ports/sysdeps/arm/arm-features.h
+++ b/ports/sysdeps/arm/arm-features.h
@@ -36,4 +36,8 @@
at runtime (or that we never care about its state) and so need not
be checked for. */
+/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate
+ that instructions using pc as a destination register must never be used,
+ so a "bx" (or "blx") instruction is always required. */
+
#endif /* arm-features.h */
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -88,7 +89,12 @@ ENTRY(memcpy)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, r3 ) @ C gets set
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #0] )
2: PLD( subs r2, r2, #96 )
@@ -107,8 +113,16 @@ ENTRY(memcpy)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1], #4
ldr r4, [r1], #4
@@ -118,8 +132,13 @@ ENTRY(memcpy)
ldr r8, [r1], #4
ldr lr, [r1], #4
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0], #4
str r4, [r0], #4
@@ -129,6 +148,11 @@ ENTRY(memcpy)
str r8, [r0], #4
str lr, [r0], #4
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -146,7 +170,8 @@ ENTRY(memcpy)
strcsb r4, [r0], #1
strcsb ip, [r0]
-#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -20,6 +20,7 @@
/* Thumb requires excessive IT insns here. */
#define NO_THUMB
#include <sysdep.h>
+#include <arm-features.h>
/*
* Data preload for architectures that support it (ARM V5TE and above)
@@ -104,7 +105,12 @@ ENTRY(memmove)
CALGN( bcs 2f )
CALGN( adr r4, 6f )
CALGN( subs r2, r2, ip ) @ C is set here
+#ifndef ARM_ALWAYS_BX
CALGN( add pc, r4, ip )
+#else
+ CALGN( add r4, r4, ip )
+ CALGN( bx r4 )
+#endif
PLD( pld [r1, #-4] )
2: PLD( subs r2, r2, #96 )
@@ -123,8 +129,16 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+#ifndef ARM_ALWAYS_BX
addne pc, pc, ip @ C is always clear here
b 7f
+#else
+ beq 7f
+ push {r10}
+ cfi_adjust_cfa_offset (4)
+ add r10, pc, ip
+ bx r10
+#endif
6: nop
ldr r3, [r1, #-4]!
ldr r4, [r1, #-4]!
@@ -134,8 +148,13 @@ ENTRY(memmove)
ldr r8, [r1, #-4]!
ldr lr, [r1, #-4]!
+#ifndef ARM_ALWAYS_BX
add pc, pc, ip
nop
+#else
+ add r10, pc, ip
+ bx r10
+#endif
nop
str r3, [r0, #-4]!
str r4, [r0, #-4]!
@@ -145,6 +164,11 @@ ENTRY(memmove)
str r8, [r0, #-4]!
str lr, [r0, #-4]!
+#ifdef ARM_ALWAYS_BX
+ pop {r10}
+ cfi_adjust_cfa_offset (-4)
+#endif
+
CALGN( bcs 2b )
7: pop {r5 - r8}
@@ -162,7 +186,8 @@ ENTRY(memmove)
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
-#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
+ || defined (ARM_ALWAYS_BX))
pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2013-03-13 16:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-04 19:08 [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register Roland McGrath
2013-03-09 2:21 ` Joseph S. Myers
2013-03-11 16:42 ` Roland McGrath
2013-03-11 20:54 Roland McGrath
2013-03-12 23:35 ` Joseph S. Myers
2013-03-13 16:50 ` Roland McGrath
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).