From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 3957 invoked by alias); 4 Mar 2013 19:08:18 -0000 Received: (qmail 3947 invoked by uid 22791); 4 Mar 2013 19:08:17 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL,BAYES_00,TW_BG,TW_DM X-Spam-Check-By: sourceware.org Received: from toast.topped-with-meat.com (HELO topped-with-meat.com) (204.197.218.159) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 04 Mar 2013 19:07:43 +0000 Received: by topped-with-meat.com (Postfix, from userid 5281) id F178D2C09B; Mon, 4 Mar 2013 11:07:41 -0800 (PST) MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit From: Roland McGrath To: libc-ports@sourceware.org Subject: [PATCH roland/arm-avoid-pc] ARM: Support avoiding pc as destination register. Message-Id: <20130304190741.F178D2C09B@topped-with-meat.com> Date: Mon, 04 Mar 2013 19:08:00 -0000 X-CMAE-Score: 0 X-CMAE-Analysis: v=2.1 cv=LYSvtFvi c=1 sm=1 tr=0 a=WkljmVdYkabdwxfqvArNOQ==:117 a=14OXPxybAAAA:8 a=Cp02NCq-BFIA:10 a=Z6MIti7PxpgA:10 a=kj9zAlcOel0A:10 a=hOe2yjtxAAAA:8 a=lBSqbSby7r8A:10 a=lfO5th-Wo9bpuGVhrDkA:9 a=CjuIK1q_8ugA:10 X-IsSubscribed: yes Mailing-List: contact libc-ports-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: libc-ports-owner@sourceware.org X-SW-Source: 2013-03/txt/msg00025.txt.bz2 On arm-linux-gnueabihf, I tested that this doesn't change the object code at all. I also tested the changes by hacking the local copy of arm-features.h to define ARM_ALWAYS_BX and verifying that there are no regressions (no failures at all) when running 'make check subdirs=string'. I'd be grateful for any suggestions to improve the efficiency of the code in the ARM_ALWAYS_BX case. The extra push/pop for the scratch register seems unavoidable without reworking the whole function in some way more complicated than I wanted to think about. But maybe ARM experts have better ideas. Thanks, Roland ports/ChangeLog.arm 2013-03-04 Roland McGrath * sysdeps/arm/arm-features.h: Add comment about ARM_ALWAYS_BX. * sysdeps/arm/memcpy.S: Include . [ARM_ALWAYS_BX]: Avoid pc as destination. * sysdeps/arm/memmove.S: Likewise. --- a/ports/sysdeps/arm/arm-features.h +++ b/ports/sysdeps/arm/arm-features.h @@ -36,4 +36,8 @@ at runtime (or that we never care about its state) and so need not be checked for. */ +/* A more-specific arm-features.h file may define ARM_ALWAYS_BX to indicate + that instructions using pc as a destination register must never be used, + so a "bx" (or "blx") instruction is always required. */ + #endif /* arm-features.h */ --- a/ports/sysdeps/arm/memcpy.S +++ b/ports/sysdeps/arm/memcpy.S @@ -20,6 +20,7 @@ /* Thumb requires excessive IT insns here. */ #define NO_THUMB #include +#include /* * Data preload for architectures that support it (ARM V5TE and above) @@ -45,11 +46,11 @@ * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ -#define pull lsr -#define push lsl +#define PULL lsr +#define PUSH lsl #else -#define pull lsl -#define push lsr +#define PULL lsl +#define PUSH lsr #endif .text @@ -88,7 +89,12 @@ ENTRY(memcpy) CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, r3 ) @ C gets set +#ifndef ARM_ALWAYS_BX CALGN( add pc, r4, ip ) +#else + CALGN( add r4, r4, ip ) + CALGN( bx r4 ) +#endif PLD( pld [r1, #0] ) 2: PLD( subs r2, r2, #96 ) @@ -107,8 +113,16 @@ ENTRY(memcpy) 5: ands ip, r2, #28 rsb ip, ip, #32 +#ifndef ARM_ALWAYS_BX addne pc, pc, ip @ C is always clear here b 7f +#else + beq 7f + push {r10} + cfi_adjust_cfa_offset (4) + add r10, pc, ip + bx r10 +#endif 6: nop ldr r3, [r1], #4 ldr r4, [r1], #4 @@ -118,8 +132,13 @@ ENTRY(memcpy) ldr r8, [r1], #4 ldr lr, [r1], #4 +#ifndef ARM_ALWAYS_BX add pc, pc, ip nop +#else + add r10, pc, ip + bx r10 +#endif nop str r3, [r0], #4 str r4, [r0], #4 @@ -129,6 +148,11 @@ ENTRY(memcpy) str r8, [r0], #4 str lr, [r0], #4 +#ifdef ARM_ALWAYS_BX + pop {r10} + cfi_adjust_cfa_offset (-4) +#endif + CALGN( bcs 2b ) 7: ldmfd sp!, {r5 - r8} @@ -146,7 +170,8 @@ ENTRY(memcpy) strcsb r4, [r0], #1 strcsb ip, [r0] -#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__) +#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \ + || defined (ARM_ALWAYS_BX)) ldmfd sp!, {r0, r4, lr} cfi_adjust_cfa_offset (-12) cfi_restore (r4) @@ -178,7 +203,7 @@ ENTRY(memcpy) bgt 18f - .macro forward_copy_shift pull push + .macro forward_copy_shift PULL PUSH subs r2, r2, #28 blt 14f @@ -206,24 +231,24 @@ ENTRY(memcpy) 12: PLD( pld [r1, #124] ) 13: ldmia r1!, {r4, r5, r6, r7} - mov r3, lr, pull #\pull + mov r3, lr, PULL #\PULL subs r2, r2, #32 ldmia r1!, {r8, r9, ip, lr} - orr r3, r3, r4, push #\push - mov r4, r4, pull #\pull - orr r4, r4, r5, push #\push - mov r5, r5, pull #\pull - orr r5, r5, r6, push #\push - mov r6, r6, pull #\pull - orr r6, r6, r7, push #\push - mov r7, r7, pull #\pull - orr r7, r7, r8, push #\push - mov r8, r8, pull #\pull - orr r8, r8, r9, push #\push - mov r9, r9, pull #\pull - orr r9, r9, ip, push #\push - mov ip, ip, pull #\pull - orr ip, ip, lr, push #\push + orr r3, r3, r4, PUSH #\PUSH + mov r4, r4, PULL #\PULL + orr r4, r4, r5, PUSH #\PUSH + mov r5, r5, PULL #\PULL + orr r5, r5, r6, PUSH #\PUSH + mov r6, r6, PULL #\PULL + orr r6, r6, r7, PUSH #\PUSH + mov r7, r7, PULL #\PULL + orr r7, r7, r8, PUSH #\PUSH + mov r8, r8, PULL #\PULL + orr r8, r8, r9, PUSH #\PUSH + mov r9, r9, PULL #\PULL + orr r9, r9, ip, PUSH #\PUSH + mov ip, ip, PULL #\PULL + orr ip, ip, lr, PUSH #\PUSH stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip} bge 12b PLD( cmn r2, #96 ) @@ -240,26 +265,26 @@ ENTRY(memcpy) 14: ands ip, r2, #28 beq 16f -15: mov r3, lr, pull #\pull +15: mov r3, lr, PULL #\PULL ldr lr, [r1], #4 subs ip, ip, #4 - orr r3, r3, lr, push #\push + orr r3, r3, lr, PUSH #\PUSH str r3, [r0], #4 bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) -16: sub r1, r1, #(\push / 8) +16: sub r1, r1, #(\PUSH / 8) b 8b .endm - forward_copy_shift pull=8 push=24 + forward_copy_shift PULL=8 PUSH=24 -17: forward_copy_shift pull=16 push=16 +17: forward_copy_shift PULL=16 PUSH=16 -18: forward_copy_shift pull=24 push=8 +18: forward_copy_shift PULL=24 PUSH=8 END(memcpy) libc_hidden_builtin_def (memcpy) --- a/ports/sysdeps/arm/memmove.S +++ b/ports/sysdeps/arm/memmove.S @@ -20,6 +20,7 @@ /* Thumb requires excessive IT insns here. */ #define NO_THUMB #include +#include /* * Data preload for architectures that support it (ARM V5TE and above) @@ -45,11 +46,11 @@ * Endian independent macros for shifting bytes within registers. */ #ifndef __ARMEB__ -#define pull lsr -#define push lsl +#define PULL lsr +#define PUSH lsl #else -#define pull lsl -#define push lsr +#define PULL lsl +#define PUSH lsr #endif .text @@ -104,7 +105,12 @@ ENTRY(memmove) CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, ip ) @ C is set here +#ifndef ARM_ALWAYS_BX CALGN( add pc, r4, ip ) +#else + CALGN( add r4, r4, ip ) + CALGN( bx r4 ) +#endif PLD( pld [r1, #-4] ) 2: PLD( subs r2, r2, #96 ) @@ -123,8 +129,16 @@ ENTRY(memmove) 5: ands ip, r2, #28 rsb ip, ip, #32 +#ifndef ARM_ALWAYS_BX addne pc, pc, ip @ C is always clear here b 7f +#else + beq 7f + push {r10} + cfi_adjust_cfa_offset (4) + add r10, pc, ip + bx r10 +#endif 6: nop ldr r3, [r1, #-4]! ldr r4, [r1, #-4]! @@ -134,8 +148,13 @@ ENTRY(memmove) ldr r8, [r1, #-4]! ldr lr, [r1, #-4]! +#ifndef ARM_ALWAYS_BX add pc, pc, ip nop +#else + add r10, pc, ip + bx r10 +#endif nop str r3, [r0, #-4]! str r4, [r0, #-4]! @@ -145,6 +164,11 @@ ENTRY(memmove) str r8, [r0, #-4]! str lr, [r0, #-4]! +#ifdef ARM_ALWAYS_BX + pop {r10} + cfi_adjust_cfa_offset (-4) +#endif + CALGN( bcs 2b ) 7: ldmfd sp!, {r5 - r8} @@ -162,7 +186,8 @@ ENTRY(memmove) strcsb r4, [r0, #-1]! strcsb ip, [r0, #-1] -#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__) +#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ + || defined (ARM_ALWAYS_BX)) ldmfd sp!, {r0, r4, lr} cfi_adjust_cfa_offset (-12) cfi_restore (r4) @@ -193,7 +218,7 @@ ENTRY(memmove) blt 18f - .macro backward_copy_shift push pull + .macro backward_copy_shift PUSH PULL subs r2, r2, #28 blt 14f @@ -221,24 +246,24 @@ ENTRY(memmove) 12: PLD( pld [r1, #-128] ) 13: ldmdb r1!, {r7, r8, r9, ip} - mov lr, r3, push #\push + mov lr, r3, PUSH #\PUSH subs r2, r2, #32 ldmdb r1!, {r3, r4, r5, r6} - orr lr, lr, ip, pull #\pull - mov ip, ip, push #\push - orr ip, ip, r9, pull #\pull - mov r9, r9, push #\push - orr r9, r9, r8, pull #\pull - mov r8, r8, push #\push - orr r8, r8, r7, pull #\pull - mov r7, r7, push #\push - orr r7, r7, r6, pull #\pull - mov r6, r6, push #\push - orr r6, r6, r5, pull #\pull - mov r5, r5, push #\push - orr r5, r5, r4, pull #\pull - mov r4, r4, push #\push - orr r4, r4, r3, pull #\pull + orr lr, lr, ip, PULL #\PULL + mov ip, ip, PUSH #\PUSH + orr ip, ip, r9, PULL #\PULL + mov r9, r9, PUSH #\PUSH + orr r9, r9, r8, PULL #\PULL + mov r8, r8, PUSH #\PUSH + orr r8, r8, r7, PULL #\PULL + mov r7, r7, PUSH #\PUSH + orr r7, r7, r6, PULL #\PULL + mov r6, r6, PUSH #\PUSH + orr r6, r6, r5, PULL #\PULL + mov r5, r5, PUSH #\PUSH + orr r5, r5, r4, PULL #\PULL + mov r4, r4, PUSH #\PUSH + orr r4, r4, r3, PULL #\PULL stmdb r0!, {r4 - r9, ip, lr} bge 12b PLD( cmn r2, #96 ) @@ -255,26 +280,26 @@ ENTRY(memmove) 14: ands ip, r2, #28 beq 16f -15: mov lr, r3, push #\push +15: mov lr, r3, PUSH #\PUSH ldr r3, [r1, #-4]! subs ip, ip, #4 - orr lr, lr, r3, pull #\pull + orr lr, lr, r3, PULL #\PULL str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) -16: add r1, r1, #(\pull / 8) +16: add r1, r1, #(\PULL / 8) b 8b .endm - backward_copy_shift push=8 pull=24 + backward_copy_shift PUSH=8 PULL=24 -17: backward_copy_shift push=16 pull=16 +17: backward_copy_shift PUSH=16 PULL=16 -18: backward_copy_shift push=24 pull=8 +18: backward_copy_shift PUSH=24 PULL=8 END(memmove)