* [PATCH v2 03/14] arm: Introduce and use GET_TLS
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (6 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:45 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
` (5 subsequent siblings)
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Factor out the sequence needed to call kuser_get_tls, as we can't
play subtract into pc games in thumb mode. Prepare for hard-tp,
pulling the save of LR into the macro.
---
* sysdeps/arm/sysdep.h (GET_TLS): New macro.
* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_undefweak): Use it.
(_dl_tlsdesc_dynamic): Likewise.
* sysdeps/unix/arm/sysdep.S (__syscall_error): Likewise.
* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): New macro.
* sysdeps/unix/sysv/linux/arm/clone.S (__clone): Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S (SAVE_PID): Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/vfork.S (SAVE_PID): Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
Likewise.
* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S (__aeabi_read_tp):
Add thumb2 alternative.
---
ports/sysdeps/arm/dl-tlsdesc.S | 13 ++--------
ports/sysdeps/arm/sysdep.h | 19 ++++++++++++++
ports/sysdeps/unix/arm/sysdep.S | 12 +++------
ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S | 6 +++++
ports/sysdeps/unix/sysv/linux/arm/clone.S | 4 +--
ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S | 10 +-------
.../unix/sysv/linux/arm/nptl/sysdep-cancel.h | 2 +-
ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S | 10 +-------
ports/sysdeps/unix/sysv/linux/arm/sysdep.h | 30 ++++++++++++++++++++++
9 files changed, 64 insertions(+), 42 deletions(-)
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 7b4c8df..1c3bccf 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -50,18 +50,9 @@ _dl_tlsdesc_return:
.fnstart
.align 2
_dl_tlsdesc_undefweak:
- @ Are we allowed a misaligned stack pointer calling read_tp?
- .save {lr}
- stmdb sp!, {lr}
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (lr,0)
- bl __aeabi_read_tp
+ GET_TLS(r1)
rsb r0, r0, #0
- ldmia sp!, {lr}
- cfi_adjust_cfa_offset (-4)
- cfi_restore (lr)
BX (lr)
-
cfi_endproc
.fnend
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
@@ -106,7 +97,7 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (r4,8)
cfi_rel_offset (lr,12)
ldr r1, [r0] /* td */
- bl __aeabi_read_tp
+ GET_TLS(lr)
mov r4, r0 /* r4 = tp */
ldr r0, [r0]
ldr r2, [r1, #8] /* gen_count */
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 9230131..c525d5b 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -150,6 +150,25 @@
# define NEGOFF_OFF1(R, OFF) [R, $OFF]
# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $OFFA]
# endif
+
+/* Helper to get the TLS base pointer. The interface is that TMP is a
+ register that may be used to hold the LR, if necessary. TMP may be
+ LR itself to indicate that LR need not be saved. The base pointer
+ is returned in R0. Only R0 and TMP are modified.
+
+ At this generic level we have no tricks to pull. Call the ABI routine. */
+# define GET_TLS(TMP) \
+ push { r1, r2, r3, lr }; \
+ cfi_remember_state; \
+ cfi_adjust_cfa_offset (16); \
+ cfi_rel_offset (r1, 0); \
+ cfi_rel_offset (r2, 4); \
+ cfi_rel_offset (r3, 8); \
+ cfi_rel_offset (lr, 12); \
+ bl __aeabi_read_tp; \
+ pop { r1, r2, r3, lr }; \
+ cfi_restore_state
+
#endif /* __ASSEMBLER__ */
/* This number is the offset from the pc at the current location. */
diff --git a/ports/sysdeps/unix/arm/sysdep.S b/ports/sysdeps/unix/arm/sysdep.S
index d44ee48..b07cba9 100644
--- a/ports/sysdeps/unix/arm/sysdep.S
+++ b/ports/sysdeps/unix/arm/sysdep.S
@@ -37,14 +37,8 @@ __syscall_error:
#endif
#ifndef IS_IN_rtld
- mov ip, lr
- cfi_register (lr, ip)
- mov r1, r0
-
- mov r0, #0xffff0fff
- mov lr, pc
- sub pc, r0, #31
-
+ mov r1, r0
+ GET_TLS(r2)
ldr r2, 1f
#ifdef __thumb__
2: add r2, r2, pc
@@ -54,7 +48,7 @@ __syscall_error:
#endif
str r1, [r0, r2]
mvn r0, #0
- DO_RET(ip)
+ DO_RET(lr)
1: .word errno(gottpoff) + (. - 2b - PC_OFS)
#elif RTLD_PRIVATE_ERRNO
diff --git a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
index c4ddbc6..ecdc322 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
@@ -41,6 +41,12 @@
.hidden __aeabi_read_tp
ENTRY (__aeabi_read_tp)
+#ifdef __thumb2__
+ movw r0, #0x0fe0
+ movt r0, #0xffff
+ bx r0
+#else
mov r0, #0xffff0fff
sub pc, r0, #31
+#endif
END (__aeabi_read_tp)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index a5f9b4d..1bc5eab 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -74,9 +74,7 @@ PSEUDO_END (__clone)
#ifdef RESET_PID
tst ip, #CLONE_THREAD
bne 3f
- mov r0, #0xffff0fff
- mov lr, pc
- sub pc, r0, #31
+ GET_TLS(lr)
mov r1, r0
tst ip, #CLONE_VM
ldr r7, =SYS_ify(getpid)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
index ff88510..c731cd7 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
@@ -19,15 +19,7 @@
/* Save the PID value. */
#define SAVE_PID \
- str lr, [sp, #-4]!; /* Save LR. */ \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (lr, 0); \
- mov r0, #0xffff0fff; /* Point to the high page. */ \
- mov lr, pc; /* Save our return address. */ \
- sub pc, r0, #31; /* Jump to the TLS entry. */ \
- ldr lr, [sp], #4; /* Restore LR. */ \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (lr); \
+ GET_TLS(r2); \
NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
ldr r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID. */ \
rsb r0, r3, #0; /* Negate it. */ \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index 47d4c70..d5e666b 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -216,7 +216,7 @@ extern int __local_multiple_threads attribute_hidden;
stmfd sp!, {r0, lr}; \
cfi_adjust_cfa_offset (8); \
cfi_rel_offset (lr, 4); \
- bl __aeabi_read_tp; \
+ GET_TLS(lr); \
NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET); \
ldr ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET); \
ldmfd sp!, {r0, lr}; \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
index c4be1e2..accecf2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
@@ -19,15 +19,7 @@
/* Save the PID value. */
#define SAVE_PID \
- str lr, [sp, #-4]!; /* Save LR. */ \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (lr, 0); \
- mov r0, #0xffff0fff; /* Point to the high page. */ \
- mov lr, pc; /* Save our return address. */ \
- sub pc, r0, #31; /* Jump to the TLS entry. */ \
- ldr lr, [sp], #4; /* Restore LR. */ \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (lr); \
+ GET_TLS(r2); \
NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
ldr r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID. */ \
rsbs r0, r3, #0; /* Negate it. */ \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 89208a9..01d8123 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -45,6 +45,36 @@
#ifdef __ASSEMBLER__
+/* Internal macro calling the linux kernel kuser_get_tls helper.
+ Note that in thumb mode, a constant pool break is often out of range, so
+ we always expand the constant inline. */
+#ifdef __thumb2__
+# define GET_TLS_BODY \
+ movw r0, #0x0fe0; \
+ movt r0, #0xffff; \
+ blx r0
+#else
+# define GET_TLS_BODY \
+ mov r0, #0xffff0fff; /* Point to the high page. */ \
+ mov lr, pc; /* Save our return address. */ \
+ sub pc, r0, #31 /* Jump to the TLS entry. */
+#endif
+
+/* Helper to get the TLS base pointer. Save LR in TMP, return in R0,
+ and no other registers clobbered. TMP may be LR itself to indicate
+ that no save is necessary. */
+#undef GET_TLS
+#define GET_TLS(TMP) \
+ .ifnc TMP, lr; \
+ mov TMP, lr; \
+ cfi_register (lr, TMP); \
+ GET_TLS_BODY; \
+ mov lr, TMP; \
+ cfi_restore (lr); \
+ .else; \
+ GET_TLS_BODY; \
+ .endif
+
/* Linux uses a negative return value to indicate syscall errors,
unlike most Unices, which use the condition codes' carry flag.
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (3 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:58 ` Roland McGrath
` (2 more replies)
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
` (8 subsequent siblings)
13 siblings, 3 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Written from scratch rather than copied from GMP, due to GPL 2.1 vs
GPL 3, but tested with the GMP testsuite.
This is 25% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core. It's probably slower
than GMP on the A8 and A9 cores though.
---
* sysdeps/arm/addmul_1.S: New file.
---
ports/sysdeps/arm/addmul_1.S | 66 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
create mode 100644 ports/sysdeps/arm/addmul_1.S
diff --git a/ports/sysdeps/arm/addmul_1.S b/ports/sysdeps/arm/addmul_1.S
new file mode 100644
index 0000000..4e2f6da
--- /dev/null
+++ b/ports/sysdeps/arm/addmul_1.S
@@ -0,0 +1,66 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .syntax unified
+ .text
+
+@ cycles/limb
+@ StrongArm ?
+@ Cortex-A8 ?
+@ Cortex-A9 ?
+@ Cortex-A15 4
+
+/* mp_limb_t mpn_addmul_1(res_ptr, src1_ptr, size, s2_limb) */
+
+ENTRY(__mpn_addmul_1)
+ push { r4, r5, r6, r7 }
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (r4, 0)
+ cfi_rel_offset (r5, 4)
+ cfi_rel_offset (r6, 8)
+ cfi_rel_offset (r7, 12)
+
+ ldr r6, [r1], #4
+ ldr r5, [r0]
+ mov r4, #0 /* init carry in */
+ b 1f
+0:
+ ldr r6, [r1], #4 /* load next ul */
+ adds r7, r4, r5 /* (out, c) = cl + lpl */
+ ldr r5, [r0, #4] /* load next rl */
+ adc r4, ip, #0 /* cl = hpl + c */
+ str r7, [r0], #4
+1:
+ mov ip, #0 /* zero-extend rl */
+ umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */
+ subs r2, r2, #1
+ bne 0b
+
+ adds r4, r4, r5 /* (out, c) = cl + llpl */
+ str r4, [r0]
+ adc r0, ip, #0 /* return hpl + c */
+
+ pop { r4, r5, r6, r7 }
+ cfi_adjust_cfa_offset (-16)
+ cfi_restore (r4)
+ cfi_restore (r5)
+ cfi_restore (r6)
+ cfi_restore (r7)
+ DO_RET(lr)
+END(__mpn_addmul_1)
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (11 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
---
* sysdeps/arm/sysdep.h (ARCH_HAS_HARD_TP): New macro.
(GET_TLS): Use hard-tp if ARCH_HAS_HARD_TP.
* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S: Likewise.
* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): Don't override
the default definition if ARCH_HAS_HARD_TP.
---
ports/sysdeps/arm/sysdep.h | 14 +++++++++++---
ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S | 5 ++++-
ports/sysdeps/unix/sysv/linux/arm/sysdep.h | 16 +++++++++-------
3 files changed, 24 insertions(+), 11 deletions(-)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index c09e680..03739a4 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -47,6 +47,9 @@
#if __ARM_ARCH > 4
# define ARCH_HAS_BLX
#endif
+#if __ARM_ARCH > 6 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__)
+# define ARCH_HAS_HARD_TP
+#endif
#if __ARM_ARCH > 6 || defined(__ARM_ARCH_6T2__)
# define ARCH_HAS_T2
#endif
@@ -184,10 +187,14 @@
/* Helper to get the TLS base pointer. The interface is that TMP is a
register that may be used to hold the LR, if necessary. TMP may be
LR itself to indicate that LR need not be saved. The base pointer
- is returned in R0. Only R0 and TMP are modified.
+ is returned in R0. Only R0 and TMP are modified. */
- At this generic level we have no tricks to pull. Call the ABI routine. */
-# define GET_TLS(TMP) \
+# ifdef ARCH_HAS_HARD_TP
+/* If the cpu has cp15 available, use it. */
+# define GET_TLS(TMP) mrc p15, 0, r0, c13, c0, 3
+# else
+/* At this generic level we have no tricks to pull. Call the ABI routine. */
+# define GET_TLS(TMP) \
push { r1, r2, r3, lr }; \
cfi_remember_state; \
cfi_adjust_cfa_offset (16); \
@@ -198,6 +205,7 @@
bl __aeabi_read_tp; \
pop { r1, r2, r3, lr }; \
cfi_restore_state
+# endif /* ARCH_HAS_HARD_TP */
#endif /* __ASSEMBLER__ */
diff --git a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
index ecdc322..21e3229 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
@@ -41,7 +41,10 @@
.hidden __aeabi_read_tp
ENTRY (__aeabi_read_tp)
-#ifdef __thumb2__
+#ifdef ARCH_HAS_HARD_TP
+ mrc p15, 0, r0, c13, c0, 3
+ bx lr
+#elif defined(__thumb2__)
movw r0, #0x0fe0
movt r0, #0xffff
bx r0
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 39872b8..89fea7a 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -45,26 +45,27 @@
#ifdef __ASSEMBLER__
+#ifndef ARCH_HAS_HARD_TP
/* Internal macro calling the linux kernel kuser_get_tls helper.
Note that in thumb mode, a constant pool break is often out of range, so
we always expand the constant inline. */
-#ifdef __thumb2__
-# define GET_TLS_BODY \
+# ifdef __thumb2__
+# define GET_TLS_BODY \
movw r0, #0x0fe0; \
movt r0, #0xffff; \
blx r0
-#else
-# define GET_TLS_BODY \
+# else
+# define GET_TLS_BODY \
mov r0, #0xffff0fff; /* Point to the high page. */ \
mov lr, pc; /* Save our return address. */ \
sub pc, r0, #31 /* Jump to the TLS entry. */
-#endif
+# endif
/* Helper to get the TLS base pointer. Save LR in TMP, return in R0,
and no other registers clobbered. TMP may be LR itself to indicate
that no save is necessary. */
-#undef GET_TLS
-#define GET_TLS(TMP) \
+# undef GET_TLS
+# define GET_TLS(TMP) \
.ifnc TMP, lr; \
mov TMP, lr; \
cfi_register (lr, TMP); \
@@ -74,6 +75,7 @@
.else; \
GET_TLS_BODY; \
.endif
+#endif /* ARCH_HAS_HARD_TP */
/* Linux uses a negative return value to indicate syscall errors,
unlike most Unices, which use the condition codes' carry flag.
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 01/14] arm: Introduce and use LDST_PCREL
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (10 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-04 17:47 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
13 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Macro-ising the few instances where we need to distinguish between
arm and thumb pc-relative memory operations.
---
* sysdeps/arm/sysdep.h (LDST_PCREL): New macro.
* sysdeps/unix/arm/sysdep.S (__syscall_error): Use LDST_PCREL.
Fix up gottpoff load of errno for thumb2.
* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
(SINGLE_THREAD_P): Use LDST_PCREL.
(PSEUDO_PROLOGUE): Remove.
(PSEUDO): Don't use it.
* sysdeps/unix/sysv/linux/arm/sysdep.h (SYSCALL_ERROR_HANDLER):
Use LDST_PCREL.
---
ports/sysdeps/arm/sysdep.h | 17 +++++++++++++++++
ports/sysdeps/unix/arm/sysdep.S | 22 ++++++++++++----------
.../unix/sysv/linux/arm/nptl/sysdep-cancel.h | 10 ++--------
ports/sysdeps/unix/sysv/linux/arm/sysdep.h | 10 ++++------
4 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 4af7429..29a78f0 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -117,6 +117,23 @@
the caller. */
.eabi_attribute 24, 1
+/* Load or store to/from a pc-relative EXPR into/from R, using T. */
+# ifdef __thumb2__
+# define LDST_PCREL(OP, R, T, EXPR) \
+ ldr T, 98f; \
+ .subsection 2; \
+98: .word EXPR - 99f - PC_OFS; \
+ .previous; \
+99: add T, T, pc; \
+ OP R, [T]
+# else
+# define LDST_PCREL(OP, R, T, EXPR) \
+ ldr T, 98f; \
+ .subsection 2; \
+98: .word EXPR - 99f - PC_OFS; \
+ .previous; \
+99: OP R, [pc, T]
+# endif
#endif /* __ASSEMBLER__ */
/* This number is the offset from the pc at the current location. */
diff --git a/ports/sysdeps/unix/arm/sysdep.S b/ports/sysdeps/unix/arm/sysdep.S
index 40e4d80..d44ee48 100644
--- a/ports/sysdeps/unix/arm/sysdep.S
+++ b/ports/sysdeps/unix/arm/sysdep.S
@@ -45,20 +45,22 @@ __syscall_error:
mov lr, pc
sub pc, r0, #31
- ldr r2, 1f
-2: ldr r2, [pc, r2]
- str r1, [r0, r2]
- mvn r0, #0
- RETINSTR (, ip)
+ ldr r2, 1f
+#ifdef __thumb__
+2: add r2, r2, pc
+ ldr r2, [r2]
+#else
+2: ldr r2, [pc, r2]
+#endif
+ str r1, [r0, r2]
+ mvn r0, #0
+ DO_RET(ip)
1: .word errno(gottpoff) + (. - 2b - PC_OFS)
#elif RTLD_PRIVATE_ERRNO
- ldr r1, 1f
-0: str r0, [pc, r1]
- mvn r0, $0
+ LDST_PCREL(str, r0, r1, C_SYMBOL_NAME(rtld_errno))
+ mvn r0, #0
DO_RET(r14)
-
-1: .word C_SYMBOL_NAME(rtld_errno) - 0b - PC_OFS
#else
#error "Unsupported non-TLS case"
#endif
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index df85d51..8889369 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -31,7 +31,6 @@
# undef PSEUDO
# define PSEUDO(name, syscall_name, args) \
.text; \
- PSEUDO_PROLOGUE; \
ENTRY (__##syscall_name##_nocancel); \
CFI_SECTIONS; \
DO_CALL (syscall_name, args); \
@@ -203,12 +202,8 @@ extern int __local_multiple_threads attribute_hidden;
# define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
# else
# define SINGLE_THREAD_P \
- ldr ip, 1b; \
- 2: \
- ldr ip, [pc, ip]; \
- teq ip, #0;
-# define PSEUDO_PROLOGUE \
- 1: .word __local_multiple_threads - 2f - PC_OFS;
+ LDST_PCREL(ldr, ip, ip, __local_multiple_threads); \
+ teq ip, #0
# endif
# else
/* There is no __local_multiple_threads for librt, so use the TCB. */
@@ -217,7 +212,6 @@ extern int __local_multiple_threads attribute_hidden;
__builtin_expect (THREAD_GETMEM (THREAD_SELF, \
header.multiple_threads) == 0, 1)
# else
-# define PSEUDO_PROLOGUE
# define SINGLE_THREAD_P \
stmfd sp!, {r0, lr}; \
cfi_adjust_cfa_offset (8); \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index f40cb95..89208a9 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -110,12 +110,10 @@
# if RTLD_PRIVATE_ERRNO
# define SYSCALL_ERROR_HANDLER \
__local_syscall_error: \
- ldr r1, 1f; \
- rsb r0, r0, #0; \
-0: str r0, [pc, r1]; \
- mvn r0, #0; \
- DO_RET(lr); \
-1: .word C_SYMBOL_NAME(rtld_errno) - 0b - PC_OFS;
+ rsb r0, r0, #0; \
+ LDST_PCREL(str, r0, r1, C_SYMBOL_NAME(rtld_errno)); \
+ mvn r0, #0; \
+ DO_RET(lr)
# else
# if defined(__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
# define POP_PC \
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (12 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
The preceeding patches have allowed for the few incompatibilities
between arm and thumb2 mode, or have marked the file as not wanting
to use thumb2 mode.
---
* sysdeps/arm/sysdep.h [__ASSEMBLER__]: Enable thumb2 if __thumb2__.
(PC_OFS): Respect __thumb__ if __ASSEMBLER__.
---
ports/sysdeps/arm/sysdep.h | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index c525d5b..d855ceb 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -117,6 +117,16 @@
the caller. */
.eabi_attribute 24, 1
+/* The thumb2 encoding is reasonably complete. Unless suppressed, use it. */
+ .syntax unified
+# if defined(__thumb2__) && !defined(NO_THUMB)
+ .thumb
+#else
+# undef __thumb__
+# undef __thumb2__
+ .arm
+# endif
+
/* Load or store to/from a pc-relative EXPR into/from R, using T. */
# ifdef __thumb2__
# define LDST_PCREL(OP, R, T, EXPR) \
@@ -172,8 +182,7 @@
#endif /* __ASSEMBLER__ */
/* This number is the offset from the pc at the current location. */
-/* ??? At the moment we're not turning on thumb mode in assembly. */
-#if defined(__thumb__) && !defined(__ASSEMBLER__)
+#ifdef __thumb__
# define PC_OFS 4
#else
# define PC_OFS 8
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 07/14] arm: Commonize BX conditionals
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (8 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
` (3 subsequent siblings)
13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Add BLX macro in addition and use it where appropriate.
---
* sysdeps/arm/sysdep.h (BX, BXC, BLX): New macros.
(DO_RET): Use BX.
(RETINSTR): Use BXC.
* sysdeps/arm/dl-tlsdesc.S (BX): Remove.
* sysdeps/arm/dl-trampoline.S (BX): Remove.
(_dl_runtime_profile): Use BLX.
---
ports/sysdeps/arm/dl-tlsdesc.S | 6 ------
ports/sysdeps/arm/dl-trampoline.S | 9 +--------
ports/sysdeps/arm/sysdep.h | 29 +++++++++++++----------------
3 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 3a956de..aa3db80 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -20,12 +20,6 @@
#include <tls.h>
#include "tlsdesc.h"
-#ifdef __USE_BX__
- #define BX(x) bx x
-#else
- #define BX(x) mov pc, x
-#endif
-
.text
@ emit debug information with cfi
@ use arm-specific pseudos for unwinding itself
diff --git a/ports/sysdeps/arm/dl-trampoline.S b/ports/sysdeps/arm/dl-trampoline.S
index f2d1679..9366976 100644
--- a/ports/sysdeps/arm/dl-trampoline.S
+++ b/ports/sysdeps/arm/dl-trampoline.S
@@ -21,12 +21,6 @@
#include <sysdep.h>
#include <libc-symbols.h>
-#if defined(__USE_BX__)
-#define BX(x) bx x
-#else
-#define BX(x) mov pc, x
-#endif
-
.text
.globl _dl_runtime_resolve
.type _dl_runtime_resolve, #function
@@ -192,8 +186,7 @@ _dl_runtime_profile:
add ip, r7, #72
ldmia ip, {r0-r3}
ldr ip, [r7, #264]
- mov lr, pc
- BX(ip)
+ BLX(ip)
stmia r7, {r0-r3}
@ Call pltexit.
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 5f6c3f2..84313fe 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -33,26 +33,23 @@
#define PLTJMP(_x) _x##(PLT)
-/* APCS-32 doesn't preserve the condition codes across function call. */
-#ifdef __APCS_32__
#ifdef __USE_BX__
-#define RETINSTR(cond, reg) \
- bx##cond reg
-#define DO_RET(_reg) \
- bx _reg
+# define BX(R) bx R
+# define BXC(C, R) bx##C R
+# ifdef __ARM_ARCH_4T__
+# define BLX(R) mov lr, pc; bx R
+# else
+# define BLX(R) blx R
+# endif
#else
-#define RETINSTR(cond, reg) \
- mov##cond pc, reg
-#define DO_RET(_reg) \
- mov pc, _reg
-#endif
-#else /* APCS-26 */
-#define RETINSTR(cond, reg) \
- mov##cond##s pc, reg
-#define DO_RET(_reg) \
- movs pc, _reg
+# define BX(R) mov pc, R
+# define BXC(C, R) mov##C pc, R
+# define BLX(R) mov lr, pc; mov pc, R
#endif
+#define DO_RET(R) BX(R)
+#define RETINSTR(C, R) BXC(C, R)
+
/* Define an entry point visible from C. */
#define ENTRY(name) \
.globl C_SYMBOL_NAME(name); \
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 13/14] arm: Add optimized submul_1
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:58 ` Roland McGrath
2013-03-06 1:14 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
` (10 subsequent siblings)
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Written from scratch rather than copied from GMP, due to GPL 2.1 vs
GPL 3, but tested with the GMP testsuite.
This is 50% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core. It's probably slower
than GMP on the A8 and A9 cores though.
---
* sysdeps/arm/submul_1.S: New file.
---
ports/sysdeps/arm/submul_1.S | 67 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
create mode 100644 ports/sysdeps/arm/submul_1.S
diff --git a/ports/sysdeps/arm/submul_1.S b/ports/sysdeps/arm/submul_1.S
new file mode 100644
index 0000000..35e1348
--- /dev/null
+++ b/ports/sysdeps/arm/submul_1.S
@@ -0,0 +1,67 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .syntax unified
+ .text
+
+@ cycles/limb
+@ StrongArm ?
+@ Cortex-A8 ?
+@ Cortex-A9 ?
+@ Cortex-A15 4
+
+/* mp_limb_t mpn_submul_1(res_ptr, src1_ptr, size, s2_limb) */
+
+ENTRY(__mpn_submul_1)
+ push { r4, r5, r6, r7 }
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (r4, 0)
+ cfi_rel_offset (r5, 4)
+ cfi_rel_offset (r6, 8)
+ cfi_rel_offset (r7, 12)
+
+ ldr r6, [r1], #4
+ ldr r7, [r0]
+ mov r4, #0 /* init carry in */
+ b 1f
+0:
+ ldr r6, [r1], #4 /* load next ul */
+ adds r5, r5, r4 /* (lpl, c) = lpl + cl */
+ adc r4, ip, #0 /* cl = hpl + c */
+ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */
+ ldr r7, [r0, #4] /* load next rl */
+ it cc
+ addcc r4, r4, #1 /* cl += !c */
+ str r5, [r0], #4
+1:
+ umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */
+ subs r2, r2, #1
+ bne 0b
+
+ adds r5, r5, r4 /* (lpl, c) = lpl + cl */
+ adc r4, ip, #0 /* cl = hpl + c */
+ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */
+ str r5, [r0], #4
+ ite cc
+ addcc r0, r4, #1 /* cl += !c */
+ movcs r0, r4 /* return carry */
+
+ pop { r4, r5, r6, r7 }
+ DO_RET(lr)
+END(__mpn_submul_1)
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 00/14] ARM improvements
@ 2013-03-01 17:36 Richard Henderson
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
` (13 more replies)
0 siblings, 14 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
I believe I've now addressed all of the comments to date.
Patches 4-8 have been approved, but they touch the same code
as patches 1-3, so I'm not rearranging them.
Patch 1 -- Spurious whitespace changes removed.
Patch 2 -- I never saw a review for this one?
Patch 3 -- Totally rewritten, in preparation for new patch for hard-tp.
Patch 6 -- Review mentioned deleting __APCS_32__, but this was already
done in patch 7, so I didn't rearrange that.
Patch 9-10 -- New.
Patch 11-12 -- Micro-optimization suggestions from Mans Rullgard applied.
Patch 14 -- Adjusted to avoid r9.
I've yet to test the string routines on big-endian. I'll delay
re-posting those until I have done so.
r~
Richard Henderson (14):
arm: Introduce and use LDST_PCREL
arm: Introduce and use NEGOFF series of macros
arm: Introduce and use GET_TLS
arm: Enable thumb2 mode in assembly files
arm: Use push/pop mnemonics
arm: Delete LOADREGS macro
arm: Commonize BX conditionals
arm: Unless arm4t, pop return address directly into pc
arm: Tidy architecture selection
arm: Implement hard-tp for GET_TLS
arm: Add optimized ffs for armv6t2
arm: Add optimized addmul_1
arm: Add optimized submul_1
arm: Add optimized add_n and sub_n
ports/sysdeps/arm/__longjmp.S | 2 +-
ports/sysdeps/arm/add_n.S | 83 ++++++++++++
ports/sysdeps/arm/addmul_1.S | 66 +++++++++
ports/sysdeps/arm/arm-mcount.S | 10 +-
ports/sysdeps/arm/armv6t2/ffs.S | 35 +++++
ports/sysdeps/arm/armv6t2/ffsll.S | 50 +++++++
ports/sysdeps/arm/crti.S | 4 +-
ports/sysdeps/arm/crtn.S | 8 +-
ports/sysdeps/arm/dl-machine.h | 2 +-
ports/sysdeps/arm/dl-tlsdesc.S | 42 +++---
ports/sysdeps/arm/dl-trampoline.S | 13 +-
ports/sysdeps/arm/memcpy.S | 58 ++++----
ports/sysdeps/arm/memmove.S | 58 ++++----
ports/sysdeps/arm/start.S | 10 +-
ports/sysdeps/arm/sub_n.S | 2 +
ports/sysdeps/arm/submul_1.S | 67 ++++++++++
ports/sysdeps/arm/sysdep.h | 147 +++++++++++++++++----
ports/sysdeps/unix/arm/sysdep.S | 32 ++---
.../sysdeps/unix/sysv/linux/arm/____longjmp_chk.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S | 9 ++
ports/sysdeps/unix/sysv/linux/arm/clone.S | 13 +-
ports/sysdeps/unix/sysv/linux/arm/mmap.S | 8 +-
ports/sysdeps/unix/sysv/linux/arm/mmap64.S | 8 +-
ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S | 21 +--
.../unix/sysv/linux/arm/nptl/sysdep-cancel.h | 47 +++----
.../unix/sysv/linux/arm/nptl/unwind-forcedunwind.c | 4 +-
.../unix/sysv/linux/arm/nptl/unwind-resume.c | 4 +-
ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S | 24 ++--
ports/sysdeps/unix/sysv/linux/arm/syscall.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/sysdep.h | 69 +++++++---
ports/sysdeps/unix/sysv/linux/arm/vfork.S | 2 +-
31 files changed, 648 insertions(+), 258 deletions(-)
create mode 100644 ports/sysdeps/arm/add_n.S
create mode 100644 ports/sysdeps/arm/addmul_1.S
create mode 100644 ports/sysdeps/arm/armv6t2/ffs.S
create mode 100644 ports/sysdeps/arm/armv6t2/ffsll.S
create mode 100644 ports/sysdeps/arm/sub_n.S
create mode 100644 ports/sysdeps/arm/submul_1.S
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 06/14] arm: Delete LOADREGS macro
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (5 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
` (6 subsequent siblings)
13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
There was only one user. It's "condition" argument was used
for "ia" rather than an actual condition. The apcs26 syntax
is almost certainly not needed, given current binutils requirements.
---
* sysdeps/arm/__longjmp.S (__longjmp): Use ldmia insn directly.
* sysdeps/arm/sysdep.h (LOADREGS): Remove.
---
ports/sysdeps/arm/__longjmp.S | 2 +-
ports/sysdeps/arm/sysdep.h | 4 ----
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/ports/sysdeps/arm/__longjmp.S b/ports/sysdeps/arm/__longjmp.S
index a3a2a8a..1d5d56b 100644
--- a/ports/sysdeps/arm/__longjmp.S
+++ b/ports/sysdeps/arm/__longjmp.S
@@ -37,7 +37,7 @@ ENTRY (__longjmp)
cfi_undefined (r4)
CHECK_SP (r4)
#endif
- LOADREGS(ia, ip!, {v1-v6, sl, fp, sp, lr})
+ ldmia ip!, {v1-v6, sl, fp, sp, lr}
cfi_restore (v1)
cfi_restore (v2)
cfi_restore (v3)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index d74a328..5f6c3f2 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -35,8 +35,6 @@
/* APCS-32 doesn't preserve the condition codes across function call. */
#ifdef __APCS_32__
-#define LOADREGS(cond, base, reglist...)\
- ldm##cond base,reglist
#ifdef __USE_BX__
#define RETINSTR(cond, reg) \
bx##cond reg
@@ -49,8 +47,6 @@
mov pc, _reg
#endif
#else /* APCS-26 */
-#define LOADREGS(cond, base, reglist...)\
- ldm##cond base,reglist^
#define RETINSTR(cond, reg) \
mov##cond##s pc, reg
#define DO_RET(_reg) \
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (2 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-05 2:08 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
` (9 subsequent siblings)
13 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
---
* sysdeps/arm/armv6t2/ffs.S: New file.
* sysdeps/arm/armv6t2/ffsll.S: New file.
---
ports/sysdeps/arm/armv6t2/ffs.S | 35 +++++++++++++++++++++++++++
ports/sysdeps/arm/armv6t2/ffsll.S | 50 +++++++++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+)
create mode 100644 ports/sysdeps/arm/armv6t2/ffs.S
create mode 100644 ports/sysdeps/arm/armv6t2/ffsll.S
diff --git a/ports/sysdeps/arm/armv6t2/ffs.S b/ports/sysdeps/arm/armv6t2/ffs.S
new file mode 100644
index 0000000..b2c88b9
--- /dev/null
+++ b/ports/sysdeps/arm/armv6t2/ffs.S
@@ -0,0 +1,35 @@
+/* ffs -- find first set bit in an int, from least significant end.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .syntax unified
+ .text
+
+ENTRY (__ffs)
+ cmp r0, #0
+ rbit r0, r0
+ itt ne
+ clzne r0, r0
+ addne r0, r0, #1
+ bx lr
+END (__ffs)
+
+weak_alias (__ffs, ffs)
+weak_alias (__ffs, ffsl)
+libc_hidden_builtin_def (ffs)
diff --git a/ports/sysdeps/arm/armv6t2/ffsll.S b/ports/sysdeps/arm/armv6t2/ffsll.S
new file mode 100644
index 0000000..e49c70f
--- /dev/null
+++ b/ports/sysdeps/arm/armv6t2/ffsll.S
@@ -0,0 +1,50 @@
+/* ffsll -- find first set bit in a long long, from least significant end.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .syntax unified
+ .text
+
+ENTRY (ffsll)
+ @ If low part is 0, operate on the high part. Ensure that the
+ @ word on which we operate is in r0. Set r2 to the bit offset
+ @ of the word being considered. Set the flags for the word
+ @ being operated on.
+#ifdef __ARMEL__
+ cmp r0, #0
+ itee ne
+ movne r2, #0
+ moveq r2, #32
+ movseq r0, r1
+#else
+ cmp r1, #0
+ ittee ne
+ movne r2, #0
+ movne r0, r1
+ moveq r2, #32
+ cmpeq r0, #0
+#endif
+ @ Perform the ffs on r0.
+ rbit r0, r0
+ ittt ne
+ clzne r0, r0
+ addne r2, r2, #1
+ addne r0, r0, r2
+ bx lr
+END (ffsll)
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 14/14] arm: Add optimized add_n and sub_n
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:59 ` Roland McGrath
2013-03-06 0:53 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
` (12 subsequent siblings)
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Written from scratch rather than copied from GMP, due to LGPL 2.1 vs
GPL 3, but tested with the GMP testsuite.
This is 250% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core, and probably everywhere.
---
* sysdeps/arm/add_n.S: New file.
* sysdeps/arm/sub_n.S: New file.
---
ports/sysdeps/arm/add_n.S | 83 +++++++++++++++++++++++++++++++++++++++++++++++
ports/sysdeps/arm/sub_n.S | 2 ++
2 files changed, 85 insertions(+)
create mode 100644 ports/sysdeps/arm/add_n.S
create mode 100644 ports/sysdeps/arm/sub_n.S
diff --git a/ports/sysdeps/arm/add_n.S b/ports/sysdeps/arm/add_n.S
new file mode 100644
index 0000000..af69733
--- /dev/null
+++ b/ports/sysdeps/arm/add_n.S
@@ -0,0 +1,83 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ .syntax unified
+ .text
+
+#ifdef USE_AS_SUB_N
+# define INITC cmp r0, r0
+# define OPC sbcs
+# define RETC sbc r0, r0, r0; neg r0, r0
+# define FUNC __mpn_sub_n
+#else
+# define INITC cmn r0, #0
+# define OPC adcs
+# define RETC mov r0, #0; adc r0, r0, r0
+# define FUNC __mpn_add_n
+#endif
+
+/* mp_limb_t mpn_add_n(res_ptr, src1_ptr, src2_ptr, size) */
+
+ENTRY (FUNC)
+ push { r4, r5, r6, r7, r8, r10, lr }
+ cfi_adjust_cfa_offset (28)
+ cfi_rel_offset (r4, 0)
+ cfi_rel_offset (r5, 4)
+ cfi_rel_offset (r6, 8)
+ cfi_rel_offset (r7, 12)
+ cfi_rel_offset (r8, 16)
+ cfi_rel_offset (r10, 20)
+ cfi_rel_offset (lr, 24)
+
+ INITC /* initialize carry flag */
+ tst r3, #1 /* count & 1 == 1? */
+ add lr, r1, r3, lsl #2 /* compute end src1 */
+ beq 1f
+
+ ldr r4, [r1], #4 /* do one to make count even */
+ ldr r5, [r2], #4
+ OPC r4, r4, r5
+ teq r1, lr /* end of count? (preserve carry) */
+ str r4, [r0], #4
+ beq 9f
+1:
+ tst r3, #2 /* count & 2 == 2? */
+ beq 2f
+ ldm r1!, { r4, r5 } /* do two to make count 0 mod 4 */
+ ldm r2!, { r6, r7 }
+ OPC r4, r4, r6
+ OPC r5, r5, r7
+ teq r1, lr /* end of count? */
+ stm r0!, { r4, r5 }
+ beq 9f
+2:
+ ldm r1!, { r3, r5, r7, r10 } /* do four each loop */
+ ldm r2!, { r4, r6, r8, ip }
+ OPC r3, r3, r4
+ OPC r5, r5, r6
+ OPC r7, r7, r8
+ OPC r10, r10, ip
+ teq r1, lr
+ stm r0!, { r3, r5, r7, r10 }
+ bne 2b
+
+9:
+ RETC /* copy carry out */
+ pop { r4, r5, r6, r7, r8, r10, pc }
+END (FUNC)
diff --git a/ports/sysdeps/arm/sub_n.S b/ports/sysdeps/arm/sub_n.S
new file mode 100644
index 0000000..8eafa41
--- /dev/null
+++ b/ports/sysdeps/arm/sub_n.S
@@ -0,0 +1,2 @@
+#define USE_AS_SUB_N
+#include "add_n.S"
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
` (11 subsequent siblings)
13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
Unless we're trying old interworking, there's no point restoring to
LR first. Everthing from armv5 on handles pop as an interworking jump.
---
* sysdeps/arm/arm-mcount.S (_mcount): Use pop into pc unless
__ARM_ARCH_4T__ and __THUMB_INTERWORK__.
* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Likewise.
---
ports/sysdeps/arm/arm-mcount.S | 6 +++---
ports/sysdeps/arm/dl-tlsdesc.S | 9 +++++++--
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/ports/sysdeps/arm/arm-mcount.S b/ports/sysdeps/arm/arm-mcount.S
index b6e5ec7..8ad0779 100644
--- a/ports/sysdeps/arm/arm-mcount.S
+++ b/ports/sysdeps/arm/arm-mcount.S
@@ -82,9 +82,7 @@ ENTRY(_mcount)
ldrne r0, [r0, #-4]
movsne r1, lr
blne __mcount_internal
-#ifdef __thumb2__
- pop {r0, r1, r2, r3, fp, pc}
-#else
+#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
pop {r0, r1, r2, r3, fp, lr}
cfi_adjust_cfa_offset (-24)
cfi_restore (r0)
@@ -94,6 +92,8 @@ ENTRY(_mcount)
cfi_restore (fp)
cfi_restore (lr)
bx lr
+#else
+ pop {r0, r1, r2, r3, fp, pc}
#endif
END(_mcount)
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index aa3db80..4635415 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -109,13 +109,18 @@ _dl_tlsdesc_dynamic:
1: mov r0, r1
bl __tls_get_addr
rsb r0, r4, r0
-2: pop {r2,r3,r4, lr}
+2:
+#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+ pop {r2,r3,r4, lr}
cfi_adjust_cfa_offset (-16)
cfi_restore (lr)
cfi_restore (r4)
cfi_restore (r3)
cfi_restore (r2)
- BX (lr)
+ bx lr
+#else
+ pop {r2,r3,r4, pc}
+#endif
.fnend
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 09/14] arm: Tidy architecture selection
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (9 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
` (2 subsequent siblings)
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
---
* sysdeps/arm/sysdep.h (__ARM_ARCH): New macro.
(ARCH_HAS_BX, ARCH_HAS_BLX, ARCH_HAS_T2): New macros.
(BX): Select on ARCH_HAS_BX instead of __USE_BX__.
(BLX): Select on ARCH_HAS_BLX instead of __ARM_ARCH_4T__.
* ports/sysdeps/arm/dl-machine.h (BX): Select on ARCH_HAS_BX
instead of __USE_BX__.
---
ports/sysdeps/arm/dl-machine.h | 2 +-
ports/sysdeps/arm/sysdep.h | 41 ++++++++++++++++++++++++++++++++++-------
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/ports/sysdeps/arm/dl-machine.h b/ports/sysdeps/arm/dl-machine.h
index 30ad46c..5a424f8 100644
--- a/ports/sysdeps/arm/dl-machine.h
+++ b/ports/sysdeps/arm/dl-machine.h
@@ -136,7 +136,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
return lazy;
}
-#if defined(__USE_BX__)
+#if defined(ARCH_HAS_BX)
#define BX(x) "bx\t" #x
#else
#define BX(x) "mov\tpc, " #x
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 84313fe..c09e680 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -19,9 +19,36 @@
#include <sysdeps/generic/sysdep.h>
#include <features.h>
-#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
- && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__))
-# define __USE_BX__
+/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
+#ifndef __ARM_ARCH
+# ifdef __ARM_ARCH_2__
+# define __ARM_ARCH 2
+# elif defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__)
+# define __ARM_ARCH 3
+# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+# define __ARM_ARCH 4
+# elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
+ defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH 5
+# elif defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6ZK__) \
+ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
+# define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
+ defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH 7
+# else
+# error unknown arm architecture
+# endif
+#endif
+
+#if __ARM_ARCH > 4 || defined(__ARM_ARCH_4T__)
+# define ARCH_HAS_BX
+#endif
+#if __ARM_ARCH > 4
+# define ARCH_HAS_BLX
+#endif
+#if __ARM_ARCH > 6 || defined(__ARM_ARCH_6T2__)
+# define ARCH_HAS_T2
#endif
#ifdef __ASSEMBLER__
@@ -33,13 +60,13 @@
#define PLTJMP(_x) _x##(PLT)
-#ifdef __USE_BX__
+#ifdef ARCH_HAS_BX
# define BX(R) bx R
# define BXC(C, R) bx##C R
-# ifdef __ARM_ARCH_4T__
-# define BLX(R) mov lr, pc; bx R
-# else
+# ifdef ARCH_HAS_BLX
# define BLX(R) blx R
+# else
+# define BLX(R) mov lr, pc; bx R
# endif
#else
# define BX(R) mov pc, R
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 05/14] arm: Use push/pop mnemonics
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (7 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
` (4 subsequent siblings)
13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
For arm this makes no difference--the result is bit-for-bit identical;
for thumb this results in smaller encodings. Perhaps it ought not and
this is in fact an assembler bug, but I also think it's clearer.
---
* sysdeps/arm/arm-mcount.S (_mcount): Use push/pop mnemonics.
* sysdeps/arm/crti.S, sysdeps/arm/crtn.S: Likewise.
* sysdeps/arm/dl-tlsdesc.S: Likewise.
* sysdeps/arm/dl-trampoline.S: Likewise.
* sysdeps/arm/start.S: Likewise.
* sysdeps/arm/memcpy.S (PULL): Rename macro from pull.
(PUSH): Rename macro from push.
(memcpy): Use push/pop mnemonics.
* sysdeps/arm/memmove.S: Similarly.
* sysdeps/arm/sysdep.h (CALL_MCOUNT): Use push/pop mnemonics.
* sysdeps/unix/sysv/linux/arm/____longjmp_chk.S: Likewise.
* sysdeps/unix/sysv/linux/arm/clone.S: Likewise.
* sysdeps/unix/sysv/linux/arm/mmap.S: Likewise.
* sysdeps/unix/sysv/linux/arm/mmap64.S: Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h: Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c: Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c: Likewise.
* sysdeps/unix/sysv/linux/arm/syscall.S: Likewise.
* sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise.
* sysdeps/unix/sysv/linux/arm/vfork.S: Likewise.
---
ports/sysdeps/arm/arm-mcount.S | 6 +--
ports/sysdeps/arm/crti.S | 4 +-
ports/sysdeps/arm/crtn.S | 8 +--
ports/sysdeps/arm/dl-tlsdesc.S | 16 +++---
ports/sysdeps/arm/dl-trampoline.S | 4 +-
ports/sysdeps/arm/memcpy.S | 58 +++++++++++-----------
ports/sysdeps/arm/memmove.S | 58 +++++++++++-----------
ports/sysdeps/arm/start.S | 10 ++--
ports/sysdeps/arm/sysdep.h | 6 +--
.../sysdeps/unix/sysv/linux/arm/____longjmp_chk.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/clone.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/mmap.S | 8 +--
ports/sysdeps/unix/sysv/linux/arm/mmap64.S | 8 +--
.../unix/sysv/linux/arm/nptl/sysdep-cancel.h | 32 ++++++------
.../unix/sysv/linux/arm/nptl/unwind-forcedunwind.c | 4 +-
.../unix/sysv/linux/arm/nptl/unwind-resume.c | 4 +-
ports/sysdeps/unix/sysv/linux/arm/syscall.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/sysdep.h | 27 +++++-----
ports/sysdeps/unix/sysv/linux/arm/vfork.S | 2 +-
19 files changed, 133 insertions(+), 134 deletions(-)
diff --git a/ports/sysdeps/arm/arm-mcount.S b/ports/sysdeps/arm/arm-mcount.S
index 679d042..b6e5ec7 100644
--- a/ports/sysdeps/arm/arm-mcount.S
+++ b/ports/sysdeps/arm/arm-mcount.S
@@ -69,7 +69,7 @@ END(__gnu_mcount_nc)
code be compiled with APCS frame pointers. */
ENTRY(_mcount)
- stmdb sp!, {r0, r1, r2, r3, fp, lr}
+ push {r0, r1, r2, r3, fp, lr}
cfi_adjust_cfa_offset (24)
cfi_rel_offset (r0, 0)
cfi_rel_offset (r1, 4)
@@ -83,9 +83,9 @@ ENTRY(_mcount)
movsne r1, lr
blne __mcount_internal
#ifdef __thumb2__
- ldmia sp!, {r0, r1, r2, r3, fp, pc}
+ pop {r0, r1, r2, r3, fp, pc}
#else
- ldmia sp!, {r0, r1, r2, r3, fp, lr}
+ pop {r0, r1, r2, r3, fp, lr}
cfi_adjust_cfa_offset (-24)
cfi_restore (r0)
cfi_restore (r1)
diff --git a/ports/sysdeps/arm/crti.S b/ports/sysdeps/arm/crti.S
index 1d55ae2..be20a11 100644
--- a/ports/sysdeps/arm/crti.S
+++ b/ports/sysdeps/arm/crti.S
@@ -80,7 +80,7 @@ call_weak_fn:
.globl _init
.type _init, %function
_init:
- stmfd sp!, {r3, lr}
+ push {r3, lr}
#if PREINIT_FUNCTION_WEAK
bl call_weak_fn
#else
@@ -92,4 +92,4 @@ _init:
.globl _fini
.type _fini, %function
_fini:
- stmfd sp!, {r3, lr}
+ push {r3, lr}
diff --git a/ports/sysdeps/arm/crtn.S b/ports/sysdeps/arm/crtn.S
index a01eb01..ae7546c 100644
--- a/ports/sysdeps/arm/crtn.S
+++ b/ports/sysdeps/arm/crtn.S
@@ -42,16 +42,16 @@
.section .init,"ax",%progbits
#ifdef __ARM_ARCH_4T__
- ldmfd sp!, {r3, lr}
+ pop {r3, lr}
bx lr
#else
- ldmfd sp!, {r3, pc}
+ pop {r3, pc}
#endif
.section .fini,"ax",%progbits
#ifdef __ARM_ARCH_4T__
- ldmfd sp!, {r3, lr}
+ pop {r3, lr}
bx lr
#else
- ldmfd sp!, {r3, pc}
+ pop {r3, pc}
#endif
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 1c3bccf..3a956de 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -90,7 +90,7 @@ _dl_tlsdesc_dynamic:
/* Our calling convention is to clobber r0, r1 and the processor
flags. All others that are modified must be saved */
.save {r2,r3,r4,lr}
- stmdb sp!, {r2,r3,r4,lr}
+ push {r2,r3,r4,lr}
cfi_adjust_cfa_offset (16)
cfi_rel_offset (r2,0)
cfi_rel_offset (r3,4)
@@ -115,7 +115,7 @@ _dl_tlsdesc_dynamic:
1: mov r0, r1
bl __tls_get_addr
rsb r0, r4, r0
-2: ldmia sp!, {r2,r3,r4, lr}
+2: pop {r2,r3,r4, lr}
cfi_adjust_cfa_offset (-16)
cfi_restore (lr)
cfi_restore (r4)
@@ -146,7 +146,7 @@ _dl_tlsdesc_lazy_resolver:
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r2, 0)
.save {r0,r1,r3,ip,lr}
- stmdb sp!, {r0, r1, r3, ip, lr}
+ push {r0, r1, r3, ip, lr}
cfi_adjust_cfa_offset (20)
cfi_rel_offset (r0, 0)
cfi_rel_offset (r1, 4)
@@ -154,14 +154,14 @@ _dl_tlsdesc_lazy_resolver:
cfi_rel_offset (ip, 12)
cfi_rel_offset (lr, 16)
bl _dl_tlsdesc_lazy_resolver_fixup
- ldmia sp!, {r0, r1, r3, ip, lr}
+ pop {r0, r1, r3, ip, lr}
cfi_adjust_cfa_offset (-20)
cfi_restore (lr)
cfi_restore (ip)
cfi_restore (r3)
cfi_restore (r1)
cfi_restore (r0)
- ldmia sp!, {r2}
+ pop {r2}
cfi_adjust_cfa_offset (-4)
cfi_restore (r2)
ldr r1, [r0, #4]
@@ -184,7 +184,7 @@ _dl_tlsdesc_resolve_hold:
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r2, 0)
.save {r0,r1,r3,ip,lr}
- stmdb sp!, {r0, r1, r3, ip, lr}
+ push {r0, r1, r3, ip, lr}
cfi_adjust_cfa_offset (20)
cfi_rel_offset (r0, 0)
cfi_rel_offset (r1, 4)
@@ -193,14 +193,14 @@ _dl_tlsdesc_resolve_hold:
cfi_rel_offset (lr, 16)
adr r2, _dl_tlsdesc_resolve_hold
bl _dl_tlsdesc_resolve_hold_fixup
- ldmia sp!, {r0, r1, r3, ip, lr}
+ pop {r0, r1, r3, ip, lr}
cfi_adjust_cfa_offset (-20)
cfi_restore (lr)
cfi_restore (ip)
cfi_restore (r3)
cfi_restore (r1)
cfi_restore (r0)
- ldmia sp!, {r2}
+ pop {r2}
cfi_adjust_cfa_offset (-4)
cfi_restore (r2)
ldr r1, [r0, #4]
diff --git a/ports/sysdeps/arm/dl-trampoline.S b/ports/sysdeps/arm/dl-trampoline.S
index 561d8ae..f2d1679 100644
--- a/ports/sysdeps/arm/dl-trampoline.S
+++ b/ports/sysdeps/arm/dl-trampoline.S
@@ -43,7 +43,7 @@ _dl_runtime_resolve:
@ lr points to &GOT[2]
@ Save arguments. We save r4 to realign the stack.
- stmdb sp!,{r0-r4}
+ push {r0-r4}
cfi_adjust_cfa_offset (20)
cfi_rel_offset (r0, 0)
cfi_rel_offset (r1, 4)
@@ -67,7 +67,7 @@ _dl_runtime_resolve:
@ get arguments and return address back. We restore r4
@ only to realign the stack.
- ldmia sp!, {r0-r4,lr}
+ pop {r0-r4,lr}
cfi_adjust_cfa_offset (-24)
@ jump to the newly found address
diff --git a/ports/sysdeps/arm/memcpy.S b/ports/sysdeps/arm/memcpy.S
index 98b9b47..98981ef 100644
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -45,11 +45,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -58,7 +58,7 @@
ENTRY(memcpy)
- stmfd sp!, {r0, r4, lr}
+ push {r0, r4, lr}
cfi_adjust_cfa_offset (12)
cfi_rel_offset (r4, 4)
cfi_rel_offset (lr, 8)
@@ -74,7 +74,7 @@ ENTRY(memcpy)
bne 10f
1: subs r2, r2, #(28)
- stmfd sp!, {r5 - r8}
+ push {r5 - r8}
cfi_adjust_cfa_offset (16)
cfi_rel_offset (r5, 0)
cfi_rel_offset (r6, 4)
@@ -131,7 +131,7 @@ ENTRY(memcpy)
CALGN( bcs 2b )
-7: ldmfd sp!, {r5 - r8}
+7: pop {r5 - r8}
cfi_adjust_cfa_offset (-16)
cfi_restore (r5)
cfi_restore (r6)
@@ -147,13 +147,13 @@ ENTRY(memcpy)
strcsb ip, [r0]
#if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
- ldmfd sp!, {r0, r4, lr}
+ pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
cfi_restore (lr)
bx lr
#else
- ldmfd sp!, {r0, r4, pc}
+ pop {r0, r4, pc}
#endif
cfi_restore_state
@@ -189,7 +189,7 @@ ENTRY(memcpy)
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
-11: stmfd sp!, {r5 - r9}
+11: push {r5 - r9}
cfi_adjust_cfa_offset (20)
cfi_rel_offset (r5, 0)
cfi_rel_offset (r6, 4)
@@ -206,30 +206,30 @@ ENTRY(memcpy)
12: PLD( pld [r1, #124] )
13: ldmia r1!, {r4, r5, r6, r7}
- mov r3, lr, pull #\pull
+ mov r3, lr, PULL #\pull
subs r2, r2, #32
ldmia r1!, {r8, r9, ip, lr}
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, PUSH #\push
+ mov r4, r4, PULL #\pull
+ orr r4, r4, r5, PUSH #\push
+ mov r5, r5, PULL #\pull
+ orr r5, r5, r6, PUSH #\push
+ mov r6, r6, PULL #\pull
+ orr r6, r6, r7, PUSH #\push
+ mov r7, r7, PULL #\pull
+ orr r7, r7, r8, PUSH #\push
+ mov r8, r8, PULL #\pull
+ orr r8, r8, r9, PUSH #\push
+ mov r9, r9, PULL #\pull
+ orr r9, r9, ip, PUSH #\push
+ mov ip, ip, PULL #\pull
+ orr ip, ip, lr, PUSH #\push
stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
bge 12b
PLD( cmn r2, #96 )
PLD( bge 13b )
- ldmfd sp!, {r5 - r9}
+ pop {r5 - r9}
cfi_adjust_cfa_offset (-20)
cfi_restore (r5)
cfi_restore (r6)
@@ -240,10 +240,10 @@ ENTRY(memcpy)
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, PULL #\pull
ldr lr, [r1], #4
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, PUSH #\push
str r3, [r0], #4
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/ports/sysdeps/arm/memmove.S b/ports/sysdeps/arm/memmove.S
index 059ca7a..d9fa0e3 100644
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -45,11 +45,11 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define PULL lsr
+#define PUSH lsl
#else
-#define pull lsl
-#define push lsr
+#define PULL lsl
+#define PUSH lsr
#endif
.text
@@ -73,7 +73,7 @@ ENTRY(memmove)
bls HIDDEN_JUMPTARGET(memcpy)
#endif
- stmfd sp!, {r0, r4, lr}
+ push {r0, r4, lr}
cfi_adjust_cfa_offset (12)
cfi_rel_offset (r4, 4)
cfi_rel_offset (lr, 8)
@@ -91,7 +91,7 @@ ENTRY(memmove)
bne 10f
1: subs r2, r2, #(28)
- stmfd sp!, {r5 - r8}
+ push {r5 - r8}
cfi_adjust_cfa_offset (16)
cfi_rel_offset (r5, 0)
cfi_rel_offset (r6, 4)
@@ -147,7 +147,7 @@ ENTRY(memmove)
CALGN( bcs 2b )
-7: ldmfd sp!, {r5 - r8}
+7: pop {r5 - r8}
cfi_adjust_cfa_offset (-16)
cfi_restore (r5)
cfi_restore (r6)
@@ -163,13 +163,13 @@ ENTRY(memmove)
strcsb ip, [r0, #-1]
#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
- ldmfd sp!, {r0, r4, lr}
+ pop {r0, r4, lr}
cfi_adjust_cfa_offset (-12)
cfi_restore (r4)
cfi_restore (lr)
bx lr
#else
- ldmfd sp!, {r0, r4, pc}
+ pop {r0, r4, pc}
#endif
cfi_restore_state
@@ -204,7 +204,7 @@ ENTRY(memmove)
CALGN( subcc r2, r2, ip )
CALGN( bcc 15f )
-11: stmfd sp!, {r5 - r9}
+11: push {r5 - r9}
cfi_adjust_cfa_offset (20)
cfi_rel_offset (r5, 0)
cfi_rel_offset (r6, 4)
@@ -221,30 +221,30 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, PUSH #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, PULL #\pull
+ mov ip, ip, PUSH #\push
+ orr ip, ip, r9, PULL #\pull
+ mov r9, r9, PUSH #\push
+ orr r9, r9, r8, PULL #\pull
+ mov r8, r8, PUSH #\push
+ orr r8, r8, r7, PULL #\pull
+ mov r7, r7, PUSH #\push
+ orr r7, r7, r6, PULL #\pull
+ mov r6, r6, PUSH #\push
+ orr r6, r6, r5, PULL #\pull
+ mov r5, r5, PUSH #\push
+ orr r5, r5, r4, PULL #\pull
+ mov r4, r4, PUSH #\push
+ orr r4, r4, r3, PULL #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
PLD( bge 13b )
- ldmfd sp!, {r5 - r9}
+ pop {r5 - r9}
cfi_adjust_cfa_offset (-20)
cfi_restore (r5)
cfi_restore (r6)
@@ -255,10 +255,10 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, PUSH #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, PULL #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/ports/sysdeps/arm/start.S b/ports/sysdeps/arm/start.S
index a1d15b8..0a57b0b 100644
--- a/ports/sysdeps/arm/start.S
+++ b/ports/sysdeps/arm/start.S
@@ -80,14 +80,14 @@ _start:
mov lr, #0
/* Pop argc off the stack and save a pointer to argv */
- ldr a2, [sp], #4
+ pop { a2 }
mov a3, sp
/* Push stack limit */
- str a3, [sp, #-4]!
+ push { a3 }
/* Push rtld_fini */
- str a1, [sp, #-4]!
+ push { a1 }
#ifdef SHARED
ldr sl, .L_GOT
@@ -97,7 +97,7 @@ _start:
ldr ip, .L_GOT+4 /* __libc_csu_fini */
ldr ip, [sl, ip]
- str ip, [sp, #-4]! /* Push __libc_csu_fini */
+ push { ip } /* Push __libc_csu_fini */
ldr a4, .L_GOT+8 /* __libc_csu_init */
ldr a4, [sl, a4]
@@ -113,7 +113,7 @@ _start:
ldr ip, =__libc_csu_fini
/* Push __libc_csu_fini */
- str ip, [sp, #-4]!
+ push { ip }
/* Set up the other arguments in registers */
ldr a1, =main
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index d855ceb..d74a328 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -80,7 +80,7 @@
/* Call __gnu_mcount_nc if GCC >= 4.4. */
#if __GNUC_PREREQ(4,4)
#define CALL_MCOUNT \
- str lr,[sp, #-4]!; \
+ push {lr}; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (lr, 0); \
bl PLTJMP(mcount); \
@@ -88,11 +88,11 @@
cfi_restore (lr)
#else /* else call _mcount */
#define CALL_MCOUNT \
- str lr,[sp, #-4]!; \
+ push {lr}; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (lr, 0); \
bl PLTJMP(mcount); \
- ldr lr, [sp], #4; \
+ pops {lr}; \
cfi_adjust_cfa_offset (-4); \
cfi_restore (lr)
#endif
diff --git a/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S b/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
index 29edec6..6ee7a1a 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
@@ -53,7 +53,7 @@ longjmp_msg:
cfi_remember_state; \
cmp sp, reg; \
bls .Lok; \
- str r7, [sp, #-4]!; \
+ push { r7 }; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (r7, 0); \
mov r5, r0; \
@@ -79,7 +79,7 @@ longjmp_msg:
.Lfail: \
add sp, sp, #12; \
cfi_adjust_cfa_offset (-12); \
- ldr r7, [sp], #4; \
+ pop { r7 }; \
cfi_adjust_cfa_offset (-4); \
cfi_restore (r7); \
CALL_FAIL \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index 1bc5eab..3edebd2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -49,7 +49,7 @@ ENTRY(__clone)
mov ip, r2
#endif
@ new sp is already in r1
- stmfd sp!, {r4, r7}
+ push {r4, r7}
cfi_adjust_cfa_offset (8)
cfi_rel_offset (r4, 0)
cfi_rel_offset (r7, 4)
@@ -61,7 +61,7 @@ ENTRY(__clone)
cfi_endproc
cmp r0, #0
beq 1f
- ldmfd sp!, {r4, r7}
+ pop {r4, r7}
blt PLTJMP(C_SYMBOL_NAME(__syscall_error))
RETINSTR(, lr)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/mmap.S b/ports/sysdeps/unix/sysv/linux/arm/mmap.S
index 68560b0..06b737e 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/mmap.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/mmap.S
@@ -23,11 +23,11 @@
ENTRY (__mmap)
/* shuffle args */
- str r5, [sp, #-4]!
+ push { r5 }
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r5, 0)
ldr r5, [sp, #8]
- str r4, [sp, #-4]!
+ push { r4 }
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r4, 0)
cfi_remember_state
@@ -43,10 +43,10 @@ ENTRY (__mmap)
/* restore registers */
2:
- ldr r4, [sp], #4
+ pop { r4 }
cfi_adjust_cfa_offset (-4)
cfi_restore (r4)
- ldr r5, [sp], #4
+ pop { r5 }
cfi_adjust_cfa_offset (-4)
cfi_restore (r5)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/mmap64.S b/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
index dcbab3a..d039129 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
@@ -34,11 +34,11 @@
.text
ENTRY (__mmap64)
ldr ip, [sp, $LOW_OFFSET]
- str r5, [sp, #-4]!
+ push { r5 }
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r5, 0)
ldr r5, [sp, $HIGH_OFFSET]
- str r4, [sp, #-4]!
+ push { r4 }
cfi_adjust_cfa_offset (4)
cfi_rel_offset (r4, 0)
cfi_remember_state
@@ -51,7 +51,7 @@ ENTRY (__mmap64)
orr r5, ip, r5, lsl $20 @ compose page offset
DO_CALL (mmap2, 0)
cmn r0, $4096
- ldmfd sp!, {r4, r5}
+ pop {r4, r5}
cfi_adjust_cfa_offset (-8)
cfi_restore (r4)
cfi_restore (r5)
@@ -62,7 +62,7 @@ ENTRY (__mmap64)
cfi_restore_state
.Linval:
mov r0, $-EINVAL
- ldmfd sp!, {r4, r5}
+ pop {r4, r5}
cfi_adjust_cfa_offset (-8)
cfi_restore (r4)
cfi_restore (r5)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index d5e666b..ac094df 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -76,19 +76,19 @@
# define DOCARGS_0 \
.save {r7}; \
- str lr, [sp, #-4]!; \
+ push {lr}; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (lr, 0); \
.save {lr}
# define UNDOCARGS_0
# define RESTORE_LR_0 \
- ldr lr, [sp], #4; \
+ pop {lr}; \
cfi_adjust_cfa_offset (-4); \
cfi_restore (lr)
# define DOCARGS_1 \
.save {r7}; \
- stmfd sp!, {r0, r1, lr}; \
+ push {r0, r1, lr}; \
cfi_adjust_cfa_offset (12); \
cfi_rel_offset (lr, 8); \
.save {lr}; \
@@ -102,13 +102,13 @@
# define DOCARGS_2 \
.save {r7}; \
- stmfd sp!, {r0, r1, lr}; \
+ push {r0, r1, lr}; \
cfi_adjust_cfa_offset (12); \
cfi_rel_offset (lr, 8); \
.save {lr}; \
.pad #8
# define UNDOCARGS_2 \
- ldmfd sp!, {r0, r1}; \
+ pop {r0, r1}; \
cfi_adjust_cfa_offset (-8); \
RESTART_UNWIND
# define RESTORE_LR_2 \
@@ -116,13 +116,13 @@
# define DOCARGS_3 \
.save {r7}; \
- stmfd sp!, {r0, r1, r2, r3, lr}; \
+ push {r0, r1, r2, r3, lr}; \
cfi_adjust_cfa_offset (20); \
cfi_rel_offset (lr, 16); \
.save {lr}; \
.pad #16
# define UNDOCARGS_3 \
- ldmfd sp!, {r0, r1, r2, r3}; \
+ pop {r0, r1, r2, r3}; \
cfi_adjust_cfa_offset (-16); \
RESTART_UNWIND
# define RESTORE_LR_3 \
@@ -130,13 +130,13 @@
# define DOCARGS_4 \
.save {r7}; \
- stmfd sp!, {r0, r1, r2, r3, lr}; \
+ push {r0, r1, r2, r3, lr}; \
cfi_adjust_cfa_offset (20); \
cfi_rel_offset (lr, 16); \
.save {lr}; \
.pad #16
# define UNDOCARGS_4 \
- ldmfd sp!, {r0, r1, r2, r3}; \
+ pop {r0, r1, r2, r3}; \
cfi_adjust_cfa_offset (-16); \
RESTART_UNWIND
# define RESTORE_LR_4 \
@@ -145,13 +145,13 @@
/* r4 is only stmfd'ed for correct stack alignment. */
# define DOCARGS_5 \
.save {r4, r7}; \
- stmfd sp!, {r0, r1, r2, r3, r4, lr}; \
+ push {r0, r1, r2, r3, r4, lr}; \
cfi_adjust_cfa_offset (24); \
cfi_rel_offset (lr, 20); \
.save {lr}; \
.pad #20
# define UNDOCARGS_5 \
- ldmfd sp!, {r0, r1, r2, r3}; \
+ pop {r0, r1, r2, r3}; \
cfi_adjust_cfa_offset (-16); \
.fnend; \
.fnstart; \
@@ -159,20 +159,20 @@
.save {lr}; \
.pad #4
# define RESTORE_LR_5 \
- ldmfd sp!, {r4, lr}; \
+ pop {r4, lr}; \
cfi_adjust_cfa_offset (-8); \
/* r4 will be marked as restored later. */ \
cfi_restore (lr)
# define DOCARGS_6 \
.save {r4, r5, r7}; \
- stmfd sp!, {r0, r1, r2, r3, lr}; \
+ push {r0, r1, r2, r3, lr}; \
cfi_adjust_cfa_offset (20); \
cfi_rel_offset (lr, 16); \
.save {lr}; \
.pad #16
# define UNDOCARGS_6 \
- ldmfd sp!, {r0, r1, r2, r3}; \
+ pop {r0, r1, r2, r3}; \
cfi_adjust_cfa_offset (-16); \
.fnend; \
.fnstart; \
@@ -213,13 +213,13 @@ extern int __local_multiple_threads attribute_hidden;
header.multiple_threads) == 0, 1)
# else
# define SINGLE_THREAD_P \
- stmfd sp!, {r0, lr}; \
+ push {r0, lr}; \
cfi_adjust_cfa_offset (8); \
cfi_rel_offset (lr, 4); \
GET_TLS(lr); \
NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET); \
ldr ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET); \
- ldmfd sp!, {r0, lr}; \
+ pop {r0, lr}; \
cfi_adjust_cfa_offset (-8); \
cfi_restore (lr); \
teq ip, #0
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
index caa6a26..108924d 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
@@ -93,7 +93,7 @@ asm (
"_Unwind_Resume:\n"
" .cfi_sections .debug_frame\n"
" " CFI_STARTPROC "\n"
-" stmfd sp!, {r4, r5, r6, lr}\n"
+" push {r4, r5, r6, lr}\n"
" " CFI_ADJUST_CFA_OFFSET (16)" \n"
" " CFI_REL_OFFSET (r4, 0) "\n"
" " CFI_REL_OFFSET (r5, 4) "\n"
@@ -108,7 +108,7 @@ asm (
" cmp r3, #0\n"
" beq 4f\n"
"5: mov r0, r6\n"
-" ldmfd sp!, {r4, r5, r6, lr}\n"
+" pop {r4, r5, r6, lr}\n"
" " CFI_ADJUST_CFA_OFFSET (-16) "\n"
" " CFI_RESTORE (r4) "\n"
" " CFI_RESTORE (r5) "\n"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
index 1211599..d155ea7 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
@@ -56,7 +56,7 @@ asm (
"_Unwind_Resume:\n"
" .cfi_sections .debug_frame\n"
" " CFI_STARTPROC "\n"
-" stmfd sp!, {r4, r5, r6, lr}\n"
+" push {r4, r5, r6, lr}\n"
" " CFI_ADJUST_CFA_OFFSET (16)" \n"
" " CFI_REL_OFFSET (r4, 0) "\n"
" " CFI_REL_OFFSET (r5, 4) "\n"
@@ -71,7 +71,7 @@ asm (
" cmp r3, #0\n"
" beq 4f\n"
"5: mov r0, r6\n"
-" ldmfd sp!, {r4, r5, r6, lr}\n"
+" pop {r4, r5, r6, lr}\n"
" " CFI_ADJUST_CFA_OFFSET (-16) "\n"
" " CFI_RESTORE (r4) "\n"
" " CFI_RESTORE (r5) "\n"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/syscall.S b/ports/sysdeps/unix/sysv/linux/arm/syscall.S
index 665ecb4..bdd5a52 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/syscall.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/syscall.S
@@ -23,7 +23,7 @@
ENTRY (syscall)
mov ip, sp
- stmfd sp!, {r4, r5, r6, r7}
+ push {r4, r5, r6, r7}
cfi_adjust_cfa_offset (16)
cfi_rel_offset (r4, 0)
cfi_rel_offset (r5, 4)
@@ -35,7 +35,7 @@ ENTRY (syscall)
mov r2, r3
ldmfd ip, {r3, r4, r5, r6}
swi 0x0
- ldmfd sp!, {r4, r5, r6, r7}
+ pop {r4, r5, r6, r7}
cfi_adjust_cfa_offset (-16)
cfi_restore (r4)
cfi_restore (r5)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 01d8123..39872b8 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -147,23 +147,22 @@ __local_syscall_error: \
# else
# if defined(__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
# define POP_PC \
- ldr lr, [sp], #4; \
+ pop { lr }; \
cfi_adjust_cfa_offset (-4); \
cfi_restore (lr); \
bx lr
# else
-# define POP_PC \
- ldr pc, [sp], #4
+# define POP_PC pop { pc }
# endif
# define SYSCALL_ERROR_HANDLER \
__local_syscall_error: \
- str lr, [sp, #-4]!; \
+ push { lr }; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (lr, 0); \
- str r0, [sp, #-4]!; \
+ push { r0 }; \
cfi_adjust_cfa_offset (4); \
bl PLTJMP(C_SYMBOL_NAME(__errno_location)); \
- ldr r1, [sp], #4; \
+ pop { r1 }; \
cfi_adjust_cfa_offset (-4); \
rsb r1, r1, #0; \
str r1, [r0]; \
@@ -230,7 +229,7 @@ __local_syscall_error: \
#undef DOARGS_0
#define DOARGS_0 \
.fnstart; \
- str r7, [sp, #-4]!; \
+ push { r7 }; \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (r7, 0); \
.save { r7 }
@@ -245,7 +244,7 @@ __local_syscall_error: \
#undef DOARGS_5
#define DOARGS_5 \
.fnstart; \
- stmfd sp!, {r4, r7}; \
+ push {r4, r7}; \
cfi_adjust_cfa_offset (8); \
cfi_rel_offset (r4, 0); \
cfi_rel_offset (r7, 4); \
@@ -255,7 +254,7 @@ __local_syscall_error: \
#define DOARGS_6 \
.fnstart; \
mov ip, sp; \
- stmfd sp!, {r4, r5, r7}; \
+ push {r4, r5, r7}; \
cfi_adjust_cfa_offset (12); \
cfi_rel_offset (r4, 0); \
cfi_rel_offset (r5, 4); \
@@ -266,7 +265,7 @@ __local_syscall_error: \
#define DOARGS_7 \
.fnstart; \
mov ip, sp; \
- stmfd sp!, {r4, r5, r6, r7}; \
+ push {r4, r5, r6, r7}; \
cfi_adjust_cfa_offset (16); \
cfi_rel_offset (r4, 0); \
cfi_rel_offset (r5, 4); \
@@ -277,7 +276,7 @@ __local_syscall_error: \
#undef UNDOARGS_0
#define UNDOARGS_0 \
- ldr r7, [sp], #4; \
+ pop {r7}; \
cfi_adjust_cfa_offset (-4); \
cfi_restore (r7); \
.fnend
@@ -291,14 +290,14 @@ __local_syscall_error: \
#define UNDOARGS_4 UNDOARGS_0
#undef UNDOARGS_5
#define UNDOARGS_5 \
- ldmfd sp!, {r4, r7}; \
+ pop {r4, r7}; \
cfi_adjust_cfa_offset (-8); \
cfi_restore (r4); \
cfi_restore (r7); \
.fnend
#undef UNDOARGS_6
#define UNDOARGS_6 \
- ldmfd sp!, {r4, r5, r7}; \
+ pop {r4, r5, r7}; \
cfi_adjust_cfa_offset (-12); \
cfi_restore (r4); \
cfi_restore (r5); \
@@ -306,7 +305,7 @@ __local_syscall_error: \
.fnend
#undef UNDOARGS_7
#define UNDOARGS_7 \
- ldmfd sp!, {r4, r5, r6, r7}; \
+ pop {r4, r5, r6, r7}; \
cfi_adjust_cfa_offset (-16); \
cfi_restore (r4); \
cfi_restore (r5); \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/vfork.S
index ae931f7..128a640 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/vfork.S
@@ -37,7 +37,7 @@ ENTRY (__vfork)
mov ip, r7
cfi_register (r7, ip)
.fnstart
- str r7, [sp, #-4]!
+ push { r7 }
cfi_adjust_cfa_offset (4)
.save { r7 }
ldr r7, =SYS_ify (vfork)
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
` (4 preceding siblings ...)
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:42 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
` (7 subsequent siblings)
13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
To: libc-ports; +Cc: joseph
There are several places in which we access negative offsets from
the thread-pointer, but thumb2 only supports positive offsets in
memory references.
Avoid duplicating the rather large macros in which these references
are embedded by abstracting out the operation.
---
* sysdeps/arm/sysdep.h (NEGOFF_ADJ_BASE): New macro.
(NEGOFF_ADJ_BASE2, NEGOFF_OFF1, NEGOFF_OFF2): New macros.
* sysdeps/unix/sysv/linux/arm/clone.S (__clone): Use them.
* sysdeps/unix/sysv/linux/arm/nptl/vfork.S: Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S: Likewise.
* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
Likewise.
---
ports/sysdeps/arm/sysdep.h | 16 ++++++++++++++++
ports/sysdeps/unix/sysv/linux/arm/clone.S | 5 +++--
ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S | 11 ++++++-----
ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h | 3 ++-
ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S | 14 ++++++++------
5 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 29a78f0..9230131 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -134,6 +134,22 @@
.previous; \
99: OP R, [pc, T]
# endif
+
+/* Cope with negative memory offsets, which thumb can't encode.
+ Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
+ and then NEGOFF_OFF1 to use 0 for thumb and the offset for arm,
+ or NEGOFF_OFF2 to use A-B for thumb and A for arm. */
+# ifdef __thumb2__
+# define NEGOFF_ADJ_BASE(R, OFF) add R, R, $OFF
+# define NEGOFF_ADJ_BASE2(D, S, OFF) add D, S, $OFF
+# define NEGOFF_OFF1(R, OFF) [R]
+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $((OFFA) - (OFFB))]
+# else
+# define NEGOFF_ADJ_BASE(R, OFF)
+# define NEGOFF_ADJ_BASE2(D, S, OFF) mov D, S
+# define NEGOFF_OFF1(R, OFF) [R, $OFF]
+# define NEGOFF_OFF2(R, OFFA, OFFB) [R, $OFFA]
+# endif
#endif /* __ASSEMBLER__ */
/* This number is the offset from the pc at the current location. */
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index 732a3ff..a5f9b4d 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -83,8 +83,9 @@ PSEUDO_END (__clone)
ite ne
movne r0, #-1
swieq 0x0
- str r0, [r1, #PID_OFFSET]
- str r0, [r1, #TID_OFFSET]
+ NEGOFF_ADJ_BASE(r1, TID_OFFSET)
+ str r0, NEGOFF_OFF1(r1, TID_OFFSET)
+ str r0, NEGOFF_OFF2(r1, PID_OFFSET, TID_OFFSET)
3:
#endif
@ pick the function arg and call address off the stack and execute
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
index a38d564..ff88510 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
@@ -28,14 +28,15 @@
ldr lr, [sp], #4; /* Restore LR. */ \
cfi_adjust_cfa_offset (-4); \
cfi_restore (lr); \
- mov r2, r0; /* Save the TLS addr in r2. */ \
- ldr r3, [r2, #PID_OFFSET]; /* Load the saved PID. */ \
- rsb r0, r3, #0; /* Negate it. */ \
- str r0, [r2, #PID_OFFSET] /* Store the temporary PID. */
+ NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
+ ldr r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID. */ \
+ rsb r0, r3, #0; /* Negate it. */ \
+ str r0, NEGOFF_OFF1(r2, PID_OFFSET); /* Store the temp PID. */
/* Restore the old PID value in the parent. */
#define RESTORE_PID \
cmp r0, #0; /* If we are the parent... */ \
- strne r3, [r2, #PID_OFFSET] /* ... restore the saved PID. */
+ it ne; \
+ strne r3, NEGOFF_OFF1(r2, PID_OFFSET); /* restore the saved PID. */
#include "../vfork.S"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index 8889369..47d4c70 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -217,7 +217,8 @@ extern int __local_multiple_threads attribute_hidden;
cfi_adjust_cfa_offset (8); \
cfi_rel_offset (lr, 4); \
bl __aeabi_read_tp; \
- ldr ip, [r0, #MULTIPLE_THREADS_OFFSET]; \
+ NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET); \
+ ldr ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET); \
ldmfd sp!, {r0, lr}; \
cfi_adjust_cfa_offset (-8); \
cfi_restore (lr); \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
index 3fce2d1..c4be1e2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
@@ -28,15 +28,17 @@
ldr lr, [sp], #4; /* Restore LR. */ \
cfi_adjust_cfa_offset (-4); \
cfi_restore (lr); \
- mov r2, r0; /* Save the TLS addr in r2. */ \
- ldr r3, [r2, #PID_OFFSET]; /* Load the saved PID. */ \
- rsbs r0, r3, #0; /* Negate it. */ \
- moveq r0, #0x80000000; /* Use 0x80000000 if it was 0. */ \
- str r0, [r2, #PID_OFFSET] /* Store the temporary PID. */
+ NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
+ ldr r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID. */ \
+ rsbs r0, r3, #0; /* Negate it. */ \
+ it eq; \
+ moveq r0, #0x80000000; /* Use 0x80000000 if it was 0. */ \
+ str r0, NEGOFF_OFF1(r2, PID_OFFSET); /* Store the temp PID. */
/* Restore the old PID value in the parent. */
#define RESTORE_PID \
cmp r0, #0; /* If we are the parent... */ \
- strne r3, [r2, #PID_OFFSET] /* ... restore the saved PID. */
+ it ne; \
+ strne r3, NEGOFF_OFF1(r2, PID_OFFSET); /* restore the saved PID. */
#include "../vfork.S"
--
1.8.1.2
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
@ 2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:55 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Space before paren in defined (FOO).
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 09/14] arm: Tidy architecture selection
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
@ 2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:55 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Space before paren in defined (FOO).
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 03/14] arm: Introduce and use GET_TLS
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
@ 2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:45 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:57 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Space before paren.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
@ 2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:42 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:57 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Space before paren.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 13/14] arm: Add optimized submul_1
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
@ 2013-03-01 17:58 ` Roland McGrath
2013-03-06 1:14 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:58 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Top line descriptive comment, please.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
@ 2013-03-01 17:58 ` Roland McGrath
2013-03-01 18:00 ` Roland McGrath
2013-03-06 1:11 ` Joseph S. Myers
2 siblings, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:58 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Top line descriptive comment, please.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 14/14] arm: Add optimized add_n and sub_n
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
@ 2013-03-01 17:59 ` Roland McGrath
2013-03-06 0:53 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:59 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
Top line descriptive comment, please.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
2013-03-01 17:58 ` Roland McGrath
@ 2013-03-01 18:00 ` Roland McGrath
2013-03-06 1:18 ` Joseph S. Myers
2013-03-06 1:11 ` Joseph S. Myers
2 siblings, 1 reply; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 18:00 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports, joseph
I think the license is a non-problem since FSF is copyright owner.
But if your from-scratch code is good then I don't know there's a
strong reason to use GMP's instead, since we haven't been tracking
GMP changes in our copies for years anyway AFAIK.
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 01/14] arm: Introduce and use LDST_PCREL
2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
@ 2013-03-04 17:47 ` Joseph S. Myers
0 siblings, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-04 17:47 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> Macro-ising the few instances where we need to distinguish between
> arm and thumb pc-relative memory operations.
> ---
> * sysdeps/arm/sysdep.h (LDST_PCREL): New macro.
> * sysdeps/unix/arm/sysdep.S (__syscall_error): Use LDST_PCREL.
> Fix up gottpoff load of errno for thumb2.
> * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
> (SINGLE_THREAD_P): Use LDST_PCREL.
> (PSEUDO_PROLOGUE): Remove.
> (PSEUDO): Don't use it.
> * sysdeps/unix/sysv/linux/arm/sysdep.h (SYSCALL_ERROR_HANDLER):
> Use LDST_PCREL.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
2013-03-01 17:57 ` Roland McGrath
@ 2013-03-05 1:42 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05 1:42 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> There are several places in which we access negative offsets from
> the thread-pointer, but thumb2 only supports positive offsets in
> memory references.
>
> Avoid duplicating the rather large macros in which these references
> are embedded by abstracting out the operation.
> ---
> * sysdeps/arm/sysdep.h (NEGOFF_ADJ_BASE): New macro.
> (NEGOFF_ADJ_BASE2, NEGOFF_OFF1, NEGOFF_OFF2): New macros.
> * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Use them.
> * sysdeps/unix/sysv/linux/arm/nptl/vfork.S: Likewise.
> * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S: Likewise.
> * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
> Likewise.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 03/14] arm: Introduce and use GET_TLS
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
2013-03-01 17:57 ` Roland McGrath
@ 2013-03-05 1:45 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05 1:45 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> Factor out the sequence needed to call kuser_get_tls, as we can't
> play subtract into pc games in thumb mode. Prepare for hard-tp,
> pulling the save of LR into the macro.
> ---
> * sysdeps/arm/sysdep.h (GET_TLS): New macro.
> * sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_undefweak): Use it.
> (_dl_tlsdesc_dynamic): Likewise.
> * sysdeps/unix/arm/sysdep.S (__syscall_error): Likewise.
> * sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): New macro.
> * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Likewise.
> * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S (SAVE_PID): Likewise.
> * sysdeps/unix/sysv/linux/arm/nptl/vfork.S (SAVE_PID): Likewise.
> * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
> Likewise.
> * sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S (__aeabi_read_tp):
> Add thumb2 alternative.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 09/14] arm: Tidy architecture selection
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
2013-03-01 17:55 ` Roland McGrath
@ 2013-03-05 2:01 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05 2:01 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> +# elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
> + defined(__ARM_ARCH_5TEJ__)
> +# define __ARM_ARCH 5
Or plain 5, or 5E (allowed by the architecture and -march=armv5 /
-march=armv5e, even if GCC doesn't know of any relevant -mcpu= processors
and such may not have existed).
> +# elif defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6ZK__) \
> + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
> +# define __ARM_ARCH 6
Or plain 6, or 6Z.
My reference for possible values is the union of the architecture columns
in GCC 4.7's arm-arches.def and arm-cores.def (given that for 4.8 and
later you have __ARM_ARCH predefined, so only the values known to 4.7 are
relevant here).
> +# elif defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
> + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
Or plain 7 (given that you're already covering values that aren't actually
compatible with building glibc).
OK fixed to handle all the other __ARM_ARCH_* values GCC might define, as
described above.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
2013-03-01 17:55 ` Roland McGrath
@ 2013-03-05 2:01 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05 2:01 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> * sysdeps/arm/sysdep.h (ARCH_HAS_HARD_TP): New macro.
> (GET_TLS): Use hard-tp if ARCH_HAS_HARD_TP.
> * sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S: Likewise.
> * sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): Don't override
> the default definition if ARCH_HAS_HARD_TP.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
@ 2013-03-05 2:08 ` Joseph S. Myers
2013-03-06 15:52 ` Richard Henderson
0 siblings, 1 reply; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05 2:08 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> * sysdeps/arm/armv6t2/ffs.S: New file.
> * sysdeps/arm/armv6t2/ffsll.S: New file.
OK, if ffsll has been tested for both big and little endian (or OK for
just ffs pending big-endian ffsll testing).
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 14/14] arm: Add optimized add_n and sub_n
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
2013-03-01 17:59 ` Roland McGrath
@ 2013-03-06 0:53 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06 0:53 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> Written from scratch rather than copied from GMP, due to LGPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
>
> This is 250% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core, and probably everywhere.
> ---
> * sysdeps/arm/add_n.S: New file.
> * sysdeps/arm/sub_n.S: New file.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
2013-03-01 17:58 ` Roland McGrath
2013-03-01 18:00 ` Roland McGrath
@ 2013-03-06 1:11 ` Joseph S. Myers
2 siblings, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06 1:11 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> Written from scratch rather than copied from GMP, due to GPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
>
> This is 25% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core. It's probably slower
> than GMP on the A8 and A9 cores though.
> ---
> * sysdeps/arm/addmul_1.S: New file.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 13/14] arm: Add optimized submul_1
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
2013-03-01 17:58 ` Roland McGrath
@ 2013-03-06 1:14 ` Joseph S. Myers
1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06 1:14 UTC (permalink / raw)
To: Richard Henderson; +Cc: libc-ports
On Fri, 1 Mar 2013, Richard Henderson wrote:
> Written from scratch rather than copied from GMP, due to GPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
>
> This is 50% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core. It's probably slower
> than GMP on the A8 and A9 cores though.
> ---
> * sysdeps/arm/submul_1.S: New file.
OK.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-01 18:00 ` Roland McGrath
@ 2013-03-06 1:18 ` Joseph S. Myers
2013-10-25 22:13 ` Roland McGrath
0 siblings, 1 reply; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06 1:18 UTC (permalink / raw)
To: Roland McGrath; +Cc: Richard Henderson, libc-ports
On Fri, 1 Mar 2013, Roland McGrath wrote:
> I think the license is a non-problem since FSF is copyright owner.
My understanding was that FSF approval was needed for relicensing code
from other FSF-owned packages (as opposed to correcting simple mistakes,
e.g. making the license notice on a file reflect established licensing
practice for files used in a particular way). (E.g., when license
exception notices were added to soft-fp for use in libgcc, that involved
FSF approval for adding those notices.)
> But if your from-scratch code is good then I don't know there's a
> strong reason to use GMP's instead, since we haven't been tracking
> GMP changes in our copies for years anyway AFAIK.
I suspect other architectures might benefit from changes made in GMP to
improve performance - but certainly this is code that has diverged
significantly from the GMP versions over time.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
2013-03-05 2:08 ` Joseph S. Myers
@ 2013-03-06 15:52 ` Richard Henderson
0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-06 15:52 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-ports
On 03/04/2013 06:08 PM, Joseph S. Myers wrote:
> On Fri, 1 Mar 2013, Richard Henderson wrote:
>
>> * sysdeps/arm/armv6t2/ffs.S: New file.
>> * sysdeps/arm/armv6t2/ffsll.S: New file.
>
> OK, if ffsll has been tested for both big and little endian (or OK for
> just ffs pending big-endian ffsll testing).
>
I did finally get big-endian testing done.
There appear to be no big-endian distributions extant anymore, so this required
doing the entire bootstrap gcc/glibc dance from scratch. It had been long
enough since I'd done so that it took me a while to remember the steps.
So I've committed this entire patch set, including ffsll, and will shortly be
re-submitting the string routines previously posted.
r~
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
2013-03-06 1:18 ` Joseph S. Myers
@ 2013-10-25 22:13 ` Roland McGrath
0 siblings, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-10-25 22:13 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: Richard Henderson, libc-ports
[A very old thread, but I still had it sitting around.]
> On Fri, 1 Mar 2013, Roland McGrath wrote:
>
> > I think the license is a non-problem since FSF is copyright owner.
>
> My understanding was that FSF approval was needed for relicensing code
> from other FSF-owned packages (as opposed to correcting simple mistakes,
> e.g. making the license notice on a file reflect established licensing
> practice for files used in a particular way). (E.g., when license
> exception notices were added to soft-fp for use in libgcc, that involved
> FSF approval for adding those notices.)
Given that we imported GMP code before and had permission, I don't think we
really need new permission for more GMP code being used for the same
purpose. That was 20 years ago and lots of things have changed, but I
still think so. Nonetheless, the most conservative thing would be to ask
the current FSF authorities and make it clear that it is a continuation of
a past exception rather than an entirely fresh one.
> > But if your from-scratch code is good then I don't know there's a
> > strong reason to use GMP's instead, since we haven't been tracking
> > GMP changes in our copies for years anyway AFAIK.
>
> I suspect other architectures might benefit from changes made in GMP to
> improve performance - but certainly this is code that has diverged
> significantly from the GMP versions over time.
Agreed. I think the long-term right thing is to be sharing the code with
GMP. But that requires both verifying that the reasons for the past
libc-local changes are satisfied by new GMP code, and establishing the
relationship with the current GMP maintainers so they understand what code
we are using and what extra constraints being used in libc puts on that
code (probably just name space issues and maybe PLT issues).
Thanks,
Roland
^ permalink raw reply [flat|nested] 35+ messages in thread
end of thread, other threads:[~2013-10-25 22:13 UTC | newest]
Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
2013-03-01 17:59 ` Roland McGrath
2013-03-06 0:53 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
2013-03-01 17:58 ` Roland McGrath
2013-03-06 1:14 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
2013-03-05 2:08 ` Joseph S. Myers
2013-03-06 15:52 ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
2013-03-01 17:58 ` Roland McGrath
2013-03-01 18:00 ` Roland McGrath
2013-03-06 1:18 ` Joseph S. Myers
2013-10-25 22:13 ` Roland McGrath
2013-03-06 1:11 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:42 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
2013-03-01 17:57 ` Roland McGrath
2013-03-05 1:45 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
2013-03-04 17:47 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
2013-03-01 17:55 ` Roland McGrath
2013-03-05 2:01 ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).