public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2 03/14] arm: Introduce and use GET_TLS
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (2 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:57   ` Roland McGrath
  2013-03-05  1:45   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
                   ` (9 subsequent siblings)
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Factor out the sequence needed to call kuser_get_tls, as we can't
play subtract into pc games in thumb mode.  Prepare for hard-tp,
pulling the save of LR into the macro.
---
	* sysdeps/arm/sysdep.h (GET_TLS): New macro.
	* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_undefweak): Use it.
	(_dl_tlsdesc_dynamic): Likewise.
	* sysdeps/unix/arm/sysdep.S (__syscall_error): Likewise.
	* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): New macro.
        * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S (SAVE_PID): Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/vfork.S (SAVE_PID): Likewise.
	* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
	Likewise.
	* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S (__aeabi_read_tp):
	Add thumb2 alternative.
---
 ports/sysdeps/arm/dl-tlsdesc.S                     | 13 ++--------
 ports/sysdeps/arm/sysdep.h                         | 19 ++++++++++++++
 ports/sysdeps/unix/arm/sysdep.S                    | 12 +++------
 ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S  |  6 +++++
 ports/sysdeps/unix/sysv/linux/arm/clone.S          |  4 +--
 ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S  | 10 +-------
 .../unix/sysv/linux/arm/nptl/sysdep-cancel.h       |  2 +-
 ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S     | 10 +-------
 ports/sysdeps/unix/sysv/linux/arm/sysdep.h         | 30 ++++++++++++++++++++++
 9 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 7b4c8df..1c3bccf 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -50,18 +50,9 @@ _dl_tlsdesc_return:
 	.fnstart
 	.align 2
 _dl_tlsdesc_undefweak:
-	@ Are we allowed a misaligned stack pointer calling read_tp?
-	.save	{lr}
-	stmdb 	sp!, {lr}
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (lr,0)
-	bl 	__aeabi_read_tp
+	GET_TLS(r1)
 	rsb 	r0, r0, #0
-	ldmia 	sp!, {lr}
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (lr)
 	BX	(lr)
-
 	cfi_endproc
 	.fnend
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
@@ -106,7 +97,7 @@ _dl_tlsdesc_dynamic:
 	cfi_rel_offset (r4,8)
 	cfi_rel_offset (lr,12)
 	ldr	r1, [r0] /* td */
-	bl	__aeabi_read_tp
+	GET_TLS(lr)
 	mov	r4, r0 /* r4 = tp */
 	ldr	r0, [r0]
 	ldr	r2, [r1, #8] /* gen_count */
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 9230131..c525d5b 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -150,6 +150,25 @@
 #  define NEGOFF_OFF1(R, OFF)		[R, $OFF]
 #  define NEGOFF_OFF2(R, OFFA, OFFB)	[R, $OFFA]
 # endif
+
+/* Helper to get the TLS base pointer.  The interface is that TMP is a
+   register that may be used to hold the LR, if necessary.  TMP may be
+   LR itself to indicate that LR need not be saved.  The base pointer
+   is returned in R0.  Only R0 and TMP are modified.
+
+   At this generic level we have no tricks to pull.  Call the ABI routine.  */
+# define GET_TLS(TMP)					\
+	push	{ r1, r2, r3, lr };			\
+	cfi_remember_state;				\
+	cfi_adjust_cfa_offset (16);			\
+	cfi_rel_offset (r1, 0);				\
+	cfi_rel_offset (r2, 4);				\
+	cfi_rel_offset (r3, 8);				\
+	cfi_rel_offset (lr, 12);			\
+	bl	__aeabi_read_tp;			\
+	pop	{ r1, r2, r3, lr };			\
+	cfi_restore_state
+
 #endif	/* __ASSEMBLER__ */
 
 /* This number is the offset from the pc at the current location.  */
diff --git a/ports/sysdeps/unix/arm/sysdep.S b/ports/sysdeps/unix/arm/sysdep.S
index d44ee48..b07cba9 100644
--- a/ports/sysdeps/unix/arm/sysdep.S
+++ b/ports/sysdeps/unix/arm/sysdep.S
@@ -37,14 +37,8 @@ __syscall_error:
 #endif
 
 #ifndef IS_IN_rtld
-	mov ip, lr
-	cfi_register (lr, ip)
-	mov r1, r0
-
-	mov r0, #0xffff0fff
-	mov lr, pc
-	sub pc, r0, #31
-
+	mov	r1, r0
+	GET_TLS(r2)
 	ldr	r2, 1f
 #ifdef __thumb__
 2:	add	r2, r2, pc
@@ -54,7 +48,7 @@ __syscall_error:
 #endif
 	str	r1, [r0, r2]
 	mvn	r0, #0
-	DO_RET(ip)
+	DO_RET(lr)
 
 1:	.word errno(gottpoff) + (. - 2b - PC_OFS)
 #elif RTLD_PRIVATE_ERRNO
diff --git a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
index c4ddbc6..ecdc322 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
@@ -41,6 +41,12 @@
 
 	.hidden __aeabi_read_tp
 ENTRY (__aeabi_read_tp)
+#ifdef __thumb2__
+	movw	r0, #0x0fe0
+	movt	r0, #0xffff
+	bx	r0
+#else
 	mov	r0, #0xffff0fff
 	sub	pc, r0, #31
+#endif
 END (__aeabi_read_tp)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index a5f9b4d..1bc5eab 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -74,9 +74,7 @@ PSEUDO_END (__clone)
 #ifdef RESET_PID
 	tst	ip, #CLONE_THREAD
 	bne	3f
-	mov	r0, #0xffff0fff
-	mov	lr, pc
-	sub	pc, r0, #31
+	GET_TLS(lr)
 	mov	r1, r0
 	tst	ip, #CLONE_VM
 	ldr	r7, =SYS_ify(getpid)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
index ff88510..c731cd7 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
@@ -19,15 +19,7 @@
 
 /* Save the PID value.  */
 #define SAVE_PID \
-	str	lr, [sp, #-4]!;		/* Save LR.  */			\
-	cfi_adjust_cfa_offset (4);					\
-	cfi_rel_offset (lr, 0);						\
-	mov	r0, #0xffff0fff;	/* Point to the high page.  */	\
-	mov	lr, pc;			/* Save our return address.  */	\
-	sub	pc, r0, #31;		/* Jump to the TLS entry.  */	\
-	ldr	lr, [sp], #4;		/* Restore LR.  */		\
-	cfi_adjust_cfa_offset (-4);					\
-	cfi_restore (lr);						\
+	GET_TLS(r2);							\
 	NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
 	ldr	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID.  */  \
 	rsb	r0, r3, #0;		/* Negate it.  */		     \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index 47d4c70..d5e666b 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -216,7 +216,7 @@ extern int __local_multiple_threads attribute_hidden;
 	stmfd	sp!, {r0, lr};						\
 	cfi_adjust_cfa_offset (8);					\
 	cfi_rel_offset (lr, 4);						\
-	bl	__aeabi_read_tp;					\
+	GET_TLS(lr);							\
 	NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET);			\
 	ldr	ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET);		\
 	ldmfd	sp!, {r0, lr};						\
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
index c4be1e2..accecf2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
@@ -19,15 +19,7 @@
 
 /* Save the PID value.  */
 #define SAVE_PID \
-	str	lr, [sp, #-4]!;		/* Save LR.  */			\
-	cfi_adjust_cfa_offset (4);					\
-	cfi_rel_offset (lr, 0);						\
-	mov	r0, #0xffff0fff;	/* Point to the high page.  */	\
-	mov	lr, pc;			/* Save our return address.  */	\
-	sub	pc, r0, #31;		/* Jump to the TLS entry.  */	\
-	ldr	lr, [sp], #4;		/* Restore LR.  */		\
-	cfi_adjust_cfa_offset (-4);					\
-	cfi_restore (lr);						\
+	GET_TLS(r2);							\
 	NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2.  */ \
 	ldr	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID.  */   \
 	rsbs	r0, r3, #0;		/* Negate it.  */		      \
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 89208a9..01d8123 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -45,6 +45,36 @@
 
 #ifdef __ASSEMBLER__
 
+/* Internal macro calling the linux kernel kuser_get_tls helper. 
+   Note that in thumb mode, a constant pool break is often out of range, so
+   we always expand the constant inline.  */
+#ifdef __thumb2__
+# define GET_TLS_BODY			\
+	movw	r0, #0x0fe0;		\
+	movt	r0, #0xffff;		\
+	blx	r0
+#else
+# define GET_TLS_BODY \
+	mov	r0, #0xffff0fff;	/* Point to the high page.  */	\
+	mov	lr, pc;			/* Save our return address.  */	\
+	sub	pc, r0, #31		/* Jump to the TLS entry.  */
+#endif
+
+/* Helper to get the TLS base pointer.  Save LR in TMP, return in R0,
+   and no other registers clobbered.  TMP may be LR itself to indicate
+   that no save is necessary.  */
+#undef GET_TLS
+#define GET_TLS(TMP)			\
+  .ifnc TMP, lr;			\
+	mov	TMP, lr;		\
+	cfi_register (lr, TMP);		\
+	GET_TLS_BODY;			\
+	mov	lr, TMP;		\
+	cfi_restore (lr);		\
+  .else;				\
+	GET_TLS_BODY;			\
+  .endif
+
 /* Linux uses a negative return value to indicate syscall errors,
    unlike most Unices, which use the condition codes' carry flag.
 
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (3 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:58   ` Roland McGrath
                     ` (2 more replies)
  2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
                   ` (8 subsequent siblings)
  13 siblings, 3 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Written from scratch rather than copied from GMP, due to GPL 2.1 vs
GPL 3, but tested with the GMP testsuite.

This is 25% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core.  It's probably slower
than GMP on the A8 and A9 cores though.
---
	* sysdeps/arm/addmul_1.S: New file.
---
 ports/sysdeps/arm/addmul_1.S | 66 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 ports/sysdeps/arm/addmul_1.S

diff --git a/ports/sysdeps/arm/addmul_1.S b/ports/sysdeps/arm/addmul_1.S
new file mode 100644
index 0000000..4e2f6da
--- /dev/null
+++ b/ports/sysdeps/arm/addmul_1.S
@@ -0,0 +1,66 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.syntax unified
+	.text
+
+@		cycles/limb
+@ StrongArm	   ?
+@ Cortex-A8	   ?
+@ Cortex-A9	   ?
+@ Cortex-A15	   4
+
+/* mp_limb_t mpn_addmul_1(res_ptr, src1_ptr, size, s2_limb) */
+
+ENTRY(__mpn_addmul_1)
+	push	{ r4, r5, r6, r7 }
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (r4, 0)
+	cfi_rel_offset (r5, 4)
+	cfi_rel_offset (r6, 8)
+	cfi_rel_offset (r7, 12)
+
+	ldr	r6, [r1], #4
+	ldr	r5, [r0]
+	mov	r4, #0			/* init carry in */
+	b	1f
+0:
+	ldr	r6, [r1], #4		/* load next ul */
+	adds	r7, r4, r5		/* (out, c) = cl + lpl */
+	ldr	r5, [r0, #4]		/* load next rl */
+	adc	r4, ip, #0		/* cl = hpl + c */
+	str	r7, [r0], #4
+1:
+	mov	ip, #0			/* zero-extend rl */
+	umlal	r5, ip, r6, r3		/* (hpl, lpl) = ul * vl + rl */
+	subs	r2, r2, #1
+	bne	0b
+
+	adds	r4, r4, r5		/* (out, c) = cl + llpl */
+	str	r4, [r0]
+	adc	r0, ip, #0		/* return hpl + c */
+
+	pop	{ r4, r5, r6, r7 }
+	cfi_adjust_cfa_offset (-16)
+	cfi_restore (r4)
+	cfi_restore (r5)
+	cfi_restore (r6)
+	cfi_restore (r7)
+	DO_RET(lr)
+END(__mpn_addmul_1)
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (10 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:55   ` Roland McGrath
  2013-03-05  2:01   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

---
	* sysdeps/arm/sysdep.h (ARCH_HAS_HARD_TP): New macro.
	(GET_TLS): Use hard-tp if ARCH_HAS_HARD_TP.
	* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S: Likewise.
	* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): Don't override
	the default definition if ARCH_HAS_HARD_TP.
---
 ports/sysdeps/arm/sysdep.h                        | 14 +++++++++++---
 ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S |  5 ++++-
 ports/sysdeps/unix/sysv/linux/arm/sysdep.h        | 16 +++++++++-------
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index c09e680..03739a4 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -47,6 +47,9 @@
 #if __ARM_ARCH > 4
 # define ARCH_HAS_BLX
 #endif
+#if __ARM_ARCH > 6 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__)
+# define ARCH_HAS_HARD_TP
+#endif
 #if __ARM_ARCH > 6 || defined(__ARM_ARCH_6T2__)
 # define ARCH_HAS_T2
 #endif
@@ -184,10 +187,14 @@
 /* Helper to get the TLS base pointer.  The interface is that TMP is a
    register that may be used to hold the LR, if necessary.  TMP may be
    LR itself to indicate that LR need not be saved.  The base pointer
-   is returned in R0.  Only R0 and TMP are modified.
+   is returned in R0.  Only R0 and TMP are modified.  */
 
-   At this generic level we have no tricks to pull.  Call the ABI routine.  */
-# define GET_TLS(TMP)					\
+# ifdef ARCH_HAS_HARD_TP
+/* If the cpu has cp15 available, use it.  */
+#  define GET_TLS(TMP)		mrc p15, 0, r0, c13, c0, 3
+# else
+/* At this generic level we have no tricks to pull.  Call the ABI routine.  */
+#  define GET_TLS(TMP)					\
 	push	{ r1, r2, r3, lr };			\
 	cfi_remember_state;				\
 	cfi_adjust_cfa_offset (16);			\
@@ -198,6 +205,7 @@
 	bl	__aeabi_read_tp;			\
 	pop	{ r1, r2, r3, lr };			\
 	cfi_restore_state
+# endif /* ARCH_HAS_HARD_TP */
 
 #endif	/* __ASSEMBLER__ */
 
diff --git a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
index ecdc322..21e3229 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S
@@ -41,7 +41,10 @@
 
 	.hidden __aeabi_read_tp
 ENTRY (__aeabi_read_tp)
-#ifdef __thumb2__
+#ifdef ARCH_HAS_HARD_TP
+	mrc	p15, 0, r0, c13, c0, 3
+	bx	lr
+#elif defined(__thumb2__)
 	movw	r0, #0x0fe0
 	movt	r0, #0xffff
 	bx	r0
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 39872b8..89fea7a 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -45,26 +45,27 @@
 
 #ifdef __ASSEMBLER__
 
+#ifndef ARCH_HAS_HARD_TP
 /* Internal macro calling the linux kernel kuser_get_tls helper. 
    Note that in thumb mode, a constant pool break is often out of range, so
    we always expand the constant inline.  */
-#ifdef __thumb2__
-# define GET_TLS_BODY			\
+# ifdef __thumb2__
+#  define GET_TLS_BODY			\
 	movw	r0, #0x0fe0;		\
 	movt	r0, #0xffff;		\
 	blx	r0
-#else
-# define GET_TLS_BODY \
+# else
+#  define GET_TLS_BODY \
 	mov	r0, #0xffff0fff;	/* Point to the high page.  */	\
 	mov	lr, pc;			/* Save our return address.  */	\
 	sub	pc, r0, #31		/* Jump to the TLS entry.  */
-#endif
+# endif
 
 /* Helper to get the TLS base pointer.  Save LR in TMP, return in R0,
    and no other registers clobbered.  TMP may be LR itself to indicate
    that no save is necessary.  */
-#undef GET_TLS
-#define GET_TLS(TMP)			\
+# undef GET_TLS
+# define GET_TLS(TMP)			\
   .ifnc TMP, lr;			\
 	mov	TMP, lr;		\
 	cfi_register (lr, TMP);		\
@@ -74,6 +75,7 @@
   .else;				\
 	GET_TLS_BODY;			\
   .endif
+#endif /* ARCH_HAS_HARD_TP */
 
 /* Linux uses a negative return value to indicate syscall errors,
    unlike most Unices, which use the condition codes' carry flag.
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 01/14] arm: Introduce and use LDST_PCREL
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (9 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-04 17:47   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Macro-ising the few instances where we need to distinguish between
arm and thumb pc-relative memory operations.
---
        * sysdeps/arm/sysdep.h (LDST_PCREL): New macro.
        * sysdeps/unix/arm/sysdep.S (__syscall_error): Use LDST_PCREL.
        Fix up gottpoff load of errno for thumb2.
        * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
        (SINGLE_THREAD_P): Use LDST_PCREL.
        (PSEUDO_PROLOGUE): Remove.
        (PSEUDO): Don't use it.
        * sysdeps/unix/sysv/linux/arm/sysdep.h (SYSCALL_ERROR_HANDLER):
        Use LDST_PCREL.
---
 ports/sysdeps/arm/sysdep.h                         | 17 +++++++++++++++++
 ports/sysdeps/unix/arm/sysdep.S                    | 22 ++++++++++++----------
 .../unix/sysv/linux/arm/nptl/sysdep-cancel.h       | 10 ++--------
 ports/sysdeps/unix/sysv/linux/arm/sysdep.h         | 10 ++++------
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 4af7429..29a78f0 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -117,6 +117,23 @@
    the caller.  */
 	.eabi_attribute 24, 1
 
+/* Load or store to/from a pc-relative EXPR into/from R, using T.  */
+# ifdef __thumb2__
+#  define LDST_PCREL(OP, R, T, EXPR) \
+	ldr	T, 98f;					\
+	.subsection 2;					\
+98:	.word	EXPR - 99f - PC_OFS;			\
+	.previous;					\
+99:	add	T, T, pc;				\
+	OP	R, [T]
+# else
+#  define LDST_PCREL(OP, R, T, EXPR) \
+	ldr	T, 98f;					\
+	.subsection 2;					\
+98:	.word	EXPR - 99f - PC_OFS;			\
+	.previous;					\
+99:	OP	R, [pc, T]
+# endif
 #endif	/* __ASSEMBLER__ */
 
 /* This number is the offset from the pc at the current location.  */
diff --git a/ports/sysdeps/unix/arm/sysdep.S b/ports/sysdeps/unix/arm/sysdep.S
index 40e4d80..d44ee48 100644
--- a/ports/sysdeps/unix/arm/sysdep.S
+++ b/ports/sysdeps/unix/arm/sysdep.S
@@ -45,20 +45,22 @@ __syscall_error:
 	mov lr, pc
 	sub pc, r0, #31
 
-	ldr r2, 1f
-2:	ldr r2, [pc, r2]
-	str r1, [r0, r2]
-	mvn r0, #0
-	RETINSTR (, ip)
+	ldr	r2, 1f
+#ifdef __thumb__
+2:	add	r2, r2, pc
+	ldr	r2, [r2]
+#else
+2:	ldr	r2, [pc, r2]
+#endif
+	str	r1, [r0, r2]
+	mvn	r0, #0
+	DO_RET(ip)
 
 1:	.word errno(gottpoff) + (. - 2b - PC_OFS)
 #elif RTLD_PRIVATE_ERRNO
-	ldr r1, 1f
-0:	str r0, [pc, r1]
-	mvn r0, $0
+	LDST_PCREL(str, r0, r1, C_SYMBOL_NAME(rtld_errno))
+	mvn	r0, #0
 	DO_RET(r14)
-
-1:	.word C_SYMBOL_NAME(rtld_errno) - 0b - PC_OFS
 #else
 #error "Unsupported non-TLS case"
 #endif
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index df85d51..8889369 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -31,7 +31,6 @@
 # undef PSEUDO
 # define PSEUDO(name, syscall_name, args)				\
 	.text;								\
-	PSEUDO_PROLOGUE;						\
   ENTRY (__##syscall_name##_nocancel);					\
 	CFI_SECTIONS;							\
 	DO_CALL (syscall_name, args);					\
@@ -203,12 +202,8 @@ extern int __local_multiple_threads attribute_hidden;
 #   define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
 #  else
 #   define SINGLE_THREAD_P						\
-	ldr	ip, 1b;							\
-  2:									\
-	ldr ip, [pc, ip];						\
-	teq ip, #0;
-#   define PSEUDO_PROLOGUE						\
-  1:	.word	__local_multiple_threads - 2f - PC_OFS;
+	LDST_PCREL(ldr, ip, ip, __local_multiple_threads);		\
+	teq ip, #0
 #  endif
 # else
 /*  There is no __local_multiple_threads for librt, so use the TCB.  */
@@ -217,7 +212,6 @@ extern int __local_multiple_threads attribute_hidden;
   __builtin_expect (THREAD_GETMEM (THREAD_SELF,				\
 				   header.multiple_threads) == 0, 1)
 #  else
-#   define PSEUDO_PROLOGUE
 #   define SINGLE_THREAD_P						\
 	stmfd	sp!, {r0, lr};						\
 	cfi_adjust_cfa_offset (8);					\
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index f40cb95..89208a9 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -110,12 +110,10 @@
 # if RTLD_PRIVATE_ERRNO
 #  define SYSCALL_ERROR_HANDLER					\
 __local_syscall_error:						\
-       ldr     r1, 1f;						\
-       rsb     r0, r0, #0;					\
-0:     str     r0, [pc, r1];					\
-       mvn     r0, #0;						\
-       DO_RET(lr);						\
-1:     .word C_SYMBOL_NAME(rtld_errno) - 0b - PC_OFS;
+	rsb	r0, r0, #0;					\
+	LDST_PCREL(str, r0, r1, C_SYMBOL_NAME(rtld_errno));	\
+	mvn	r0, #0;						\
+	DO_RET(lr)
 # else
 #  if defined(__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
 #   define POP_PC \
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (6 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

The preceeding patches have allowed for the few incompatibilities
between arm and thumb2 mode, or have marked the file as not wanting
to use thumb2 mode.
---
	* sysdeps/arm/sysdep.h [__ASSEMBLER__]: Enable thumb2 if __thumb2__.
	(PC_OFS): Respect __thumb__ if __ASSEMBLER__.
---
 ports/sysdeps/arm/sysdep.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index c525d5b..d855ceb 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -117,6 +117,16 @@
    the caller.  */
 	.eabi_attribute 24, 1
 
+/* The thumb2 encoding is reasonably complete.  Unless suppressed, use it.  */
+	.syntax unified
+# if defined(__thumb2__) && !defined(NO_THUMB)
+	.thumb
+#else
+#  undef __thumb__
+#  undef __thumb2__
+	.arm
+# endif
+
 /* Load or store to/from a pc-relative EXPR into/from R, using T.  */
 # ifdef __thumb2__
 #  define LDST_PCREL(OP, R, T, EXPR) \
@@ -172,8 +182,7 @@
 #endif	/* __ASSEMBLER__ */
 
 /* This number is the offset from the pc at the current location.  */
-/* ??? At the moment we're not turning on thumb mode in assembly.  */
-#if defined(__thumb__) && !defined(__ASSEMBLER__)
+#ifdef __thumb__
 # define PC_OFS  4
 #else
 # define PC_OFS  8
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 07/14] arm: Commonize BX conditionals
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (7 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Add BLX macro in addition and use it where appropriate.
---
        * sysdeps/arm/sysdep.h (BX, BXC, BLX): New macros.
        (DO_RET): Use BX.
        (RETINSTR): Use BXC.
        * sysdeps/arm/dl-tlsdesc.S (BX): Remove.
        * sysdeps/arm/dl-trampoline.S (BX): Remove.
        (_dl_runtime_profile): Use BLX.
---
 ports/sysdeps/arm/dl-tlsdesc.S    |  6 ------
 ports/sysdeps/arm/dl-trampoline.S |  9 +--------
 ports/sysdeps/arm/sysdep.h        | 29 +++++++++++++----------------
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 3a956de..aa3db80 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -20,12 +20,6 @@
 #include <tls.h>
 #include "tlsdesc.h"
 
-#ifdef __USE_BX__
-  #define BX(x)	bx x
-#else
-  #define BX(x)	mov pc, x
-#endif
-
 	.text
 	@ emit debug information with cfi
 	@ use arm-specific pseudos for unwinding itself
diff --git a/ports/sysdeps/arm/dl-trampoline.S b/ports/sysdeps/arm/dl-trampoline.S
index f2d1679..9366976 100644
--- a/ports/sysdeps/arm/dl-trampoline.S
+++ b/ports/sysdeps/arm/dl-trampoline.S
@@ -21,12 +21,6 @@
 #include <sysdep.h>
 #include <libc-symbols.h>
 
-#if defined(__USE_BX__)
-#define BX(x) bx	x
-#else
-#define BX(x) mov	pc, x
-#endif
-
 	.text
 	.globl _dl_runtime_resolve
 	.type _dl_runtime_resolve, #function
@@ -192,8 +186,7 @@ _dl_runtime_profile:
 	add	ip, r7, #72
 	ldmia	ip, {r0-r3}
 	ldr	ip, [r7, #264]
-	mov	lr, pc
-	BX(ip)
+	BLX(ip)
 	stmia	r7, {r0-r3}
 
 	@ Call pltexit.
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 5f6c3f2..84313fe 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -33,26 +33,23 @@
 
 #define PLTJMP(_x)	_x##(PLT)
 
-/* APCS-32 doesn't preserve the condition codes across function call. */
-#ifdef __APCS_32__
 #ifdef __USE_BX__
-#define RETINSTR(cond, reg)	\
-	bx##cond	reg
-#define DO_RET(_reg)		\
-	bx _reg
+# define BX(R)		bx	R
+# define BXC(C, R)	bx##C	R
+# ifdef __ARM_ARCH_4T__
+#  define BLX(R)	mov	lr, pc; bx R
+# else
+#  define BLX(R)	blx	R
+# endif
 #else
-#define RETINSTR(cond, reg)	\
-	mov##cond	pc, reg
-#define DO_RET(_reg)		\
-	mov pc, _reg
-#endif
-#else  /* APCS-26 */
-#define RETINSTR(cond, reg)	\
-	mov##cond##s	pc, reg
-#define DO_RET(_reg)		\
-	movs pc, _reg
+# define BX(R)		mov	pc, R
+# define BXC(C, R)	mov##C	pc, R
+# define BLX(R)		mov	lr, pc; mov pc, R
 #endif
 
+#define DO_RET(R)	BX(R)
+#define RETINSTR(C, R)	BXC(C, R)
+
 /* Define an entry point visible from C.  */
 #define	ENTRY(name)					\
 	.globl	C_SYMBOL_NAME(name);			\
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 13/14] arm: Add optimized submul_1
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (12 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:58   ` Roland McGrath
  2013-03-06  1:14   ` Joseph S. Myers
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Written from scratch rather than copied from GMP, due to GPL 2.1 vs
GPL 3, but tested with the GMP testsuite.

This is 50% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core.  It's probably slower
than GMP on the A8 and A9 cores though.
---
	* sysdeps/arm/submul_1.S: New file.
---
 ports/sysdeps/arm/submul_1.S | 67 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 ports/sysdeps/arm/submul_1.S

diff --git a/ports/sysdeps/arm/submul_1.S b/ports/sysdeps/arm/submul_1.S
new file mode 100644
index 0000000..35e1348
--- /dev/null
+++ b/ports/sysdeps/arm/submul_1.S
@@ -0,0 +1,67 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.syntax unified
+	.text
+
+@		cycles/limb
+@ StrongArm	   ?
+@ Cortex-A8	   ?
+@ Cortex-A9	   ?
+@ Cortex-A15	   4
+
+/* mp_limb_t mpn_submul_1(res_ptr, src1_ptr, size, s2_limb) */
+
+ENTRY(__mpn_submul_1)
+	push	{ r4, r5, r6, r7 }
+	cfi_adjust_cfa_offset (16)
+	cfi_rel_offset (r4, 0)
+	cfi_rel_offset (r5, 4)
+	cfi_rel_offset (r6, 8)
+	cfi_rel_offset (r7, 12)
+
+	ldr	r6, [r1], #4
+	ldr	r7, [r0]
+	mov	r4, #0			/* init carry in */
+	b	1f
+0:
+	ldr	r6, [r1], #4		/* load next ul */
+	adds	r5, r5, r4		/* (lpl, c) = lpl + cl */
+	adc	r4, ip, #0		/* cl = hpl + c */
+	subs	r5, r7, r5		/* (lpl, !c) = rl - lpl */
+	ldr	r7, [r0, #4]		/* load next rl */
+	it	cc
+	addcc	r4, r4, #1		/* cl += !c */
+	str	r5, [r0], #4
+1:
+	umull	r5, ip, r6, r3		/* (hpl, lpl) = ul * vl */
+	subs	r2, r2, #1
+	bne	0b
+
+	adds	r5, r5, r4		/* (lpl, c) = lpl + cl */
+	adc	r4, ip, #0		/* cl = hpl + c */
+	subs	r5, r7, r5		/* (lpl, !c) = rl - lpl */
+	str	r5, [r0], #4
+	ite	cc
+	addcc	r0, r4, #1		/* cl += !c */
+	movcs	r0, r4			/* return carry */
+
+	pop	{ r4, r5, r6, r7 }
+	DO_RET(lr)
+END(__mpn_submul_1)
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 00/14] ARM improvements
@ 2013-03-01 17:36 Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
                   ` (13 more replies)
  0 siblings, 14 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

I believe I've now addressed all of the comments to date.

Patches 4-8 have been approved, but they touch the same code
as patches 1-3, so I'm not rearranging them.

Patch 1 -- Spurious whitespace changes removed.
Patch 2 -- I never saw a review for this one?
Patch 3 -- Totally rewritten, in preparation for new patch for hard-tp.
Patch 6 -- Review mentioned deleting __APCS_32__, but this was already
	   done in patch 7, so I didn't rearrange that.
Patch 9-10 -- New.
Patch 11-12 -- Micro-optimization suggestions from Mans Rullgard applied.
Patch 14 -- Adjusted to avoid r9.

I've yet to test the string routines on big-endian.  I'll delay
re-posting those until I have done so.


r~


Richard Henderson (14):
  arm: Introduce and use LDST_PCREL
  arm: Introduce and use NEGOFF series of macros
  arm: Introduce and use GET_TLS
  arm: Enable thumb2 mode in assembly files
  arm: Use push/pop mnemonics
  arm: Delete LOADREGS macro
  arm: Commonize BX conditionals
  arm: Unless arm4t, pop return address directly into pc
  arm: Tidy architecture selection
  arm: Implement hard-tp for GET_TLS
  arm: Add optimized ffs for armv6t2
  arm: Add optimized addmul_1
  arm: Add optimized submul_1
  arm: Add optimized add_n and sub_n

 ports/sysdeps/arm/__longjmp.S                      |   2 +-
 ports/sysdeps/arm/add_n.S                          |  83 ++++++++++++
 ports/sysdeps/arm/addmul_1.S                       |  66 +++++++++
 ports/sysdeps/arm/arm-mcount.S                     |  10 +-
 ports/sysdeps/arm/armv6t2/ffs.S                    |  35 +++++
 ports/sysdeps/arm/armv6t2/ffsll.S                  |  50 +++++++
 ports/sysdeps/arm/crti.S                           |   4 +-
 ports/sysdeps/arm/crtn.S                           |   8 +-
 ports/sysdeps/arm/dl-machine.h                     |   2 +-
 ports/sysdeps/arm/dl-tlsdesc.S                     |  42 +++---
 ports/sysdeps/arm/dl-trampoline.S                  |  13 +-
 ports/sysdeps/arm/memcpy.S                         |  58 ++++----
 ports/sysdeps/arm/memmove.S                        |  58 ++++----
 ports/sysdeps/arm/start.S                          |  10 +-
 ports/sysdeps/arm/sub_n.S                          |   2 +
 ports/sysdeps/arm/submul_1.S                       |  67 ++++++++++
 ports/sysdeps/arm/sysdep.h                         | 147 +++++++++++++++++----
 ports/sysdeps/unix/arm/sysdep.S                    |  32 ++---
 .../sysdeps/unix/sysv/linux/arm/____longjmp_chk.S  |   4 +-
 ports/sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S  |   9 ++
 ports/sysdeps/unix/sysv/linux/arm/clone.S          |  13 +-
 ports/sysdeps/unix/sysv/linux/arm/mmap.S           |   8 +-
 ports/sysdeps/unix/sysv/linux/arm/mmap64.S         |   8 +-
 ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S  |  21 +--
 .../unix/sysv/linux/arm/nptl/sysdep-cancel.h       |  47 +++----
 .../unix/sysv/linux/arm/nptl/unwind-forcedunwind.c |   4 +-
 .../unix/sysv/linux/arm/nptl/unwind-resume.c       |   4 +-
 ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S     |  24 ++--
 ports/sysdeps/unix/sysv/linux/arm/syscall.S        |   4 +-
 ports/sysdeps/unix/sysv/linux/arm/sysdep.h         |  69 +++++++---
 ports/sysdeps/unix/sysv/linux/arm/vfork.S          |   2 +-
 31 files changed, 648 insertions(+), 258 deletions(-)
 create mode 100644 ports/sysdeps/arm/add_n.S
 create mode 100644 ports/sysdeps/arm/addmul_1.S
 create mode 100644 ports/sysdeps/arm/armv6t2/ffs.S
 create mode 100644 ports/sysdeps/arm/armv6t2/ffsll.S
 create mode 100644 ports/sysdeps/arm/sub_n.S
 create mode 100644 ports/sysdeps/arm/submul_1.S

-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 06/14] arm: Delete LOADREGS macro
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (11 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
  13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

There was only one user.  It's "condition" argument was used
for "ia" rather than an actual condition.  The apcs26 syntax
is almost certainly not needed, given current binutils requirements.
---
        * sysdeps/arm/__longjmp.S (__longjmp): Use ldmia insn directly.
        * sysdeps/arm/sysdep.h (LOADREGS): Remove.
---
 ports/sysdeps/arm/__longjmp.S | 2 +-
 ports/sysdeps/arm/sysdep.h    | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/ports/sysdeps/arm/__longjmp.S b/ports/sysdeps/arm/__longjmp.S
index a3a2a8a..1d5d56b 100644
--- a/ports/sysdeps/arm/__longjmp.S
+++ b/ports/sysdeps/arm/__longjmp.S
@@ -37,7 +37,7 @@ ENTRY (__longjmp)
 	cfi_undefined (r4)
 	CHECK_SP (r4)
 #endif
-	LOADREGS(ia, ip!, {v1-v6, sl, fp, sp, lr})
+	ldmia	ip!, {v1-v6, sl, fp, sp, lr}
 	cfi_restore (v1)
 	cfi_restore (v2)
 	cfi_restore (v3)
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index d74a328..5f6c3f2 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -35,8 +35,6 @@
 
 /* APCS-32 doesn't preserve the condition codes across function call. */
 #ifdef __APCS_32__
-#define LOADREGS(cond, base, reglist...)\
-	ldm##cond	base,reglist
 #ifdef __USE_BX__
 #define RETINSTR(cond, reg)	\
 	bx##cond	reg
@@ -49,8 +47,6 @@
 	mov pc, _reg
 #endif
 #else  /* APCS-26 */
-#define LOADREGS(cond, base, reglist...)\
-	ldm##cond	base,reglist^
 #define RETINSTR(cond, reg)	\
 	mov##cond##s	pc, reg
 #define DO_RET(_reg)		\
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (8 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-05  2:08   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

---
	* sysdeps/arm/armv6t2/ffs.S: New file.
	* sysdeps/arm/armv6t2/ffsll.S: New file.
---
 ports/sysdeps/arm/armv6t2/ffs.S   | 35 +++++++++++++++++++++++++++
 ports/sysdeps/arm/armv6t2/ffsll.S | 50 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 ports/sysdeps/arm/armv6t2/ffs.S
 create mode 100644 ports/sysdeps/arm/armv6t2/ffsll.S

diff --git a/ports/sysdeps/arm/armv6t2/ffs.S b/ports/sysdeps/arm/armv6t2/ffs.S
new file mode 100644
index 0000000..b2c88b9
--- /dev/null
+++ b/ports/sysdeps/arm/armv6t2/ffs.S
@@ -0,0 +1,35 @@
+/* ffs -- find first set bit in an int, from least significant end.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.syntax unified
+	.text
+
+ENTRY (__ffs)
+	cmp	r0, #0
+	rbit	r0, r0
+	itt	ne
+	clzne	r0, r0
+	addne	r0, r0, #1
+	bx	lr
+END (__ffs)
+
+weak_alias (__ffs, ffs)
+weak_alias (__ffs, ffsl)
+libc_hidden_builtin_def (ffs)
diff --git a/ports/sysdeps/arm/armv6t2/ffsll.S b/ports/sysdeps/arm/armv6t2/ffsll.S
new file mode 100644
index 0000000..e49c70f
--- /dev/null
+++ b/ports/sysdeps/arm/armv6t2/ffsll.S
@@ -0,0 +1,50 @@
+/* ffsll -- find first set bit in a long long, from least significant end.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.syntax unified
+	.text
+
+ENTRY (ffsll)
+	@ If low part is 0, operate on the high part.  Ensure that the
+	@ word on which we operate is in r0.  Set r2 to the bit offset
+	@ of the word being considered.  Set the flags for the word
+	@ being operated on.
+#ifdef __ARMEL__
+	cmp	r0, #0
+	itee	ne
+	movne	r2, #0
+	moveq	r2, #32
+	movseq	r0, r1
+#else
+	cmp	r1, #0
+	ittee	ne
+	movne	r2, #0
+	movne	r0, r1
+	moveq	r2, #32
+	cmpeq	r0, #0
+#endif
+	@ Perform the ffs on r0.
+	rbit	r0, r0
+	ittt	ne
+	clzne	r0, r0
+	addne	r2, r2, #1
+	addne	r0, r0, r2
+	bx	lr
+END (ffsll)
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 14/14] arm: Add optimized add_n and sub_n
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (5 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:59   ` Roland McGrath
  2013-03-06  0:53   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
                   ` (6 subsequent siblings)
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Written from scratch rather than copied from GMP, due to LGPL 2.1 vs
GPL 3, but tested with the GMP testsuite.

This is 250% faster than the generic code as measured on Cortex-A15,
and the same speed as GMP on the same core, and probably everywhere.
---
	* sysdeps/arm/add_n.S: New file.
	* sysdeps/arm/sub_n.S: New file.
---
 ports/sysdeps/arm/add_n.S | 83 +++++++++++++++++++++++++++++++++++++++++++++++
 ports/sysdeps/arm/sub_n.S |  2 ++
 2 files changed, 85 insertions(+)
 create mode 100644 ports/sysdeps/arm/add_n.S
 create mode 100644 ports/sysdeps/arm/sub_n.S

diff --git a/ports/sysdeps/arm/add_n.S b/ports/sysdeps/arm/add_n.S
new file mode 100644
index 0000000..af69733
--- /dev/null
+++ b/ports/sysdeps/arm/add_n.S
@@ -0,0 +1,83 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.syntax unified
+	.text
+
+#ifdef USE_AS_SUB_N
+# define INITC	cmp r0, r0
+# define OPC	sbcs
+# define RETC	sbc r0, r0, r0; neg r0, r0
+# define FUNC	__mpn_sub_n
+#else
+# define INITC	cmn r0, #0
+# define OPC	adcs
+# define RETC	mov r0, #0; adc r0, r0, r0
+# define FUNC	__mpn_add_n
+#endif
+
+/* mp_limb_t mpn_add_n(res_ptr, src1_ptr, src2_ptr, size) */
+
+ENTRY (FUNC)
+	push	{ r4, r5, r6, r7, r8, r10, lr }
+	cfi_adjust_cfa_offset (28)
+	cfi_rel_offset (r4, 0)
+	cfi_rel_offset (r5, 4)
+	cfi_rel_offset (r6, 8)
+	cfi_rel_offset (r7, 12)
+	cfi_rel_offset (r8, 16)
+	cfi_rel_offset (r10, 20)
+	cfi_rel_offset (lr, 24)
+
+	INITC				/* initialize carry flag */
+	tst	r3, #1			/* count & 1 == 1? */
+	add	lr, r1, r3, lsl #2	/* compute end src1 */
+	beq	1f
+
+	ldr	r4, [r1], #4		/* do one to make count even */
+	ldr	r5, [r2], #4
+	OPC	r4, r4, r5
+	teq	r1, lr			/* end of count? (preserve carry) */
+	str	r4, [r0], #4
+	beq	9f
+1:
+	tst	r3, #2			/* count & 2 == 2?  */
+	beq	2f
+	ldm	r1!, { r4, r5 }		/* do two to make count 0 mod 4 */
+	ldm	r2!, { r6, r7 }
+	OPC	r4, r4, r6
+	OPC	r5, r5, r7
+	teq	r1, lr			/* end of count? */
+	stm	r0!, { r4, r5 }
+	beq	9f
+2:
+	ldm	r1!, { r3, r5, r7, r10 }	/* do four each loop */
+	ldm	r2!, { r4, r6, r8, ip }
+	OPC	r3, r3, r4
+	OPC	r5, r5, r6
+	OPC	r7, r7, r8
+	OPC	r10, r10, ip
+	teq	r1, lr
+	stm	r0!, { r3, r5, r7, r10 }
+	bne	2b
+
+9:
+	RETC				/* copy carry out */
+	pop	{ r4, r5, r6, r7, r8, r10, pc }
+END (FUNC)
diff --git a/ports/sysdeps/arm/sub_n.S b/ports/sysdeps/arm/sub_n.S
new file mode 100644
index 0000000..8eafa41
--- /dev/null
+++ b/ports/sysdeps/arm/sub_n.S
@@ -0,0 +1,2 @@
+#define USE_AS_SUB_N
+#include "add_n.S"
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

Unless we're trying old interworking, there's no point restoring to
LR first.  Everthing from armv5 on handles pop as an interworking jump.
---
        * sysdeps/arm/arm-mcount.S (_mcount): Use pop into pc unless
        __ARM_ARCH_4T__ and __THUMB_INTERWORK__.
        * sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Likewise.
---
 ports/sysdeps/arm/arm-mcount.S | 6 +++---
 ports/sysdeps/arm/dl-tlsdesc.S | 9 +++++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/ports/sysdeps/arm/arm-mcount.S b/ports/sysdeps/arm/arm-mcount.S
index b6e5ec7..8ad0779 100644
--- a/ports/sysdeps/arm/arm-mcount.S
+++ b/ports/sysdeps/arm/arm-mcount.S
@@ -82,9 +82,7 @@ ENTRY(_mcount)
 	ldrne r0, [r0, #-4]
 	movsne r1, lr
 	blne __mcount_internal
-#ifdef __thumb2__
-	pop	{r0, r1, r2, r3, fp, pc}
-#else
+#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
 	pop	{r0, r1, r2, r3, fp, lr}
 	cfi_adjust_cfa_offset (-24)
 	cfi_restore (r0)
@@ -94,6 +92,8 @@ ENTRY(_mcount)
 	cfi_restore (fp)
 	cfi_restore (lr)
 	bx lr
+#else
+	pop	{r0, r1, r2, r3, fp, pc}
 #endif
 END(_mcount)
 
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index aa3db80..4635415 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -109,13 +109,18 @@ _dl_tlsdesc_dynamic:
 1:	mov	r0, r1
 	bl	__tls_get_addr
 	rsb	r0, r4, r0
-2:	pop	{r2,r3,r4, lr}
+2:
+#if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
+	pop	{r2,r3,r4, lr}
 	cfi_adjust_cfa_offset (-16)
 	cfi_restore (lr)
 	cfi_restore (r4)
 	cfi_restore (r3)
 	cfi_restore (r2)
-	BX      (lr)
+	bx	lr
+#else
+	pop	{r2,r3,r4, pc}
+#endif
 	.fnend
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 09/14] arm: Tidy architecture selection
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
                   ` (4 preceding siblings ...)
  2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:55   ` Roland McGrath
  2013-03-05  2:01   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
                   ` (7 subsequent siblings)
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

---
	* sysdeps/arm/sysdep.h (__ARM_ARCH): New macro.
	(ARCH_HAS_BX, ARCH_HAS_BLX, ARCH_HAS_T2): New macros.
	(BX): Select on ARCH_HAS_BX instead of __USE_BX__.
	(BLX): Select on ARCH_HAS_BLX instead of __ARM_ARCH_4T__.
	* ports/sysdeps/arm/dl-machine.h (BX): Select on ARCH_HAS_BX
	instead of __USE_BX__.
---
 ports/sysdeps/arm/dl-machine.h |  2 +-
 ports/sysdeps/arm/sysdep.h     | 41 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/ports/sysdeps/arm/dl-machine.h b/ports/sysdeps/arm/dl-machine.h
index 30ad46c..5a424f8 100644
--- a/ports/sysdeps/arm/dl-machine.h
+++ b/ports/sysdeps/arm/dl-machine.h
@@ -136,7 +136,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
-#if defined(__USE_BX__)
+#if defined(ARCH_HAS_BX)
 #define BX(x) "bx\t" #x
 #else
 #define BX(x) "mov\tpc, " #x
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 84313fe..c09e680 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -19,9 +19,36 @@
 #include <sysdeps/generic/sysdep.h>
 #include <features.h>
 
-#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
-     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__))
-# define __USE_BX__
+/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
+#ifndef __ARM_ARCH
+# ifdef __ARM_ARCH_2__
+#  define __ARM_ARCH 2
+# elif defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__)
+#  define __ARM_ARCH 3
+# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
+#  define __ARM_ARCH 4
+# elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
+       defined(__ARM_ARCH_5TEJ__)
+#  define __ARM_ARCH 5
+# elif defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6ZK__) \
+       defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
+#  define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
+       defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#  define __ARM_ARCH 7
+# else
+#  error unknown arm architecture
+# endif
+#endif
+
+#if __ARM_ARCH > 4 || defined(__ARM_ARCH_4T__)
+# define ARCH_HAS_BX
+#endif
+#if __ARM_ARCH > 4
+# define ARCH_HAS_BLX
+#endif
+#if __ARM_ARCH > 6 || defined(__ARM_ARCH_6T2__)
+# define ARCH_HAS_T2
 #endif
 
 #ifdef	__ASSEMBLER__
@@ -33,13 +60,13 @@
 
 #define PLTJMP(_x)	_x##(PLT)
 
-#ifdef __USE_BX__
+#ifdef ARCH_HAS_BX
 # define BX(R)		bx	R
 # define BXC(C, R)	bx##C	R
-# ifdef __ARM_ARCH_4T__
-#  define BLX(R)	mov	lr, pc; bx R
-# else
+# ifdef ARCH_HAS_BLX
 #  define BLX(R)	blx	R
+# else
+#  define BLX(R)	mov	lr, pc; bx R
 # endif
 #else
 # define BX(R)		mov	pc, R
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 05/14] arm: Use push/pop mnemonics
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

For arm this makes no difference--the result is bit-for-bit identical;
for thumb this results in smaller encodings.  Perhaps it ought not and
this is in fact an assembler bug, but I also think it's clearer.
---
        * sysdeps/arm/arm-mcount.S (_mcount): Use push/pop mnemonics.
        * sysdeps/arm/crti.S, sysdeps/arm/crtn.S: Likewise.
        * sysdeps/arm/dl-tlsdesc.S: Likewise.
        * sysdeps/arm/dl-trampoline.S: Likewise.
        * sysdeps/arm/start.S: Likewise.
        * sysdeps/arm/memcpy.S (PULL): Rename macro from pull.
        (PUSH): Rename macro from push.
        (memcpy): Use push/pop mnemonics.
        * sysdeps/arm/memmove.S: Similarly.
        * sysdeps/arm/sysdep.h (CALL_MCOUNT): Use push/pop mnemonics.
        * sysdeps/unix/sysv/linux/arm/____longjmp_chk.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/clone.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/mmap.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/mmap64.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h: Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c: Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c: Likewise.
        * sysdeps/unix/sysv/linux/arm/syscall.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise.
        * sysdeps/unix/sysv/linux/arm/vfork.S: Likewise.
---
 ports/sysdeps/arm/arm-mcount.S                     |  6 +--
 ports/sysdeps/arm/crti.S                           |  4 +-
 ports/sysdeps/arm/crtn.S                           |  8 +--
 ports/sysdeps/arm/dl-tlsdesc.S                     | 16 +++---
 ports/sysdeps/arm/dl-trampoline.S                  |  4 +-
 ports/sysdeps/arm/memcpy.S                         | 58 +++++++++++-----------
 ports/sysdeps/arm/memmove.S                        | 58 +++++++++++-----------
 ports/sysdeps/arm/start.S                          | 10 ++--
 ports/sysdeps/arm/sysdep.h                         |  6 +--
 .../sysdeps/unix/sysv/linux/arm/____longjmp_chk.S  |  4 +-
 ports/sysdeps/unix/sysv/linux/arm/clone.S          |  4 +-
 ports/sysdeps/unix/sysv/linux/arm/mmap.S           |  8 +--
 ports/sysdeps/unix/sysv/linux/arm/mmap64.S         |  8 +--
 .../unix/sysv/linux/arm/nptl/sysdep-cancel.h       | 32 ++++++------
 .../unix/sysv/linux/arm/nptl/unwind-forcedunwind.c |  4 +-
 .../unix/sysv/linux/arm/nptl/unwind-resume.c       |  4 +-
 ports/sysdeps/unix/sysv/linux/arm/syscall.S        |  4 +-
 ports/sysdeps/unix/sysv/linux/arm/sysdep.h         | 27 +++++-----
 ports/sysdeps/unix/sysv/linux/arm/vfork.S          |  2 +-
 19 files changed, 133 insertions(+), 134 deletions(-)

diff --git a/ports/sysdeps/arm/arm-mcount.S b/ports/sysdeps/arm/arm-mcount.S
index 679d042..b6e5ec7 100644
--- a/ports/sysdeps/arm/arm-mcount.S
+++ b/ports/sysdeps/arm/arm-mcount.S
@@ -69,7 +69,7 @@ END(__gnu_mcount_nc)
    code be compiled with APCS frame pointers.  */
 
 ENTRY(_mcount)
-	stmdb sp!, {r0, r1, r2, r3, fp, lr}
+	push	{r0, r1, r2, r3, fp, lr}
 	cfi_adjust_cfa_offset (24)
 	cfi_rel_offset (r0, 0)
 	cfi_rel_offset (r1, 4)
@@ -83,9 +83,9 @@ ENTRY(_mcount)
 	movsne r1, lr
 	blne __mcount_internal
 #ifdef __thumb2__
-	ldmia sp!, {r0, r1, r2, r3, fp, pc}
+	pop	{r0, r1, r2, r3, fp, pc}
 #else
-	ldmia sp!, {r0, r1, r2, r3, fp, lr}
+	pop	{r0, r1, r2, r3, fp, lr}
 	cfi_adjust_cfa_offset (-24)
 	cfi_restore (r0)
 	cfi_restore (r1)
diff --git a/ports/sysdeps/arm/crti.S b/ports/sysdeps/arm/crti.S
index 1d55ae2..be20a11 100644
--- a/ports/sysdeps/arm/crti.S
+++ b/ports/sysdeps/arm/crti.S
@@ -80,7 +80,7 @@ call_weak_fn:
 	.globl _init
 	.type _init, %function
 _init:
-	stmfd sp!, {r3, lr}
+	push	{r3, lr}
 #if PREINIT_FUNCTION_WEAK
 	bl call_weak_fn
 #else
@@ -92,4 +92,4 @@ _init:
 	.globl _fini
 	.type _fini, %function
 _fini:
-	stmfd sp!, {r3, lr}
+	push	{r3, lr}
diff --git a/ports/sysdeps/arm/crtn.S b/ports/sysdeps/arm/crtn.S
index a01eb01..ae7546c 100644
--- a/ports/sysdeps/arm/crtn.S
+++ b/ports/sysdeps/arm/crtn.S
@@ -42,16 +42,16 @@
 
 	.section .init,"ax",%progbits
 #ifdef __ARM_ARCH_4T__
-	ldmfd sp!, {r3, lr}
+	pop {r3, lr}
 	bx lr
 #else
-	ldmfd sp!, {r3, pc}
+	pop {r3, pc}
 #endif
 
 	.section .fini,"ax",%progbits
 #ifdef __ARM_ARCH_4T__
-	ldmfd sp!, {r3, lr}
+	pop {r3, lr}
 	bx lr
 #else
-	ldmfd sp!, {r3, pc}
+	pop {r3, pc}
 #endif
diff --git a/ports/sysdeps/arm/dl-tlsdesc.S b/ports/sysdeps/arm/dl-tlsdesc.S
index 1c3bccf..3a956de 100644
--- a/ports/sysdeps/arm/dl-tlsdesc.S
+++ b/ports/sysdeps/arm/dl-tlsdesc.S
@@ -90,7 +90,7 @@ _dl_tlsdesc_dynamic:
 	/* Our calling convention is to clobber r0, r1 and the processor
 	   flags.  All others that are modified must be saved */
 	.save	{r2,r3,r4,lr}
-	stmdb   sp!, {r2,r3,r4,lr}
+	push	{r2,r3,r4,lr}
 	cfi_adjust_cfa_offset (16)
 	cfi_rel_offset (r2,0)
 	cfi_rel_offset (r3,4)
@@ -115,7 +115,7 @@ _dl_tlsdesc_dynamic:
 1:	mov	r0, r1
 	bl	__tls_get_addr
 	rsb	r0, r4, r0
-2:	ldmia	sp!, {r2,r3,r4, lr}
+2:	pop	{r2,r3,r4, lr}
 	cfi_adjust_cfa_offset (-16)
 	cfi_restore (lr)
 	cfi_restore (r4)
@@ -146,7 +146,7 @@ _dl_tlsdesc_lazy_resolver:
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r2, 0)
 	.save	{r0,r1,r3,ip,lr}
-	stmdb	sp!, {r0, r1, r3, ip, lr}
+	push	{r0, r1, r3, ip, lr}
 	cfi_adjust_cfa_offset (20)
 	cfi_rel_offset (r0, 0)
 	cfi_rel_offset (r1, 4)
@@ -154,14 +154,14 @@ _dl_tlsdesc_lazy_resolver:
 	cfi_rel_offset (ip, 12)
 	cfi_rel_offset (lr, 16)
 	bl	_dl_tlsdesc_lazy_resolver_fixup
-	ldmia	sp!, {r0, r1, r3, ip, lr}
+	pop	{r0, r1, r3, ip, lr}
 	cfi_adjust_cfa_offset (-20)
 	cfi_restore (lr)
 	cfi_restore (ip)
 	cfi_restore (r3)
 	cfi_restore (r1)
 	cfi_restore (r0)
-	ldmia	sp!, {r2}
+	pop	{r2}
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (r2)
 	ldr	r1, [r0, #4]
@@ -184,7 +184,7 @@ _dl_tlsdesc_resolve_hold:
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r2, 0)
 	.save	{r0,r1,r3,ip,lr}
-	stmdb   sp!, {r0, r1, r3, ip, lr}
+	push	{r0, r1, r3, ip, lr}
 	cfi_adjust_cfa_offset (20)
 	cfi_rel_offset (r0, 0)
 	cfi_rel_offset (r1, 4)
@@ -193,14 +193,14 @@ _dl_tlsdesc_resolve_hold:
 	cfi_rel_offset (lr, 16)
 	adr	r2, _dl_tlsdesc_resolve_hold
 	bl	_dl_tlsdesc_resolve_hold_fixup
-	ldmia   sp!, {r0, r1, r3, ip, lr}
+	pop	{r0, r1, r3, ip, lr}
 	cfi_adjust_cfa_offset (-20)
 	cfi_restore (lr)
 	cfi_restore (ip)
 	cfi_restore (r3)
 	cfi_restore (r1)
 	cfi_restore (r0)
-	ldmia   sp!, {r2}
+	pop	{r2}
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (r2)
 	ldr     r1, [r0, #4]
diff --git a/ports/sysdeps/arm/dl-trampoline.S b/ports/sysdeps/arm/dl-trampoline.S
index 561d8ae..f2d1679 100644
--- a/ports/sysdeps/arm/dl-trampoline.S
+++ b/ports/sysdeps/arm/dl-trampoline.S
@@ -43,7 +43,7 @@ _dl_runtime_resolve:
 	@	lr points to &GOT[2]
 
 	@ Save arguments.  We save r4 to realign the stack.
-	stmdb	sp!,{r0-r4}
+	push	{r0-r4}
 	cfi_adjust_cfa_offset (20)
 	cfi_rel_offset (r0, 0)
 	cfi_rel_offset (r1, 4)
@@ -67,7 +67,7 @@ _dl_runtime_resolve:
 
 	@ get arguments and return address back.  We restore r4
 	@ only to realign the stack.
-	ldmia	sp!, {r0-r4,lr}
+	pop	{r0-r4,lr}
 	cfi_adjust_cfa_offset (-24)
 
 	@ jump to the newly found address
diff --git a/ports/sysdeps/arm/memcpy.S b/ports/sysdeps/arm/memcpy.S
index 98b9b47..98981ef 100644
--- a/ports/sysdeps/arm/memcpy.S
+++ b/ports/sysdeps/arm/memcpy.S
@@ -45,11 +45,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -58,7 +58,7 @@
 
 ENTRY(memcpy)
 
-		stmfd	sp!, {r0, r4, lr}
+		push	{r0, r4, lr}
 		cfi_adjust_cfa_offset (12)
 		cfi_rel_offset (r4, 4)
 		cfi_rel_offset (lr, 8)
@@ -74,7 +74,7 @@ ENTRY(memcpy)
 		bne	10f
 
 1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
+		push	{r5 - r8}
 		cfi_adjust_cfa_offset (16)
 		cfi_rel_offset (r5, 0)
 		cfi_rel_offset (r6, 4)
@@ -131,7 +131,7 @@ ENTRY(memcpy)
 
 	CALGN(	bcs	2b			)
 
-7:		ldmfd	sp!, {r5 - r8}
+7:		pop	{r5 - r8}
 		cfi_adjust_cfa_offset (-16)
 		cfi_restore (r5)
 		cfi_restore (r6)
@@ -147,13 +147,13 @@ ENTRY(memcpy)
 		strcsb	ip, [r0]
 
 #if defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
-		ldmfd	sp!, {r0, r4, lr}
+		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
 		cfi_restore (lr)
 		bx      lr
 #else
-		ldmfd	sp!, {r0, r4, pc}
+		pop	{r0, r4, pc}
 #endif
 
 		cfi_restore_state
@@ -189,7 +189,7 @@ ENTRY(memcpy)
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 
-11:		stmfd	sp!, {r5 - r9}
+11:		push	{r5 - r9}
 		cfi_adjust_cfa_offset (20)
 		cfi_rel_offset (r5, 0)
 		cfi_rel_offset (r6, 4)
@@ -206,30 +206,30 @@ ENTRY(memcpy)
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldmia	r1!, {r4, r5, r6, r7}
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, PULL #\pull
 		subs	r2, r2, #32
 		ldmia	r1!, {r8, r9, ip, lr}
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, PUSH #\push
+		mov	r4, r4, PULL #\pull
+		orr	r4, r4, r5, PUSH #\push
+		mov	r5, r5, PULL #\pull
+		orr	r5, r5, r6, PUSH #\push
+		mov	r6, r6, PULL #\pull
+		orr	r6, r6, r7, PUSH #\push
+		mov	r7, r7, PULL #\pull
+		orr	r7, r7, r8, PUSH #\push
+		mov	r8, r8, PULL #\pull
+		orr	r8, r8, r9, PUSH #\push
+		mov	r9, r9, PULL #\pull
+		orr	r9, r9, ip, PUSH #\push
+		mov	ip, ip, PULL #\pull
+		orr	ip, ip, lr, PUSH #\push
 		stmia	r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
 		bge	12b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	13b			)
 
-		ldmfd	sp!, {r5 - r9}
+		pop	{r5 - r9}
 		cfi_adjust_cfa_offset (-20)
 		cfi_restore (r5)
 		cfi_restore (r6)
@@ -240,10 +240,10 @@ ENTRY(memcpy)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, PULL #\pull
 		ldr	lr, [r1], #4
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, PUSH #\push
 		str	r3, [r0], #4
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/ports/sysdeps/arm/memmove.S b/ports/sysdeps/arm/memmove.S
index 059ca7a..d9fa0e3 100644
--- a/ports/sysdeps/arm/memmove.S
+++ b/ports/sysdeps/arm/memmove.S
@@ -45,11 +45,11 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define PULL            lsr
+#define PUSH            lsl
 #else
-#define pull            lsl
-#define push            lsr
+#define PULL            lsl
+#define PUSH            lsr
 #endif
 
 		.text
@@ -73,7 +73,7 @@ ENTRY(memmove)
 		bls	HIDDEN_JUMPTARGET(memcpy)
 #endif
 
-		stmfd	sp!, {r0, r4, lr}
+		push	{r0, r4, lr}
 		cfi_adjust_cfa_offset (12)
 		cfi_rel_offset (r4, 4)
 		cfi_rel_offset (lr, 8)
@@ -91,7 +91,7 @@ ENTRY(memmove)
 		bne	10f
 
 1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
+		push	{r5 - r8}
 		cfi_adjust_cfa_offset (16)
 		cfi_rel_offset (r5, 0)
 		cfi_rel_offset (r6, 4)
@@ -147,7 +147,7 @@ ENTRY(memmove)
 
 	CALGN(	bcs	2b			)
 
-7:		ldmfd	sp!, {r5 - r8}
+7:		pop	{r5 - r8}
 		cfi_adjust_cfa_offset (-16)
 		cfi_restore (r5)
 		cfi_restore (r6)
@@ -163,13 +163,13 @@ ENTRY(memmove)
 		strcsb	ip, [r0, #-1]
 
 #if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)
-		ldmfd	sp!, {r0, r4, lr}
+		pop	{r0, r4, lr}
 		cfi_adjust_cfa_offset (-12)
 		cfi_restore (r4)
 		cfi_restore (lr)
 		bx      lr
 #else
-		ldmfd	sp!, {r0, r4, pc}
+		pop	{r0, r4, pc}
 #endif
 
 		cfi_restore_state
@@ -204,7 +204,7 @@ ENTRY(memmove)
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 
-11:		stmfd	sp!, {r5 - r9}
+11:		push	{r5 - r9}
 		cfi_adjust_cfa_offset (20)
 		cfi_rel_offset (r5, 0)
 		cfi_rel_offset (r6, 4)
@@ -221,30 +221,30 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, PUSH #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, PULL #\pull
+		mov     ip, ip, PUSH #\push
+		orr     ip, ip, r9, PULL #\pull
+		mov     r9, r9, PUSH #\push
+		orr     r9, r9, r8, PULL #\pull
+		mov     r8, r8, PUSH #\push
+		orr     r8, r8, r7, PULL #\pull
+		mov     r7, r7, PUSH #\push
+		orr     r7, r7, r6, PULL #\pull
+		mov     r6, r6, PUSH #\push
+		orr     r6, r6, r5, PULL #\pull
+		mov     r5, r5, PUSH #\push
+		orr     r5, r5, r4, PULL #\pull
+		mov     r4, r4, PUSH #\push
+		orr     r4, r4, r3, PULL #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	13b			)
 
-		ldmfd	sp!, {r5 - r9}
+		pop	{r5 - r9}
 		cfi_adjust_cfa_offset (-20)
 		cfi_restore (r5)
 		cfi_restore (r6)
@@ -255,10 +255,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, PUSH #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, PULL #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/ports/sysdeps/arm/start.S b/ports/sysdeps/arm/start.S
index a1d15b8..0a57b0b 100644
--- a/ports/sysdeps/arm/start.S
+++ b/ports/sysdeps/arm/start.S
@@ -80,14 +80,14 @@ _start:
 	mov lr, #0
 
 	/* Pop argc off the stack and save a pointer to argv */
-	ldr a2, [sp], #4
+	pop { a2 }
 	mov a3, sp
 
 	/* Push stack limit */
-	str a3, [sp, #-4]!
+	push { a3 }
 
 	/* Push rtld_fini */
-	str a1, [sp, #-4]!
+	push { a1 }
 
 #ifdef SHARED
 	ldr sl, .L_GOT
@@ -97,7 +97,7 @@ _start:
 	ldr ip, .L_GOT+4	/* __libc_csu_fini */
 	ldr ip, [sl, ip]
 
-	str ip, [sp, #-4]!	/* Push __libc_csu_fini */
+	push { ip }		/* Push __libc_csu_fini */
 
 	ldr a4, .L_GOT+8	/* __libc_csu_init */
 	ldr a4, [sl, a4]
@@ -113,7 +113,7 @@ _start:
 	ldr ip, =__libc_csu_fini
 
 	/* Push __libc_csu_fini */
-	str ip, [sp, #-4]!
+	push { ip }
 
 	/* Set up the other arguments in registers */
 	ldr a1, =main
diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index d855ceb..d74a328 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -80,7 +80,7 @@
 /* Call __gnu_mcount_nc if GCC >= 4.4.  */
 #if __GNUC_PREREQ(4,4)
 #define CALL_MCOUNT					\
-	str	lr,[sp, #-4]!;				\
+	push	{lr};					\
 	cfi_adjust_cfa_offset (4);			\
 	cfi_rel_offset (lr, 0);				\
 	bl	PLTJMP(mcount);				\
@@ -88,11 +88,11 @@
 	cfi_restore (lr)
 #else /* else call _mcount */
 #define CALL_MCOUNT					\
-	str	lr,[sp, #-4]!;				\
+	push	{lr};					\
 	cfi_adjust_cfa_offset (4);			\
 	cfi_rel_offset (lr, 0);				\
 	bl	PLTJMP(mcount);				\
-	ldr	lr, [sp], #4;				\
+	pops	{lr};					\
 	cfi_adjust_cfa_offset (-4);			\
 	cfi_restore (lr)
 #endif
diff --git a/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S b/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
index 29edec6..6ee7a1a 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/____longjmp_chk.S
@@ -53,7 +53,7 @@ longjmp_msg:
 	cfi_remember_state;			\
 	cmp	sp, reg;			\
 	bls	.Lok;				\
-	str	r7, [sp, #-4]!;			\
+	push	{ r7 };				\
 	cfi_adjust_cfa_offset (4);		\
 	cfi_rel_offset (r7, 0);			\
 	mov	r5, r0;				\
@@ -79,7 +79,7 @@ longjmp_msg:
 .Lfail:						\
 	add	sp, sp, #12;			\
 	cfi_adjust_cfa_offset (-12);		\
-	ldr	r7, [sp], #4;			\
+	pop	{ r7 };				\
 	cfi_adjust_cfa_offset (-4);		\
 	cfi_restore (r7);			\
 	CALL_FAIL				\
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index 1bc5eab..3edebd2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -49,7 +49,7 @@ ENTRY(__clone)
 	mov	ip, r2
 #endif
 	@ new sp is already in r1
-	stmfd	sp!, {r4, r7}
+	push	{r4, r7}
 	cfi_adjust_cfa_offset (8)
 	cfi_rel_offset (r4, 0)
 	cfi_rel_offset (r7, 4)
@@ -61,7 +61,7 @@ ENTRY(__clone)
 	cfi_endproc
 	cmp	r0, #0
 	beq	1f
-	ldmfd	sp!, {r4, r7}
+	pop	{r4, r7}
 	blt	PLTJMP(C_SYMBOL_NAME(__syscall_error))
 	RETINSTR(, lr)
 
diff --git a/ports/sysdeps/unix/sysv/linux/arm/mmap.S b/ports/sysdeps/unix/sysv/linux/arm/mmap.S
index 68560b0..06b737e 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/mmap.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/mmap.S
@@ -23,11 +23,11 @@
 
 ENTRY (__mmap)
 	/* shuffle args */
-	str	r5, [sp, #-4]!
+	push	{ r5 }
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r5, 0)
 	ldr	r5, [sp, #8]
-	str	r4, [sp, #-4]!
+	push	{ r4 }
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r4, 0)
 	cfi_remember_state
@@ -43,10 +43,10 @@ ENTRY (__mmap)
 
 	/* restore registers */
 2:
-	ldr	r4, [sp], #4
+	pop	{ r4 }
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (r4)
-	ldr	r5, [sp], #4
+	pop	{ r5 }
 	cfi_adjust_cfa_offset (-4)
 	cfi_restore (r5)
 
diff --git a/ports/sysdeps/unix/sysv/linux/arm/mmap64.S b/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
index dcbab3a..d039129 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/mmap64.S
@@ -34,11 +34,11 @@
 	.text
 ENTRY (__mmap64)
 	ldr	ip, [sp, $LOW_OFFSET]
-	str	r5, [sp, #-4]!
+	push	{ r5 }
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r5, 0)
 	ldr	r5, [sp, $HIGH_OFFSET]
-	str	r4, [sp, #-4]!
+	push	{ r4 }
 	cfi_adjust_cfa_offset (4)
 	cfi_rel_offset (r4, 0)
 	cfi_remember_state
@@ -51,7 +51,7 @@ ENTRY (__mmap64)
 	orr	r5, ip, r5, lsl $20	@ compose page offset
 	DO_CALL (mmap2, 0)
 	cmn	r0, $4096
-	ldmfd	sp!, {r4, r5}
+	pop	{r4, r5}
 	cfi_adjust_cfa_offset (-8)
 	cfi_restore (r4)
 	cfi_restore (r5)
@@ -62,7 +62,7 @@ ENTRY (__mmap64)
 	cfi_restore_state
 .Linval:
 	mov	r0, $-EINVAL
-	ldmfd	sp!, {r4, r5}
+	pop	{r4, r5}
 	cfi_adjust_cfa_offset (-8)
 	cfi_restore (r4)
 	cfi_restore (r5)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index d5e666b..ac094df 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -76,19 +76,19 @@
 
 # define DOCARGS_0				\
 	.save {r7};				\
-	str	lr, [sp, #-4]!;			\
+	push	{lr};				\
 	cfi_adjust_cfa_offset (4);		\
 	cfi_rel_offset (lr, 0);			\
 	.save	{lr}
 # define UNDOCARGS_0
 # define RESTORE_LR_0				\
-	ldr	lr, [sp], #4;			\
+	pop	{lr};				\
 	cfi_adjust_cfa_offset (-4);		\
 	cfi_restore (lr)
 
 # define DOCARGS_1				\
 	.save	{r7};				\
-	stmfd	sp!, {r0, r1, lr};		\
+	push	{r0, r1, lr};			\
 	cfi_adjust_cfa_offset (12);		\
 	cfi_rel_offset (lr, 8);			\
 	.save	{lr};				\
@@ -102,13 +102,13 @@
 
 # define DOCARGS_2				\
 	.save	{r7};				\
-	stmfd	sp!, {r0, r1, lr};		\
+	push	{r0, r1, lr};			\
 	cfi_adjust_cfa_offset (12);		\
 	cfi_rel_offset (lr, 8);			\
 	.save	{lr};				\
 	.pad	#8
 # define UNDOCARGS_2				\
-	ldmfd	sp!, {r0, r1};			\
+	pop	{r0, r1};			\
 	cfi_adjust_cfa_offset (-8);		\
 	RESTART_UNWIND
 # define RESTORE_LR_2				\
@@ -116,13 +116,13 @@
 
 # define DOCARGS_3				\
 	.save	{r7};				\
-	stmfd	sp!, {r0, r1, r2, r3, lr};	\
+	push	{r0, r1, r2, r3, lr};		\
 	cfi_adjust_cfa_offset (20);		\
 	cfi_rel_offset (lr, 16);		\
 	.save	{lr};				\
 	.pad	#16
 # define UNDOCARGS_3				\
-	ldmfd	sp!, {r0, r1, r2, r3};		\
+	pop	{r0, r1, r2, r3};		\
 	cfi_adjust_cfa_offset (-16);		\
 	RESTART_UNWIND
 # define RESTORE_LR_3				\
@@ -130,13 +130,13 @@
 
 # define DOCARGS_4				\
 	.save	{r7};				\
-	stmfd	sp!, {r0, r1, r2, r3, lr};	\
+	push	{r0, r1, r2, r3, lr};		\
 	cfi_adjust_cfa_offset (20);		\
 	cfi_rel_offset (lr, 16);		\
 	.save	{lr};				\
 	.pad	#16
 # define UNDOCARGS_4				\
-	ldmfd	sp!, {r0, r1, r2, r3};		\
+	pop	{r0, r1, r2, r3};		\
 	cfi_adjust_cfa_offset (-16);		\
 	RESTART_UNWIND
 # define RESTORE_LR_4				\
@@ -145,13 +145,13 @@
 /* r4 is only stmfd'ed for correct stack alignment.  */
 # define DOCARGS_5				\
 	.save	{r4, r7};			\
-	stmfd	sp!, {r0, r1, r2, r3, r4, lr};	\
+	push	{r0, r1, r2, r3, r4, lr};	\
 	cfi_adjust_cfa_offset (24);		\
 	cfi_rel_offset (lr, 20);		\
 	.save	{lr};				\
 	.pad	#20
 # define UNDOCARGS_5				\
-	ldmfd	sp!, {r0, r1, r2, r3};		\
+	pop	{r0, r1, r2, r3};		\
 	cfi_adjust_cfa_offset (-16);		\
 	.fnend;					\
 	.fnstart;				\
@@ -159,20 +159,20 @@
 	.save	{lr};				\
 	.pad	#4
 # define RESTORE_LR_5				\
-	ldmfd sp!, {r4, lr};			\
+	pop	{r4, lr};			\
 	cfi_adjust_cfa_offset (-8);		\
 	/* r4 will be marked as restored later.  */ \
 	cfi_restore (lr)
 
 # define DOCARGS_6				\
 	.save	{r4, r5, r7};			\
-	stmfd	sp!, {r0, r1, r2, r3, lr};	\
+	push	{r0, r1, r2, r3, lr};		\
 	cfi_adjust_cfa_offset (20);		\
 	cfi_rel_offset (lr, 16);		\
 	.save	{lr};				\
 	.pad	#16
 # define UNDOCARGS_6				\
-	ldmfd	sp!, {r0, r1, r2, r3};		\
+	pop	{r0, r1, r2, r3};		\
 	cfi_adjust_cfa_offset (-16);		\
 	.fnend;					\
 	.fnstart;				\
@@ -213,13 +213,13 @@ extern int __local_multiple_threads attribute_hidden;
 				   header.multiple_threads) == 0, 1)
 #  else
 #   define SINGLE_THREAD_P						\
-	stmfd	sp!, {r0, lr};						\
+	push	{r0, lr};						\
 	cfi_adjust_cfa_offset (8);					\
 	cfi_rel_offset (lr, 4);						\
 	GET_TLS(lr);							\
 	NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET);			\
 	ldr	ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET);		\
-	ldmfd	sp!, {r0, lr};						\
+	pop	{r0, lr};						\
 	cfi_adjust_cfa_offset (-8);					\
 	cfi_restore (lr);						\
 	teq	ip, #0
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
index caa6a26..108924d 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-forcedunwind.c
@@ -93,7 +93,7 @@ asm (
 "_Unwind_Resume:\n"
 "	.cfi_sections .debug_frame\n"
 "	" CFI_STARTPROC "\n"
-"	stmfd	sp!, {r4, r5, r6, lr}\n"
+"	push	{r4, r5, r6, lr}\n"
 "	" CFI_ADJUST_CFA_OFFSET (16)" \n"
 "	" CFI_REL_OFFSET (r4, 0) "\n"
 "	" CFI_REL_OFFSET (r5, 4) "\n"
@@ -108,7 +108,7 @@ asm (
 "	cmp	r3, #0\n"
 "	beq	4f\n"
 "5:	mov	r0, r6\n"
-"	ldmfd	sp!, {r4, r5, r6, lr}\n"
+"	pop	{r4, r5, r6, lr}\n"
 "	" CFI_ADJUST_CFA_OFFSET (-16) "\n"
 "	" CFI_RESTORE (r4) "\n"
 "	" CFI_RESTORE (r5) "\n"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
index 1211599..d155ea7 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/unwind-resume.c
@@ -56,7 +56,7 @@ asm (
 "_Unwind_Resume:\n"
 "	.cfi_sections .debug_frame\n"
 "	" CFI_STARTPROC "\n"
-"	stmfd	sp!, {r4, r5, r6, lr}\n"
+"	push	{r4, r5, r6, lr}\n"
 "	" CFI_ADJUST_CFA_OFFSET (16)" \n"
 "	" CFI_REL_OFFSET (r4, 0) "\n"
 "	" CFI_REL_OFFSET (r5, 4) "\n"
@@ -71,7 +71,7 @@ asm (
 "	cmp	r3, #0\n"
 "	beq	4f\n"
 "5:	mov	r0, r6\n"
-"	ldmfd	sp!, {r4, r5, r6, lr}\n"
+"	pop	{r4, r5, r6, lr}\n"
 "	" CFI_ADJUST_CFA_OFFSET (-16) "\n"
 "	" CFI_RESTORE (r4) "\n"
 "	" CFI_RESTORE (r5) "\n"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/syscall.S b/ports/sysdeps/unix/sysv/linux/arm/syscall.S
index 665ecb4..bdd5a52 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/syscall.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/syscall.S
@@ -23,7 +23,7 @@
 
 ENTRY (syscall)
 	mov	ip, sp
-	stmfd	sp!, {r4, r5, r6, r7}
+	push	{r4, r5, r6, r7}
 	cfi_adjust_cfa_offset (16)
 	cfi_rel_offset (r4, 0)
 	cfi_rel_offset (r5, 4)
@@ -35,7 +35,7 @@ ENTRY (syscall)
 	mov	r2, r3
 	ldmfd	ip, {r3, r4, r5, r6}
 	swi	0x0
-	ldmfd	sp!, {r4, r5, r6, r7}
+	pop	{r4, r5, r6, r7}
 	cfi_adjust_cfa_offset (-16)
 	cfi_restore (r4)
 	cfi_restore (r5)
diff --git a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
index 01d8123..39872b8 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
@@ -147,23 +147,22 @@ __local_syscall_error:						\
 # else
 #  if defined(__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)
 #   define POP_PC \
-  ldr lr, [sp], #4; \
+  pop { lr }; \
   cfi_adjust_cfa_offset (-4); \
   cfi_restore (lr); \
   bx lr
 #  else
-#   define POP_PC  \
-  ldr pc, [sp], #4
+#   define POP_PC  pop { pc }
 #  endif
 #  define SYSCALL_ERROR_HANDLER					\
 __local_syscall_error:						\
-	str	lr, [sp, #-4]!;					\
+	push	{ lr };						\
 	cfi_adjust_cfa_offset (4);				\
 	cfi_rel_offset (lr, 0);					\
-	str	r0, [sp, #-4]!;					\
+	push	{ r0 };	    					\
 	cfi_adjust_cfa_offset (4);				\
 	bl	PLTJMP(C_SYMBOL_NAME(__errno_location)); 	\
-	ldr	r1, [sp], #4;					\
+	pop	{ r1 };						\
 	cfi_adjust_cfa_offset (-4);				\
 	rsb	r1, r1, #0;					\
 	str	r1, [r0];					\
@@ -230,7 +229,7 @@ __local_syscall_error:						\
 #undef  DOARGS_0
 #define DOARGS_0					\
 	.fnstart;					\
-	str r7, [sp, #-4]!;				\
+	push	{ r7 };					\
 	cfi_adjust_cfa_offset (4);			\
 	cfi_rel_offset (r7, 0);				\
 	.save	{ r7 }
@@ -245,7 +244,7 @@ __local_syscall_error:						\
 #undef  DOARGS_5
 #define DOARGS_5					\
 	.fnstart;					\
-	stmfd	sp!, {r4, r7};				\
+	push	{r4, r7};				\
 	cfi_adjust_cfa_offset (8);			\
 	cfi_rel_offset (r4, 0);				\
 	cfi_rel_offset (r7, 4);				\
@@ -255,7 +254,7 @@ __local_syscall_error:						\
 #define DOARGS_6					\
 	.fnstart;					\
 	mov	ip, sp;					\
-	stmfd	sp!, {r4, r5, r7};			\
+	push	{r4, r5, r7};				\
 	cfi_adjust_cfa_offset (12);			\
 	cfi_rel_offset (r4, 0);				\
 	cfi_rel_offset (r5, 4);				\
@@ -266,7 +265,7 @@ __local_syscall_error:						\
 #define DOARGS_7					\
 	.fnstart;					\
 	mov	ip, sp;					\
-	stmfd	sp!, {r4, r5, r6, r7};			\
+	push	{r4, r5, r6, r7};			\
 	cfi_adjust_cfa_offset (16);			\
 	cfi_rel_offset (r4, 0);				\
 	cfi_rel_offset (r5, 4);				\
@@ -277,7 +276,7 @@ __local_syscall_error:						\
 
 #undef  UNDOARGS_0
 #define UNDOARGS_0					\
-	ldr	r7, [sp], #4;				\
+	pop	{r7};					\
 	cfi_adjust_cfa_offset (-4);			\
 	cfi_restore (r7);				\
 	.fnend
@@ -291,14 +290,14 @@ __local_syscall_error:						\
 #define UNDOARGS_4 UNDOARGS_0
 #undef  UNDOARGS_5
 #define UNDOARGS_5					\
-	ldmfd	sp!, {r4, r7};				\
+	pop	{r4, r7};				\
 	cfi_adjust_cfa_offset (-8);			\
 	cfi_restore (r4);				\
 	cfi_restore (r7);				\
 	.fnend
 #undef  UNDOARGS_6
 #define UNDOARGS_6					\
-	ldmfd	sp!, {r4, r5, r7};			\
+	pop	{r4, r5, r7};				\
 	cfi_adjust_cfa_offset (-12);			\
 	cfi_restore (r4);				\
 	cfi_restore (r5);				\
@@ -306,7 +305,7 @@ __local_syscall_error:						\
 	.fnend
 #undef  UNDOARGS_7
 #define UNDOARGS_7					\
-	ldmfd	sp!, {r4, r5, r6, r7};			\
+	pop	{r4, r5, r6, r7};			\
 	cfi_adjust_cfa_offset (-16);			\
 	cfi_restore (r4);				\
 	cfi_restore (r5);				\
diff --git a/ports/sysdeps/unix/sysv/linux/arm/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/vfork.S
index ae931f7..128a640 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/vfork.S
@@ -37,7 +37,7 @@ ENTRY (__vfork)
 	mov	ip, r7
 	cfi_register (r7, ip)
 	.fnstart
-	str r7, [sp, #-4]!
+	push	{ r7 }
 	cfi_adjust_cfa_offset (4)
 	.save { r7 }
 	ldr	r7, =SYS_ify (vfork)
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
  2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
  2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
@ 2013-03-01 17:36 ` Richard Henderson
  2013-03-01 17:57   ` Roland McGrath
  2013-03-05  1:42   ` Joseph S. Myers
  2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
                   ` (11 subsequent siblings)
  13 siblings, 2 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-01 17:36 UTC (permalink / raw)
  To: libc-ports; +Cc: joseph

There are several places in which we access negative offsets from
the thread-pointer, but thumb2 only supports positive offsets in
memory references.

Avoid duplicating the rather large macros in which these references
are embedded by abstracting out the operation.
---
        * sysdeps/arm/sysdep.h (NEGOFF_ADJ_BASE): New macro.
        (NEGOFF_ADJ_BASE2, NEGOFF_OFF1, NEGOFF_OFF2): New macros.
        * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Use them.
        * sysdeps/unix/sysv/linux/arm/nptl/vfork.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S: Likewise.
        * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
        Likewise.
---
 ports/sysdeps/arm/sysdep.h                             | 16 ++++++++++++++++
 ports/sysdeps/unix/sysv/linux/arm/clone.S              |  5 +++--
 ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S      | 11 ++++++-----
 ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h |  3 ++-
 ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S         | 14 ++++++++------
 5 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/ports/sysdeps/arm/sysdep.h b/ports/sysdeps/arm/sysdep.h
index 29a78f0..9230131 100644
--- a/ports/sysdeps/arm/sysdep.h
+++ b/ports/sysdeps/arm/sysdep.h
@@ -134,6 +134,22 @@
 	.previous;					\
 99:	OP	R, [pc, T]
 # endif
+
+/* Cope with negative memory offsets, which thumb can't encode.
+   Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
+   and then NEGOFF_OFF1 to use 0 for thumb and the offset for arm,
+   or NEGOFF_OFF2 to use A-B for thumb and A for arm.  */
+# ifdef __thumb2__
+#  define NEGOFF_ADJ_BASE(R, OFF)	add R, R, $OFF
+#  define NEGOFF_ADJ_BASE2(D, S, OFF)	add D, S, $OFF
+#  define NEGOFF_OFF1(R, OFF)		[R]
+#  define NEGOFF_OFF2(R, OFFA, OFFB)	[R, $((OFFA) - (OFFB))]
+# else
+#  define NEGOFF_ADJ_BASE(R, OFF)
+#  define NEGOFF_ADJ_BASE2(D, S, OFF)	mov D, S
+#  define NEGOFF_OFF1(R, OFF)		[R, $OFF]
+#  define NEGOFF_OFF2(R, OFFA, OFFB)	[R, $OFFA]
+# endif
 #endif	/* __ASSEMBLER__ */
 
 /* This number is the offset from the pc at the current location.  */
diff --git a/ports/sysdeps/unix/sysv/linux/arm/clone.S b/ports/sysdeps/unix/sysv/linux/arm/clone.S
index 732a3ff..a5f9b4d 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/clone.S
@@ -83,8 +83,9 @@ PSEUDO_END (__clone)
 	ite	ne
 	movne	r0, #-1
 	swieq	0x0
-	str	r0, [r1, #PID_OFFSET]
-	str	r0, [r1, #TID_OFFSET]
+	NEGOFF_ADJ_BASE(r1, TID_OFFSET)
+	str	r0, NEGOFF_OFF1(r1, TID_OFFSET)
+	str	r0, NEGOFF_OFF2(r1, PID_OFFSET, TID_OFFSET)
 3:
 #endif
 	@ pick the function arg and call address off the stack and execute
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
index a38d564..ff88510 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S
@@ -28,14 +28,15 @@
 	ldr	lr, [sp], #4;		/* Restore LR.  */		\
 	cfi_adjust_cfa_offset (-4);					\
 	cfi_restore (lr);						\
-	mov	r2, r0;			/* Save the TLS addr in r2.  */	\
-	ldr	r3, [r2, #PID_OFFSET];	/* Load the saved PID.  */	\
-	rsb	r0, r3, #0;		/* Negate it.  */		\
-	str	r0, [r2, #PID_OFFSET]	/* Store the temporary PID.  */
+	NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2. */ \
+	ldr	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID.  */  \
+	rsb	r0, r3, #0;		/* Negate it.  */		     \
+	str	r0, NEGOFF_OFF1(r2, PID_OFFSET); /* Store the temp PID.  */
 
 /* Restore the old PID value in the parent.  */
 #define RESTORE_PID \
 	cmp	r0, #0;			/* If we are the parent... */	\
-	strne	r3, [r2, #PID_OFFSET]	/* ... restore the saved PID.  */
+	it	ne;							\
+	strne	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* restore the saved PID.  */
 
 #include "../vfork.S"
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
index 8889369..47d4c70 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
@@ -217,7 +217,8 @@ extern int __local_multiple_threads attribute_hidden;
 	cfi_adjust_cfa_offset (8);					\
 	cfi_rel_offset (lr, 4);						\
 	bl	__aeabi_read_tp;					\
-	ldr	ip, [r0, #MULTIPLE_THREADS_OFFSET];			\
+	NEGOFF_ADJ_BASE(r0, MULTIPLE_THREADS_OFFSET);			\
+	ldr	ip, NEGOFF_OFF1(r0, MULTIPLE_THREADS_OFFSET);		\
 	ldmfd	sp!, {r0, lr};						\
 	cfi_adjust_cfa_offset (-8);					\
 	cfi_restore (lr);						\
diff --git a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
index 3fce2d1..c4be1e2 100644
--- a/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/arm/nptl/vfork.S
@@ -28,15 +28,17 @@
 	ldr	lr, [sp], #4;		/* Restore LR.  */		\
 	cfi_adjust_cfa_offset (-4);					\
 	cfi_restore (lr);						\
-	mov	r2, r0;			/* Save the TLS addr in r2.  */	\
-	ldr	r3, [r2, #PID_OFFSET];	/* Load the saved PID.  */	\
-	rsbs	r0, r3, #0;		/* Negate it.  */		\
-	moveq	r0, #0x80000000;	/* Use 0x80000000 if it was 0.  */ \
-	str	r0, [r2, #PID_OFFSET]	/* Store the temporary PID.  */
+	NEGOFF_ADJ_BASE2(r2, r0, PID_OFFSET); /* Save the TLS addr in r2.  */ \
+	ldr	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* Load the saved PID.  */   \
+	rsbs	r0, r3, #0;		/* Negate it.  */		      \
+	it	eq;							      \
+	moveq	r0, #0x80000000;	/* Use 0x80000000 if it was 0.  */    \
+	str	r0, NEGOFF_OFF1(r2, PID_OFFSET); /* Store the temp PID.  */
 
 /* Restore the old PID value in the parent.  */
 #define RESTORE_PID \
 	cmp	r0, #0;		/* If we are the parent... */		\
-	strne	r3, [r2, #PID_OFFSET]	/* ... restore the saved PID.  */
+	it	ne;							\
+	strne	r3, NEGOFF_OFF1(r2, PID_OFFSET); /* restore the saved PID.  */
 
 #include "../vfork.S"
-- 
1.8.1.2

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
  2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
@ 2013-03-01 17:55   ` Roland McGrath
  2013-03-05  2:01   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Space before paren in defined (FOO).

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 09/14] arm: Tidy architecture selection
  2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
@ 2013-03-01 17:55   ` Roland McGrath
  2013-03-05  2:01   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Space before paren in defined (FOO).

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 03/14] arm: Introduce and use GET_TLS
  2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
@ 2013-03-01 17:57   ` Roland McGrath
  2013-03-05  1:45   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Space before paren.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
  2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
@ 2013-03-01 17:57   ` Roland McGrath
  2013-03-05  1:42   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Space before paren.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 13/14] arm: Add optimized submul_1
  2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
@ 2013-03-01 17:58   ` Roland McGrath
  2013-03-06  1:14   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Top line descriptive comment, please.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
@ 2013-03-01 17:58   ` Roland McGrath
  2013-03-01 18:00   ` Roland McGrath
  2013-03-06  1:11   ` Joseph S. Myers
  2 siblings, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:58 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Top line descriptive comment, please.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 14/14] arm: Add optimized add_n and sub_n
  2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
@ 2013-03-01 17:59   ` Roland McGrath
  2013-03-06  0:53   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 17:59 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

Top line descriptive comment, please.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
  2013-03-01 17:58   ` Roland McGrath
@ 2013-03-01 18:00   ` Roland McGrath
  2013-03-06  1:18     ` Joseph S. Myers
  2013-03-06  1:11   ` Joseph S. Myers
  2 siblings, 1 reply; 35+ messages in thread
From: Roland McGrath @ 2013-03-01 18:00 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports, joseph

I think the license is a non-problem since FSF is copyright owner.
But if your from-scratch code is good then I don't know there's a
strong reason to use GMP's instead, since we haven't been tracking
GMP changes in our copies for years anyway AFAIK.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 01/14] arm: Introduce and use LDST_PCREL
  2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
@ 2013-03-04 17:47   ` Joseph S. Myers
  0 siblings, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-04 17:47 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> Macro-ising the few instances where we need to distinguish between
> arm and thumb pc-relative memory operations.
> ---
>         * sysdeps/arm/sysdep.h (LDST_PCREL): New macro.
>         * sysdeps/unix/arm/sysdep.S (__syscall_error): Use LDST_PCREL.
>         Fix up gottpoff load of errno for thumb2.
>         * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h
>         (SINGLE_THREAD_P): Use LDST_PCREL.
>         (PSEUDO_PROLOGUE): Remove.
>         (PSEUDO): Don't use it.
>         * sysdeps/unix/sysv/linux/arm/sysdep.h (SYSCALL_ERROR_HANDLER):
>         Use LDST_PCREL.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros
  2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
  2013-03-01 17:57   ` Roland McGrath
@ 2013-03-05  1:42   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05  1:42 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> There are several places in which we access negative offsets from
> the thread-pointer, but thumb2 only supports positive offsets in
> memory references.
> 
> Avoid duplicating the rather large macros in which these references
> are embedded by abstracting out the operation.
> ---
>         * sysdeps/arm/sysdep.h (NEGOFF_ADJ_BASE): New macro.
>         (NEGOFF_ADJ_BASE2, NEGOFF_OFF1, NEGOFF_OFF2): New macros.
>         * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Use them.
>         * sysdeps/unix/sysv/linux/arm/nptl/vfork.S: Likewise.
>         * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S: Likewise.
>         * sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
>         Likewise.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 03/14] arm: Introduce and use GET_TLS
  2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
  2013-03-01 17:57   ` Roland McGrath
@ 2013-03-05  1:45   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05  1:45 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> Factor out the sequence needed to call kuser_get_tls, as we can't
> play subtract into pc games in thumb mode.  Prepare for hard-tp,
> pulling the save of LR into the macro.
> ---
> 	* sysdeps/arm/sysdep.h (GET_TLS): New macro.
> 	* sysdeps/arm/dl-tlsdesc.S (_dl_tlsdesc_undefweak): Use it.
> 	(_dl_tlsdesc_dynamic): Likewise.
> 	* sysdeps/unix/arm/sysdep.S (__syscall_error): Likewise.
> 	* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): New macro.
>         * sysdeps/unix/sysv/linux/arm/clone.S (__clone): Likewise.
>         * sysdeps/unix/sysv/linux/arm/nptl/pt-vfork.S (SAVE_PID): Likewise.
>         * sysdeps/unix/sysv/linux/arm/nptl/vfork.S (SAVE_PID): Likewise.
> 	* sysdeps/unix/sysv/linux/arm/nptl/sysdep-cancel.h (SINGLE_THREAD_P):
> 	Likewise.
> 	* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S (__aeabi_read_tp):
> 	Add thumb2 alternative.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 09/14] arm: Tidy architecture selection
  2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
  2013-03-01 17:55   ` Roland McGrath
@ 2013-03-05  2:01   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05  2:01 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> +# elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
> +       defined(__ARM_ARCH_5TEJ__)
> +#  define __ARM_ARCH 5

Or plain 5, or 5E (allowed by the architecture and -march=armv5 / 
-march=armv5e, even if GCC doesn't know of any relevant -mcpu= processors 
and such may not have existed).

> +# elif defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6ZK__) \
> +       defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
> +#  define __ARM_ARCH 6

Or plain 6, or 6Z.

My reference for possible values is the union of the architecture columns 
in GCC 4.7's arm-arches.def and arm-cores.def (given that for 4.8 and 
later you have __ARM_ARCH predefined, so only the values known to 4.7 are 
relevant here).

> +# elif defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
> +       defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)

Or plain 7 (given that you're already covering values that aren't actually 
compatible with building glibc).

OK fixed to handle all the other __ARM_ARCH_* values GCC might define, as 
described above.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS
  2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
  2013-03-01 17:55   ` Roland McGrath
@ 2013-03-05  2:01   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05  2:01 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> 	* sysdeps/arm/sysdep.h (ARCH_HAS_HARD_TP): New macro.
> 	(GET_TLS): Use hard-tp if ARCH_HAS_HARD_TP.
> 	* sysdeps/unix/sysv/linux/arm/aeabi_read_tp.S: Likewise.
> 	* sysdeps/unix/sysv/linux/arm/sysdep.h (GET_TLS): Don't override
> 	the default definition if ARCH_HAS_HARD_TP.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
  2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
@ 2013-03-05  2:08   ` Joseph S. Myers
  2013-03-06 15:52     ` Richard Henderson
  0 siblings, 1 reply; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-05  2:08 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> 	* sysdeps/arm/armv6t2/ffs.S: New file.
> 	* sysdeps/arm/armv6t2/ffsll.S: New file.

OK, if ffsll has been tested for both big and little endian (or OK for 
just ffs pending big-endian ffsll testing).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 14/14] arm: Add optimized add_n and sub_n
  2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
  2013-03-01 17:59   ` Roland McGrath
@ 2013-03-06  0:53   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06  0:53 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> Written from scratch rather than copied from GMP, due to LGPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
> 
> This is 250% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core, and probably everywhere.
> ---
> 	* sysdeps/arm/add_n.S: New file.
> 	* sysdeps/arm/sub_n.S: New file.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
  2013-03-01 17:58   ` Roland McGrath
  2013-03-01 18:00   ` Roland McGrath
@ 2013-03-06  1:11   ` Joseph S. Myers
  2 siblings, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06  1:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> Written from scratch rather than copied from GMP, due to GPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
> 
> This is 25% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core.  It's probably slower
> than GMP on the A8 and A9 cores though.
> ---
> 	* sysdeps/arm/addmul_1.S: New file.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 13/14] arm: Add optimized submul_1
  2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
  2013-03-01 17:58   ` Roland McGrath
@ 2013-03-06  1:14   ` Joseph S. Myers
  1 sibling, 0 replies; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06  1:14 UTC (permalink / raw)
  To: Richard Henderson; +Cc: libc-ports

On Fri, 1 Mar 2013, Richard Henderson wrote:

> Written from scratch rather than copied from GMP, due to GPL 2.1 vs
> GPL 3, but tested with the GMP testsuite.
> 
> This is 50% faster than the generic code as measured on Cortex-A15,
> and the same speed as GMP on the same core.  It's probably slower
> than GMP on the A8 and A9 cores though.
> ---
> 	* sysdeps/arm/submul_1.S: New file.

OK.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-01 18:00   ` Roland McGrath
@ 2013-03-06  1:18     ` Joseph S. Myers
  2013-10-25 22:13       ` Roland McGrath
  0 siblings, 1 reply; 35+ messages in thread
From: Joseph S. Myers @ 2013-03-06  1:18 UTC (permalink / raw)
  To: Roland McGrath; +Cc: Richard Henderson, libc-ports

On Fri, 1 Mar 2013, Roland McGrath wrote:

> I think the license is a non-problem since FSF is copyright owner.

My understanding was that FSF approval was needed for relicensing code 
from other FSF-owned packages (as opposed to correcting simple mistakes, 
e.g. making the license notice on a file reflect established licensing 
practice for files used in a particular way).  (E.g., when license 
exception notices were added to soft-fp for use in libgcc, that involved 
FSF approval for adding those notices.)

> But if your from-scratch code is good then I don't know there's a
> strong reason to use GMP's instead, since we haven't been tracking
> GMP changes in our copies for years anyway AFAIK.

I suspect other architectures might benefit from changes made in GMP to 
improve performance - but certainly this is code that has diverged 
significantly from the GMP versions over time.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 11/14] arm: Add optimized ffs for armv6t2
  2013-03-05  2:08   ` Joseph S. Myers
@ 2013-03-06 15:52     ` Richard Henderson
  0 siblings, 0 replies; 35+ messages in thread
From: Richard Henderson @ 2013-03-06 15:52 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-ports

On 03/04/2013 06:08 PM, Joseph S. Myers wrote:
> On Fri, 1 Mar 2013, Richard Henderson wrote:
> 
>> 	* sysdeps/arm/armv6t2/ffs.S: New file.
>> 	* sysdeps/arm/armv6t2/ffsll.S: New file.
> 
> OK, if ffsll has been tested for both big and little endian (or OK for 
> just ffs pending big-endian ffsll testing).
> 

I did finally get big-endian testing done.

There appear to be no big-endian distributions extant anymore, so this required
doing the entire bootstrap gcc/glibc dance from scratch.  It had been long
enough since I'd done so that it took me a while to remember the steps.

So I've committed this entire patch set, including ffsll, and will shortly be
re-submitting the string routines previously posted.


r~

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 12/14] arm: Add optimized addmul_1
  2013-03-06  1:18     ` Joseph S. Myers
@ 2013-10-25 22:13       ` Roland McGrath
  0 siblings, 0 replies; 35+ messages in thread
From: Roland McGrath @ 2013-10-25 22:13 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: Richard Henderson, libc-ports

[A very old thread, but I still had it sitting around.]

> On Fri, 1 Mar 2013, Roland McGrath wrote:
> 
> > I think the license is a non-problem since FSF is copyright owner.
> 
> My understanding was that FSF approval was needed for relicensing code 
> from other FSF-owned packages (as opposed to correcting simple mistakes, 
> e.g. making the license notice on a file reflect established licensing 
> practice for files used in a particular way).  (E.g., when license 
> exception notices were added to soft-fp for use in libgcc, that involved 
> FSF approval for adding those notices.)

Given that we imported GMP code before and had permission, I don't think we
really need new permission for more GMP code being used for the same
purpose.  That was 20 years ago and lots of things have changed, but I
still think so.  Nonetheless, the most conservative thing would be to ask
the current FSF authorities and make it clear that it is a continuation of
a past exception rather than an entirely fresh one.

> > But if your from-scratch code is good then I don't know there's a
> > strong reason to use GMP's instead, since we haven't been tracking
> > GMP changes in our copies for years anyway AFAIK.
> 
> I suspect other architectures might benefit from changes made in GMP to 
> improve performance - but certainly this is code that has diverged 
> significantly from the GMP versions over time.

Agreed.  I think the long-term right thing is to be sharing the code with
GMP.  But that requires both verifying that the reasons for the past
libc-local changes are satisfied by new GMP code, and establishing the
relationship with the current GMP maintainers so they understand what code
we are using and what extra constraints being used in libc puts on that
code (probably just name space issues and maybe PLT issues).


Thanks,
Roland

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2013-10-25 22:13 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-01 17:36 [PATCH v2 00/14] ARM improvements Richard Henderson
2013-03-01 17:36 ` [PATCH v2 05/14] arm: Use push/pop mnemonics Richard Henderson
2013-03-01 17:36 ` [PATCH v2 02/14] arm: Introduce and use NEGOFF series of macros Richard Henderson
2013-03-01 17:57   ` Roland McGrath
2013-03-05  1:42   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 08/14] arm: Unless arm4t, pop return address directly into pc Richard Henderson
2013-03-01 17:36 ` [PATCH v2 03/14] arm: Introduce and use GET_TLS Richard Henderson
2013-03-01 17:57   ` Roland McGrath
2013-03-05  1:45   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 12/14] arm: Add optimized addmul_1 Richard Henderson
2013-03-01 17:58   ` Roland McGrath
2013-03-01 18:00   ` Roland McGrath
2013-03-06  1:18     ` Joseph S. Myers
2013-10-25 22:13       ` Roland McGrath
2013-03-06  1:11   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 09/14] arm: Tidy architecture selection Richard Henderson
2013-03-01 17:55   ` Roland McGrath
2013-03-05  2:01   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 14/14] arm: Add optimized add_n and sub_n Richard Henderson
2013-03-01 17:59   ` Roland McGrath
2013-03-06  0:53   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 04/14] arm: Enable thumb2 mode in assembly files Richard Henderson
2013-03-01 17:36 ` [PATCH v2 07/14] arm: Commonize BX conditionals Richard Henderson
2013-03-01 17:36 ` [PATCH v2 11/14] arm: Add optimized ffs for armv6t2 Richard Henderson
2013-03-05  2:08   ` Joseph S. Myers
2013-03-06 15:52     ` Richard Henderson
2013-03-01 17:36 ` [PATCH v2 01/14] arm: Introduce and use LDST_PCREL Richard Henderson
2013-03-04 17:47   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 10/14] arm: Implement hard-tp for GET_TLS Richard Henderson
2013-03-01 17:55   ` Roland McGrath
2013-03-05  2:01   ` Joseph S. Myers
2013-03-01 17:36 ` [PATCH v2 06/14] arm: Delete LOADREGS macro Richard Henderson
2013-03-01 17:36 ` [PATCH v2 13/14] arm: Add optimized submul_1 Richard Henderson
2013-03-01 17:58   ` Roland McGrath
2013-03-06  1:14   ` Joseph S. Myers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).