public inbox for libc-ports@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/5] alpha: put mcount prologue at correct location
@ 2012-06-06 21:52 Richard Henderson
  2012-06-06 21:52 ` [PATCH 3/5] alpha: Fix ev4 build with ev6 compiler Richard Henderson
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Richard Henderson @ 2012-06-06 21:52 UTC (permalink / raw)
  To: libc-ports

---
 ChangeLog.alpha         |    4 ++++
 sysdeps/alpha/_mcount.S |    3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index 705a020..2c107ad 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,3 +1,7 @@
+2012-06-06  Richard Henderson  <rth@twiddle.net>
+
+	* sysdeps/alpha/_mcount.S: Move .prologue after stack alloc.
+
 2012-06-05  Richard Henderson  <rth@twiddle.net>
 
 	* sysdeps/unix/sysv/linux/alpha/rt_sigaction.S: Use .cfi_signal_frame
diff --git a/sysdeps/alpha/_mcount.S b/sysdeps/alpha/_mcount.S
index 9d7c15b..67c579e 100644
--- a/sysdeps/alpha/_mcount.S
+++ b/sysdeps/alpha/_mcount.S
@@ -38,9 +38,8 @@
 	.set	noreorder
 
 LEAF(_mcount, 0xb0)
-	.prologue 0
-
 	subq	 sp, 0xb0, sp
+	.prologue 0
 	stq	 a0, 0x00(sp)
 	mov	 ra, a0		# a0 = caller-pc
 	stq	 a1, 0x08(sp)
-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/5] alpha: Use cfi_startproc instead of dual procedure descriptors
  2012-06-06 21:52 [PATCH 1/5] alpha: put mcount prologue at correct location Richard Henderson
                   ` (2 preceding siblings ...)
  2012-06-06 21:52 ` [PATCH 4/5] alpha: Fix [BZ #13718] Richard Henderson
@ 2012-06-06 21:52 ` Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2012-06-06 21:52 UTC (permalink / raw)
  To: libc-ports

---
 ChangeLog.alpha                  |    9 +++++++
 sysdeps/alpha/alphaev6/stxcpy.S  |   33 +++++++-----------------
 sysdeps/alpha/alphaev6/stxncpy.S |   49 ++++++++++++-------------------------
 sysdeps/alpha/bzero.S            |   21 ++++-----------
 sysdeps/alpha/memset.S           |   25 ++++++-------------
 sysdeps/alpha/stxcpy.S           |   30 +++++++----------------
 sysdeps/alpha/stxncpy.S          |   31 +++++++-----------------
 sysdeps/unix/alpha/sysdep.h      |    6 ++++
 8 files changed, 73 insertions(+), 131 deletions(-)

diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index 2c107ad..ceee910 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,5 +1,14 @@
 2012-06-06  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/alpha/alphaev6/stxcpy.S: Use cfi markup instead of
+	dual ecoff procedure descriptors.
+	* sysdeps/alpha/alphaev6/stxncpy.S: Likewise.
+	* sysdeps/alpha/bzero.S: Likewise.
+	* sysdeps/alpha/memset.S: Likewise.
+	* sysdeps/alpha/stxcpy.S: Likewise.
+	* sysdeps/alpha/stxncpy.S: Likewise.
+	* sysdeps/unix/alpha/sysdep.h (USEPV_PROF): New.
+
 	* sysdeps/alpha/_mcount.S: Move .prologue after stack alloc.
 
 2012-06-05  Richard Henderson  <rth@twiddle.net>
diff --git a/sysdeps/alpha/alphaev6/stxcpy.S b/sysdeps/alpha/alphaev6/stxcpy.S
index 37ad737..bdc8e72 100644
--- a/sysdeps/alpha/alphaev6/stxcpy.S
+++ b/sysdeps/alpha/alphaev6/stxcpy.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000 Free Software Foundation, Inc.
+/* Copyright (C) 2000-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
    This file is part of the GNU C Library.
@@ -41,24 +41,20 @@
 	.arch ev6
 	.set noat
 	.set noreorder
-	.text
-
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
 
+	.text
+	.type	__stxcpy, @function
+	.globl	__stxcpy
+	.usepv	__stxcpy, no
 
-	.ent stxcpy_aligned
-	.align 4
-stxcpy_aligned:
-	.frame sp, 0, t9
-	.prologue 0
+	cfi_startproc
+	cfi_return_column (t9)
 
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == the first source word.  */
-
+	.align 4
+stxcpy_aligned:
 	/* Create the 1st output word and detect 0's in the 1st input word.  */
 	lda	t2, -1		# E : build a mask against false zero
 	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
@@ -115,15 +111,8 @@ $a_eos:
 	nop
 	nop
 
-	.end stxcpy_aligned
-
 	.align 4
-	.ent __stxcpy
-	.globl __stxcpy
 __stxcpy:
-	.frame sp, 0, t9
-	.prologue 0
-
 	/* Are source and destination co-aligned?  */
 	xor	a0, a1, t0	# E :
 	unop			# E :
@@ -321,7 +310,5 @@ $unaligned:
 	or	t0, t1, t1	# e1    : and put it there
 	stq_u	t1, 0(a0)	# .. e0 : (stall)
 	ret	(t9)		# e1    :
-	nop
-
-	.end __stxcpy
 
+	cfi_endproc
diff --git a/sysdeps/alpha/alphaev6/stxncpy.S b/sysdeps/alpha/alphaev6/stxncpy.S
index 31f8570..d134eb8 100644
--- a/sysdeps/alpha/alphaev6/stxncpy.S
+++ b/sysdeps/alpha/alphaev6/stxncpy.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2002 Free Software Foundation, Inc.
+/* Copyright (C) 2000-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    EV6 optimized by Rick Gorton <rick.gorton@alpha-processor.com>.
    This file is part of the GNU C Library.
@@ -49,22 +49,19 @@
 	.set noat
 	.set noreorder
 
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
+	.text
+	.type	__stxncpy, @function
+	.globl	__stxncpy
+	.usepv	__stxncpy, no
 
-
-	.ent stxncpy_aligned
-	.align 4
-stxncpy_aligned:
-	.frame sp, 0, t9, 0
-	.prologue 0
+	cfi_startproc
+	cfi_return_column (t9)
 
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == the first source word.  */
-
+	.align 4
+stxncpy_aligned:
 	/* Create the 1st output word and detect 0's in the 1st input word.  */
 	lda	t2, -1		# E : build a mask against false zero
 	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
@@ -111,7 +108,6 @@ $a_loop:
 	   On entry to this basic block we have:
 	   t0 == the source word containing the null
 	   t7 == the cmpbge mask that found it.  */
-
 $a_eos:
 	negq	t7, t8		# E : find low bit set
 	and	t7, t8, t8	# E : (stall)
@@ -144,15 +140,8 @@ $a_eoc:
 	nop
 	nop
 
-	.end stxncpy_aligned
-
 	.align 4
-	.ent __stxncpy
-	.globl __stxncpy
 __stxncpy:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
 	/* Are source and destination co-aligned?  */
 	xor	a0, a1, t1	# E :
 	and	a0, 7, t0	# E : find dest misalignment
@@ -166,16 +155,14 @@ __stxncpy:
 
 	sll	t10, t2, t10	# U : t10 = bitmask of last count byte
 	bne	t1, $unaligned	# U :
+
 	/* We are co-aligned; take care of a partial first word.  */
 	ldq_u	t1, 0(a1)	# L : load first src word
 	addq	a1, 8, a1	# E :
-
-	beq	t0, stxncpy_aligned     # U : avoid loading dest word if not needed
+	beq	t0, stxncpy_aligned # U : avoid loading dest word if not needed
 	ldq_u	t0, 0(a0)	# L :
-	nop
-	nop
 
-	br	stxncpy_aligned	# .. e1 :
+	br	stxncpy_aligned	# U :
 	nop
 	nop
 	nop
@@ -231,7 +218,7 @@ $u_head:
 
 	extqh	t2, a1, t0	# U : position lo-bits of hi word (stall)
 	cmpbge	zero, t2, t7	# E :
-	nop			
+	nop
 	bne	t7, $u_eos	# U :
 
 	/* Unaligned copy main loop.  In order to avoid reading too much,
@@ -314,7 +301,7 @@ $u_final:
 1:	stq_u	t0, 0(a0)	# L :
 	ret	(t9)		# L0 : Latency=3
 
-        /* Got to end-of-count before end of string.  
+        /* Got to end-of-count before end of string.
            On entry to this basic block:
            t1 == the shifted high-order bits from the previous source word  */
 $u_eoc:
@@ -325,7 +312,7 @@ $u_eoc:
 
 	ldq_u	t2, 8(a1)	# L : load final src word
 	nop
-	extqh	t2, a1, t0	# U : extract low bits for last word (stall)	
+	extqh	t2, a1, t0	# U : extract low bits for last word (stall)
 	or	t1, t0, t1	# E : (stall)
 
 1:	cmpbge	zero, t1, t7	# E :
@@ -394,9 +381,5 @@ $unaligned:
 	stq_u	t0, 0(a0)	# L : (stall)
 
 	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-	nop
-
-	.end __stxncpy
 
+	cfi_endproc
diff --git a/sysdeps/alpha/bzero.S b/sysdeps/alpha/bzero.S
index 091024b..a691ff6 100644
--- a/sysdeps/alpha/bzero.S
+++ b/sysdeps/alpha/bzero.S
@@ -36,23 +36,19 @@
 	.set noreorder
 
 	.text
+	.type	__bzero, @function
+	.globl	__bzero
+	.usepv	__bzero, USEPV_PROF
 
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the main
-   loop in its own procedure descriptor.  */
+	cfi_startproc
 
 	/* On entry to this basic block:
 	   t3 == loop counter
 	   t4 == bytes in partial final word
 	   a0 == possibly misaligned destination pointer  */
 
-	.ent bzero_loop
 	.align 3
 bzero_loop:
-	.frame sp, 0, ra, 0
-	.prologue 0
-
 	beq	t3, $tail	#
 	blbc	t3, 0f		# skip single store if count even
 
@@ -75,16 +71,11 @@ $tail:	bne	t4, 1f		# is there a tail to do?
 	stq_u	t0, 0(a0)	#
 	ret			#
 
-	.end bzero_loop
-
-ENTRY(__bzero)
+__bzero:
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
-	.prologue 1
-#else
-	.prologue 0
 #endif
 
 	mov	a0, v0		# e0    : move return value in place
@@ -115,5 +106,5 @@ $oneq:
 
 $done:	ret
 
-	END(__bzero)
+	cfi_endproc
 weak_alias (__bzero, bzero)
diff --git a/sysdeps/alpha/memset.S b/sysdeps/alpha/memset.S
index 4ac7092..db92771 100644
--- a/sysdeps/alpha/memset.S
+++ b/sysdeps/alpha/memset.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    This file is part of the GNU C Library.
 
@@ -37,11 +37,11 @@
 	.set noreorder
 
 	.text
+	.type	memset, @function
+	.globl	memset
+	.usepv	memset, USEPV_PROF
 
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the main
-   loop in its own procedure descriptor.  */
+	cfi_startproc
 
 	/* On entry to this basic block:
 	   t3 == loop counter
@@ -49,12 +49,8 @@
 	   a0 == possibly misaligned destination pointer
 	   a1 == replicated source character  */
 
-	.ent memset_loop
 	.align 3
 memset_loop:
-	.frame sp, 0, ra, 0
-	.prologue 0
-
 	beq	t3, $tail
 	blbc	t3, 0f		# skip single store if count even
 
@@ -80,19 +76,14 @@ $tail:	bne	t4, 1f		# is there a tail to do?
 	stq_u	t0, 0(a0)	# e0    :
 	ret			# .. e1 :
 
-	.end memset_loop
-
-ENTRY(memset)
+memset:
 #ifdef PROF
 	ldgp	gp, 0(pv)
 	lda	AT, _mcount
 	jsr	AT, (AT), _mcount
-	.prologue 1
-#else
-	.prologue 0
 #endif
 
-	zapnot	a1, 1, a1	# e0    : zero extend input character
+	and	a1, 0xff, a1	# e0    : zero extend input character
 	mov	a0, v0		# .. e1 : move return value in place
 	sll	a1, 8, t0	# e0    : begin replicating the char
 	beq	a2, $done	# .. e1 : early exit for zero-length store
@@ -132,5 +123,5 @@ $oneq:
 
 $done:	ret
 
-	END(memset)
+	cfi_endproc
 libc_hidden_builtin_def (memset)
diff --git a/sysdeps/alpha/stxcpy.S b/sysdeps/alpha/stxcpy.S
index 46f19a9..c55d005 100644
--- a/sysdeps/alpha/stxcpy.S
+++ b/sysdeps/alpha/stxcpy.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    This file is part of the GNU C Library.
 
@@ -43,22 +43,18 @@
 	.set noreorder
 
 	.text
+	.type	__stxcpy, @function
+	.globl	__stxcpy
+	.usepv	__stxcpy, no
 
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-	.ent stxcpy_aligned
-	.align 3
-stxcpy_aligned:
-	.frame sp, 0, t9
-	.prologue 0
+	cfi_startproc
+	cfi_return_column (t9)
 
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == the first source word.  */
-
+	.align 3
+stxcpy_aligned:
 	/* Create the 1st output word and detect 0's in the 1st input word.  */
 	lda	t2, -1		# e1    : build a mask against false zero
 	mskqh	t2, a1, t2	# e0    :   detection in the src word
@@ -72,7 +68,6 @@ stxcpy_aligned:
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == a source word not containing a null.  */
-
 $a_loop:
 	stq_u	t1, 0(a0)	# e0    :
 	addq	a0, 8, a0	# .. e1 :
@@ -106,15 +101,8 @@ $a_eos:
 1:	stq_u	t1, 0(a0)	# e0    :
 	ret	(t9)		# .. e1 :
 
-	.end stxcpy_aligned
-
 	.align 3
-	.ent __stxcpy
-	.globl __stxcpy
 __stxcpy:
-	.frame sp, 0, t9
-	.prologue 0
-
 	/* Are source and destination co-aligned?  */
 	xor	a0, a1, t0	# e0    :
 	unop			#       :
@@ -303,4 +291,4 @@ $unaligned:
 	stq_u	t1, 0(a0)	# .. e0 :
 	ret	(t9)
 
-	.end __stxcpy
+	cfi_endproc
diff --git a/sysdeps/alpha/stxncpy.S b/sysdeps/alpha/stxncpy.S
index 5b81ac9..f8b494a 100644
--- a/sysdeps/alpha/stxncpy.S
+++ b/sysdeps/alpha/stxncpy.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 2002 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    This file is part of the GNU C Library.
 
@@ -52,22 +52,18 @@
 	.set noreorder
 
 	.text
+	.type	__stxncpy, @function
+	.globl	__stxncpy
+	.usepv	__stxncpy, no
 
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-	.ent stxncpy_aligned
-	.align 3
-stxncpy_aligned:
-	.frame sp, 0, t9, 0
-	.prologue 0
+	cfi_startproc
+	cfi_return_column (t9)
 
 	/* On entry to this basic block:
 	   t0 == the first destination word for masking back in
 	   t1 == the first source word.  */
-
+	.align 3
+stxncpy_aligned:
 	/* Create the 1st output word and detect 0's in the 1st input word.  */
 	lda	t2, -1		# e1    : build a mask against false zero
 	mskqh	t2, a1, t2	# e0    :   detection in the src word
@@ -81,7 +77,6 @@ stxncpy_aligned:
 
 	/* On entry to this basic block:
 	   t0 == a source word not containing a null.  */
-
 $a_loop:
 	stq_u	t0, 0(a0)	# e0    :
 	addq	a0, 8, a0	# .. e1 :
@@ -98,7 +93,6 @@ $a_loop:
 	   On entry to this basic block we have:
 	   t0 == the source word containing the null
 	   t7 == the cmpbge mask that found it.  */
-
 $a_eos:
 	negq	t7, t8		# e0    : find low bit set
 	and	t7, t8, t8	# e1 (stall)
@@ -126,15 +120,8 @@ $a_eoc:
 	or	t10, t7, t7
 	br	$a_eos
 
-	.end stxncpy_aligned
-
 	.align 3
-	.ent __stxncpy
-	.globl __stxncpy
 __stxncpy:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
 	/* Are source and destination co-aligned?  */
 	xor	a0, a1, t1	# e0    :
 	and	a0, 7, t0	# .. e1 : find dest misalignment
@@ -359,4 +346,4 @@ $unaligned:
 	stq_u	t0, 0(a0)	# e0    :
 	ret	(t9)		# .. e1 :
 
-	.end __stxncpy
+	cfi_endproc
diff --git a/sysdeps/unix/alpha/sysdep.h b/sysdeps/unix/alpha/sysdep.h
index e17bf21..4ee0746 100644
--- a/sysdeps/unix/alpha/sysdep.h
+++ b/sysdeps/unix/alpha/sysdep.h
@@ -71,6 +71,12 @@
 	.prologue 1
 #endif /* PROF */
 
+#ifdef PROF
+# define USEPV_PROF	std
+#else
+# define USEPV_PROF	no
+#endif
+
 #if RTLD_PRIVATE_ERRNO
 # define SYSCALL_ERROR_LABEL	$syscall_error
 # define SYSCALL_ERROR_HANDLER			\
-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 3/5] alpha: Fix ev4 build with ev6 compiler
  2012-06-06 21:52 [PATCH 1/5] alpha: put mcount prologue at correct location Richard Henderson
@ 2012-06-06 21:52 ` Richard Henderson
  2012-06-06 21:52 ` [PATCH 5/5] alpha: Fix end-of-count checks in strncmp Richard Henderson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2012-06-06 21:52 UTC (permalink / raw)
  To: libc-ports

---
 ChangeLog.alpha            |    3 +++
 sysdeps/alpha/fpu/e_sqrt.c |    3 ++-
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index ceee910..0598d7e 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,5 +1,8 @@
 2012-06-06  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/alpha/fpu/e_sqrt.c: Include <math_private.h> before
+	redefining __ieee758_sqrt.
+
 	* sysdeps/alpha/alphaev6/stxcpy.S: Use cfi markup instead of
 	dual ecoff procedure descriptors.
 	* sysdeps/alpha/alphaev6/stxncpy.S: Likewise.
diff --git a/sysdeps/alpha/fpu/e_sqrt.c b/sysdeps/alpha/fpu/e_sqrt.c
index 22b24b5..ad10dad 100644
--- a/sysdeps/alpha/fpu/e_sqrt.c
+++ b/sysdeps/alpha/fpu/e_sqrt.c
@@ -16,7 +16,8 @@
    License along with the GNU C Library.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <features.h>
+#include <math.h>
+#include <math_private.h>
 
 #if !defined(_IEEE_FP_INEXACT)
 
-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 5/5] alpha: Fix end-of-count checks in strncmp
  2012-06-06 21:52 [PATCH 1/5] alpha: put mcount prologue at correct location Richard Henderson
  2012-06-06 21:52 ` [PATCH 3/5] alpha: Fix ev4 build with ev6 compiler Richard Henderson
@ 2012-06-06 21:52 ` Richard Henderson
  2012-06-06 21:52 ` [PATCH 4/5] alpha: Fix [BZ #13718] Richard Henderson
  2012-06-06 21:52 ` [PATCH 2/5] alpha: Use cfi_startproc instead of dual procedure descriptors Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2012-06-06 21:52 UTC (permalink / raw)
  To: libc-ports

This routine suffered a similar problem as stxncpy in needing to
bias a potentially very large unsigned number with wraparound.
This exposed a secondary problem where we didn't properly handle
end-of-count condition for the second string input resulting in
stratcliff failures.
---
 ChangeLog.alpha         |    3 +
 sysdeps/alpha/strncmp.S |  251 ++++++++++++++++++++++++++---------------------
 2 files changed, 144 insertions(+), 110 deletions(-)

diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index d291df9..824083c 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,5 +1,8 @@
 2012-06-06  Richard Henderson  <rth@twiddle.net>
 
+	* sysdeps/alpha/strncmp.S: Bound count to LONG_MAX at startup.
+	Re-organize checks vs s2 end-of-count.
+
 	[BZ #13718]
 	* sysdeps/alpha/stxncmp.S: Bound count to LONG_MAX at startup.
 	* sysdeps/alpha/alphaev6/stxncmp.S: Likewise.
diff --git a/sysdeps/alpha/strncmp.S b/sysdeps/alpha/strncmp.S
index c9981e1..828f1b9 100644
--- a/sysdeps/alpha/strncmp.S
+++ b/sysdeps/alpha/strncmp.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu)
    This file is part of the GNU C Library.
 
@@ -23,6 +23,15 @@
 	.set noat
 	.set noreorder
 
+/* EV6 only predicts one branch per octaword.  We'll use these to push
+   subsequent branches back to the next bundle.  This will generally add
+   a fetch+decode cycle to older machines, so skip in that case.  */
+#ifdef __alpha_fix__
+# define ev6_unop	unop
+#else
+# define ev6_unop
+#endif
+
 	.text
 
 ENTRY(strncmp)
@@ -35,128 +44,140 @@ ENTRY(strncmp)
 	.prologue 0
 #endif
 
-	xor	a0, a1, t2	# e0    : are s1 and s2 co-aligned?
-	beq	a2, $zerolength	# .. e1 :
-	ldq_u	t0, 0(a0)	# e0    : give cache time to catch up
-	ldq_u	t1, 0(a1)	# .. e1 :
-	and	t2, 7, t2	# e0    :
-	and	a0, 7, t4	# .. e1 : find s1 misalignment
-	lda	t3, -1		# e0    :
-	addq	a2, t4, a2	# .. e1 : bias count by s1 misalignment
-	and	a2, 7, t10	# e1    : ofs of last byte in last word
-	srl	a2, 3, a2	# .. e0 : remaining full words in count
-	and	a1, 7, t5	# e0    : find s2 misalignment
-	bne	t2, $unaligned	# .. e1 :
+	xor	a0, a1, t2	# are s1 and s2 co-aligned?
+	beq	a2, $zerolength
+	ldq_u	t0, 0(a0)	# load asap to give cache time to catch up
+	ldq_u	t1, 0(a1)
+	lda	t3, -1
+	and	t2, 7, t2
+	srl	t3, 1, t6
+	and	a0, 7, t4	# find s1 misalignment
+	and	a1, 7, t5	# find s2 misalignment
+	cmovlt	a2, t6, a2	# bound neg count to LONG_MAX
+	addq	a1, a2, a3	# s2+count
+	addq	a2, t4, a2	# bias count by s1 misalignment
+	and	a2, 7, t10	# ofs of last byte in s1 last word
+	srl	a2, 3, a2	# remaining full words in s1 count
+	bne	t2, $unaligned
 
 	/* On entry to this basic block:
 	   t0 == the first word of s1.
 	   t1 == the first word of s2.
 	   t3 == -1.  */
-
 $aligned:
-	mskqh	t3, a1, t3	# e0    : mask off leading garbage
-	nop			# .. e1 :
-	ornot	t1, t3, t1	# e0    :
-	ornot	t0, t3, t0	# .. e1 :
-	cmpbge	zero, t1, t7	# e0    : bits set iff null found
-	beq	a2, $eoc	# .. e1 : check end of count
-	unop			# e0    :
-	bne	t7, $eos	# .. e1 :
-	unop			# e0    :
-	beq	t10, $ant_loop	# .. e1 :
+	mskqh	t3, a1, t8	# mask off leading garbage
+	ornot	t1, t8, t1
+	ornot	t0, t8, t0
+	cmpbge	zero, t1, t7	# bits set iff null found
+	beq	a2, $eoc	# check end of count
+	bne	t7, $eos
+	beq	t10, $ant_loop
 
 	/* Aligned compare main loop.
 	   On entry to this basic block:
 	   t0 == an s1 word.
 	   t1 == an s2 word not containing a null.  */
 
+	.align 4
 $a_loop:
 	xor	t0, t1, t2	# e0	:
 	bne	t2, $wordcmp	# .. e1 (zdb)
 	ldq_u	t1, 8(a1)	# e0    :
 	ldq_u	t0, 8(a0)	# .. e1 :
+
 	subq	a2, 1, a2	# e0    :
 	addq	a1, 8, a1	# .. e1 :
 	addq	a0, 8, a0	# e0    :
 	beq	a2, $eoc	# .. e1 :
+
 	cmpbge	zero, t1, t7	# e0    :
 	beq	t7, $a_loop	# .. e1 :
-	unop			# e0    :
-	br	$eos		# .. e1 :
+
+	br	$eos
 
 	/* Alternate aligned compare loop, for when there's no trailing
 	   bytes on the count.  We have to avoid reading too much data.  */
+	.align 4
 $ant_loop:
 	xor	t0, t1, t2	# e0	:
+	ev6_unop
+	ev6_unop
 	bne	t2, $wordcmp	# .. e1 (zdb)
+
 	subq	a2, 1, a2	# e0    :
 	beq	a2, $zerolength	# .. e1 :
 	ldq_u	t1, 8(a1)	# e0    :
 	ldq_u	t0, 8(a0)	# .. e1 :
+
 	addq	a1, 8, a1	# e0    :
 	addq	a0, 8, a0	# .. e1 :
 	cmpbge	zero, t1, t7	# e0    :
 	beq	t7, $ant_loop	# .. e1 :
-	unop			# e0	:
-	br	$eos		# .. e1 :
+
+	br	$eos
 
 	/* The two strings are not co-aligned.  Align s1 and cope.  */
+	/* On entry to this basic block:
+	   t0 == the first word of s1.
+	   t1 == the first word of s2.
+	   t3 == -1.
+	   t4 == misalignment of s1.
+	   t5 == misalignment of s2.
+	  t10 == misalignment of s1 end.  */
+	.align	4
 $unaligned:
-	subq	a1, t4, a1	# e0	 :
-	unop			#        :
-
-	/* If s2 misalignment is larger than s2 misalignment, we need
+	/* If s1 misalignment is larger than s2 misalignment, we need
 	   extra startup checks to avoid SEGV.  */
+	subq	a1, t4, a1	# adjust s2 for s1 misalignment
+	cmpult	t4, t5, t9
+	subq	a3, 1, a3	# last byte of s2
+	bic	a1, 7, t8
+	mskqh	t3, t5, t7	# mask garbage in s2
+	subq	a3, t8, a3
+	ornot	t1, t7, t7
+	srl	a3, 3, a3	# remaining full words in s2 count
+	beq	t9, $u_head
+
+	/* Failing that, we need to look for both eos and eoc within the
+	   first word of s2.  If we find either, we can continue by
+	   pretending that the next word of s2 is all zeros.  */
+	lda	t2, 0		# next = zero
+	cmpeq	a3, 0, t8	# eoc in the first word of s2?
+	cmpbge	zero, t7, t7	# eos in the first word of s2?
+	or	t7, t8, t8
+	bne	t8, $u_head_nl
 
-	cmplt	t4, t5, t8	# .. e1 :
-	beq	t8, $u_head	# e1    :
-
-	mskqh	t3, t5, t3	# e0    :
-	ornot	t1, t3, t3	# e0    :
-	cmpbge	zero, t3, t7	# e1    : is there a zero?
-	beq	t7, $u_head	# e1    :
-
-	/* We've found a zero in the first partial word of s2.  Align
-	   our current s1 and s2 words and compare what we've got.  */
-
-	extql	t1, t5, t1	# e0    :
-	lda	t3, -1		# .. e1 :
-	insql	t1, a0, t1	# e0    :
-	mskqh	t3, a0, t3	# e0    :
-	ornot	t1, t3, t1	# e0    :
-	ornot	t0, t3, t0	# .. e1 :
-	cmpbge	zero, t1, t7	# e0    : find that zero again
-	beq	a2, $eoc	# .. e1 : and finish up
-	br	$eos		# e1    :
-
-	.align 3
-$u_head:
 	/* We know just enough now to be able to assemble the first
 	   full word of s2.  We can still find a zero at the end of it.
 
 	   On entry to this basic block:
 	   t0 == first word of s1
-	   t1 == first partial word of s2.  */
-
-	ldq_u	t2, 8(a1)	# e0    : load second partial s2 word
-	lda	t3, -1		# .. e1 : create leading garbage mask
-	extql	t1, a1, t1	# e0    : create first s2 word
-	mskqh	t3, a0, t3	# e0    :
-	extqh	t2, a1, t4	# e0    :
-	ornot	t0, t3, t0	# .. e1 : kill s1 garbage
-	or	t1, t4, t1	# e0    : s2 word now complete
-	ornot	t1, t3, t1	# e1    : kill s2 garbage
-	cmpbge	zero, t0, t7	# e0    : find zero in first s1 word
-	beq	a2, $eoc	# .. e1 :
-	lda	t3, -1		# e0    :
-	bne	t7, $eos	# .. e1 :
-	subq	a2, 1, a2	# e0    :
-	xor	t0, t1, t4	# .. e1 : compare aligned words
-	mskql	t3, a1, t3	# e0    : mask out s2[1] bits we have seen
-	bne	t4, $wordcmp	# .. e1 :
-	or	t2, t3, t3	# e0    :
-	cmpbge	zero, t3, t7	# e1    : find zero in high bits of s2[1]
-	bne	t7, $u_final	# e1    :
+	   t1 == first partial word of s2.
+	   t3 == -1.
+	   t10 == ofs of last byte in s1 last word.
+	   t11 == ofs of last byte in s2 last word.  */
+$u_head:
+	ldq_u	t2, 8(a1)	# load second partial s2 word
+	subq	a3, 1, a3
+$u_head_nl:
+	extql	t1, a1, t1	# create first s2 word
+	mskqh	t3, a0, t8
+	extqh	t2, a1, t4
+	ornot	t0, t8, t0	# kill s1 garbage
+	or	t1, t4, t1	# s2 word now complete
+	cmpbge	zero, t0, t7	# find eos in first s1 word
+	ornot	t1, t8, t1	# kill s2 garbage
+	beq	a2, $eoc
+	subq	a2, 1, a2
+	bne	t7, $eos
+	mskql	t3, a1, t8	# mask out s2[1] bits we have seen
+	xor	t0, t1, t4	# compare aligned words
+	or	t2, t8, t8
+	bne	t4, $wordcmp
+	cmpbge	zero, t8, t7	# eos in high bits of s2[1]?
+	cmpeq	a3, 0, t8	# eoc in s2[1]?
+	or	t7, t8, t7
+	bne	t7, $u_final
 
 	/* Unaligned copy main loop.  In order to avoid reading too much,
 	   the loop is structured to detect zeros in aligned words from s2.
@@ -166,43 +187,54 @@ $u_head:
 	   to run as fast as possible.
 
 	   On entry to this basic block:
-	   t2 == the unshifted low-bits from the next s2 word.  */
-
-	.align 3
+	   t2 == the unshifted low-bits from the next s2 word.
+	   t10 == ofs of last byte in s1 last word.
+	   t11 == ofs of last byte in s2 last word.  */
+	.align 4
 $u_loop:
 	extql	t2, a1, t3	# e0    :
 	ldq_u	t2, 16(a1)	# .. e1 : load next s2 high bits
 	ldq_u	t0, 8(a0)	# e0    : load next s1 word
 	addq	a1, 8, a1	# .. e1 :
+
 	addq	a0, 8, a0	# e0    :
-	nop			# .. e1 :
+	subq	a3, 1, a3	# .. e1 :
 	extqh	t2, a1, t1	# e0    :
-	cmpbge	zero, t0, t7	# .. e1 : find zero in current s1 word
+	cmpbge	zero, t0, t7	# .. e1 : eos in current s1 word
+
 	or	t1, t3, t1	# e0    :
-	beq	a2, $eoc	# .. e1 : check for end of count
+	beq	a2, $eoc	# .. e1 : eoc in current s1 word
 	subq	a2, 1, a2	# e0    :
+	cmpbge	zero, t2, t4	# .. e1 : eos in s2[1]
+
+	xor	t0, t1, t3	# e0    : compare the words
+	ev6_unop
+	ev6_unop
 	bne	t7, $eos	# .. e1 :
-	xor	t0, t1, t4	# e0    : compare the words
-	bne	t4, $wordcmp	# .. e1 (zdb)
-	cmpbge	zero, t2, t4	# e0    : find zero in next low bits
+
+	cmpeq	a3, 0, t5	# e0    : eoc in s2[1]
+	ev6_unop
+	ev6_unop
+	bne	t3, $wordcmp	# .. e1 :
+
+	or	t4, t5, t4	# e0    : eos or eoc in s2[1].
 	beq	t4, $u_loop	# .. e1 (zdb)
 
 	/* We've found a zero in the low bits of the last s2 word.  Get
 	   the next s1 word and align them.  */
+	.align 3
 $u_final:
-	ldq_u	t0, 8(a0)	# e1    :
-	extql	t2, a1, t1	# .. e0 :
-	cmpbge	zero, t1, t7	# e0    :
-	bne	a2, $eos	# .. e1 :
+	ldq_u	t0, 8(a0)
+	extql	t2, a1, t1
+	cmpbge	zero, t1, t7
+	bne	a2, $eos
 
 	/* We've hit end of count.  Zero everything after the count
 	   and compare whats left.  */
-
 	.align 3
 $eoc:
 	mskql	t0, t10, t0
 	mskql	t1, t10, t1
-	unop
 	cmpbge	zero, t1, t7
 
 	/* We've found a zero somewhere in a word we just read.
@@ -210,32 +242,31 @@ $eoc:
 	   t0 == s1 word
 	   t1 == s2 word
 	   t7 == cmpbge mask containing the zero.  */
-
+	.align 3
 $eos:
-	negq	t7, t6		# e0    : create bytemask of valid data
-	and	t6, t7, t8	# e1    :
-	subq	t8, 1, t6	# e0    :
-	or	t6, t8, t7	# e1    :
-	zapnot	t0, t7, t0	# e0    : kill the garbage
-	zapnot	t1, t7, t1	# .. e1 :
-	xor	t0, t1, v0	# e0    : and compare
-	beq	v0, $done	# .. e1 :
+	negq	t7, t6		# create bytemask of valid data
+	and	t6, t7, t8
+	subq	t8, 1, t6
+	or	t6, t8, t7
+	zapnot	t0, t7, t0	# kill the garbage
+	zapnot	t1, t7, t1
+	xor	t0, t1, v0	# ... and compare
+	beq	v0, $done
 
 	/* Here we have two differing co-aligned words in t0 & t1.
 	   Bytewise compare them and return (t0 > t1 ? 1 : -1).  */
 	.align 3
 $wordcmp:
-	cmpbge	t0, t1, t2	# e0    : comparison yields bit mask of ge
-	cmpbge	t1, t0, t3	# .. e1 :
-	xor	t2, t3, t0	# e0    : bits set iff t0/t1 bytes differ
-	negq	t0, t1		# e1    : clear all but least bit
-	and	t0, t1, t0	# e0    :
-	lda	v0, -1		# .. e1 :
-	and	t0, t2, t1	# e0    : was bit set in t0 > t1?
-	cmovne	t1, 1, v0	# .. e1 (zdb)
-
+	cmpbge	t0, t1, t2	# comparison yields bit mask of ge
+	cmpbge	t1, t0, t3
+	xor	t2, t3, t0	# bits set iff t0/t1 bytes differ
+	negq	t0, t1		# clear all but least bit
+	and	t0, t1, t0
+	lda	v0, -1
+	and	t0, t2, t1	# was bit set in t0 > t1?
+	cmovne	t1, 1, v0
 $done:
-	ret			# e1    :
+	ret
 
 	.align 3
 $zerolength:
-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 4/5] alpha: Fix [BZ #13718]
  2012-06-06 21:52 [PATCH 1/5] alpha: put mcount prologue at correct location Richard Henderson
  2012-06-06 21:52 ` [PATCH 3/5] alpha: Fix ev4 build with ev6 compiler Richard Henderson
  2012-06-06 21:52 ` [PATCH 5/5] alpha: Fix end-of-count checks in strncmp Richard Henderson
@ 2012-06-06 21:52 ` Richard Henderson
  2012-06-06 21:52 ` [PATCH 2/5] alpha: Use cfi_startproc instead of dual procedure descriptors Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2012-06-06 21:52 UTC (permalink / raw)
  To: libc-ports

The routines expect to be able to bias the count by a small number.
If the count is near -1ull, the count will overflow.  Since we cannot
use the whole 64-bit address space, bound the count to LONG_MAX.
---
 ChangeLog.alpha                  |    4 ++++
 sysdeps/alpha/alphaev6/stxncpy.S |   19 +++++++++++++------
 sysdeps/alpha/stxncpy.S          |   23 +++++++++++++----------
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/ChangeLog.alpha b/ChangeLog.alpha
index 0598d7e..d291df9 100644
--- a/ChangeLog.alpha
+++ b/ChangeLog.alpha
@@ -1,5 +1,9 @@
 2012-06-06  Richard Henderson  <rth@twiddle.net>
 
+	[BZ #13718]
+	* sysdeps/alpha/stxncmp.S: Bound count to LONG_MAX at startup.
+	* sysdeps/alpha/alphaev6/stxncmp.S: Likewise.
+
 	* sysdeps/alpha/fpu/e_sqrt.c: Include <math_private.h> before
 	redefining __ieee758_sqrt.
 
diff --git a/sysdeps/alpha/alphaev6/stxncpy.S b/sysdeps/alpha/alphaev6/stxncpy.S
index d134eb8..28495df 100644
--- a/sysdeps/alpha/alphaev6/stxncpy.S
+++ b/sysdeps/alpha/alphaev6/stxncpy.S
@@ -143,18 +143,25 @@ $a_eoc:
 	.align 4
 __stxncpy:
 	/* Are source and destination co-aligned?  */
+	lda	t2, -1		# E :
 	xor	a0, a1, t1	# E :
 	and	a0, 7, t0	# E : find dest misalignment
-	and	t1, 7, t1	# E : (stall)
-	addq	a2, t0, a2	# E : bias count by dest misalignment (stall)
+	nop			# E :
 
-	subq	a2, 1, a2	# E :
+	srl	t2, 1, t2	# U :
+	and	t1, 7, t1	# E :
+	cmovlt	a2, t2, a2	# E : bound count to LONG_MAX (stall)
+	nop			# E :
+
+	addq	a2, t0, a2	# E : bias count by dest misalignment
+	subq	a2, 1, a2	# E : (stall)
 	and	a2, 7, t2	# E : (stall)
-	srl	a2, 3, a2	# U : a2 = loop counter = (count - 1)/8 (stall)
-	addq	zero, 1, t10	# E :
+	lda	t10, 1		# E :
 
+	srl	a2, 3, a2	# U : a2 = loop counter = (count - 1)/8
 	sll	t10, t2, t10	# U : t10 = bitmask of last count byte
-	bne	t1, $unaligned	# U :
+	nop			# E :
+	bne	t1, $unaligned	# U : (stall)
 
 	/* We are co-aligned; take care of a partial first word.  */
 	ldq_u	t1, 0(a1)	# L : load first src word
diff --git a/sysdeps/alpha/stxncpy.S b/sysdeps/alpha/stxncpy.S
index f8b494a..d2cb9c3 100644
--- a/sysdeps/alpha/stxncpy.S
+++ b/sysdeps/alpha/stxncpy.S
@@ -123,16 +123,19 @@ $a_eoc:
 	.align 3
 __stxncpy:
 	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t1	# e0    :
-	and	a0, 7, t0	# .. e1 : find dest misalignment
-	and	t1, 7, t1	# e0    :
-	addq	a2, t0, a2	# .. e1 : bias count by dest misalignment
-	subq	a2, 1, a2	# e0    :
-	and	a2, 7, t2	# e1    :
-	srl	a2, 3, a2	# e0    : a2 = loop counter = (count - 1)/8
-	addq	zero, 1, t10	# .. e1 :
-	sll	t10, t2, t10	# e0    : t10 = bitmask of last count byte
-	bne	t1, $unaligned	# .. e1 :
+	lda	t2, -1
+	xor	a0, a1, t1
+	srl	t2, 1, t2
+	and	a0, 7, t0		# find dest misalignment
+	cmovlt	a2, t2, a2		# bound neg count to LONG_MAX
+	and	t1, 7, t1
+	addq	a2, t0, a2		# bias count by dest misalignment
+	subq	a2, 1, a2
+	and	a2, 7, t2
+	srl	a2, 3, a2		# a2 = loop counter = (count - 1)/8
+	addq	zero, 1, t10
+	sll	t10, t2, t10		# t10 = bitmask of last count byte
+	bne	t1, $unaligned
 
 	/* We are co-aligned; take care of a partial first word.  */
 
-- 
1.7.7.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-06-06 21:52 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-06 21:52 [PATCH 1/5] alpha: put mcount prologue at correct location Richard Henderson
2012-06-06 21:52 ` [PATCH 3/5] alpha: Fix ev4 build with ev6 compiler Richard Henderson
2012-06-06 21:52 ` [PATCH 5/5] alpha: Fix end-of-count checks in strncmp Richard Henderson
2012-06-06 21:52 ` [PATCH 4/5] alpha: Fix [BZ #13718] Richard Henderson
2012-06-06 21:52 ` [PATCH 2/5] alpha: Use cfi_startproc instead of dual procedure descriptors Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).