public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] ia64 libgcc routines for binary compatibility
@ 2002-05-02 10:08 Jakub Jelinek
  2002-05-03  0:14 ` Ulrich Drepper
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2002-05-02 10:08 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Glibc hackers

Hi!

I just skimmed shared libs/binaries and the following libgcc functions are
both reexported from random shared libraries and undefined in some other
shared libs or binaries, meaning they take it from the shared libraries
which mistakenly used to export them (before .hidden was added in gcc 3.1
to all libgcc.a routines).
All of these are used internally by glibc anyway, so IMHO just exporting
them normally will make various binaries/libraries tiny bit smaller,
but if you prefer to export them as @GLIBC_2.0 symbols only (ie. nobody
will be able to link against them), so be it.

2002-05-02  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/ia64/Makefile: Add ia64libgcc in csu subdir.
	* sysdeps/ia64/Versions (__divtf3, __divdf3, __divsf3, __divdi3,
	__moddi3, __udivdi3, __umoddi3, __multi3): Export at GLIBC_2.0.
	* sysdeps/ia64/ia64libgcc.S: New file.

--- libc/sysdeps/ia64/Makefile.jj	Fri Feb  1 11:01:18 2002
+++ libc/sysdeps/ia64/Makefile	Thu May  2 17:50:59 2002
@@ -9,6 +9,13 @@ ifeq ($(subdir), csu)
 CPPFLAGS-start.S = -D__ASSEMBLY__
 sysdep_routines += hp-timing
 static-only-routines += hp-timing
+
+ifeq (yes,$(build-shared))
+# Compatibility
+sysdep_routines += ia64libgcc
+shared-only-routines += ia64libgcc
+endif
+endif
 endif
 
 ifeq ($(subdir),elf)
--- libc/sysdeps/ia64/Versions.jj	Mon Feb  4 17:35:18 2002
+++ libc/sysdeps/ia64/Versions	Thu May  2 19:08:48 2002
@@ -5,3 +5,10 @@ ld {
     _dl_function_address;
   }
 }
+libc {
+  GLIBC_2.0 {
+    # Functions from libgcc.
+    __divtf3; __divdf3; __divsf3; __divdi3; __moddi3; __udivdi3; __umoddi3;
+    __multi3;
+  }
+}
--- libc/sysdeps/ia64/ia64libgcc.S.jj	Thu May  2 17:51:11 2002
+++ libc/sysdeps/ia64/ia64libgcc.S	Thu May  2 19:03:01 2002
@@ -0,0 +1,336 @@
+/* From the Intel IA-64 Optimization Guide, choose the minimum latency
+   alternative.  */
+
+#include <sysdep.h>
+#undef ret
+
+/* __divtf3
+   Compute a 80-bit IEEE double-extended quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divtf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fnma.s1 f11 = farg1, f10, f1
+(p6)	fma.s1 f12 = farg0, f10, f0
+	;;
+(p6)	fma.s1 f13 = f11, f11, f0
+(p6)	fma.s1 f14 = f11, f11, f11
+	;;
+(p6)	fma.s1 f11 = f13, f13, f11
+(p6)	fma.s1 f13 = f14, f10, f10
+	;;
+(p6)	fma.s1 f10 = f13, f11, f10
+(p6)	fnma.s1 f11 = farg1, f12, farg0
+	;;
+(p6)	fma.s1 f11 = f11, f10, f12
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fnma.s1 f12 = farg1, f11, farg0
+	;;
+(p6)	fma.s0 fret0 = f12, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+END(__divtf3)
+
+/* __divdf3
+   Compute a 64-bit IEEE double quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divdf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f11 = farg0, f10
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fmpy.s1 f13 = f12, f12
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fma.s1 f11 = f13, f11, f11
+	;;
+(p6)	fmpy.s1 f12 = f13, f13
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d fret0 = f8, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divdf3)
+
+/* __divsf3
+   Compute a 32-bit IEEE float quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divsf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f8 = farg0, f10
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.d.s1 f10 = f9, f8, f8
+	;;
+(p6)	fnorm.s.s0 fret0 = f10
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divsf3)
+
+/* __divdi3
+   Compute a 64-bit integer quotient.
+   in0 holds the dividend.  in1 holds the divisor.  */
+
+ENTRY(__divdi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, so that they won't be treated as
+	   unsigned.  */
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an integer.  */
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divdi3)
+
+/* __moddi3
+   Compute a 64-bit integer modulus.
+   in0 holds the dividend (a).  in1 holds the divisor (b).  */
+
+ENTRY(__moddi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, so that they won't be treated as
+	   unsigned.  */
+	fcvt.xf f8 = f14
+	fcvt.xf f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	/* r = q * (-b) + a  */
+	xma.l f10 = f10, f9, f14
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__moddi3)
+
+/* __udivdi3
+   Compute a 64-bit unsigned integer quotient.
+   in0 holds the dividend.  in1 holds the divisor.  */
+
+ENTRY(__udivdi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, to avoid FP software-assist faults.  */
+	fcvt.xuf.s1 f8 = f8
+	fcvt.xuf.s1 f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an unsigned integer.  */
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__udivdi3)
+
+/* __umoddi3
+   Compute a 64-bit unsigned integer modulus.
+   in0 holds the dividend (a).  in1 holds the divisor (b).  */
+
+ENTRY(__umoddi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, to avoid FP software assist faults.  */
+	fcvt.xuf.s1 f8 = f14
+	fcvt.xuf.s1 f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an unsigned integer.  */
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	/* r = q * (-b) + a  */
+	xma.l f10 = f10, f9, f14
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__umoddi3)
+
+/* __multi3
+   Compute a 128-bit multiply of 128-bit multiplicands.
+   in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b).  */
+
+ENTRY(__multi3)
+	.regstk 4,0,0,0
+	setf.sig f6 = in1
+	movl r19 = 0xffffffff
+	setf.sig f7 = in2
+	;;
+	and r14 = r19, in0
+	;;
+	setf.sig f10 = r14
+	and r14 = r19, in2
+	xmpy.l f9 = f6, f7
+	;;
+	setf.sig f6 = r14
+	shr.u r14 = in0, 32
+	;;
+	setf.sig f7 = r14
+	shr.u r14 = in2, 32
+	;;
+	setf.sig f8 = r14
+	xmpy.l f11 = f10, f6
+	xmpy.l f6 = f7, f6
+	;;
+	getf.sig r16 = f11
+	xmpy.l f7 = f7, f8
+	;;
+	shr.u r14 = r16, 32
+	and r16 = r19, r16
+	getf.sig r17 = f6
+	setf.sig f6 = in0
+	;;
+	setf.sig f11 = r14
+	getf.sig r21 = f7
+	setf.sig f7 = in3
+	;;
+	xma.l f11 = f10, f8, f11
+	xma.l f6 = f6, f7, f9
+	;;
+	getf.sig r18 = f11
+	;;
+	add r18 = r18, r17
+	;;
+	and r15 = r19, r18
+	cmp.ltu p7, p6 = r18, r17
+	;;
+	getf.sig r22 = f6
+(p7)	adds r14 = 1, r19
+	;;
+(p7)	add r21 = r21, r14
+	shr.u r14 = r18, 32
+	shl r15 = r15, 32
+	;;
+	add r20 = r21, r14
+	;;
+	add ret0 = r15, r16
+	add ret1 = r22, r20
+	br.ret.sptk rp
+	;;
+END(__multi3)

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] ia64 libgcc routines for binary compatibility
  2002-05-02 10:08 [PATCH] ia64 libgcc routines for binary compatibility Jakub Jelinek
@ 2002-05-03  0:14 ` Ulrich Drepper
  0 siblings, 0 replies; 2+ messages in thread
From: Ulrich Drepper @ 2002-05-03  0:14 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Glibc hackers

[-- Attachment #1: Type: text/plain, Size: 954 bytes --]

On Thu, 2002-05-02 at 10:08, Jakub Jelinek wrote:

> All of these are used internally by glibc anyway, so IMHO just exporting
> them normally will make various binaries/libraries tiny bit smaller,
> but if you prefer to export them as @GLIBC_2.0 symbols only (ie. nobody
> will be able to link against them), so be it.

I really hate doing this.  It never should have happened.  But the least
we can do is to limit the damage.  I don't want to track libgcc and it
should also be possible to build a legacy-free system.  This is why I've
added patches to export the code via version GLIBC_2.2.  It's untested. 
If it does not work for you (or does work) let me know so I can carry
the change over to the 2.2 branch.

-- 
---------------.                          ,-.   1325 Chesapeake Terrace
Ulrich Drepper  \    ,-------------------'   \  Sunnyvale, CA 94089 USA
Red Hat          `--' drepper at redhat.com   `------------------------

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 232 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-05-03  7:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-05-02 10:08 [PATCH] ia64 libgcc routines for binary compatibility Jakub Jelinek
2002-05-03  0:14 ` Ulrich Drepper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).