* [PATCH] Fix powl on x86_64 [BZ #258]
@ 2004-07-14 18:17 Jakub Jelinek
2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek
0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2004-07-14 18:17 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: Glibc hackers
Hi!
i386 will need similar treatment.
Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j?
or vice versa, maybe the comparisons could be coded more efficiently.
2004-07-14 Jakub Jelinek <jakub@redhat.com>
[BZ #258]
* math/libm-test.inc (max_value, min_value): New variables.
(initialize): Initialize them.
(pow_test): Add a couple of new tests.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Don't generate
invalid exception if |y| is >= 1L<<63. If y*log2(x) overflows
to +-inf, return still +inf/+0 instead of NaN. Replace
fldl MO(zero) with fldz and fldl MO(one) with fld1.
--- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc 2004-07-14 20:09:24.475429821 +0200
@@ -169,7 +169,7 @@ static int output_points; /* Should the
static int ignore_max_ulp; /* Should we ignore max_ulp? */
static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
static FLOAT max_error, real_max_error, imag_max_error;
@@ -3593,6 +3593,25 @@ pow_test (void)
TEST_ff_f (pow, -1, plus_infty, 1);
TEST_ff_f (pow, 1, minus_infty, 1);
TEST_ff_f (pow, -1, minus_infty, 1);
+ TEST_ff_f (pow, 1, 1, 1);
+ TEST_ff_f (pow, 1, -1, 1);
+ TEST_ff_f (pow, 1, 1.25, 1);
+ TEST_ff_f (pow, 1, -1.25, 1);
+ TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+ /* pow (x, +-0) == 1. */
+ TEST_ff_f (pow, plus_infty, 0, 1);
+ TEST_ff_f (pow, plus_infty, minus_zero, 1);
+ TEST_ff_f (pow, minus_infty, 0, 1);
+ TEST_ff_f (pow, minus_infty, minus_zero, 1);
+ TEST_ff_f (pow, 32.75L, 0, 1);
+ TEST_ff_f (pow, 32.75L, minus_zero, 1);
+ TEST_ff_f (pow, -32.75L, 0, 1);
+ TEST_ff_f (pow, -32.75L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p72L, 0, 1);
+ TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p-72L, 0, 1);
+ TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3628,10 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+ TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+ TEST_ff_f (pow, 10, -0x1p72L, 0);
+ TEST_ff_f (pow, max_value, max_value, plus_infty);
+ TEST_ff_f (pow, 10, -max_value, 0);
TEST_ff_f (pow, 0, 1, 0);
TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3646,8 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, 2, 0);
TEST_ff_f (pow, minus_zero, 11.1L, 0);
+ TEST_ff_f (pow, 0, plus_infty, 0);
+ TEST_ff_f (pow, minus_zero, plus_infty, 0);
#ifndef TEST_INLINE
/* pow (x, +inf) == +inf for |x| > 1. */
@@ -3667,6 +3692,11 @@ pow_test (void)
/* pow (-0, y) == +0 for y > 0 and not an odd integer. */
TEST_ff_f (pow, minus_zero, 4, 0.0);
+ TEST_ff_f (pow, 16, 0.25L, 2);
+ TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+ TEST_ff_f (pow, 2, 4, 16);
+ TEST_ff_f (pow, 256, 8, 0x1p64L);
+
TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
#if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4342,18 @@ initialize (void)
HUGE_VALL, HUGE_VAL, HUGE_VALF);
minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+ max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+ LDBL_MAX, DBL_MAX, FLT_MAX);
+ min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+ LDBL_MIN, DBL_MIN, FLT_MIN);
(void) &plus_zero;
(void) &nan_value;
(void) &minus_zero;
(void) &plus_infty;
(void) &minus_infty;
+ (void) &max_value;
+ (void) &min_value;
/* Clear all exceptions. From now on we must not get random exceptions. */
feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.717899272 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##(%rip)
@@ -87,6 +90,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -105,7 +116,7 @@ ENTRY(__ieee754_powl)
negl %eax
adcl $0, %edx
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrdl $1, %edx, %eax
@@ -123,7 +134,7 @@ ENTRY(__ieee754_powl)
/* y is ±NAN */
30: fldt 8(%rsp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomip %st(1),%st // x : y
je 31f
fxch // y : x
@@ -133,7 +144,7 @@ ENTRY(__ieee754_powl)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -148,6 +159,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -158,11 +174,16 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fld1 // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -191,7 +212,7 @@ ENTRY(__ieee754_powl)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -275,7 +296,7 @@ ENTRY(__ieee754_powl)
jz 27f // jump if not odd
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -283,7 +304,7 @@ ENTRY(__ieee754_powl)
25: fstp %st(0)
26:
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -310,7 +331,7 @@ ENTRY(__ieee754_powl)
22: fstp %st(0)
23:
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_powl)
Jakub
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
2004-07-14 18:17 [PATCH] Fix powl on x86_64 [BZ #258] Jakub Jelinek
@ 2004-07-15 11:51 ` Jakub Jelinek
2004-07-16 17:09 ` Ulrich Drepper
0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2004-07-15 11:51 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: Glibc hackers
On Wed, Jul 14, 2004 at 06:02:03PM +0200, Jakub Jelinek wrote:
> i386 will need similar treatment.
> Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j?
> or vice versa, maybe the comparisons could be coded more efficiently.
Here is an updated version which fixes i386 too.
I have done also some benchmarking and e.g. on P3 pow{f,,l} is now from 22
ticks slower to 106 ticks faster depending on input, on x86_64 (Opteron) I saw
slowdowns at most 16 ticks for powl (powf and pow aren't done in assembly,
so there is no change for x86_64), on Opteron -m32 from 10 ticks slower to
29 ticks faster.
But more importantly it is correct.
2004-07-15 Jakub Jelinek <jakub@redhat.com>
[BZ #258]
* math/libm-test.inc (max_value, min_value): New variables.
(initialize): Initialize them.
(pow_test): Add a couple of new tests.
* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
exception if |y| >= 1U<<31. Replace fldl MO(zero) with fldz and
fldl MO(one) with fld1.
* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
exception if |y| >= 1L<<63. Replace fldl MO(zero) with fldz and
fldl MO(one) with fld1.
* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.
--- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200
@@ -169,7 +169,7 @@ static int output_points; /* Should the
static int ignore_max_ulp; /* Should we ignore max_ulp? */
static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
static FLOAT max_error, real_max_error, imag_max_error;
@@ -3593,6 +3593,28 @@ pow_test (void)
TEST_ff_f (pow, -1, plus_infty, 1);
TEST_ff_f (pow, 1, minus_infty, 1);
TEST_ff_f (pow, -1, minus_infty, 1);
+ TEST_ff_f (pow, 1, 1, 1);
+ TEST_ff_f (pow, 1, -1, 1);
+ TEST_ff_f (pow, 1, 1.25, 1);
+ TEST_ff_f (pow, 1, -1.25, 1);
+ TEST_ff_f (pow, 1, 0x1p62L, 1);
+ TEST_ff_f (pow, 1, 0x1p63L, 1);
+ TEST_ff_f (pow, 1, 0x1p64L, 1);
+ TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+ /* pow (x, +-0) == 1. */
+ TEST_ff_f (pow, plus_infty, 0, 1);
+ TEST_ff_f (pow, plus_infty, minus_zero, 1);
+ TEST_ff_f (pow, minus_infty, 0, 1);
+ TEST_ff_f (pow, minus_infty, minus_zero, 1);
+ TEST_ff_f (pow, 32.75L, 0, 1);
+ TEST_ff_f (pow, 32.75L, minus_zero, 1);
+ TEST_ff_f (pow, -32.75L, 0, 1);
+ TEST_ff_f (pow, -32.75L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p72L, 0, 1);
+ TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p-72L, 0, 1);
+ TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3631,10 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+ TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+ TEST_ff_f (pow, 10, -0x1p72L, 0);
+ TEST_ff_f (pow, max_value, max_value, plus_infty);
+ TEST_ff_f (pow, 10, -max_value, 0);
TEST_ff_f (pow, 0, 1, 0);
TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3649,8 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, 2, 0);
TEST_ff_f (pow, minus_zero, 11.1L, 0);
+ TEST_ff_f (pow, 0, plus_infty, 0);
+ TEST_ff_f (pow, minus_zero, plus_infty, 0);
#ifndef TEST_INLINE
/* pow (x, +inf) == +inf for |x| > 1. */
@@ -3667,6 +3695,11 @@ pow_test (void)
/* pow (-0, y) == +0 for y > 0 and not an odd integer. */
TEST_ff_f (pow, minus_zero, 4, 0.0);
+ TEST_ff_f (pow, 16, 0.25L, 2);
+ TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+ TEST_ff_f (pow, 2, 4, 16);
+ TEST_ff_f (pow, 256, 8, 0x1p64L);
+
TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
#if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4345,18 @@ initialize (void)
HUGE_VALL, HUGE_VAL, HUGE_VALF);
minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+ max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+ LDBL_MAX, DBL_MAX, FLT_MAX);
+ min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+ LDBL_MIN, DBL_MIN, FLT_MIN);
(void) &plus_zero;
(void) &nan_value;
(void) &minus_zero;
(void) &plus_infty;
(void) &minus_infty;
+ (void) &max_value;
+ (void) &min_value;
/* Clear all exceptions. From now on we must not get random exceptions. */
feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p31,@object)
+p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
+ ASM_SIZE_DIRECTIVE(p31)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +99,14 @@ ENTRY(__ieee754_powf)
fxch // y : x
+ /* fistpl raises invalid exception for |y| >= 1L<<31. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p31) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -113,7 +124,7 @@ ENTRY(__ieee754_powf)
jns 4f // y >= 0, jump
fdivrl MO(one) // 1/x (now referred to as x)
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrl $1, %edx
@@ -129,7 +140,7 @@ ENTRY(__ieee754_powf)
/* y is ±NAN */
30: flds 4(%esp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomp %st(1) // x : y
fnstsw
sahf
@@ -141,7 +152,7 @@ ENTRY(__ieee754_powf)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -171,7 +182,7 @@ ENTRY(__ieee754_powf)
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -195,7 +206,7 @@ ENTRY(__ieee754_powf)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -276,7 +287,7 @@ ENTRY(__ieee754_powf)
jbe 27f // does not fit in mantissa bits
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -284,7 +295,7 @@ ENTRY(__ieee754_powf)
25: fstp %st(0)
26: addl $4, %esp
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -314,7 +325,7 @@ ENTRY(__ieee754_powf)
22: fstp %st(0)
23: addl $4, %esp // Don't use pop.
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_powf)
--- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -116,7 +128,7 @@ ENTRY(__ieee754_powl)
negl %eax
adcl $0, %edx
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrdl $1, %edx, %eax
@@ -134,7 +146,7 @@ ENTRY(__ieee754_powl)
/* y is ±NAN */
30: fldt 4(%esp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomp %st(1) // x : y
fnstsw
sahf
@@ -146,7 +158,7 @@ ENTRY(__ieee754_powl)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -161,6 +173,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -172,11 +189,17 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fld1 // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ addl $8, %esp
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -200,7 +223,7 @@ ENTRY(__ieee754_powl)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -273,7 +296,7 @@ ENTRY(__ieee754_powl)
jz 27f // jump if not odd
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -281,7 +304,7 @@ ENTRY(__ieee754_powl)
25: fstp %st(0)
26: addl $8, %esp
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -309,7 +332,7 @@ ENTRY(__ieee754_powl)
22: fstp %st(0)
23: addl $8, %esp // Don't use 2 x pop
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_powl)
--- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_pow)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -116,7 +128,7 @@ ENTRY(__ieee754_pow)
negl %eax
adcl $0, %edx
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrdl $1, %edx, %eax
@@ -134,7 +146,7 @@ ENTRY(__ieee754_pow)
/* y is ±NAN */
30: fldl 4(%esp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomp %st(1) // x : y
fnstsw
sahf
@@ -146,7 +158,7 @@ ENTRY(__ieee754_pow)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -176,7 +188,7 @@ ENTRY(__ieee754_pow)
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -200,7 +212,7 @@ ENTRY(__ieee754_pow)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -283,7 +295,7 @@ ENTRY(__ieee754_pow)
jbe 27f // does not fit in mantissa bits
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -291,7 +303,7 @@ ENTRY(__ieee754_pow)
25: fstp %st(0)
26: addl $8, %esp
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -322,7 +334,7 @@ ENTRY(__ieee754_pow)
22: fstp %st(0)
23: addl $8, %esp // Don't use 2 x pop
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_pow)
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,10 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+ .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##(%rip)
@@ -87,6 +91,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -105,7 +117,7 @@ ENTRY(__ieee754_powl)
negl %eax
adcl $0, %edx
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrdl $1, %edx, %eax
@@ -123,7 +135,7 @@ ENTRY(__ieee754_powl)
/* y is ±NAN */
30: fldt 8(%rsp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomip %st(1),%st // x : y
je 31f
fxch // y : x
@@ -133,7 +145,7 @@ ENTRY(__ieee754_powl)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -148,6 +160,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -158,11 +175,16 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fld1 // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -191,7 +213,7 @@ ENTRY(__ieee754_powl)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -275,7 +297,7 @@ ENTRY(__ieee754_powl)
jz 27f // jump if not odd
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -283,7 +305,7 @@ ENTRY(__ieee754_powl)
25: fstp %st(0)
26:
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -310,7 +332,7 @@ ENTRY(__ieee754_powl)
22: fstp %st(0)
23:
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_powl)
Jakub
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek
@ 2004-07-16 17:09 ` Ulrich Drepper
2004-07-19 10:19 ` Jakub Jelinek
0 siblings, 1 reply; 5+ messages in thread
From: Ulrich Drepper @ 2004-07-16 17:09 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Glibc hackers
Jakub Jelinek wrote:
> -4: fldl MO(one) // 1 : x
> +4: fld1 // 1 : x
I deliberately avoided the use of fld1 since all my measurements showed
it is slower. Please some some measurements of your own. As far as I
remember the only time when it paid off to use fld1 is when the fld was
the only memory operation and therefore it could be avoided to load the
PIC register.
--
⧠Ulrich Drepper ⧠Red Hat, Inc. ⧠444 Castro St ⧠Mountain View, CA â
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
2004-07-16 17:09 ` Ulrich Drepper
@ 2004-07-19 10:19 ` Jakub Jelinek
2004-07-20 7:07 ` Ulrich Drepper
0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2004-07-19 10:19 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: Glibc hackers
On Fri, Jul 16, 2004 at 10:07:16AM -0700, Ulrich Drepper wrote:
> Jakub Jelinek wrote:
>
> > -4: fldl MO(one) // 1 : x
> > +4: fld1 // 1 : x
>
> I deliberately avoided the use of fld1 since all my measurements showed
> it is slower. Please some some measurements of your own. As far as I
> remember the only time when it paid off to use fld1 is when the fld was
> the only memory operation and therefore it could be avoided to load the
> PIC register.
If there are no other memory references, it is surely noticeably faster,
otherwise I see it the same speed (but the instruction is shorter).
With following proglet I get 10 cycles in both cases on Opteron,
42 on P3, 92 on P4. AFAIK GCC uses fld1 and fldz in the code it generates
in all cases.
#include <stdio.h>
#include <math.h>
extern long double foo1 (void);
extern long double foo2 (void);
asm (".balign 16; foo1: .section .rodata; .balign 16; mzero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80; .previous; fldz; faddl mzero; fxam; fnstsw; ret;");
asm (".balign 16; foo2: .section .rodata; .balign 16; zero: .double 0.0; .previous; fldl zero; faddl mzero; fxam; fnstsw; ret;");
int main (void)
{
unsigned long long st, en, m;
int i;
m = -1ULL;
for (i = 0; i < 1000; ++i)
{
asm volatile ("rdtsc" : "=A" (st));
foo1 ();
asm volatile ("rdtsc" : "=A" (en));
en -= st;
if (en < m) m = en;
}
printf ("%lld\n", m);
m = -1ULL;
for (i = 0; i < 1000; ++i)
{
asm volatile ("rdtsc" : "=A" (st));
foo2 ();
asm volatile ("rdtsc" : "=A" (en));
en -= st;
if (en < m) m = en;
}
printf ("%lld\n", m);
return 0;
}
Anyway, below is a patch variant without fldz and fld1, those changes
weren't fixing anything and thus can be a separate change which can be
argued about independently.
2004-07-19 Jakub Jelinek <jakub@redhat.com>
[BZ #258]
* math/libm-test.inc (max_value, min_value): New variables.
(initialize): Initialize them.
(pow_test): Add a couple of new tests.
* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
exception if |y| >= 1U<<31.
* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
exception if |y| >= 1L<<63.
* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.
--- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200
@@ -169,7 +169,7 @@ static int output_points; /* Should the
static int ignore_max_ulp; /* Should we ignore max_ulp? */
static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
static FLOAT max_error, real_max_error, imag_max_error;
@@ -3593,6 +3593,28 @@ pow_test (void)
TEST_ff_f (pow, -1, plus_infty, 1);
TEST_ff_f (pow, 1, minus_infty, 1);
TEST_ff_f (pow, -1, minus_infty, 1);
+ TEST_ff_f (pow, 1, 1, 1);
+ TEST_ff_f (pow, 1, -1, 1);
+ TEST_ff_f (pow, 1, 1.25, 1);
+ TEST_ff_f (pow, 1, -1.25, 1);
+ TEST_ff_f (pow, 1, 0x1p62L, 1);
+ TEST_ff_f (pow, 1, 0x1p63L, 1);
+ TEST_ff_f (pow, 1, 0x1p64L, 1);
+ TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+ /* pow (x, +-0) == 1. */
+ TEST_ff_f (pow, plus_infty, 0, 1);
+ TEST_ff_f (pow, plus_infty, minus_zero, 1);
+ TEST_ff_f (pow, minus_infty, 0, 1);
+ TEST_ff_f (pow, minus_infty, minus_zero, 1);
+ TEST_ff_f (pow, 32.75L, 0, 1);
+ TEST_ff_f (pow, 32.75L, minus_zero, 1);
+ TEST_ff_f (pow, -32.75L, 0, 1);
+ TEST_ff_f (pow, -32.75L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p72L, 0, 1);
+ TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p-72L, 0, 1);
+ TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3631,10 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+ TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+ TEST_ff_f (pow, 10, -0x1p72L, 0);
+ TEST_ff_f (pow, max_value, max_value, plus_infty);
+ TEST_ff_f (pow, 10, -max_value, 0);
TEST_ff_f (pow, 0, 1, 0);
TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3649,8 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, 2, 0);
TEST_ff_f (pow, minus_zero, 11.1L, 0);
+ TEST_ff_f (pow, 0, plus_infty, 0);
+ TEST_ff_f (pow, minus_zero, plus_infty, 0);
#ifndef TEST_INLINE
/* pow (x, +inf) == +inf for |x| > 1. */
@@ -3667,6 +3695,11 @@ pow_test (void)
/* pow (-0, y) == +0 for y > 0 and not an odd integer. */
TEST_ff_f (pow, minus_zero, 4, 0.0);
+ TEST_ff_f (pow, 16, 0.25L, 2);
+ TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+ TEST_ff_f (pow, 2, 4, 16);
+ TEST_ff_f (pow, 256, 8, 0x1p64L);
+
TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
#if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4345,18 @@ initialize (void)
HUGE_VALL, HUGE_VAL, HUGE_VALF);
minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+ max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+ LDBL_MAX, DBL_MAX, FLT_MAX);
+ min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+ LDBL_MIN, DBL_MIN, FLT_MIN);
(void) &plus_zero;
(void) &nan_value;
(void) &minus_zero;
(void) &plus_infty;
(void) &minus_infty;
+ (void) &max_value;
+ (void) &min_value;
/* Clear all exceptions. From now on we must not get random exceptions. */
feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p31,@object)
+p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
+ ASM_SIZE_DIRECTIVE(p31)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +99,14 @@ ENTRY(__ieee754_powf)
fxch // y : x
+ /* fistpl raises invalid exception for |y| >= 1L<<31. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p31) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
--- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -161,6 +173,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -172,6 +189,12 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ addl $8, %esp
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
--- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_pow)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,10 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+ .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##(%rip)
@@ -87,6 +91,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -148,6 +160,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -158,6 +175,11 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
Jakub
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
2004-07-19 10:19 ` Jakub Jelinek
@ 2004-07-20 7:07 ` Ulrich Drepper
0 siblings, 0 replies; 5+ messages in thread
From: Ulrich Drepper @ 2004-07-20 7:07 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Glibc hackers
Applied.
And it might very well be that fld1 nowadays is as fast as the memory
load. When I wrote the code I used an i386+i387.
--
⧠Ulrich Drepper ⧠Red Hat, Inc. ⧠444 Castro St ⧠Mountain View, CA â
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2004-07-20 7:07 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-07-14 18:17 [PATCH] Fix powl on x86_64 [BZ #258] Jakub Jelinek
2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek
2004-07-16 17:09 ` Ulrich Drepper
2004-07-19 10:19 ` Jakub Jelinek
2004-07-20 7:07 ` Ulrich Drepper
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).