* [PATCH] Fix powl on x86_64 [BZ #258] @ 2004-07-14 18:17 Jakub Jelinek 2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek 0 siblings, 1 reply; 5+ messages in thread From: Jakub Jelinek @ 2004-07-14 18:17 UTC (permalink / raw) To: Ulrich Drepper; +Cc: Glibc hackers Hi! i386 will need similar treatment. Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j? or vice versa, maybe the comparisons could be coded more efficiently. 2004-07-14 Jakub Jelinek <jakub@redhat.com> [BZ #258] * math/libm-test.inc (max_value, min_value): New variables. (initialize): Initialize them. (pow_test): Add a couple of new tests. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Don't generate invalid exception if |y| is >= 1L<<63. If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN. Replace fldl MO(zero) with fldz and fldl MO(one) with fld1. --- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100 +++ libc/math/libm-test.inc 2004-07-14 20:09:24.475429821 +0200 @@ -169,7 +169,7 @@ static int output_points; /* Should the static int ignore_max_ulp; /* Should we ignore max_ulp? */ static FLOAT minus_zero, plus_zero; -static FLOAT plus_infty, minus_infty, nan_value; +static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value; static FLOAT max_error, real_max_error, imag_max_error; @@ -3593,6 +3593,25 @@ pow_test (void) TEST_ff_f (pow, -1, plus_infty, 1); TEST_ff_f (pow, 1, minus_infty, 1); TEST_ff_f (pow, -1, minus_infty, 1); + TEST_ff_f (pow, 1, 1, 1); + TEST_ff_f (pow, 1, -1, 1); + TEST_ff_f (pow, 1, 1.25, 1); + TEST_ff_f (pow, 1, -1.25, 1); + TEST_ff_f (pow, 1, 0x1p72L, 1); + + /* pow (x, +-0) == 1. */ + TEST_ff_f (pow, plus_infty, 0, 1); + TEST_ff_f (pow, plus_infty, minus_zero, 1); + TEST_ff_f (pow, minus_infty, 0, 1); + TEST_ff_f (pow, minus_infty, minus_zero, 1); + TEST_ff_f (pow, 32.75L, 0, 1); + TEST_ff_f (pow, 32.75L, minus_zero, 1); + TEST_ff_f (pow, -32.75L, 0, 1); + TEST_ff_f (pow, -32.75L, minus_zero, 1); + TEST_ff_f (pow, 0x1p72L, 0, 1); + TEST_ff_f (pow, 0x1p72L, minus_zero, 1); + TEST_ff_f (pow, 0x1p-72L, 0, 1); + TEST_ff_f (pow, 0x1p-72L, minus_zero, 1); TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION); TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION); @@ -3609,6 +3628,10 @@ pow_test (void) TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); + TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty); + TEST_ff_f (pow, 10, -0x1p72L, 0); + TEST_ff_f (pow, max_value, max_value, plus_infty); + TEST_ff_f (pow, 10, -max_value, 0); TEST_ff_f (pow, 0, 1, 0); TEST_ff_f (pow, 0, 11, 0); @@ -3623,6 +3646,8 @@ pow_test (void) TEST_ff_f (pow, minus_zero, 2, 0); TEST_ff_f (pow, minus_zero, 11.1L, 0); + TEST_ff_f (pow, 0, plus_infty, 0); + TEST_ff_f (pow, minus_zero, plus_infty, 0); #ifndef TEST_INLINE /* pow (x, +inf) == +inf for |x| > 1. */ @@ -3667,6 +3692,11 @@ pow_test (void) /* pow (-0, y) == +0 for y > 0 and not an odd integer. */ TEST_ff_f (pow, minus_zero, 4, 0.0); + TEST_ff_f (pow, 16, 0.25L, 2); + TEST_ff_f (pow, 0x1p64L, 0.125L, 256); + TEST_ff_f (pow, 2, 4, 16); + TEST_ff_f (pow, 256, 8, 0x1p64L); + TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L); #if defined TEST_DOUBLE || defined TEST_LDOUBLE @@ -4312,12 +4342,18 @@ initialize (void) HUGE_VALL, HUGE_VAL, HUGE_VALF); minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF, -HUGE_VALL, -HUGE_VAL, -HUGE_VALF); + max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX, + LDBL_MAX, DBL_MAX, FLT_MAX); + min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN, + LDBL_MIN, DBL_MIN, FLT_MIN); (void) &plus_zero; (void) &nan_value; (void) &minus_zero; (void) &plus_infty; (void) &minus_infty; + (void) &max_value; + (void) &min_value; /* Clear all exceptions. From now on we must not get random exceptions. */ feclearexcept (FE_ALL_EXCEPT); --- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200 +++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.717899272 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +48,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##(%rip) @@ -87,6 +90,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fldl MO(p63) // 1L<<63 : y : x + fld %st(1) // y : 1L<<63 : y : x + fabs // |y| : 1L<<63 : y : x + fcomip %st(1), %st // 1L<<63 : y : x + fstp %st(0) // y : x + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -105,7 +116,7 @@ ENTRY(__ieee754_powl) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -123,7 +134,7 @@ ENTRY(__ieee754_powl) /* y is ±NAN */ 30: fldt 8(%rsp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomip %st(1),%st // x : y je 31f fxch // y : x @@ -133,7 +144,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -148,6 +159,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -158,11 +174,16 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fld1 // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -191,7 +212,7 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -275,7 +296,7 @@ ENTRY(__ieee754_powl) jz 27f // jump if not odd // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -283,7 +304,7 @@ ENTRY(__ieee754_powl) 25: fstp %st(0) 26: 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -310,7 +331,7 @@ ENTRY(__ieee754_powl) 22: fstp %st(0) 23: -24: fldl MO(zero) +24: fldz ret END(__ieee754_powl) Jakub ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) 2004-07-14 18:17 [PATCH] Fix powl on x86_64 [BZ #258] Jakub Jelinek @ 2004-07-15 11:51 ` Jakub Jelinek 2004-07-16 17:09 ` Ulrich Drepper 0 siblings, 1 reply; 5+ messages in thread From: Jakub Jelinek @ 2004-07-15 11:51 UTC (permalink / raw) To: Ulrich Drepper; +Cc: Glibc hackers On Wed, Jul 14, 2004 at 06:02:03PM +0200, Jakub Jelinek wrote: > i386 will need similar treatment. > Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j? > or vice versa, maybe the comparisons could be coded more efficiently. Here is an updated version which fixes i386 too. I have done also some benchmarking and e.g. on P3 pow{f,,l} is now from 22 ticks slower to 106 ticks faster depending on input, on x86_64 (Opteron) I saw slowdowns at most 16 ticks for powl (powf and pow aren't done in assembly, so there is no change for x86_64), on Opteron -m32 from 10 ticks slower to 29 ticks faster. But more importantly it is correct. 2004-07-15 Jakub Jelinek <jakub@redhat.com> [BZ #258] * math/libm-test.inc (max_value, min_value): New variables. (initialize): Initialize them. (pow_test): Add a couple of new tests. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid exception if |y| >= 1U<<31. Replace fldl MO(zero) with fldz and fldl MO(one) with fld1. * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid exception if |y| >= 1L<<63. Replace fldl MO(zero) with fldz and fldl MO(one) with fld1. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise. --- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100 +++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200 @@ -169,7 +169,7 @@ static int output_points; /* Should the static int ignore_max_ulp; /* Should we ignore max_ulp? */ static FLOAT minus_zero, plus_zero; -static FLOAT plus_infty, minus_infty, nan_value; +static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value; static FLOAT max_error, real_max_error, imag_max_error; @@ -3593,6 +3593,28 @@ pow_test (void) TEST_ff_f (pow, -1, plus_infty, 1); TEST_ff_f (pow, 1, minus_infty, 1); TEST_ff_f (pow, -1, minus_infty, 1); + TEST_ff_f (pow, 1, 1, 1); + TEST_ff_f (pow, 1, -1, 1); + TEST_ff_f (pow, 1, 1.25, 1); + TEST_ff_f (pow, 1, -1.25, 1); + TEST_ff_f (pow, 1, 0x1p62L, 1); + TEST_ff_f (pow, 1, 0x1p63L, 1); + TEST_ff_f (pow, 1, 0x1p64L, 1); + TEST_ff_f (pow, 1, 0x1p72L, 1); + + /* pow (x, +-0) == 1. */ + TEST_ff_f (pow, plus_infty, 0, 1); + TEST_ff_f (pow, plus_infty, minus_zero, 1); + TEST_ff_f (pow, minus_infty, 0, 1); + TEST_ff_f (pow, minus_infty, minus_zero, 1); + TEST_ff_f (pow, 32.75L, 0, 1); + TEST_ff_f (pow, 32.75L, minus_zero, 1); + TEST_ff_f (pow, -32.75L, 0, 1); + TEST_ff_f (pow, -32.75L, minus_zero, 1); + TEST_ff_f (pow, 0x1p72L, 0, 1); + TEST_ff_f (pow, 0x1p72L, minus_zero, 1); + TEST_ff_f (pow, 0x1p-72L, 0, 1); + TEST_ff_f (pow, 0x1p-72L, minus_zero, 1); TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION); TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION); @@ -3609,6 +3631,10 @@ pow_test (void) TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); + TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty); + TEST_ff_f (pow, 10, -0x1p72L, 0); + TEST_ff_f (pow, max_value, max_value, plus_infty); + TEST_ff_f (pow, 10, -max_value, 0); TEST_ff_f (pow, 0, 1, 0); TEST_ff_f (pow, 0, 11, 0); @@ -3623,6 +3649,8 @@ pow_test (void) TEST_ff_f (pow, minus_zero, 2, 0); TEST_ff_f (pow, minus_zero, 11.1L, 0); + TEST_ff_f (pow, 0, plus_infty, 0); + TEST_ff_f (pow, minus_zero, plus_infty, 0); #ifndef TEST_INLINE /* pow (x, +inf) == +inf for |x| > 1. */ @@ -3667,6 +3695,11 @@ pow_test (void) /* pow (-0, y) == +0 for y > 0 and not an odd integer. */ TEST_ff_f (pow, minus_zero, 4, 0.0); + TEST_ff_f (pow, 16, 0.25L, 2); + TEST_ff_f (pow, 0x1p64L, 0.125L, 256); + TEST_ff_f (pow, 2, 4, 16); + TEST_ff_f (pow, 256, 8, 0x1p64L); + TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L); #if defined TEST_DOUBLE || defined TEST_LDOUBLE @@ -4312,12 +4345,18 @@ initialize (void) HUGE_VALL, HUGE_VAL, HUGE_VALF); minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF, -HUGE_VALL, -HUGE_VAL, -HUGE_VALF); + max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX, + LDBL_MAX, DBL_MAX, FLT_MAX); + min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN, + LDBL_MIN, DBL_MIN, FLT_MIN); (void) &plus_zero; (void) &nan_value; (void) &minus_zero; (void) &plus_infty; (void) &minus_infty; + (void) &max_value; + (void) &min_value; /* Clear all exceptions. From now on we must not get random exceptions. */ feclearexcept (FE_ALL_EXCEPT); --- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +48,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p31,@object) +p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41 + ASM_SIZE_DIRECTIVE(p31) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +99,14 @@ ENTRY(__ieee754_powf) fxch // y : x + /* fistpl raises invalid exception for |y| >= 1L<<31. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p31) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -113,7 +124,7 @@ ENTRY(__ieee754_powf) jns 4f // y >= 0, jump fdivrl MO(one) // 1/x (now referred to as x) negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrl $1, %edx @@ -129,7 +140,7 @@ ENTRY(__ieee754_powf) /* y is ±NAN */ 30: flds 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -141,7 +152,7 @@ ENTRY(__ieee754_powf) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -171,7 +182,7 @@ ENTRY(__ieee754_powf) // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -195,7 +206,7 @@ ENTRY(__ieee754_powf) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -276,7 +287,7 @@ ENTRY(__ieee754_powf) jbe 27f // does not fit in mantissa bits // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -284,7 +295,7 @@ ENTRY(__ieee754_powf) 25: fstp %st(0) 26: addl $4, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -314,7 +325,7 @@ ENTRY(__ieee754_powf) 22: fstp %st(0) 23: addl $4, %esp // Don't use pop. -24: fldl MO(zero) +24: fldz ret END(__ieee754_powf) --- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -116,7 +128,7 @@ ENTRY(__ieee754_powl) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -134,7 +146,7 @@ ENTRY(__ieee754_powl) /* y is ±NAN */ 30: fldt 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -146,7 +158,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -161,6 +173,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -172,11 +189,17 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fld1 // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + addl $8, %esp + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -200,7 +223,7 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -273,7 +296,7 @@ ENTRY(__ieee754_powl) jz 27f // jump if not odd // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -281,7 +304,7 @@ ENTRY(__ieee754_powl) 25: fstp %st(0) 26: addl $8, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -309,7 +332,7 @@ ENTRY(__ieee754_powl) 22: fstp %st(0) 23: addl $8, %esp // Don't use 2 x pop -24: fldl MO(zero) +24: fldz ret END(__ieee754_powl) --- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_pow) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -116,7 +128,7 @@ ENTRY(__ieee754_pow) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -134,7 +146,7 @@ ENTRY(__ieee754_pow) /* y is ±NAN */ 30: fldl 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -146,7 +158,7 @@ ENTRY(__ieee754_pow) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -176,7 +188,7 @@ ENTRY(__ieee754_pow) // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -200,7 +212,7 @@ ENTRY(__ieee754_pow) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -283,7 +295,7 @@ ENTRY(__ieee754_pow) jbe 27f // does not fit in mantissa bits // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -291,7 +303,7 @@ ENTRY(__ieee754_pow) 25: fstp %st(0) 26: addl $8, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -322,7 +334,7 @@ ENTRY(__ieee754_pow) 22: fstp %st(0) 23: addl $8, %esp // Don't use 2 x pop -24: fldl MO(zero) +24: fldz ret END(__ieee754_pow) --- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200 +++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +48,10 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: + .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##(%rip) @@ -87,6 +91,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fldl MO(p63) // 1L<<63 : y : x + fld %st(1) // y : 1L<<63 : y : x + fabs // |y| : 1L<<63 : y : x + fcomip %st(1), %st // 1L<<63 : y : x + fstp %st(0) // y : x + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -105,7 +117,7 @@ ENTRY(__ieee754_powl) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -123,7 +135,7 @@ ENTRY(__ieee754_powl) /* y is ±NAN */ 30: fldt 8(%rsp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomip %st(1),%st // x : y je 31f fxch // y : x @@ -133,7 +145,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -148,6 +160,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -158,11 +175,16 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fld1 // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -191,7 +213,7 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -275,7 +297,7 @@ ENTRY(__ieee754_powl) jz 27f // jump if not odd // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -283,7 +305,7 @@ ENTRY(__ieee754_powl) 25: fstp %st(0) 26: 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -310,7 +332,7 @@ ENTRY(__ieee754_powl) 22: fstp %st(0) 23: -24: fldl MO(zero) +24: fldz ret END(__ieee754_powl) Jakub ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) 2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek @ 2004-07-16 17:09 ` Ulrich Drepper 2004-07-19 10:19 ` Jakub Jelinek 0 siblings, 1 reply; 5+ messages in thread From: Ulrich Drepper @ 2004-07-16 17:09 UTC (permalink / raw) To: Jakub Jelinek; +Cc: Glibc hackers Jakub Jelinek wrote: > -4: fldl MO(one) // 1 : x > +4: fld1 // 1 : x I deliberately avoided the use of fld1 since all my measurements showed it is slower. Please some some measurements of your own. As far as I remember the only time when it paid off to use fld1 is when the fld was the only memory operation and therefore it could be avoided to load the PIC register. -- ⧠Ulrich Drepper ⧠Red Hat, Inc. ⧠444 Castro St ⧠Mountain View, CA â ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) 2004-07-16 17:09 ` Ulrich Drepper @ 2004-07-19 10:19 ` Jakub Jelinek 2004-07-20 7:07 ` Ulrich Drepper 0 siblings, 1 reply; 5+ messages in thread From: Jakub Jelinek @ 2004-07-19 10:19 UTC (permalink / raw) To: Ulrich Drepper; +Cc: Glibc hackers On Fri, Jul 16, 2004 at 10:07:16AM -0700, Ulrich Drepper wrote: > Jakub Jelinek wrote: > > > -4: fldl MO(one) // 1 : x > > +4: fld1 // 1 : x > > I deliberately avoided the use of fld1 since all my measurements showed > it is slower. Please some some measurements of your own. As far as I > remember the only time when it paid off to use fld1 is when the fld was > the only memory operation and therefore it could be avoided to load the > PIC register. If there are no other memory references, it is surely noticeably faster, otherwise I see it the same speed (but the instruction is shorter). With following proglet I get 10 cycles in both cases on Opteron, 42 on P3, 92 on P4. AFAIK GCC uses fld1 and fldz in the code it generates in all cases. #include <stdio.h> #include <math.h> extern long double foo1 (void); extern long double foo2 (void); asm (".balign 16; foo1: .section .rodata; .balign 16; mzero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80; .previous; fldz; faddl mzero; fxam; fnstsw; ret;"); asm (".balign 16; foo2: .section .rodata; .balign 16; zero: .double 0.0; .previous; fldl zero; faddl mzero; fxam; fnstsw; ret;"); int main (void) { unsigned long long st, en, m; int i; m = -1ULL; for (i = 0; i < 1000; ++i) { asm volatile ("rdtsc" : "=A" (st)); foo1 (); asm volatile ("rdtsc" : "=A" (en)); en -= st; if (en < m) m = en; } printf ("%lld\n", m); m = -1ULL; for (i = 0; i < 1000; ++i) { asm volatile ("rdtsc" : "=A" (st)); foo2 (); asm volatile ("rdtsc" : "=A" (en)); en -= st; if (en < m) m = en; } printf ("%lld\n", m); return 0; } Anyway, below is a patch variant without fldz and fld1, those changes weren't fixing anything and thus can be a separate change which can be argued about independently. 2004-07-19 Jakub Jelinek <jakub@redhat.com> [BZ #258] * math/libm-test.inc (max_value, min_value): New variables. (initialize): Initialize them. (pow_test): Add a couple of new tests. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid exception if |y| >= 1U<<31. * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid exception if |y| >= 1L<<63. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise. --- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100 +++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200 @@ -169,7 +169,7 @@ static int output_points; /* Should the static int ignore_max_ulp; /* Should we ignore max_ulp? */ static FLOAT minus_zero, plus_zero; -static FLOAT plus_infty, minus_infty, nan_value; +static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value; static FLOAT max_error, real_max_error, imag_max_error; @@ -3593,6 +3593,28 @@ pow_test (void) TEST_ff_f (pow, -1, plus_infty, 1); TEST_ff_f (pow, 1, minus_infty, 1); TEST_ff_f (pow, -1, minus_infty, 1); + TEST_ff_f (pow, 1, 1, 1); + TEST_ff_f (pow, 1, -1, 1); + TEST_ff_f (pow, 1, 1.25, 1); + TEST_ff_f (pow, 1, -1.25, 1); + TEST_ff_f (pow, 1, 0x1p62L, 1); + TEST_ff_f (pow, 1, 0x1p63L, 1); + TEST_ff_f (pow, 1, 0x1p64L, 1); + TEST_ff_f (pow, 1, 0x1p72L, 1); + + /* pow (x, +-0) == 1. */ + TEST_ff_f (pow, plus_infty, 0, 1); + TEST_ff_f (pow, plus_infty, minus_zero, 1); + TEST_ff_f (pow, minus_infty, 0, 1); + TEST_ff_f (pow, minus_infty, minus_zero, 1); + TEST_ff_f (pow, 32.75L, 0, 1); + TEST_ff_f (pow, 32.75L, minus_zero, 1); + TEST_ff_f (pow, -32.75L, 0, 1); + TEST_ff_f (pow, -32.75L, minus_zero, 1); + TEST_ff_f (pow, 0x1p72L, 0, 1); + TEST_ff_f (pow, 0x1p72L, minus_zero, 1); + TEST_ff_f (pow, 0x1p-72L, 0, 1); + TEST_ff_f (pow, 0x1p-72L, minus_zero, 1); TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION); TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION); @@ -3609,6 +3631,10 @@ pow_test (void) TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); + TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty); + TEST_ff_f (pow, 10, -0x1p72L, 0); + TEST_ff_f (pow, max_value, max_value, plus_infty); + TEST_ff_f (pow, 10, -max_value, 0); TEST_ff_f (pow, 0, 1, 0); TEST_ff_f (pow, 0, 11, 0); @@ -3623,6 +3649,8 @@ pow_test (void) TEST_ff_f (pow, minus_zero, 2, 0); TEST_ff_f (pow, minus_zero, 11.1L, 0); + TEST_ff_f (pow, 0, plus_infty, 0); + TEST_ff_f (pow, minus_zero, plus_infty, 0); #ifndef TEST_INLINE /* pow (x, +inf) == +inf for |x| > 1. */ @@ -3667,6 +3695,11 @@ pow_test (void) /* pow (-0, y) == +0 for y > 0 and not an odd integer. */ TEST_ff_f (pow, minus_zero, 4, 0.0); + TEST_ff_f (pow, 16, 0.25L, 2); + TEST_ff_f (pow, 0x1p64L, 0.125L, 256); + TEST_ff_f (pow, 2, 4, 16); + TEST_ff_f (pow, 256, 8, 0x1p64L); + TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L); #if defined TEST_DOUBLE || defined TEST_LDOUBLE @@ -4312,12 +4345,18 @@ initialize (void) HUGE_VALL, HUGE_VAL, HUGE_VALF); minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF, -HUGE_VALL, -HUGE_VAL, -HUGE_VALF); + max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX, + LDBL_MAX, DBL_MAX, FLT_MAX); + min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN, + LDBL_MIN, DBL_MIN, FLT_MIN); (void) &plus_zero; (void) &nan_value; (void) &minus_zero; (void) &plus_infty; (void) &minus_infty; + (void) &max_value; + (void) &min_value; /* Clear all exceptions. From now on we must not get random exceptions. */ feclearexcept (FE_ALL_EXCEPT); --- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +48,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p31,@object) +p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41 + ASM_SIZE_DIRECTIVE(p31) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +99,14 @@ ENTRY(__ieee754_powf) fxch // y : x + /* fistpl raises invalid exception for |y| >= 1L<<31. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p31) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x --- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -161,6 +173,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -172,6 +189,12 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fldl MO(one) // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + addl $8, %esp + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) --- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_pow) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x --- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200 +++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -48,6 +48,10 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: + .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##(%rip) @@ -87,6 +91,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fldl MO(p63) // 1L<<63 : y : x + fld %st(1) // y : 1L<<63 : y : x + fabs // |y| : 1L<<63 : y : x + fcomip %st(1), %st // 1L<<63 : y : x + fstp %st(0) // y : x + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -148,6 +160,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -158,6 +175,11 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fldl MO(one) // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) Jakub ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) 2004-07-19 10:19 ` Jakub Jelinek @ 2004-07-20 7:07 ` Ulrich Drepper 0 siblings, 0 replies; 5+ messages in thread From: Ulrich Drepper @ 2004-07-20 7:07 UTC (permalink / raw) To: Jakub Jelinek; +Cc: Glibc hackers Applied. And it might very well be that fld1 nowadays is as fast as the memory load. When I wrote the code I used an i386+i387. -- ⧠Ulrich Drepper ⧠Red Hat, Inc. ⧠444 Castro St ⧠Mountain View, CA â ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2004-07-20 7:07 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2004-07-14 18:17 [PATCH] Fix powl on x86_64 [BZ #258] Jakub Jelinek 2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek 2004-07-16 17:09 ` Ulrich Drepper 2004-07-19 10:19 ` Jakub Jelinek 2004-07-20 7:07 ` Ulrich Drepper
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).