From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 5442 invoked by alias); 15 Jul 2004 11:51:39 -0000 Mailing-List: contact libc-hacker-help@sources.redhat.com; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-hacker-owner@sources.redhat.com Received: (qmail 5416 invoked from network); 15 Jul 2004 11:51:38 -0000 Received: from unknown (HELO sunsite.ms.mff.cuni.cz) (195.113.15.26) by sourceware.org with SMTP; 15 Jul 2004 11:51:38 -0000 Received: from sunsite.ms.mff.cuni.cz (sunsite.mff.cuni.cz [127.0.0.1]) by sunsite.ms.mff.cuni.cz (8.12.8/8.12.8) with ESMTP id i6F9Zu3j010500; Thu, 15 Jul 2004 11:35:56 +0200 Received: (from jakub@localhost) by sunsite.ms.mff.cuni.cz (8.12.8/8.12.8/Submit) id i6F9Ztl7010495; Thu, 15 Jul 2004 11:35:55 +0200 Date: Thu, 15 Jul 2004 11:51:00 -0000 From: Jakub Jelinek To: Ulrich Drepper Cc: Glibc hackers Subject: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Message-ID: <20040715093555.GA9816@sunsite.ms.mff.cuni.cz> Reply-To: Jakub Jelinek References: <20040714160203.GQ5191@sunsite.ms.mff.cuni.cz> Mime-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline Content-Transfer-Encoding: 8bit In-Reply-To: <20040714160203.GQ5191@sunsite.ms.mff.cuni.cz> User-Agent: Mutt/1.4.1i X-SW-Source: 2004-07/txt/msg00030.txt.bz2 On Wed, Jul 14, 2004 at 06:02:03PM +0200, Jakub Jelinek wrote: > i386 will need similar treatment. > Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j? > or vice versa, maybe the comparisons could be coded more efficiently. Here is an updated version which fixes i386 too. I have done also some benchmarking and e.g. on P3 pow{f,,l} is now from 22 ticks slower to 106 ticks faster depending on input, on x86_64 (Opteron) I saw slowdowns at most 16 ticks for powl (powf and pow aren't done in assembly, so there is no change for x86_64), on Opteron -m32 from 10 ticks slower to 29 ticks faster. But more importantly it is correct. 2004-07-15 Jakub Jelinek [BZ #258] * math/libm-test.inc (max_value, min_value): New variables. (initialize): Initialize them. (pow_test): Add a couple of new tests. * sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid exception if |y| >= 1U<<31. Replace fldl MO(zero) with fldz and fldl MO(one) with fld1. * sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid exception if |y| >= 1L<<63. Replace fldl MO(zero) with fldz and fldl MO(one) with fld1. * sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN. * sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise. --- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100 +++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200 @@ -169,7 +169,7 @@ static int output_points; /* Should the static int ignore_max_ulp; /* Should we ignore max_ulp? */ static FLOAT minus_zero, plus_zero; -static FLOAT plus_infty, minus_infty, nan_value; +static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value; static FLOAT max_error, real_max_error, imag_max_error; @@ -3593,6 +3593,28 @@ pow_test (void) TEST_ff_f (pow, -1, plus_infty, 1); TEST_ff_f (pow, 1, minus_infty, 1); TEST_ff_f (pow, -1, minus_infty, 1); + TEST_ff_f (pow, 1, 1, 1); + TEST_ff_f (pow, 1, -1, 1); + TEST_ff_f (pow, 1, 1.25, 1); + TEST_ff_f (pow, 1, -1.25, 1); + TEST_ff_f (pow, 1, 0x1p62L, 1); + TEST_ff_f (pow, 1, 0x1p63L, 1); + TEST_ff_f (pow, 1, 0x1p64L, 1); + TEST_ff_f (pow, 1, 0x1p72L, 1); + + /* pow (x, +-0) == 1. */ + TEST_ff_f (pow, plus_infty, 0, 1); + TEST_ff_f (pow, plus_infty, minus_zero, 1); + TEST_ff_f (pow, minus_infty, 0, 1); + TEST_ff_f (pow, minus_infty, minus_zero, 1); + TEST_ff_f (pow, 32.75L, 0, 1); + TEST_ff_f (pow, 32.75L, minus_zero, 1); + TEST_ff_f (pow, -32.75L, 0, 1); + TEST_ff_f (pow, -32.75L, minus_zero, 1); + TEST_ff_f (pow, 0x1p72L, 0, 1); + TEST_ff_f (pow, 0x1p72L, minus_zero, 1); + TEST_ff_f (pow, 0x1p-72L, 0, 1); + TEST_ff_f (pow, 0x1p-72L, minus_zero, 1); TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION); TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION); @@ -3609,6 +3631,10 @@ pow_test (void) TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION); + TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty); + TEST_ff_f (pow, 10, -0x1p72L, 0); + TEST_ff_f (pow, max_value, max_value, plus_infty); + TEST_ff_f (pow, 10, -max_value, 0); TEST_ff_f (pow, 0, 1, 0); TEST_ff_f (pow, 0, 11, 0); @@ -3623,6 +3649,8 @@ pow_test (void) TEST_ff_f (pow, minus_zero, 2, 0); TEST_ff_f (pow, minus_zero, 11.1L, 0); + TEST_ff_f (pow, 0, plus_infty, 0); + TEST_ff_f (pow, minus_zero, plus_infty, 0); #ifndef TEST_INLINE /* pow (x, +inf) == +inf for |x| > 1. */ @@ -3667,6 +3695,11 @@ pow_test (void) /* pow (-0, y) == +0 for y > 0 and not an odd integer. */ TEST_ff_f (pow, minus_zero, 4, 0.0); + TEST_ff_f (pow, 16, 0.25L, 2); + TEST_ff_f (pow, 0x1p64L, 0.125L, 256); + TEST_ff_f (pow, 2, 4, 16); + TEST_ff_f (pow, 256, 8, 0x1p64L); + TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L); #if defined TEST_DOUBLE || defined TEST_LDOUBLE @@ -4312,12 +4345,18 @@ initialize (void) HUGE_VALL, HUGE_VAL, HUGE_VALF); minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF, -HUGE_VALL, -HUGE_VAL, -HUGE_VALF); + max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX, + LDBL_MAX, DBL_MAX, FLT_MAX); + min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN, + LDBL_MIN, DBL_MIN, FLT_MIN); (void) &plus_zero; (void) &nan_value; (void) &minus_zero; (void) &plus_infty; (void) &minus_infty; + (void) &max_value; + (void) &min_value; /* Clear all exceptions. From now on we must not get random exceptions. */ feclearexcept (FE_ALL_EXCEPT); --- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -48,6 +48,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p31,@object) +p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41 + ASM_SIZE_DIRECTIVE(p31) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +99,14 @@ ENTRY(__ieee754_powf) fxch // y : x + /* fistpl raises invalid exception for |y| >= 1L<<31. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p31) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -113,7 +124,7 @@ ENTRY(__ieee754_powf) jns 4f // y >= 0, jump fdivrl MO(one) // 1/x (now referred to as x) negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrl $1, %edx @@ -129,7 +140,7 @@ ENTRY(__ieee754_powf) /* y is ±NAN */ 30: flds 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -141,7 +152,7 @@ ENTRY(__ieee754_powf) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -171,7 +182,7 @@ ENTRY(__ieee754_powf) // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -195,7 +206,7 @@ ENTRY(__ieee754_powf) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -276,7 +287,7 @@ ENTRY(__ieee754_powf) jbe 27f // does not fit in mantissa bits // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -284,7 +295,7 @@ ENTRY(__ieee754_powf) 25: fstp %st(0) 26: addl $4, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -314,7 +325,7 @@ ENTRY(__ieee754_powf) 22: fstp %st(0) 23: addl $4, %esp // Don't use pop. -24: fldl MO(zero) +24: fldz ret END(__ieee754_powf) --- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -116,7 +128,7 @@ ENTRY(__ieee754_powl) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -134,7 +146,7 @@ ENTRY(__ieee754_powl) /* y is ±NAN */ 30: fldt 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -146,7 +158,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -161,6 +173,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -172,11 +189,17 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fld1 // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + addl $8, %esp + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -200,7 +223,7 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -273,7 +296,7 @@ ENTRY(__ieee754_powl) jz 27f // jump if not odd // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -281,7 +304,7 @@ ENTRY(__ieee754_powl) 25: fstp %st(0) 26: addl $8, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -309,7 +332,7 @@ ENTRY(__ieee754_powl) 22: fstp %st(0) 23: addl $8, %esp // Don't use 2 x pop -24: fldl MO(zero) +24: fldz ret END(__ieee754_powl) --- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200 +++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200 @@ -1,5 +1,6 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -48,6 +49,9 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##@GOTOFF(%ecx) @@ -96,6 +100,14 @@ ENTRY(__ieee754_pow) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -116,7 +128,7 @@ ENTRY(__ieee754_pow) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -134,7 +146,7 @@ ENTRY(__ieee754_pow) /* y is ±NAN */ 30: fldl 4(%esp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomp %st(1) // x : y fnstsw sahf @@ -146,7 +158,7 @@ ENTRY(__ieee754_pow) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -176,7 +188,7 @@ ENTRY(__ieee754_pow) // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -200,7 +212,7 @@ ENTRY(__ieee754_pow) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -283,7 +295,7 @@ ENTRY(__ieee754_pow) jbe 27f // does not fit in mantissa bits // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -291,7 +303,7 @@ ENTRY(__ieee754_pow) 25: fstp %st(0) 26: addl $8, %esp 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -322,7 +334,7 @@ ENTRY(__ieee754_pow) 22: fstp %st(0) 23: addl $8, %esp // Don't use 2 x pop -24: fldl MO(zero) +24: fldz ret END(__ieee754_pow) --- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200 +++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200 @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -48,6 +48,10 @@ one: .double 1.0 ASM_TYPE_DIRECTIVE(limit,@object) limit: .double 0.29 ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: + .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) #ifdef PIC #define MO(op) op##(%rip) @@ -87,6 +91,14 @@ ENTRY(__ieee754_powl) fxch // y : x + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fldl MO(p63) // 1L<<63 : y : x + fld %st(1) // y : 1L<<63 : y : x + fabs // |y| : 1L<<63 : y : x + fcomip %st(1), %st // 1L<<63 : y : x + fstp %st(0) // y : x + jnc 2f + /* First see whether `y' is a natural number. In this case we can use a more precise algorithm. */ fld %st // y : y : x @@ -105,7 +117,7 @@ ENTRY(__ieee754_powl) negl %eax adcl $0, %edx negl %edx -4: fldl MO(one) // 1 : x +4: fld1 // 1 : x fxch 6: shrdl $1, %edx, %eax @@ -123,7 +135,7 @@ ENTRY(__ieee754_powl) /* y is ±NAN */ 30: fldt 8(%rsp) // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fucomip %st(1),%st // x : y je 31f fxch // y : x @@ -133,7 +145,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 2: /* y is a real number. */ fxch // x : y - fldl MO(one) // 1.0 : x : y + fld1 // 1.0 : x : y fld %st(1) // x : 1.0 : x : y fsub %st(1) // x-1 : 1.0 : x : y fabs // |x-1| : 1.0 : x : y @@ -148,6 +160,11 @@ ENTRY(__ieee754_powl) 7: fyl2x // log2(x) : y 8: fmul %st(1) // y*log2(x) : y + fxam + fnstsw + andb $0x45, %ah + cmpb $0x05, %ah // is y*log2(x) == ±inf ? + je 28f fst %st(1) // y*log2(x) : y*log2(x) frndint // int(y*log2(x)) : y*log2(x) fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) @@ -158,11 +175,16 @@ ENTRY(__ieee754_powl) fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) ret +28: fstp %st(1) // y*log2(x) + fld1 // 1 : y*log2(x) + fscale // 2^(y*log2(x)) : y*log2(x) + fstp %st(1) // 2^(y*log2(x)) + ret // pow(x,±0) = 1 .align ALIGNARG(4) 11: fstp %st(0) // pop y - fldl MO(one) + fld1 ret // y == ±inf @@ -191,7 +213,7 @@ ENTRY(__ieee754_powl) ret .align ALIGNARG(4) -14: fldl MO(one) +14: fld1 ret .align ALIGNARG(4) @@ -275,7 +297,7 @@ ENTRY(__ieee754_powl) jz 27f // jump if not odd // It's an odd integer. // Raise divide-by-zero exception and get minus infinity value. - fldl MO(one) + fld1 fdivl MO(zero) fchs ret @@ -283,7 +305,7 @@ ENTRY(__ieee754_powl) 25: fstp %st(0) 26: 27: // Raise divide-by-zero exception and get infinity value. - fldl MO(one) + fld1 fdivl MO(zero) ret @@ -310,7 +332,7 @@ ENTRY(__ieee754_powl) 22: fstp %st(0) 23: -24: fldl MO(zero) +24: fldz ret END(__ieee754_powl) Jakub