From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <libc-hacker-return-7729-listarch-libc-hacker=sources.redhat.com@sources.redhat.com>
Received: (qmail 5442 invoked by alias); 15 Jul 2004 11:51:39 -0000
Mailing-List: contact libc-hacker-help@sources.redhat.com; run by ezmlm
Precedence: bulk
List-Subscribe: <mailto:libc-hacker-subscribe@sources.redhat.com>
List-Archive: <http://sources.redhat.com/ml/libc-hacker/>
List-Post: <mailto:libc-hacker@sources.redhat.com>
List-Help: <mailto:libc-hacker-help@sources.redhat.com>, <http://sources.redhat.com/ml/#faqs>
Sender: libc-hacker-owner@sources.redhat.com
Received: (qmail 5416 invoked from network); 15 Jul 2004 11:51:38 -0000
Received: from unknown (HELO sunsite.ms.mff.cuni.cz) (195.113.15.26)
  by sourceware.org with SMTP; 15 Jul 2004 11:51:38 -0000
Received: from sunsite.ms.mff.cuni.cz (sunsite.mff.cuni.cz [127.0.0.1])
	by sunsite.ms.mff.cuni.cz (8.12.8/8.12.8) with ESMTP id i6F9Zu3j010500;
	Thu, 15 Jul 2004 11:35:56 +0200
Received: (from jakub@localhost)
	by sunsite.ms.mff.cuni.cz (8.12.8/8.12.8/Submit) id i6F9Ztl7010495;
	Thu, 15 Jul 2004 11:35:55 +0200
Date: Thu, 15 Jul 2004 11:51:00 -0000
From: Jakub Jelinek <jakub@redhat.com>
To: Ulrich Drepper <drepper@redhat.com>
Cc: Glibc hackers <libc-hacker@sources.redhat.com>
Subject: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
Message-ID: <20040715093555.GA9816@sunsite.ms.mff.cuni.cz>
Reply-To: Jakub Jelinek <jakub@redhat.com>
References: <20040714160203.GQ5191@sunsite.ms.mff.cuni.cz>
Mime-Version: 1.0
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit
In-Reply-To: <20040714160203.GQ5191@sunsite.ms.mff.cuni.cz>
User-Agent: Mutt/1.4.1i
X-SW-Source: 2004-07/txt/msg00030.txt.bz2

On Wed, Jul 14, 2004 at 06:02:03PM +0200, Jakub Jelinek wrote:
> i386 will need similar treatment.
> Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j?
> or vice versa, maybe the comparisons could be coded more efficiently.

Here is an updated version which fixes i386 too.
I have done also some benchmarking and e.g. on P3 pow{f,,l} is now from 22
ticks slower to 106 ticks faster depending on input, on x86_64 (Opteron) I saw
slowdowns at most 16 ticks for powl (powf and pow aren't done in assembly,
so there is no change for x86_64), on Opteron -m32 from 10 ticks slower to
29 ticks faster.
But more importantly it is correct.

2004-07-15  Jakub Jelinek  <jakub@redhat.com>

	[BZ #258]
	* math/libm-test.inc (max_value, min_value): New variables.
	(initialize): Initialize them.
	(pow_test): Add a couple of new tests.
	* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
	exception if |y| >= 1U<<31.  Replace fldl MO(zero) with fldz and
	fldl MO(one) with fld1.
	* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
	exception if |y| >= 1L<<63.  Replace fldl MO(zero) with fldz and
	fldl MO(one) with fld1.
	* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
	If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.

--- libc/math/libm-test.inc.jj	2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc	2004-07-15 12:34:17.395663150 +0200
@@ -169,7 +169,7 @@ static int output_points;	/* Should the 
 static int ignore_max_ulp;	/* Should we ignore max_ulp?  */
 
 static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
 
 static FLOAT max_error, real_max_error, imag_max_error;
 
@@ -3593,6 +3593,28 @@ pow_test (void)
   TEST_ff_f (pow, -1, plus_infty, 1);
   TEST_ff_f (pow, 1, minus_infty, 1);
   TEST_ff_f (pow, -1, minus_infty, 1);
+  TEST_ff_f (pow, 1, 1, 1);
+  TEST_ff_f (pow, 1, -1, 1);
+  TEST_ff_f (pow, 1, 1.25, 1);
+  TEST_ff_f (pow, 1, -1.25, 1);
+  TEST_ff_f (pow, 1, 0x1p62L, 1);
+  TEST_ff_f (pow, 1, 0x1p63L, 1);
+  TEST_ff_f (pow, 1, 0x1p64L, 1);
+  TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+  /* pow (x, +-0) == 1.  */
+  TEST_ff_f (pow, plus_infty, 0, 1);
+  TEST_ff_f (pow, plus_infty, minus_zero, 1);
+  TEST_ff_f (pow, minus_infty, 0, 1);
+  TEST_ff_f (pow, minus_infty, minus_zero, 1);
+  TEST_ff_f (pow, 32.75L, 0, 1);
+  TEST_ff_f (pow, 32.75L, minus_zero, 1);
+  TEST_ff_f (pow, -32.75L, 0, 1);
+  TEST_ff_f (pow, -32.75L, minus_zero, 1);
+  TEST_ff_f (pow, 0x1p72L, 0, 1);
+  TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+  TEST_ff_f (pow, 0x1p-72L, 0, 1);
+  TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
 
   TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3631,10 @@ pow_test (void)
   TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
 
+  TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+  TEST_ff_f (pow, 10, -0x1p72L, 0);
+  TEST_ff_f (pow, max_value, max_value, plus_infty);
+  TEST_ff_f (pow, 10, -max_value, 0);
 
   TEST_ff_f (pow, 0, 1, 0);
   TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3649,8 @@ pow_test (void)
 
   TEST_ff_f (pow, minus_zero, 2, 0);
   TEST_ff_f (pow, minus_zero, 11.1L, 0);
+  TEST_ff_f (pow, 0, plus_infty, 0);
+  TEST_ff_f (pow, minus_zero, plus_infty, 0);
 
 #ifndef TEST_INLINE
   /* pow (x, +inf) == +inf for |x| > 1.  */
@@ -3667,6 +3695,11 @@ pow_test (void)
   /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
   TEST_ff_f (pow, minus_zero, 4, 0.0);
 
+  TEST_ff_f (pow, 16, 0.25L, 2);
+  TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+  TEST_ff_f (pow, 2, 4, 16);
+  TEST_ff_f (pow, 256, 8, 0x1p64L);
+
   TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
 
 #if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4345,18 @@ initialize (void)
 		       HUGE_VALL, HUGE_VAL, HUGE_VALF);
   minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
 			-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+  max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+		      LDBL_MAX, DBL_MAX, FLT_MAX);
+  min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+		      LDBL_MIN, DBL_MIN, FLT_MIN);
 
   (void) &plus_zero;
   (void) &nan_value;
   (void) &minus_zero;
   (void) &plus_infty;
   (void) &minus_infty;
+  (void) &max_value;
+  (void) &min_value;
 
   /* Clear all exceptions.  From now on we must not get random exceptions.  */
   feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/i386/fpu/e_powf.S.jj	2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powf.S	2004-07-15 13:23:15.977438871 +0200
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -48,6 +48,9 @@ one:	.double 1.0
 	ASM_TYPE_DIRECTIVE(limit,@object)
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p31,@object)
+p31:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
+	ASM_SIZE_DIRECTIVE(p31)
 
 #ifdef PIC
 #define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +99,14 @@ ENTRY(__ieee754_powf)
 
 	fxch			// y : x
 
+	/* fistpl raises invalid exception for |y| >= 1L<<31.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p31)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
+
 	/* First see whether `y' is a natural number.  In this case we
 	   can use a more precise algorithm.  */
 	fld	%st		// y : y : x
@@ -113,7 +124,7 @@ ENTRY(__ieee754_powf)
 	jns	4f		// y >= 0, jump
 	fdivrl	MO(one)		// 1/x		(now referred to as x)
 	negl	%edx
-4:	fldl	MO(one)		// 1 : x
+4:	fld1			// 1 : x
 	fxch
 
 6:	shrl	$1, %edx
@@ -129,7 +140,7 @@ ENTRY(__ieee754_powf)
 
 	/* y is ±NAN */
 30:	flds	4(%esp)		// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fucomp	%st(1)		// x : y
 	fnstsw
 	sahf
@@ -141,7 +152,7 @@ ENTRY(__ieee754_powf)
 	.align ALIGNARG(4)
 2:	/* y is a real number.  */
 	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fld	%st(1)		// x : 1.0 : x : y
 	fsub	%st(1)		// x-1 : 1.0 : x : y
 	fabs			// |x-1| : 1.0 : x : y
@@ -171,7 +182,7 @@ ENTRY(__ieee754_powf)
 	// pow(x,±0) = 1
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
-	fldl	MO(one)
+	fld1
 	ret
 
 	// y == ±inf
@@ -195,7 +206,7 @@ ENTRY(__ieee754_powf)
 	ret
 
 	.align ALIGNARG(4)
-14:	fldl	MO(one)
+14:	fld1
 	ret
 
 	.align ALIGNARG(4)
@@ -276,7 +287,7 @@ ENTRY(__ieee754_powf)
 	jbe	27f		// does not fit in mantissa bits
 	// It's an odd integer.
 	// Raise divide-by-zero exception and get minus infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	fchs
 	ret
@@ -284,7 +295,7 @@ ENTRY(__ieee754_powf)
 25:	fstp	%st(0)
 26:	addl	$4, %esp
 27:	// Raise divide-by-zero exception and get infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	ret
 
@@ -314,7 +325,7 @@ ENTRY(__ieee754_powf)
 
 22:	fstp	%st(0)
 23:	addl	$4, %esp	// Don't use pop.
-24:	fldl	MO(zero)
+24:	fldz
 	ret
 
 END(__ieee754_powf)
--- libc/sysdeps/i386/fpu/e_powl.S.jj	2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powl.S	2004-07-15 12:56:07.678735978 +0200
@@ -1,5 +1,6 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -48,6 +49,9 @@ one:	.double 1.0
 	ASM_TYPE_DIRECTIVE(limit,@object)
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
 
 #ifdef PIC
 #define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_powl)
 
 	fxch			// y : x
 
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p63)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
+
 	/* First see whether `y' is a natural number.  In this case we
 	   can use a more precise algorithm.  */
 	fld	%st		// y : y : x
@@ -116,7 +128,7 @@ ENTRY(__ieee754_powl)
 	negl	%eax
 	adcl	$0, %edx
 	negl	%edx
-4:	fldl	MO(one)		// 1 : x
+4:	fld1			// 1 : x
 	fxch
 
 6:	shrdl	$1, %edx, %eax
@@ -134,7 +146,7 @@ ENTRY(__ieee754_powl)
 
 	/* y is ±NAN */
 30:	fldt	4(%esp)		// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fucomp	%st(1)		// x : y
 	fnstsw
 	sahf
@@ -146,7 +158,7 @@ ENTRY(__ieee754_powl)
 	.align ALIGNARG(4)
 2:	/* y is a real number.  */
 	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fld	%st(1)		// x : 1.0 : x : y
 	fsub	%st(1)		// x-1 : 1.0 : x : y
 	fabs			// |x-1| : 1.0 : x : y
@@ -161,6 +173,11 @@ ENTRY(__ieee754_powl)
 
 7:	fyl2x			// log2(x) : y
 8:	fmul	%st(1)		// y*log2(x) : y
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x05, %ah	// is y*log2(x) == ±inf ?
+	je	28f
 	fst	%st(1)		// y*log2(x) : y*log2(x)
 	frndint			// int(y*log2(x)) : y*log2(x)
 	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
@@ -172,11 +189,17 @@ ENTRY(__ieee754_powl)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
 	ret
 
+28:	fstp	%st(1)		// y*log2(x)
+	fld1			// 1 : y*log2(x)
+	fscale			// 2^(y*log2(x)) : y*log2(x)
+	addl	$8, %esp
+	fstp	%st(1)		// 2^(y*log2(x))
+	ret
 
 	// pow(x,±0) = 1
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
-	fldl	MO(one)
+	fld1
 	ret
 
 	// y == ±inf
@@ -200,7 +223,7 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-14:	fldl	MO(one)
+14:	fld1
 	ret
 
 	.align ALIGNARG(4)
@@ -273,7 +296,7 @@ ENTRY(__ieee754_powl)
 	jz	27f		// jump if not odd
 	// It's an odd integer.
 	// Raise divide-by-zero exception and get minus infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	fchs
 	ret
@@ -281,7 +304,7 @@ ENTRY(__ieee754_powl)
 25:	fstp	%st(0)
 26:	addl	$8, %esp
 27:	// Raise divide-by-zero exception and get infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	ret
 
@@ -309,7 +332,7 @@ ENTRY(__ieee754_powl)
 
 22:	fstp	%st(0)
 23:	addl	$8, %esp	// Don't use 2 x pop
-24:	fldl	MO(zero)
+24:	fldz
 	ret
 
 END(__ieee754_powl)
--- libc/sysdeps/i386/fpu/e_pow.S.jj	2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_pow.S	2004-07-15 13:05:41.601640612 +0200
@@ -1,5 +1,6 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -48,6 +49,9 @@ one:	.double 1.0
 	ASM_TYPE_DIRECTIVE(limit,@object)
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
 
 #ifdef PIC
 #define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_pow)
 
 	fxch			// y : x
 
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p63)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
+
 	/* First see whether `y' is a natural number.  In this case we
 	   can use a more precise algorithm.  */
 	fld	%st		// y : y : x
@@ -116,7 +128,7 @@ ENTRY(__ieee754_pow)
 	negl	%eax
 	adcl	$0, %edx
 	negl	%edx
-4:	fldl	MO(one)		// 1 : x
+4:	fld1			// 1 : x
 	fxch
 
 6:	shrdl	$1, %edx, %eax
@@ -134,7 +146,7 @@ ENTRY(__ieee754_pow)
 
 	/* y is ±NAN */
 30:	fldl	4(%esp)		// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fucomp	%st(1)		// x : y
 	fnstsw
 	sahf
@@ -146,7 +158,7 @@ ENTRY(__ieee754_pow)
 	.align ALIGNARG(4)
 2:	/* y is a real number.  */
 	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fld	%st(1)		// x : 1.0 : x : y
 	fsub	%st(1)		// x-1 : 1.0 : x : y
 	fabs			// |x-1| : 1.0 : x : y
@@ -176,7 +188,7 @@ ENTRY(__ieee754_pow)
 	// pow(x,±0) = 1
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
-	fldl	MO(one)
+	fld1
 	ret
 
 	// y == ±inf
@@ -200,7 +212,7 @@ ENTRY(__ieee754_pow)
 	ret
 
 	.align ALIGNARG(4)
-14:	fldl	MO(one)
+14:	fld1
 	ret
 
 	.align ALIGNARG(4)
@@ -283,7 +295,7 @@ ENTRY(__ieee754_pow)
 	jbe	27f		// does not fit in mantissa bits
 	// It's an odd integer.
 	// Raise divide-by-zero exception and get minus infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	fchs
 	ret
@@ -291,7 +303,7 @@ ENTRY(__ieee754_pow)
 25:	fstp	%st(0)
 26:	addl	$8, %esp
 27:	// Raise divide-by-zero exception and get infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	ret
 
@@ -322,7 +334,7 @@ ENTRY(__ieee754_pow)
 
 22:	fstp	%st(0)
 23:	addl	$8, %esp	// Don't use 2 x pop
-24:	fldl	MO(zero)
+24:	fldz
 	ret
 
 END(__ieee754_pow)
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj	2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S	2004-07-14 20:02:06.000000000 +0200
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -48,6 +48,10 @@ one:	.double 1.0
 	ASM_TYPE_DIRECTIVE(limit,@object)
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
 
 #ifdef PIC
 #define MO(op) op##(%rip)
@@ -87,6 +91,14 @@ ENTRY(__ieee754_powl)
 
 	fxch			// y : x
 
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fldl	MO(p63)		// 1L<<63 : y : x
+	fld	%st(1)		// y : 1L<<63 : y : x
+	fabs			// |y| : 1L<<63 : y : x
+	fcomip	%st(1), %st	// 1L<<63 : y : x
+	fstp	%st(0)		// y : x
+	jnc	2f
+
 	/* First see whether `y' is a natural number.  In this case we
 	   can use a more precise algorithm.  */
 	fld	%st		// y : y : x
@@ -105,7 +117,7 @@ ENTRY(__ieee754_powl)
 	negl	%eax
 	adcl	$0, %edx
 	negl	%edx
-4:	fldl	MO(one)		// 1 : x
+4:	fld1			// 1 : x
 	fxch
 
 6:	shrdl	$1, %edx, %eax
@@ -123,7 +135,7 @@ ENTRY(__ieee754_powl)
 
 	/* y is ±NAN */
 30:	fldt	8(%rsp)		// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fucomip	%st(1),%st	// x : y
 	je	31f
 	fxch			// y : x
@@ -133,7 +145,7 @@ ENTRY(__ieee754_powl)
 	.align ALIGNARG(4)
 2:	/* y is a real number.  */
 	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fld	%st(1)		// x : 1.0 : x : y
 	fsub	%st(1)		// x-1 : 1.0 : x : y
 	fabs			// |x-1| : 1.0 : x : y
@@ -148,6 +160,11 @@ ENTRY(__ieee754_powl)
 
 7:	fyl2x			// log2(x) : y
 8:	fmul	%st(1)		// y*log2(x) : y
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x05, %ah      // is y*log2(x) == ±inf ?
+	je	28f
 	fst	%st(1)		// y*log2(x) : y*log2(x)
 	frndint			// int(y*log2(x)) : y*log2(x)
 	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
@@ -158,11 +175,16 @@ ENTRY(__ieee754_powl)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
 	ret
 
+28:	fstp	%st(1)		// y*log2(x)
+	fld1			// 1 : y*log2(x)
+	fscale			// 2^(y*log2(x)) : y*log2(x)
+	fstp	%st(1)		// 2^(y*log2(x))
+	ret
 
 	// pow(x,±0) = 1
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
-	fldl	MO(one)
+	fld1
 	ret
 
 	// y == ±inf
@@ -191,7 +213,7 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-14:	fldl	MO(one)
+14:	fld1
 	ret
 
 	.align ALIGNARG(4)
@@ -275,7 +297,7 @@ ENTRY(__ieee754_powl)
 	jz	27f		// jump if not odd
 	// It's an odd integer.
 	// Raise divide-by-zero exception and get minus infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	fchs
 	ret
@@ -283,7 +305,7 @@ ENTRY(__ieee754_powl)
 25:	fstp	%st(0)
 26:
 27:	// Raise divide-by-zero exception and get infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	ret
 
@@ -310,7 +332,7 @@ ENTRY(__ieee754_powl)
 
 22:	fstp	%st(0)
 23:
-24:	fldl	MO(zero)
+24:	fldz
 	ret
 
 END(__ieee754_powl)

	Jakub