From: Jakub Jelinek <jakub@redhat.com>
To: Ulrich Drepper <drepper@redhat.com>
Cc: Glibc hackers <libc-hacker@sources.redhat.com>
Subject: Re: [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2)
Date: Mon, 19 Jul 2004 10:19:00 -0000 [thread overview]
Message-ID: <20040719080353.GL30497@sunsite.ms.mff.cuni.cz> (raw)
In-Reply-To: <40F80B44.9020305@redhat.com>
On Fri, Jul 16, 2004 at 10:07:16AM -0700, Ulrich Drepper wrote:
> Jakub Jelinek wrote:
>
> > -4: fldl MO(one) // 1 : x
> > +4: fld1 // 1 : x
>
> I deliberately avoided the use of fld1 since all my measurements showed
> it is slower. Please some some measurements of your own. As far as I
> remember the only time when it paid off to use fld1 is when the fld was
> the only memory operation and therefore it could be avoided to load the
> PIC register.
If there are no other memory references, it is surely noticeably faster,
otherwise I see it the same speed (but the instruction is shorter).
With following proglet I get 10 cycles in both cases on Opteron,
42 on P3, 92 on P4. AFAIK GCC uses fld1 and fldz in the code it generates
in all cases.
#include <stdio.h>
#include <math.h>
extern long double foo1 (void);
extern long double foo2 (void);
asm (".balign 16; foo1: .section .rodata; .balign 16; mzero: .byte 0, 0, 0, 0, 0, 0, 0, 0x80; .previous; fldz; faddl mzero; fxam; fnstsw; ret;");
asm (".balign 16; foo2: .section .rodata; .balign 16; zero: .double 0.0; .previous; fldl zero; faddl mzero; fxam; fnstsw; ret;");
int main (void)
{
unsigned long long st, en, m;
int i;
m = -1ULL;
for (i = 0; i < 1000; ++i)
{
asm volatile ("rdtsc" : "=A" (st));
foo1 ();
asm volatile ("rdtsc" : "=A" (en));
en -= st;
if (en < m) m = en;
}
printf ("%lld\n", m);
m = -1ULL;
for (i = 0; i < 1000; ++i)
{
asm volatile ("rdtsc" : "=A" (st));
foo2 ();
asm volatile ("rdtsc" : "=A" (en));
en -= st;
if (en < m) m = en;
}
printf ("%lld\n", m);
return 0;
}
Anyway, below is a patch variant without fldz and fld1, those changes
weren't fixing anything and thus can be a separate change which can be
argued about independently.
2004-07-19 Jakub Jelinek <jakub@redhat.com>
[BZ #258]
* math/libm-test.inc (max_value, min_value): New variables.
(initialize): Initialize them.
(pow_test): Add a couple of new tests.
* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
exception if |y| >= 1U<<31.
* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
exception if |y| >= 1L<<63.
* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.
--- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc 2004-07-15 12:34:17.395663150 +0200
@@ -169,7 +169,7 @@ static int output_points; /* Should the
static int ignore_max_ulp; /* Should we ignore max_ulp? */
static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
static FLOAT max_error, real_max_error, imag_max_error;
@@ -3593,6 +3593,28 @@ pow_test (void)
TEST_ff_f (pow, -1, plus_infty, 1);
TEST_ff_f (pow, 1, minus_infty, 1);
TEST_ff_f (pow, -1, minus_infty, 1);
+ TEST_ff_f (pow, 1, 1, 1);
+ TEST_ff_f (pow, 1, -1, 1);
+ TEST_ff_f (pow, 1, 1.25, 1);
+ TEST_ff_f (pow, 1, -1.25, 1);
+ TEST_ff_f (pow, 1, 0x1p62L, 1);
+ TEST_ff_f (pow, 1, 0x1p63L, 1);
+ TEST_ff_f (pow, 1, 0x1p64L, 1);
+ TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+ /* pow (x, +-0) == 1. */
+ TEST_ff_f (pow, plus_infty, 0, 1);
+ TEST_ff_f (pow, plus_infty, minus_zero, 1);
+ TEST_ff_f (pow, minus_infty, 0, 1);
+ TEST_ff_f (pow, minus_infty, minus_zero, 1);
+ TEST_ff_f (pow, 32.75L, 0, 1);
+ TEST_ff_f (pow, 32.75L, minus_zero, 1);
+ TEST_ff_f (pow, -32.75L, 0, 1);
+ TEST_ff_f (pow, -32.75L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p72L, 0, 1);
+ TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p-72L, 0, 1);
+ TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3631,10 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+ TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+ TEST_ff_f (pow, 10, -0x1p72L, 0);
+ TEST_ff_f (pow, max_value, max_value, plus_infty);
+ TEST_ff_f (pow, 10, -max_value, 0);
TEST_ff_f (pow, 0, 1, 0);
TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3649,8 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, 2, 0);
TEST_ff_f (pow, minus_zero, 11.1L, 0);
+ TEST_ff_f (pow, 0, plus_infty, 0);
+ TEST_ff_f (pow, minus_zero, plus_infty, 0);
#ifndef TEST_INLINE
/* pow (x, +inf) == +inf for |x| > 1. */
@@ -3667,6 +3695,11 @@ pow_test (void)
/* pow (-0, y) == +0 for y > 0 and not an odd integer. */
TEST_ff_f (pow, minus_zero, 4, 0.0);
+ TEST_ff_f (pow, 16, 0.25L, 2);
+ TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+ TEST_ff_f (pow, 2, 4, 16);
+ TEST_ff_f (pow, 256, 8, 0x1p64L);
+
TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
#if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4345,18 @@ initialize (void)
HUGE_VALL, HUGE_VAL, HUGE_VALF);
minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+ max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+ LDBL_MAX, DBL_MAX, FLT_MAX);
+ min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+ LDBL_MIN, DBL_MIN, FLT_MIN);
(void) &plus_zero;
(void) &nan_value;
(void) &minus_zero;
(void) &plus_infty;
(void) &minus_infty;
+ (void) &max_value;
+ (void) &min_value;
/* Clear all exceptions. From now on we must not get random exceptions. */
feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/i386/fpu/e_powf.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powf.S 2004-07-15 13:23:15.977438871 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p31,@object)
+p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
+ ASM_SIZE_DIRECTIVE(p31)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +99,14 @@ ENTRY(__ieee754_powf)
fxch // y : x
+ /* fistpl raises invalid exception for |y| >= 1L<<31. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p31) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
--- libc/sysdeps/i386/fpu/e_powl.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_powl.S 2004-07-15 12:56:07.678735978 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -161,6 +173,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -172,6 +189,12 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ addl $8, %esp
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
--- libc/sysdeps/i386/fpu/e_pow.S.jj 2001-07-06 06:55:53.000000000 +0200
+++ libc/sysdeps/i386/fpu/e_pow.S 2004-07-15 13:05:41.601640612 +0200
@@ -1,5 +1,6 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +49,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##@GOTOFF(%ecx)
@@ -96,6 +100,14 @@ ENTRY(__ieee754_pow)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fld %st // y : y : x
+ fabs // |y| : y : x
+ fcompl MO(p63) // y : x
+ fnstsw
+ sahf
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.000000000 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,10 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63:
+ .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##(%rip)
@@ -87,6 +91,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -148,6 +160,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -158,6 +175,11 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fldl MO(one) // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
Jakub
next prev parent reply other threads:[~2004-07-19 10:19 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-07-14 18:17 [PATCH] Fix powl on x86_64 [BZ #258] Jakub Jelinek
2004-07-15 11:51 ` [PATCH] Fix powl on i386/x86_64 [BZ #258] (take 2) Jakub Jelinek
2004-07-16 17:09 ` Ulrich Drepper
2004-07-19 10:19 ` Jakub Jelinek [this message]
2004-07-20 7:07 ` Ulrich Drepper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040719080353.GL30497@sunsite.ms.mff.cuni.cz \
--to=jakub@redhat.com \
--cc=drepper@redhat.com \
--cc=libc-hacker@sources.redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).