* [PATCH] Fix ppc{,64} nextafterl and fpclassifyl
@ 2007-06-05 15:54 Jakub Jelinek
2007-06-07 15:50 ` Steven Munroe
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2007-06-05 15:54 UTC (permalink / raw)
To: Ulrich Drepper, Steven Munroe; +Cc: Glibc hackers
[-- Attachment #1: Type: text/plain, Size: 446 bytes --]
Hi!
Attached is a fixed version of nextafterl and fpclassifyl fixes Steven
sent privately a few days ago, together with a couple of fixes on top
of that from me.
I have used attached tester to test whether nextafterl works correctly,
not sure if we want that in glibc testsuite (only tested on ppc/ppc64
as it is ldbl-128ibm specific) or not, perhaps with smaller number of
tests. The 10 million iterations take ~ 70 seconds on my G5.
Jakub
[-- Attachment #2: P --]
[-- Type: text/plain, Size: 7600 bytes --]
2007-06-04 Jakub Jelinek <jakub@redhat.com>
* sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Remove
unused ily variable. Fix nextafterl on +-__LDBL_MAX__ and +-Inf.
Remove unreachable code at the end.
2007-06-01 Steven Munroe <sjmunroe@us.ibm.com>
* sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c: Correct description of
ldbl-128ibm in comment.
(fpclassifyl): Correct classification of denormals.
* sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Correct
return value for MIN denormal. Rewrite using long double math too
correctly handle denormals and canonicalize the results.
--- libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c.jj 2007-05-24 16:41:25.000000000 +0200
+++ libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c 2007-06-04 17:30:21.000000000 +0200
@@ -35,7 +35,7 @@ static char rcsid[] = "$NetBSD: $";
long double x,y;
#endif
{
- int64_t hx,hy,ihx,ihy,ilx,ily;
+ int64_t hx,hy,ihx,ihy,ilx;
u_int64_t lx,ly;
GET_LDOUBLE_WORDS64(hx,lx,x);
@@ -43,7 +43,6 @@ static char rcsid[] = "$NetBSD: $";
ihx = hx&0x7fffffffffffffffLL; /* |hx| */
ilx = lx&0x7fffffffffffffffLL; /* |lx| */
ihy = hy&0x7fffffffffffffffLL; /* |hy| */
- ily = ly&0x7fffffffffffffffLL; /* |ly| */
if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
((ihx&0x000fffffffffffffLL)!=0)) || /* x is nan */
@@ -54,54 +53,66 @@ static char rcsid[] = "$NetBSD: $";
return y; /* x=y, return y */
if(ihx == 0 && ilx == 0) { /* x == 0 */
long double u;
- SET_LDOUBLE_WORDS64(x,hy&0x8000000000000000ULL,1);/* return +-minsubnormal */
- u = math_opt_barrier (u);
+ hy = (hy & 0x8000000000000000ULL) | 1;
+ SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */
+ u = math_opt_barrier (x);
u = u * u;
math_force_eval (u); /* raise underflow flag */
return x;
}
- if(ihx>=0) { /* x > 0 */
- if(ihx>ihy||((ihx==ihy)&&(ilx>ily))) { /* x > y, x -= ulp */
-
- if(ilx==0)
- hx--;
- else
- lx--;
- } else { /* x < y, x += ulp */
- if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
- {
- SET_LDOUBLE_WORDS64(x,0x7ff0000000000000,0x8000000000000000);
- return x;
- }
- else if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
- {
- SET_LDOUBLE_WORDS64(x,0xfff0000000000000,0x8000000000000000);
- return x;
- }
- else if((lx&0x7fffffffffffffff)==0) hx++;
- else
- lx++;
+
+ long double u;
+ if(x > y) { /* x > y, x -= ulp */
+ if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
+ return x+x; /* overflow, return -inf */
+ if (hx >= 0x7ff0000000000000LL) {
+ SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL);
+ return u;
}
- } else { /* x < 0 */
- if(ihy>=0||ihx>ihy||((ihx==ihy)&&(ilx>ily))){/* x < y, x -= ulp */
- if((lx&0x7fffffffffffffff)==0)
- hx--;
- else
- lx--;
- } else { /* x > y, x += ulp */
- if((lx&0x7fffffffffffffff)==0) hx++;
- else
- lx++;
+ if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */
+ u = math_opt_barrier (x);
+ x -= __LDBL_DENORM_MIN__;
+ if (ihx < 0x0360000000000000LL
+ || (hx > 0 && (int64_t) lx <= 0)
+ || (hx < 0 && (int64_t) lx > 1)) {
+ u = u * u;
+ math_force_eval (u); /* raise underflow flag */
+ }
+ return x;
}
+ if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
+ SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+ u *= 0x1.0000000000000p-105L;
+ } else
+ SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+ return x - u;
+ } else { /* x < y, x += ulp */
+ if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
+ return x+x; /* overflow, return +inf */
+ if ((u_int64_t) hx >= 0xfff0000000000000ULL) {
+ SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL);
+ return u;
+ }
+ if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */
+ u = math_opt_barrier (x);
+ x += __LDBL_DENORM_MIN__;
+ if (ihx < 0x0360000000000000LL
+ || (hx > 0 && (int64_t) lx < 0 && lx != 0x8000000000000001LL)
+ || (hx < 0 && (int64_t) lx >= 0)) {
+ u = u * u;
+ math_force_eval (u); /* raise underflow flag */
+ }
+ if (x == 0.0L) /* handle negative __LDBL_DENORM_MIN__ case */
+ x = -0.0L;
+ return x;
+ }
+ if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
+ SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+ u *= 0x1.0000000000000p-105L;
+ } else
+ SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+ return x + u;
}
- hy = hx&0x7ff0000000000000LL;
- if(hy==0x7ff0000000000000LL) return x+x;/* overflow */
- if(hy==0) {
- long double u = x * x; /* underflow */
- math_force_eval (u); /* raise underflow flag */
- }
- SET_LDOUBLE_WORDS64(x,hx,lx);
- return x;
}
strong_alias (__nextafterl, __nexttowardl)
long_double_symbol (libm, __nextafterl, nextafterl);
--- libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c.jj 2006-01-28 01:07:25.000000000 +0100
+++ libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2007-06-04 20:06:21.000000000 +0200
@@ -1,5 +1,5 @@
/* Return classification value corresponding to argument.
- Copyright (C) 1997,1999,2002,2004,2006 Free Software Foundation, Inc.
+ Copyright (C) 1997,1999,2002,2004,2006,2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997 and
Jakub Jelinek <jj@ultra.linux.cz>, 1999.
@@ -30,14 +30,16 @@
* -NaN fffn nnnn nnnn nnnn xxxx xxxx xxxx xxxx
* +Inf 7ff0 0000 0000 0000 xxxx xxxx xxxx xxxx
* -Inf fff0 0000 0000 0000 xxxx xxxx xxxx xxxx
- * +0 0000 0000 0000 0000
- * -0 8000 0000 0000 0000
- * +normal 001n nnnn nnnn nnnn (smallest)
- * -normal 801n nnnn nnnn nnnn (smallest)
- * +normal 7fen nnnn nnnn nnnn (largest)
- * -normal ffen nnnn nnnn nnnn (largest)
- * +denorm 000n nnnn nnnn nnnn
- * -denorm 800n nnnn nnnn nnnn
+ * +0 0000 0000 0000 0000 xxxx xxxx xxxx xxxx
+ * -0 8000 0000 0000 0000 xxxx xxxx xxxx xxxx
+ * +normal 0360 0000 0000 0000 0000 0000 0000 0000 (smallest)
+ * -normal 8360 0000 0000 0000 0000 0000 0000 0000 (smallest)
+ * +normal 7fef ffff ffff ffff 7c8f ffff ffff fffe (largest)
+ * +normal ffef ffff ffff ffff fc8f ffff ffff fffe (largest)
+ * +denorm 0360 0000 0000 0000 8000 0000 0000 0001 (largest)
+ * -denorm 8360 0000 0000 0000 0000 0000 0000 0001 (largest)
+ * +denorm 000n nnnn nnnn nnnn xxxx xxxx xxxx xxxx
+ * -denorm 800n nnnn nnnn nnnn xxxx xxxx xxxx xxxx
*/
int
@@ -59,12 +61,23 @@ ___fpclassifyl (long double x)
/* +/-zero or +/- normal or +/- denormal */
if (hx & 0x7fffffffffffffffULL) {
/* +/- normal or +/- denormal */
- if ((hx & 0x7ff0000000000000ULL) >= 0x0360000000000000ULL) {
+ if ((hx & 0x7ff0000000000000ULL) > 0x0360000000000000ULL) {
/* +/- normal */
retval = FP_NORMAL;
} else {
- /* +/- denormal */
- retval = FP_SUBNORMAL;
+ if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) {
+ if ((lx & 0x7fffffffffffffff) /* lower is non-zero */
+ && ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */
+ /* +/- denormal */
+ retval = FP_SUBNORMAL;
+ } else {
+ /* +/- normal */
+ retval = FP_NORMAL;
+ }
+ } else {
+ /* +/- denormal */
+ retval = FP_SUBNORMAL;
+ }
}
} else {
/* +/- zero */
[-- Attachment #3: test-ldbl128-ibm-nextafterl.c --]
[-- Type: text/plain, Size: 4265 bytes --]
#define _GNU_SOURCE
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int
main (void)
{
int i;
union { long double l; long long x[2]; } u, u2;
char buf[128], buf2[128], buf3[128];
int result = 0;
for (i = 0; i < 10000000; i++)
{
long long v[4];
int expon, expon2, flags = random () & 3;
int im = 1, im2;
int ulp_shift = 3;
v[0] = (random () & 0xfffffffLL) << 28;
v[0] |= random () & 0xfffffffLL;
v[1] = (random () & 0xffffffLL) << 28;
v[1] |= random () & 0xffffff8LL;
expon = (((unsigned long) random ()) % 2047) - 1023;
if (expon == -1023)
{
im = 0;
expon = -1022;
ulp_shift = 56;
v[1] = 0;
v[0] &= 0xfffffffffffff0LL;
}
else if (expon < -1018)
{
v[1] = 0;
v[0] &= 0xffffffffffffffLL << (-1018 - expon);
ulp_shift = 52 + (-1018 - expon);
}
else if (expon < -969)
{
ulp_shift = (-969 + 3 - expon);
v[1] &= 0xffffffffffffLL << ulp_shift;
}
else if (expon == 1023 && v[0] >= 0xfffffffffffff7LL)
{
if (v[0] > 0xfffffffffffff7LL
|| (v[1] == 0xffffffffffff8LL && (flags == 2 || flags == 1)))
/* Special cases, test separately. */
continue;
}
int n = sprintf (buf, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "",
im, v[0], v[1]);
while (buf[n - 1] == '0')
n--;
if (buf[n - 1] == '.')
n--;
if (expon == -1022 && n == 3 && memcmp (buf, "0x0", 3) == 0)
strcpy (buf + n, "p+0");
else
sprintf (buf + n, "p%s%d", expon >= 0 ? "+" : "", expon);
if (sscanf (buf, "%La", &u.l) != 1)
return 4;
v[3] = v[1];
v[2] = v[0];
expon2 = expon;
im2 = im;
if (flags & 2)
{
if (ulp_shift > 52)
{
v[2] += (1LL << (ulp_shift - 52));
goto test_inc;
}
v[3] += (1LL << ulp_shift);
if (v[3] & ~0xffffffffffff8LL)
{
v[3] &= 0xffffffffffff8LL;
v[2]++;
test_inc:
if (v[2] & ~0xffffffffffffffLL)
{
v[2] &= 0xffffffffffffffLL;
if (im == 0)
im = 1;
else
expon2++;
}
}
}
else
{
if (ulp_shift > 52)
{
v[2] -= (1LL << (ulp_shift - 52));
goto test_dec;
}
v[3] -= (1LL << ulp_shift);
if (v[3] & ~0xffffffffffff8LL)
{
v[3] &= 0xffffffffffff8LL;
v[2]--;
test_dec:
if (v[2] & ~0xffffffffffffffLL)
{
v[2] &= 0xffffffffffffffLL;
if (expon2 == -1022)
im = 0;
else
expon2--;
}
}
}
n = sprintf (buf2, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "",
im2, v[2], v[3]);
while (buf2[n - 1] == '0')
n--;
if (buf2[n - 1] == '.')
n--;
if (expon2 == -1022 && n == 3 && memcmp (buf2, "0x0", 3) == 0)
strcpy (buf2 + n, "p+0");
else
sprintf (buf2 + n, "p%s%d", expon2 >= 0 ? "+" : "", expon2);
u2.l = nextafterl (u.l, (flags == 2 || flags == 1)
? __builtin_infl () : -__builtin_infl ());
sprintf (buf3, "%La", u2.l);
if (strcmp (buf2, buf3) != 0)
{
printf ("buf is %s u.l %La buf2 %s u2.l %La buf3 %s "
"%016Lx %016Lx - %016Lx %016Lx\n", buf, u.l, buf2,
u2.l, buf3, u.x[0], u.x[1], u2.x[0], u2.x[1]);
result = 1;
}
}
if (sscanf ("+Inf", "%La", &u.l) != 1)
return 4;
u2.l = nextafterl (u.l, -__builtin_infl ());
sprintf (buf3, "%La", u2.l);
if (strcmp ("0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0)
{
puts ("Special case 1 failed");
result = 1;
}
if (sscanf ("-Inf", "%La", &u.l) != 1)
return 4;
u2.l = nextafterl (u.l, __builtin_infl ());
sprintf (buf3, "%La", u2.l);
if (strcmp ("-0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0)
{
puts ("Special case 2 failed");
result = 1;
}
if (sscanf ("0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1)
return 4;
u2.l = nextafterl (u.l, __builtin_infl ());
if (fpclassify (u2.l) != FP_INFINITE || u2.l <= 0)
{
puts ("Special case 3 failed");
result = 1;
}
if (sscanf ("-0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1)
return 4;
u2.l = nextafterl (u.l, -__builtin_infl ());
if (fpclassify (u2.l) != FP_INFINITE || u2.l >= 0)
{
puts ("Special case 4 failed");
result = 1;
}
return result;
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Fix ppc{,64} nextafterl and fpclassifyl
2007-06-05 15:54 [PATCH] Fix ppc{,64} nextafterl and fpclassifyl Jakub Jelinek
@ 2007-06-07 15:50 ` Steven Munroe
0 siblings, 0 replies; 2+ messages in thread
From: Steven Munroe @ 2007-06-07 15:50 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Ulrich Drepper, Glibc hackers
Jakub Jelinek wrote:
> Hi!
>
> Attached is a fixed version of nextafterl and fpclassifyl fixes Steven
> sent privately a few days ago, together with a couple of fixes on top
> of that from me.
>
This looks good, tested on powerpc32-nofpu, powerpc32, and powerpc64.
Thanks again!
> I have used attached tester to test whether nextafterl works correctly,
> not sure if we want that in glibc testsuite (only tested on ppc/ppc64
> as it is ldbl-128ibm specific) or not, perhaps with smaller number of
> tests. The 10 million iterations take ~ 70 seconds on my G5.
>
>
Additional testing is clearly needed, but where would this fit into the
tree. It is only ppc? I though other platforms where at least looking at
ldbl-128ibm?
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2007-06-07 15:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-05 15:54 [PATCH] Fix ppc{,64} nextafterl and fpclassifyl Jakub Jelinek
2007-06-07 15:50 ` Steven Munroe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).