From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 10601 invoked by alias); 5 Jun 2007 15:54:02 -0000 Received: (qmail 10549 invoked by uid 22791); 5 Jun 2007 15:54:01 -0000 X-Spam-Check-By: sourceware.org Received: from sunsite.ms.mff.cuni.cz (HELO sunsite.mff.cuni.cz) (195.113.15.26) by sourceware.org (qpsmtpd/0.31) with ESMTP; Tue, 05 Jun 2007 15:53:58 +0000 Received: from sunsite.mff.cuni.cz (localhost.localdomain [127.0.0.1]) by sunsite.mff.cuni.cz (8.13.8/8.13.8) with ESMTP id l55Fud1f030834; Tue, 5 Jun 2007 17:56:39 +0200 Received: (from jakub@localhost) by sunsite.mff.cuni.cz (8.13.8/8.13.8/Submit) id l55Fud2I030799; Tue, 5 Jun 2007 17:56:39 +0200 Date: Tue, 05 Jun 2007 15:54:00 -0000 From: Jakub Jelinek To: Ulrich Drepper , Steven Munroe Cc: Glibc hackers Subject: [PATCH] Fix ppc{,64} nextafterl and fpclassifyl Message-ID: <20070605155638.GO3081@sunsite.mff.cuni.cz> Reply-To: Jakub Jelinek Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="jI8keyz6grp/JLjh" Content-Disposition: inline User-Agent: Mutt/1.4.2.2i Mailing-List: contact libc-hacker-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-hacker-owner@sourceware.org X-SW-Source: 2007-06/txt/msg00004.txt.bz2 --jI8keyz6grp/JLjh Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-length: 446 Hi! Attached is a fixed version of nextafterl and fpclassifyl fixes Steven sent privately a few days ago, together with a couple of fixes on top of that from me. I have used attached tester to test whether nextafterl works correctly, not sure if we want that in glibc testsuite (only tested on ppc/ppc64 as it is ldbl-128ibm specific) or not, perhaps with smaller number of tests. The 10 million iterations take ~ 70 seconds on my G5. Jakub --jI8keyz6grp/JLjh Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename=P Content-length: 7600 2007-06-04 Jakub Jelinek * sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Remove unused ily variable. Fix nextafterl on +-__LDBL_MAX__ and +-Inf. Remove unreachable code at the end. 2007-06-01 Steven Munroe * sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c: Correct description of ldbl-128ibm in comment. (fpclassifyl): Correct classification of denormals. * sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Correct return value for MIN denormal. Rewrite using long double math too correctly handle denormals and canonicalize the results. --- libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c.jj 2007-05-24 16:41:25.000000000 +0200 +++ libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c 2007-06-04 17:30:21.000000000 +0200 @@ -35,7 +35,7 @@ static char rcsid[] = "$NetBSD: $"; long double x,y; #endif { - int64_t hx,hy,ihx,ihy,ilx,ily; + int64_t hx,hy,ihx,ihy,ilx; u_int64_t lx,ly; GET_LDOUBLE_WORDS64(hx,lx,x); @@ -43,7 +43,6 @@ static char rcsid[] = "$NetBSD: $"; ihx = hx&0x7fffffffffffffffLL; /* |hx| */ ilx = lx&0x7fffffffffffffffLL; /* |lx| */ ihy = hy&0x7fffffffffffffffLL; /* |hy| */ - ily = ly&0x7fffffffffffffffLL; /* |ly| */ if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&& ((ihx&0x000fffffffffffffLL)!=0)) || /* x is nan */ @@ -54,54 +53,66 @@ static char rcsid[] = "$NetBSD: $"; return y; /* x=y, return y */ if(ihx == 0 && ilx == 0) { /* x == 0 */ long double u; - SET_LDOUBLE_WORDS64(x,hy&0x8000000000000000ULL,1);/* return +-minsubnormal */ - u = math_opt_barrier (u); + hy = (hy & 0x8000000000000000ULL) | 1; + SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */ + u = math_opt_barrier (x); u = u * u; math_force_eval (u); /* raise underflow flag */ return x; } - if(ihx>=0) { /* x > 0 */ - if(ihx>ihy||((ihx==ihy)&&(ilx>ily))) { /* x > y, x -= ulp */ - - if(ilx==0) - hx--; - else - lx--; - } else { /* x < y, x += ulp */ - if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL)) - { - SET_LDOUBLE_WORDS64(x,0x7ff0000000000000,0x8000000000000000); - return x; - } - else if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) - { - SET_LDOUBLE_WORDS64(x,0xfff0000000000000,0x8000000000000000); - return x; - } - else if((lx&0x7fffffffffffffff)==0) hx++; - else - lx++; + + long double u; + if(x > y) { /* x > y, x -= ulp */ + if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) + return x+x; /* overflow, return -inf */ + if (hx >= 0x7ff0000000000000LL) { + SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL); + return u; } - } else { /* x < 0 */ - if(ihy>=0||ihx>ihy||((ihx==ihy)&&(ilx>ily))){/* x < y, x -= ulp */ - if((lx&0x7fffffffffffffff)==0) - hx--; - else - lx--; - } else { /* x > y, x += ulp */ - if((lx&0x7fffffffffffffff)==0) hx++; - else - lx++; + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x -= __LDBL_DENORM_MIN__; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (int64_t) lx <= 0) + || (hx < 0 && (int64_t) lx > 1)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + return x; } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x - u; + } else { /* x < y, x += ulp */ + if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL)) + return x+x; /* overflow, return +inf */ + if ((u_int64_t) hx >= 0xfff0000000000000ULL) { + SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL); + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x += __LDBL_DENORM_MIN__; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (int64_t) lx < 0 && lx != 0x8000000000000001LL) + || (hx < 0 && (int64_t) lx >= 0)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + if (x == 0.0L) /* handle negative __LDBL_DENORM_MIN__ case */ + x = -0.0L; + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x + u; } - hy = hx&0x7ff0000000000000LL; - if(hy==0x7ff0000000000000LL) return x+x;/* overflow */ - if(hy==0) { - long double u = x * x; /* underflow */ - math_force_eval (u); /* raise underflow flag */ - } - SET_LDOUBLE_WORDS64(x,hx,lx); - return x; } strong_alias (__nextafterl, __nexttowardl) long_double_symbol (libm, __nextafterl, nextafterl); --- libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c.jj 2006-01-28 01:07:25.000000000 +0100 +++ libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2007-06-04 20:06:21.000000000 +0200 @@ -1,5 +1,5 @@ /* Return classification value corresponding to argument. - Copyright (C) 1997,1999,2002,2004,2006 Free Software Foundation, Inc. + Copyright (C) 1997,1999,2002,2004,2006,2007 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997 and Jakub Jelinek , 1999. @@ -30,14 +30,16 @@ * -NaN fffn nnnn nnnn nnnn xxxx xxxx xxxx xxxx * +Inf 7ff0 0000 0000 0000 xxxx xxxx xxxx xxxx * -Inf fff0 0000 0000 0000 xxxx xxxx xxxx xxxx - * +0 0000 0000 0000 0000 - * -0 8000 0000 0000 0000 - * +normal 001n nnnn nnnn nnnn (smallest) - * -normal 801n nnnn nnnn nnnn (smallest) - * +normal 7fen nnnn nnnn nnnn (largest) - * -normal ffen nnnn nnnn nnnn (largest) - * +denorm 000n nnnn nnnn nnnn - * -denorm 800n nnnn nnnn nnnn + * +0 0000 0000 0000 0000 xxxx xxxx xxxx xxxx + * -0 8000 0000 0000 0000 xxxx xxxx xxxx xxxx + * +normal 0360 0000 0000 0000 0000 0000 0000 0000 (smallest) + * -normal 8360 0000 0000 0000 0000 0000 0000 0000 (smallest) + * +normal 7fef ffff ffff ffff 7c8f ffff ffff fffe (largest) + * +normal ffef ffff ffff ffff fc8f ffff ffff fffe (largest) + * +denorm 0360 0000 0000 0000 8000 0000 0000 0001 (largest) + * -denorm 8360 0000 0000 0000 0000 0000 0000 0001 (largest) + * +denorm 000n nnnn nnnn nnnn xxxx xxxx xxxx xxxx + * -denorm 800n nnnn nnnn nnnn xxxx xxxx xxxx xxxx */ int @@ -59,12 +61,23 @@ ___fpclassifyl (long double x) /* +/-zero or +/- normal or +/- denormal */ if (hx & 0x7fffffffffffffffULL) { /* +/- normal or +/- denormal */ - if ((hx & 0x7ff0000000000000ULL) >= 0x0360000000000000ULL) { + if ((hx & 0x7ff0000000000000ULL) > 0x0360000000000000ULL) { /* +/- normal */ retval = FP_NORMAL; } else { - /* +/- denormal */ - retval = FP_SUBNORMAL; + if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) { + if ((lx & 0x7fffffffffffffff) /* lower is non-zero */ + && ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */ + /* +/- denormal */ + retval = FP_SUBNORMAL; + } else { + /* +/- normal */ + retval = FP_NORMAL; + } + } else { + /* +/- denormal */ + retval = FP_SUBNORMAL; + } } } else { /* +/- zero */ --jI8keyz6grp/JLjh Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="test-ldbl128-ibm-nextafterl.c" Content-length: 4265 #define _GNU_SOURCE #include #include #include #include int main (void) { int i; union { long double l; long long x[2]; } u, u2; char buf[128], buf2[128], buf3[128]; int result = 0; for (i = 0; i < 10000000; i++) { long long v[4]; int expon, expon2, flags = random () & 3; int im = 1, im2; int ulp_shift = 3; v[0] = (random () & 0xfffffffLL) << 28; v[0] |= random () & 0xfffffffLL; v[1] = (random () & 0xffffffLL) << 28; v[1] |= random () & 0xffffff8LL; expon = (((unsigned long) random ()) % 2047) - 1023; if (expon == -1023) { im = 0; expon = -1022; ulp_shift = 56; v[1] = 0; v[0] &= 0xfffffffffffff0LL; } else if (expon < -1018) { v[1] = 0; v[0] &= 0xffffffffffffffLL << (-1018 - expon); ulp_shift = 52 + (-1018 - expon); } else if (expon < -969) { ulp_shift = (-969 + 3 - expon); v[1] &= 0xffffffffffffLL << ulp_shift; } else if (expon == 1023 && v[0] >= 0xfffffffffffff7LL) { if (v[0] > 0xfffffffffffff7LL || (v[1] == 0xffffffffffff8LL && (flags == 2 || flags == 1))) /* Special cases, test separately. */ continue; } int n = sprintf (buf, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "", im, v[0], v[1]); while (buf[n - 1] == '0') n--; if (buf[n - 1] == '.') n--; if (expon == -1022 && n == 3 && memcmp (buf, "0x0", 3) == 0) strcpy (buf + n, "p+0"); else sprintf (buf + n, "p%s%d", expon >= 0 ? "+" : "", expon); if (sscanf (buf, "%La", &u.l) != 1) return 4; v[3] = v[1]; v[2] = v[0]; expon2 = expon; im2 = im; if (flags & 2) { if (ulp_shift > 52) { v[2] += (1LL << (ulp_shift - 52)); goto test_inc; } v[3] += (1LL << ulp_shift); if (v[3] & ~0xffffffffffff8LL) { v[3] &= 0xffffffffffff8LL; v[2]++; test_inc: if (v[2] & ~0xffffffffffffffLL) { v[2] &= 0xffffffffffffffLL; if (im == 0) im = 1; else expon2++; } } } else { if (ulp_shift > 52) { v[2] -= (1LL << (ulp_shift - 52)); goto test_dec; } v[3] -= (1LL << ulp_shift); if (v[3] & ~0xffffffffffff8LL) { v[3] &= 0xffffffffffff8LL; v[2]--; test_dec: if (v[2] & ~0xffffffffffffffLL) { v[2] &= 0xffffffffffffffLL; if (expon2 == -1022) im = 0; else expon2--; } } } n = sprintf (buf2, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "", im2, v[2], v[3]); while (buf2[n - 1] == '0') n--; if (buf2[n - 1] == '.') n--; if (expon2 == -1022 && n == 3 && memcmp (buf2, "0x0", 3) == 0) strcpy (buf2 + n, "p+0"); else sprintf (buf2 + n, "p%s%d", expon2 >= 0 ? "+" : "", expon2); u2.l = nextafterl (u.l, (flags == 2 || flags == 1) ? __builtin_infl () : -__builtin_infl ()); sprintf (buf3, "%La", u2.l); if (strcmp (buf2, buf3) != 0) { printf ("buf is %s u.l %La buf2 %s u2.l %La buf3 %s " "%016Lx %016Lx - %016Lx %016Lx\n", buf, u.l, buf2, u2.l, buf3, u.x[0], u.x[1], u2.x[0], u2.x[1]); result = 1; } } if (sscanf ("+Inf", "%La", &u.l) != 1) return 4; u2.l = nextafterl (u.l, -__builtin_infl ()); sprintf (buf3, "%La", u2.l); if (strcmp ("0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0) { puts ("Special case 1 failed"); result = 1; } if (sscanf ("-Inf", "%La", &u.l) != 1) return 4; u2.l = nextafterl (u.l, __builtin_infl ()); sprintf (buf3, "%La", u2.l); if (strcmp ("-0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0) { puts ("Special case 2 failed"); result = 1; } if (sscanf ("0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1) return 4; u2.l = nextafterl (u.l, __builtin_infl ()); if (fpclassify (u2.l) != FP_INFINITE || u2.l <= 0) { puts ("Special case 3 failed"); result = 1; } if (sscanf ("-0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1) return 4; u2.l = nextafterl (u.l, -__builtin_infl ()); if (fpclassify (u2.l) != FP_INFINITE || u2.l >= 0) { puts ("Special case 4 failed"); result = 1; } return result; } --jI8keyz6grp/JLjh--