[PATCH] Fix ppc{,64} nextafterl and fpclassifyl

public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed

* [PATCH] Fix ppc{,64} nextafterl and fpclassifyl
@ 2007-06-05 15:54 Jakub Jelinek
  2007-06-07 15:50 ` Steven Munroe
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2007-06-05 15:54 UTC (permalink / raw)
  To: Ulrich Drepper, Steven Munroe; +Cc: Glibc hackers

[-- Attachment #1: Type: text/plain, Size: 446 bytes --]

Hi!

Attached is a fixed version of nextafterl and fpclassifyl fixes Steven
sent privately a few days ago, together with a couple of fixes on top
of that from me.

I have used attached tester to test whether nextafterl works correctly,
not sure if we want that in glibc testsuite (only tested on ppc/ppc64
as it is ldbl-128ibm specific) or not, perhaps with smaller number of
tests.  The 10 million iterations take ~ 70 seconds on my G5.

	Jakub

[-- Attachment #2: P --]
[-- Type: text/plain, Size: 7600 bytes --]

2007-06-04  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Remove
	unused ily variable.  Fix nextafterl on +-__LDBL_MAX__ and +-Inf.
	Remove unreachable code at the end.

2007-06-01  Steven Munroe  <sjmunroe@us.ibm.com>

	* sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c: Correct description of
	ldbl-128ibm in comment.
	(fpclassifyl): Correct classification of denormals.
	* sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c (nextafterl): Correct
	return value for MIN denormal. Rewrite using long double math too
	correctly handle denormals and canonicalize the results.

--- libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c.jj	2007-05-24 16:41:25.000000000 +0200
+++ libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c	2007-06-04 17:30:21.000000000 +0200
@@ -35,7 +35,7 @@ static char rcsid[] = "$NetBSD: $";
 	long double x,y;
 #endif
 {
-	int64_t hx,hy,ihx,ihy,ilx,ily;
+	int64_t hx,hy,ihx,ihy,ilx;
 	u_int64_t lx,ly;
 
 	GET_LDOUBLE_WORDS64(hx,lx,x);
@@ -43,7 +43,6 @@ static char rcsid[] = "$NetBSD: $";
 	ihx = hx&0x7fffffffffffffffLL;		/* |hx| */
 	ilx = lx&0x7fffffffffffffffLL;		/* |lx| */
 	ihy = hy&0x7fffffffffffffffLL;		/* |hy| */
-	ily = ly&0x7fffffffffffffffLL;		/* |ly| */
 
 	if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
 	    ((ihx&0x000fffffffffffffLL)!=0)) ||   /* x is nan */
@@ -54,54 +53,66 @@ static char rcsid[] = "$NetBSD: $";
 	    return y;		/* x=y, return y */
 	if(ihx == 0 && ilx == 0) {			/* x == 0 */
 	    long double u;
-	    SET_LDOUBLE_WORDS64(x,hy&0x8000000000000000ULL,1);/* return +-minsubnormal */
-	    u = math_opt_barrier (u);
+	    hy = (hy & 0x8000000000000000ULL) | 1;
+	    SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */
+	    u = math_opt_barrier (x);
 	    u = u * u;
 	    math_force_eval (u);		/* raise underflow flag */
 	    return x;
 	}
-	if(ihx>=0) {			/* x > 0 */
-	    if(ihx>ihy||((ihx==ihy)&&(ilx>ily))) {	/* x > y, x -= ulp */
-
-	        if(ilx==0)
-		    hx--;
-		else
-		    lx--;
-	    } else {				/* x < y, x += ulp */
-	        if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
-		  {
-		    SET_LDOUBLE_WORDS64(x,0x7ff0000000000000,0x8000000000000000);
-		    return x;
-		  }
-	        else if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
-		  {
-		    SET_LDOUBLE_WORDS64(x,0xfff0000000000000,0x8000000000000000);
-		    return x;
-		  }
-		else if((lx&0x7fffffffffffffff)==0) hx++;
-		else
-		  lx++;
+	
+	long double u;
+	if(x > y) {	/* x > y, x -= ulp */
+	    if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
+	      return x+x;	/* overflow, return -inf */
+	    if (hx >= 0x7ff0000000000000LL) {
+	      SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL);
+	      return u;
 	    }
-	} else {				/* x < 0 */
-	    if(ihy>=0||ihx>ihy||((ihx==ihy)&&(ilx>ily))){/* x < y, x -= ulp */
-		if((lx&0x7fffffffffffffff)==0)
-		    hx--;
-		else
-		    lx--;
-	    } else {				/* x > y, x += ulp */
-		if((lx&0x7fffffffffffffff)==0) hx++;
-		else
-		  lx++;
+	    if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
+	      u = math_opt_barrier (x);
+	      x -= __LDBL_DENORM_MIN__;
+	      if (ihx < 0x0360000000000000LL
+		  || (hx > 0 && (int64_t) lx <= 0)
+		  || (hx < 0 && (int64_t) lx > 1)) {
+		u = u * u;
+		math_force_eval (u);		/* raise underflow flag */
+	      }
+	      return x;
 	    }
+	    if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
+	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+	      u *= 0x1.0000000000000p-105L;
+	    } else
+	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+	    return x - u;
+	} else {				/* x < y, x += ulp */
+	    if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
+	      return x+x;	/* overflow, return +inf */
+	    if ((u_int64_t) hx >= 0xfff0000000000000ULL) {
+	      SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL);
+	      return u;
+	    }
+	    if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
+	      u = math_opt_barrier (x);
+	      x += __LDBL_DENORM_MIN__;
+	      if (ihx < 0x0360000000000000LL
+		  || (hx > 0 && (int64_t) lx < 0 && lx != 0x8000000000000001LL)
+		  || (hx < 0 && (int64_t) lx >= 0)) {
+		u = u * u;
+		math_force_eval (u);		/* raise underflow flag */
+	      }
+	      if (x == 0.0L)	/* handle negative __LDBL_DENORM_MIN__ case */
+		x = -0.0L;
+	      return x;
+	    }
+	    if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
+	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+	      u *= 0x1.0000000000000p-105L;
+	    } else
+	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+	    return x + u;
 	}
-	hy = hx&0x7ff0000000000000LL;
-	if(hy==0x7ff0000000000000LL) return x+x;/* overflow  */
-	if(hy==0) {
-	    long double u = x * x;		/* underflow */
-	    math_force_eval (u);		/* raise underflow flag */
-	}
-	SET_LDOUBLE_WORDS64(x,hx,lx);
-	return x;
 }
 strong_alias (__nextafterl, __nexttowardl)
 long_double_symbol (libm, __nextafterl, nextafterl);
--- libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c.jj	2006-01-28 01:07:25.000000000 +0100
+++ libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c	2007-06-04 20:06:21.000000000 +0200
@@ -1,5 +1,5 @@
 /* Return classification value corresponding to argument.
-   Copyright (C) 1997,1999,2002,2004,2006 Free Software Foundation, Inc.
+   Copyright (C) 1997,1999,2002,2004,2006,2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997 and
    		  Jakub Jelinek <jj@ultra.linux.cz>, 1999.
@@ -30,14 +30,16 @@
    * -NaN	fffn nnnn nnnn nnnn xxxx xxxx xxxx xxxx
    * +Inf	7ff0 0000 0000 0000 xxxx xxxx xxxx xxxx
    * -Inf	fff0 0000 0000 0000 xxxx xxxx xxxx xxxx
-   * +0		0000 0000 0000 0000
-   * -0		8000 0000 0000 0000
-   * +normal	001n nnnn nnnn nnnn (smallest)
-   * -normal	801n nnnn nnnn nnnn (smallest)
-   * +normal	7fen nnnn nnnn nnnn (largest)
-   * -normal	ffen nnnn nnnn nnnn (largest)
-   * +denorm	000n nnnn nnnn nnnn
-   * -denorm	800n nnnn nnnn nnnn
+   * +0		0000 0000 0000 0000 xxxx xxxx xxxx xxxx
+   * -0		8000 0000 0000 0000 xxxx xxxx xxxx xxxx
+   * +normal	0360 0000 0000 0000 0000 0000 0000 0000 (smallest)
+   * -normal	8360 0000 0000 0000 0000 0000 0000 0000 (smallest)
+   * +normal	7fef ffff ffff ffff 7c8f ffff ffff fffe (largest)
+   * +normal	ffef ffff ffff ffff fc8f ffff ffff fffe (largest)
+   * +denorm	0360 0000 0000 0000 8000 0000 0000 0001 (largest)
+   * -denorm	8360 0000 0000 0000 0000 0000 0000 0001 (largest)
+   * +denorm	000n nnnn nnnn nnnn xxxx xxxx xxxx xxxx
+   * -denorm	800n nnnn nnnn nnnn xxxx xxxx xxxx xxxx
    */
 
 int
@@ -59,12 +61,23 @@ ___fpclassifyl (long double x)
       /* +/-zero or +/- normal or +/- denormal */
       if (hx & 0x7fffffffffffffffULL) {
 	  /* +/- normal or +/- denormal */
-	  if ((hx & 0x7ff0000000000000ULL) >= 0x0360000000000000ULL) {
+	  if ((hx & 0x7ff0000000000000ULL) > 0x0360000000000000ULL) {
 	      /* +/- normal */
 	      retval = FP_NORMAL;
 	  } else {
-	      /* +/- denormal */
-	      retval = FP_SUBNORMAL;
+	      if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) {
+		  if ((lx & 0x7fffffffffffffff)	/* lower is non-zero */
+		  && ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */
+		      /* +/- denormal */
+		      retval = FP_SUBNORMAL;
+		  } else {
+		      /* +/- normal */
+		      retval = FP_NORMAL;
+		  }
+	      } else {
+		  /* +/- denormal */
+		  retval = FP_SUBNORMAL;
+	      }
 	  }
       } else {
 	  /* +/- zero */

[-- Attachment #3: test-ldbl128-ibm-nextafterl.c --]
[-- Type: text/plain, Size: 4265 bytes --]

#define _GNU_SOURCE
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

int
main (void)
{
  int i;
  union { long double l; long long x[2]; } u, u2;
  char buf[128], buf2[128], buf3[128];
  int result = 0;
  for (i = 0; i < 10000000; i++)
    {
      long long v[4];
      int expon, expon2, flags = random () & 3;
      int im = 1, im2;
      int ulp_shift = 3;
      v[0] = (random () & 0xfffffffLL) << 28;
      v[0] |= random () & 0xfffffffLL;
      v[1] = (random () & 0xffffffLL) << 28;
      v[1] |= random () & 0xffffff8LL;
      expon = (((unsigned long) random ()) % 2047) - 1023;
      if (expon == -1023)
	{
	  im = 0;
	  expon = -1022;
	  ulp_shift = 56;
	  v[1] = 0;
	  v[0] &= 0xfffffffffffff0LL;
	}
      else if (expon < -1018)
	{
	  v[1] = 0;
	  v[0] &= 0xffffffffffffffLL << (-1018 - expon);
	  ulp_shift = 52 + (-1018 - expon);
	}
      else if (expon < -969)
	{
	  ulp_shift = (-969 + 3 - expon);
	  v[1] &= 0xffffffffffffLL << ulp_shift;
	}
      else if (expon == 1023 && v[0] >= 0xfffffffffffff7LL)
	{
	  if (v[0] > 0xfffffffffffff7LL
	      || (v[1] == 0xffffffffffff8LL && (flags == 2 || flags == 1)))
	    /* Special cases, test separately.  */
	    continue;
	}
      int n = sprintf (buf, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "",
		       im, v[0], v[1]);
      while (buf[n - 1] == '0')
	n--;
      if (buf[n - 1] == '.')
	n--;
      if (expon == -1022 && n == 3 && memcmp (buf, "0x0", 3) == 0)
	strcpy (buf + n, "p+0");
      else
	sprintf (buf + n, "p%s%d", expon >= 0 ? "+" : "", expon);
      if (sscanf (buf, "%La", &u.l) != 1)
	return 4;
      v[3] = v[1];
      v[2] = v[0];
      expon2 = expon;
      im2 = im;
      if (flags & 2)
	{
	  if (ulp_shift > 52)
	    {
	      v[2] += (1LL << (ulp_shift - 52));
	      goto test_inc;
	    }
	  v[3] += (1LL << ulp_shift);
	  if (v[3] & ~0xffffffffffff8LL)
	    {
	      v[3] &= 0xffffffffffff8LL;
	      v[2]++;
	    test_inc:
	      if (v[2] & ~0xffffffffffffffLL)
		{
		  v[2] &= 0xffffffffffffffLL;
		  if (im == 0)
		    im = 1;
		  else
		    expon2++;
		}
	    }
	}
      else
	{
	  if (ulp_shift > 52)
	    {
	      v[2] -= (1LL << (ulp_shift - 52));
	      goto test_dec;
	    }
	  v[3] -= (1LL << ulp_shift);
	  if (v[3] & ~0xffffffffffff8LL)
	    {
	      v[3] &= 0xffffffffffff8LL;
	      v[2]--;
	    test_dec:
	      if (v[2] & ~0xffffffffffffffLL)
		{
		  v[2] &= 0xffffffffffffffLL;
		  if (expon2 == -1022)
		    im = 0;
		  else
		    expon2--;
		}
	    }
	}
      n = sprintf (buf2, "%s0x%d.%014Lx%013Lx", (flags & 1) ? "-" : "",
		   im2, v[2], v[3]);
      while (buf2[n - 1] == '0')
	n--;
      if (buf2[n - 1] == '.')
	n--;
      if (expon2 == -1022 && n == 3 && memcmp (buf2, "0x0", 3) == 0)
	strcpy (buf2 + n, "p+0");
      else
	sprintf (buf2 + n, "p%s%d", expon2 >= 0 ? "+" : "", expon2);
      u2.l = nextafterl (u.l, (flags == 2 || flags == 1)
			 ? __builtin_infl () : -__builtin_infl ());
      sprintf (buf3, "%La", u2.l);
      if (strcmp (buf2, buf3) != 0)
	{
	  printf ("buf is %s u.l %La buf2 %s u2.l %La buf3 %s "
		  "%016Lx %016Lx - %016Lx %016Lx\n", buf, u.l, buf2,
		  u2.l, buf3, u.x[0], u.x[1], u2.x[0], u2.x[1]);
	  result = 1;
	}
    }

  if (sscanf ("+Inf", "%La", &u.l) != 1)
    return 4;
  u2.l = nextafterl (u.l, -__builtin_infl ());
  sprintf (buf3, "%La", u2.l);
  if (strcmp ("0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0)
    {
      puts ("Special case 1 failed");
      result = 1;
    }
  if (sscanf ("-Inf", "%La", &u.l) != 1)
    return 4;
  u2.l = nextafterl (u.l, __builtin_infl ());
  sprintf (buf3, "%La", u2.l);
  if (strcmp ("-0x1.fffffffffffff7ffffffffffff8p+1023", buf3) != 0)
    {
      puts ("Special case 2 failed");
      result = 1;
    }
  if (sscanf ("0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1)
    return 4;
  u2.l = nextafterl (u.l, __builtin_infl ());
  if (fpclassify (u2.l) != FP_INFINITE || u2.l <= 0)
    {
      puts ("Special case 3 failed");
      result = 1;
    }
  if (sscanf ("-0x1.fffffffffffff7ffffffffffff8p+1023", "%La", &u.l) != 1)
    return 4;
  u2.l = nextafterl (u.l, -__builtin_infl ());
  if (fpclassify (u2.l) != FP_INFINITE || u2.l >= 0)
    {
      puts ("Special case 4 failed");
      result = 1;
    }

  return result;
}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix ppc{,64} nextafterl and fpclassifyl
  2007-06-05 15:54 [PATCH] Fix ppc{,64} nextafterl and fpclassifyl Jakub Jelinek
@ 2007-06-07 15:50 ` Steven Munroe
  0 siblings, 0 replies; 2+ messages in thread
From: Steven Munroe @ 2007-06-07 15:50 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Ulrich Drepper, Glibc hackers

Jakub Jelinek wrote:
> Hi!
>
> Attached is a fixed version of nextafterl and fpclassifyl fixes Steven
> sent privately a few days ago, together with a couple of fixes on top
> of that from me.
>   

This looks good, tested on powerpc32-nofpu, powerpc32, and powerpc64.
Thanks again!
> I have used attached tester to test whether nextafterl works correctly,
> not sure if we want that in glibc testsuite (only tested on ppc/ppc64
> as it is ldbl-128ibm specific) or not, perhaps with smaller number of
> tests.  The 10 million iterations take ~ 70 seconds on my G5.
>
>   

Additional testing is clearly needed, but where would this fit into the
tree. It is only ppc? I though other platforms where at least looking at
ldbl-128ibm?

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-06-07 15:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-05 15:54 [PATCH] Fix ppc{,64} nextafterl and fpclassifyl Jakub Jelinek
2007-06-07 15:50 ` Steven Munroe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).