[glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction

public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed

* [glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
@ 2021-08-28  0:34 Fangrui Song
  0 siblings, 0 replies; 4+ messages in thread
From: Fangrui Song @ 2021-08-28  0:34 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=76f9784421e988df5a05876a7f256716dd7e412d

commit 76f9784421e988df5a05876a7f256716dd7e412d
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:33:13 2018 +0100

    [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
    
    This patch improves the accuracy of the range reduction.  When the input is
    large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
    enough.  Improve range reduction accuracy to 136 bits.  As a result the special
    checks for results close to zero can be removed.  The ULP of the polynomials is
    at worst 0.55ULP, so there is no reason for the slow functions, and they can be
    removed.
    
            * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
            reduce_sincos, improve accuracy to 136 bits.
            (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
            (__sin): Use improved reduction and simplified do_sincos calculation.
            (__cos): Likewise.
            * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.

Diff:
---
 sysdeps/ieee754/dbl-64/s_sin.c    | 94 ++++++++++++++++++---------------------
 sysdeps/ieee754/dbl-64/s_sincos.c |  6 +--
 2 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f2aa..b8c366a6f0 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
   return retval;
 }
 
+/* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
+   is written to *a, the low part to *da.  Range reduction is accurate to 136
+   bits so that when x is large and *a very close to zero, all 53 bits of *a
+   are correct.  */
 static inline int4
 __always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
 {
   mynumber v;
 
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
   v.x = t;
   double y = (x - xn * mp1) - xn * mp2;
   int4 n = v.i[LOW_HALF] & 3;
-  double db = xn * mp3;
-  double b = y - db;
-  db = (y - b) - db;
+
+  double b, db, t1, t2;
+  t1 = xn * pp3;
+  t2 = y - t1;
+  db = (y - t2) - t1;
+
+  t1 = xn * pp4;
+  b = t2 - t1;
+  db += (t2 - b) - t1;
 
   *a = b;
   *da = db;
-
   return n;
 }
 
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
+/* Compute sin or cos (A + DA) for the given quadrant N.  */
 static double
 __always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
 {
-  double xx, retval, res, cor;
-  double eps = fabs (x) * 1.2e-30;
+  double retval, cor;
 
-  int k1 = (n + shift_quadrant) & 3;
-  switch (k1)
-    {			/* quarter of unit circle */
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
+  if (n & 1)
+    /* Max ULP is 0.513.  */
+    retval = do_cos (a, da, &cor);
+  else
+    {
+      double xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, shift_quadrant));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: sloww2 (a, da, x, n));
-      break;
+	retval = __copysign (do_sin (a, da, &cor), a);
     }
 
-  return retval;
+  return (n & 2) ? -retval : retval;
 }
 
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -374,13 +361,18 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, t, cor;
+#ifndef IN_SINCOS
+  double xx, t, a, da, cor;
   mynumber u;
-  int4 k, m;
+  int4 k, m, n;
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+  double xx, t, cor;
+  mynumber u;
+  int4 k, m;
+  double retval = 0;
 #endif
 
   u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, false);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n);
     }				/*   else  if (k <  0x419921FB )    */
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
 {
   double y, xx, cor, a, da;
   mynumber u;
+#ifndef IN_SINCOS
+  int4 k, m, n;
+#else
   int4 k, m;
+#endif
 
   double retval = 0;
 
@@ -496,9 +491,8 @@ __cos (double x)
 #ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, true);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
     }				/*   else  if (k <  0x419921FB )    */
 
   /* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce526..4f032d2e42 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
   if (k < 0x419921FB)
     {
       double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
+      int4 n = reduce_sincos (x, &a, &da);
 
-      *sinx = do_sincos_1 (a, da, x, n, false);
-      *cosx = do_sincos_1 (a, da, x, n, true);
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
 
       return;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
@ 2021-08-28  0:29 Fangrui Song
  0 siblings, 0 replies; 4+ messages in thread
From: Fangrui Song @ 2021-08-28  0:29 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=5b1fbde93bf42e0daa191f3962381906f920dfae

commit 5b1fbde93bf42e0daa191f3962381906f920dfae
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:33:13 2018 +0100

    [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
    
    This patch improves the accuracy of the range reduction.  When the input is
    large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
    enough.  Improve range reduction accuracy to 136 bits.  As a result the special
    checks for results close to zero can be removed.  The ULP of the polynomials is
    at worst 0.55ULP, so there is no reason for the slow functions, and they can be
    removed.
    
            * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
            reduce_sincos, improve accuracy to 136 bits.
            (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
            (__sin): Use improved reduction and simplified do_sincos calculation.
            (__cos): Likewise.
            * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.

Diff:
---
 sysdeps/ieee754/dbl-64/s_sin.c    | 94 ++++++++++++++++++---------------------
 sysdeps/ieee754/dbl-64/s_sincos.c |  6 +--
 2 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f2aa..b8c366a6f0 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
   return retval;
 }
 
+/* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
+   is written to *a, the low part to *da.  Range reduction is accurate to 136
+   bits so that when x is large and *a very close to zero, all 53 bits of *a
+   are correct.  */
 static inline int4
 __always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
 {
   mynumber v;
 
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
   v.x = t;
   double y = (x - xn * mp1) - xn * mp2;
   int4 n = v.i[LOW_HALF] & 3;
-  double db = xn * mp3;
-  double b = y - db;
-  db = (y - b) - db;
+
+  double b, db, t1, t2;
+  t1 = xn * pp3;
+  t2 = y - t1;
+  db = (y - t2) - t1;
+
+  t1 = xn * pp4;
+  b = t2 - t1;
+  db += (t2 - b) - t1;
 
   *a = b;
   *da = db;
-
   return n;
 }
 
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
+/* Compute sin or cos (A + DA) for the given quadrant N.  */
 static double
 __always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
 {
-  double xx, retval, res, cor;
-  double eps = fabs (x) * 1.2e-30;
+  double retval, cor;
 
-  int k1 = (n + shift_quadrant) & 3;
-  switch (k1)
-    {			/* quarter of unit circle */
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
+  if (n & 1)
+    /* Max ULP is 0.513.  */
+    retval = do_cos (a, da, &cor);
+  else
+    {
+      double xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, shift_quadrant));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: sloww2 (a, da, x, n));
-      break;
+	retval = __copysign (do_sin (a, da, &cor), a);
     }
 
-  return retval;
+  return (n & 2) ? -retval : retval;
 }
 
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -374,13 +361,18 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, t, cor;
+#ifndef IN_SINCOS
+  double xx, t, a, da, cor;
   mynumber u;
-  int4 k, m;
+  int4 k, m, n;
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+  double xx, t, cor;
+  mynumber u;
+  int4 k, m;
+  double retval = 0;
 #endif
 
   u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, false);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n);
     }				/*   else  if (k <  0x419921FB )    */
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
 {
   double y, xx, cor, a, da;
   mynumber u;
+#ifndef IN_SINCOS
+  int4 k, m, n;
+#else
   int4 k, m;
+#endif
 
   double retval = 0;
 
@@ -496,9 +491,8 @@ __cos (double x)
 #ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, true);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
     }				/*   else  if (k <  0x419921FB )    */
 
   /* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce526..4f032d2e42 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
   if (k < 0x419921FB)
     {
       double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
+      int4 n = reduce_sincos (x, &a, &da);
 
-      *sinx = do_sincos_1 (a, da, x, n, false);
-      *cosx = do_sincos_1 (a, da, x, n, true);
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
 
       return;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
@ 2021-08-27 23:48 Fangrui Song
  0 siblings, 0 replies; 4+ messages in thread
From: Fangrui Song @ 2021-08-27 23:48 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=e74311c37db0268cd75bbdedbb21872cd58b3f76

commit e74311c37db0268cd75bbdedbb21872cd58b3f76
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:33:13 2018 +0100

    [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
    
    This patch improves the accuracy of the range reduction.  When the input is
    large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
    enough.  Improve range reduction accuracy to 136 bits.  As a result the special
    checks for results close to zero can be removed.  The ULP of the polynomials is
    at worst 0.55ULP, so there is no reason for the slow functions, and they can be
    removed.
    
            * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
            reduce_sincos, improve accuracy to 136 bits.
            (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
            (__sin): Use improved reduction and simplified do_sincos calculation.
            (__cos): Likewise.
            * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.

Diff:
---
 sysdeps/ieee754/dbl-64/s_sin.c    | 94 ++++++++++++++++++---------------------
 sysdeps/ieee754/dbl-64/s_sincos.c |  6 +--
 2 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f2aa..b8c366a6f0 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
   return retval;
 }
 
+/* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
+   is written to *a, the low part to *da.  Range reduction is accurate to 136
+   bits so that when x is large and *a very close to zero, all 53 bits of *a
+   are correct.  */
 static inline int4
 __always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
 {
   mynumber v;
 
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
   v.x = t;
   double y = (x - xn * mp1) - xn * mp2;
   int4 n = v.i[LOW_HALF] & 3;
-  double db = xn * mp3;
-  double b = y - db;
-  db = (y - b) - db;
+
+  double b, db, t1, t2;
+  t1 = xn * pp3;
+  t2 = y - t1;
+  db = (y - t2) - t1;
+
+  t1 = xn * pp4;
+  b = t2 - t1;
+  db += (t2 - b) - t1;
 
   *a = b;
   *da = db;
-
   return n;
 }
 
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
+/* Compute sin or cos (A + DA) for the given quadrant N.  */
 static double
 __always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
 {
-  double xx, retval, res, cor;
-  double eps = fabs (x) * 1.2e-30;
+  double retval, cor;
 
-  int k1 = (n + shift_quadrant) & 3;
-  switch (k1)
-    {			/* quarter of unit circle */
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
+  if (n & 1)
+    /* Max ULP is 0.513.  */
+    retval = do_cos (a, da, &cor);
+  else
+    {
+      double xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, shift_quadrant));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: sloww2 (a, da, x, n));
-      break;
+	retval = __copysign (do_sin (a, da, &cor), a);
     }
 
-  return retval;
+  return (n & 2) ? -retval : retval;
 }
 
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -374,13 +361,18 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, t, cor;
+#ifndef IN_SINCOS
+  double xx, t, a, da, cor;
   mynumber u;
-  int4 k, m;
+  int4 k, m, n;
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+  double xx, t, cor;
+  mynumber u;
+  int4 k, m;
+  double retval = 0;
 #endif
 
   u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, false);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n);
     }				/*   else  if (k <  0x419921FB )    */
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
 {
   double y, xx, cor, a, da;
   mynumber u;
+#ifndef IN_SINCOS
+  int4 k, m, n;
+#else
   int4 k, m;
+#endif
 
   double retval = 0;
 
@@ -496,9 +491,8 @@ __cos (double x)
 #ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, true);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
     }				/*   else  if (k <  0x419921FB )    */
 
   /* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce526..4f032d2e42 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
   if (k < 0x419921FB)
     {
       double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
+      int4 n = reduce_sincos (x, &a, &da);
 
-      *sinx = do_sincos_1 (a, da, x, n, false);
-      *cosx = do_sincos_1 (a, da, x, n, true);
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
 
       return;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
@ 2021-08-27 23:46 Fangrui Song
  0 siblings, 0 replies; 4+ messages in thread
From: Fangrui Song @ 2021-08-27 23:46 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ed2a36663a6e3edf6e3b0f4045c3befec7f5bca5

commit ed2a36663a6e3edf6e3b0f4045c3befec7f5bca5
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:33:13 2018 +0100

    [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
    
    This patch improves the accuracy of the range reduction.  When the input is
    large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
    enough.  Improve range reduction accuracy to 136 bits.  As a result the special
    checks for results close to zero can be removed.  The ULP of the polynomials is
    at worst 0.55ULP, so there is no reason for the slow functions, and they can be
    removed.
    
            * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
            reduce_sincos, improve accuracy to 136 bits.
            (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
            (__sin): Use improved reduction and simplified do_sincos calculation.
            (__cos): Likewise.
            * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.

Diff:
---
 sysdeps/ieee754/dbl-64/s_sin.c    | 94 ++++++++++++++++++---------------------
 sysdeps/ieee754/dbl-64/s_sincos.c |  6 +--
 2 files changed, 47 insertions(+), 53 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f2aa..b8c366a6f0 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
   return retval;
 }
 
+/* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
+   is written to *a, the low part to *da.  Range reduction is accurate to 136
+   bits so that when x is large and *a very close to zero, all 53 bits of *a
+   are correct.  */
 static inline int4
 __always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
 {
   mynumber v;
 
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
   v.x = t;
   double y = (x - xn * mp1) - xn * mp2;
   int4 n = v.i[LOW_HALF] & 3;
-  double db = xn * mp3;
-  double b = y - db;
-  db = (y - b) - db;
+
+  double b, db, t1, t2;
+  t1 = xn * pp3;
+  t2 = y - t1;
+  db = (y - t2) - t1;
+
+  t1 = xn * pp4;
+  b = t2 - t1;
+  db += (t2 - b) - t1;
 
   *a = b;
   *da = db;
-
   return n;
 }
 
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
+/* Compute sin or cos (A + DA) for the given quadrant N.  */
 static double
 __always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
 {
-  double xx, retval, res, cor;
-  double eps = fabs (x) * 1.2e-30;
+  double retval, cor;
 
-  int k1 = (n + shift_quadrant) & 3;
-  switch (k1)
-    {			/* quarter of unit circle */
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
+  if (n & 1)
+    /* Max ULP is 0.513.  */
+    retval = do_cos (a, da, &cor);
+  else
+    {
+      double xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, shift_quadrant));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: sloww2 (a, da, x, n));
-      break;
+	retval = __copysign (do_sin (a, da, &cor), a);
     }
 
-  return retval;
+  return (n & 2) ? -retval : retval;
 }
 
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -374,13 +361,18 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, t, cor;
+#ifndef IN_SINCOS
+  double xx, t, a, da, cor;
   mynumber u;
-  int4 k, m;
+  int4 k, m, n;
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+  double xx, t, cor;
+  mynumber u;
+  int4 k, m;
+  double retval = 0;
 #endif
 
   u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, false);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n);
     }				/*   else  if (k <  0x419921FB )    */
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
 {
   double y, xx, cor, a, da;
   mynumber u;
+#ifndef IN_SINCOS
+  int4 k, m, n;
+#else
   int4 k, m;
+#endif
 
   double retval = 0;
 
@@ -496,9 +491,8 @@ __cos (double x)
 #ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, true);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
     }				/*   else  if (k <  0x419921FB )    */
 
   /* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce526..4f032d2e42 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
   if (k < 0x419921FB)
     {
       double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
+      int4 n = reduce_sincos (x, &a, &da);
 
-      *sinx = do_sincos_1 (a, da, x, n, false);
-      *cosx = do_sincos_1 (a, da, x, n, true);
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
 
       return;
     }


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-28  0:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-28  0:34 [glibc/maskray/grte] [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction Fangrui Song
  -- strict thread matches above, loose matches on Subject: below --
2021-08-28  0:29 Fangrui Song
2021-08-27 23:48 Fangrui Song
2021-08-27 23:46 Fangrui Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).