public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/6] Fix code style and comments of new math code
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
@ 2018-07-05 16:01 ` Szabolcs Nagy
  2018-07-05 16:02 ` [PATCH 2/6] Move __HAVE_FAST_FMA to math_config.h Szabolcs Nagy
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:01 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 0 bytes --]



[-- Attachment #2: 0001-Fix-code-style-and-comments-of-new-math-code.patch --]
[-- Type: text/x-patch, Size: 20044 bytes --]

From a4e4e4057b31a65a033f7cdbe6bcedb527c41918 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue, 3 Jul 2018 12:54:36 +0100
Subject: [PATCH 1/6] Fix code style and comments of new math code

Synchronize code style and comments with Arm Optimized Routines, there
are no code changes in this patch.  This ensures different projects using
the same code have consistent code style so bug fix patches can be applied
more easily.
---
 newlib/libm/common/exp.c         | 22 +++++++++++++------
 newlib/libm/common/exp2.c        | 18 +++++++++++-----
 newlib/libm/common/log.c         | 46 ++++++++++++++++++++++------------------
 newlib/libm/common/log2.c        | 31 ++++++++++++++-------------
 newlib/libm/common/math_config.h | 34 ++++++++++++++++++++++++-----
 newlib/libm/common/pow.c         | 29 +++++++++++++++++++------
 newlib/libm/common/sincosf.c     | 16 +++++++-------
 newlib/libm/common/sinf.c        | 12 +++++------
 8 files changed, 134 insertions(+), 74 deletions(-)

diff --git a/newlib/libm/common/exp.c b/newlib/libm/common/exp.c
index f295b6173..157473de8 100644
--- a/newlib/libm/common/exp.c
+++ b/newlib/libm/common/exp.c
@@ -45,6 +45,13 @@
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
@@ -83,6 +90,7 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
   return check_uflow (y);
 }
 
+/* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
@@ -136,29 +144,29 @@ exp (double x)
   ki = asuint64 (kd);
   kd -= Shift;
 #endif
-  r = x + kd*NegLn2hiN + kd*NegLn2loN;
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* 2^(k/N) ~= scale * (1 + tail).  */
-  idx = 2*(ki % N);
+  idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  r2 = r*r;
+  r2 = r * r;
   /* Without fma the worst case error is 0.25/N ulp larger.  */
   /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
 #if EXP_POLY_ORDER == 4
-  tmp = tail + r + r2*C2 + r*r2*(C3 + r*C4);
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP_POLY_ORDER == 5
-  tmp = tail + r + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP_POLY_ORDER == 6
-  tmp = tail + r + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
   scale = asdouble (sbits);
-  /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+  /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-739, so there
      is no spurious underflow here even without fma.  */
   return scale + scale * tmp;
 }
diff --git a/newlib/libm/common/exp2.c b/newlib/libm/common/exp2.c
index 6579e16fd..d4883d8f6 100644
--- a/newlib/libm/common/exp2.c
+++ b/newlib/libm/common/exp2.c
@@ -43,6 +43,13 @@
 #define C5 __exp_data.exp2_poly[4]
 #define C6 __exp_data.exp2_poly[5]
 
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
@@ -81,6 +88,7 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
   return check_uflow (y);
 }
 
+/* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
@@ -125,22 +133,22 @@ exp2 (double x)
   kd -= Shift; /* k/N for int k.  */
   r = x - kd;
   /* 2^(k/N) ~= scale * (1 + tail).  */
-  idx = 2*(ki % N);
+  idx = 2 * (ki % N);
   top = ki << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  r2 = r*r;
+  r2 = r * r;
   /* Without fma the worst case error is 0.5/N ulp larger.  */
   /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
 #if EXP2_POLY_ORDER == 4
-  tmp = tail + r*C1 + r2*C2 + r*r2*(C3 + r*C4);
+  tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP2_POLY_ORDER == 5
-  tmp = tail + r*C1 + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
+  tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP2_POLY_ORDER == 6
-  tmp = tail + r*C1 + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
+  tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
diff --git a/newlib/libm/common/log.c b/newlib/libm/common/log.c
index 32752f55c..2e350749f 100644
--- a/newlib/libm/common/log.c
+++ b/newlib/libm/common/log.c
@@ -42,6 +42,7 @@
 #define N (1 << LOG_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 
+/* Top 16 bits of a double.  */
 static inline uint32_t
 top16 (double x)
 {
@@ -74,44 +75,44 @@ log (double x)
       if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
 	return 0;
       r = x - 1.0;
-      r2 = r*r;
-      r3 = r*r2;
+      r2 = r * r;
+      r3 = r * r2;
 #if LOG_POLY1_ORDER == 10
       /* Worst-case error is around 0.516 ULP.  */
-      y = r3*(B[1] + r*B[2] + r2*B[3]
-	      + r3*(B[4] + r*B[5] + r2*B[6] + r3*(B[7] + r*B[8])));
-      w = B[0]*r2; /* B[0] == -0.5.  */
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 #elif LOG_POLY1_ORDER == 11
       /* Worst-case error is around 0.516 ULP.  */
-      y = r3*(B[1] + r*B[2]
-	      + r2*(B[3] + r*B[4] + r2*B[5]
-		    + r3*(B[6] + r*B[7] + r2*B[8] + r3*B[9])));
-      w = B[0]*r2; /* B[0] == -0.5.  */
+      y = r3 * (B[1] + r * B[2]
+		+ r2 * (B[3] + r * B[4] + r2 * B[5]
+			+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
+      w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 #elif LOG_POLY1_ORDER == 12
-      y = r3*(B[1] + r*B[2] + r2*B[3]
-	      + r3*(B[4] + r*B[5] + r2*B[6]
-		    + r3*(B[7] + r*B[8] + r2*B[9] + r3*B[10])));
+      y = r3 * (B[1] + r * B[2] + r2 * B[3]
+		+ r3 * (B[4] + r * B[5] + r2 * B[6]
+			+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
 # if N <= 64
       /* Worst-case error is around 0.532 ULP.  */
-      w = B[0]*r2; /* B[0] == -0.5.  */
+      w = B[0] * r2; /* B[0] == -0.5.  */
       hi = r + w;
       y += r - hi + w;
       y += hi;
 # else
       /* Worst-case error is around 0.507 ULP.  */
-      w = r*0x1p27;
+      w = r * 0x1p27;
       double_t rhi = r + w - w;
       double_t rlo = r - rhi;
-      w = rhi*rhi*B[0]; /* B[0] == -0.5.  */
+      w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
       hi = r + w;
       lo = r - hi + w;
-      lo += B[0]*rlo*(rhi + r);
+      lo += B[0] * rlo * (rhi + r);
       y += lo;
       y += hi;
 # endif
@@ -150,14 +151,14 @@ log (double x)
   r = fma (z, invc, -1.0);
 #else
   /* rounding error: 0x1p-55/N + 0x1p-66.  */
-  r = (z - T2[i].chi - T2[i].clo)*invc;
+  r = (z - T2[i].chi - T2[i].clo) * invc;
 #endif
   kd = (double_t) k;
 
   /* hi + lo = r + log(c) + k*Ln2.  */
-  w = kd*Ln2hi + logc;
+  w = kd * Ln2hi + logc;
   hi = w + r;
-  lo = w - hi + r + kd*Ln2lo;
+  lo = w - hi + r + kd * Ln2lo;
 
   /* log(x) = lo + (log1p(r) - r) + hi.  */
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
@@ -166,9 +167,12 @@ log (double x)
      Worst case error if |y| > 0x1p-4:
      0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma).  */
 #if LOG_POLY_ORDER == 6
-  y = lo + r2*A[0] + r*r2*(A[1] + r*A[2] + r2*(A[3] + r*A[4])) + hi;
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
 #elif LOG_POLY_ORDER == 7
-  y = lo + r2*(A[0] + r*A[1] + r2*(A[2] + r*A[3]) + r2*r2*(A[4] + r*A[5])) + hi;
+  y = lo
+      + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+	      + r2 * r2 * (A[4] + r * A[5]))
+      + hi;
 #endif
   return y;
 }
diff --git a/newlib/libm/common/log2.c b/newlib/libm/common/log2.c
index 64617de6c..a2da93e74 100644
--- a/newlib/libm/common/log2.c
+++ b/newlib/libm/common/log2.c
@@ -42,6 +42,7 @@
 #define N (1 << LOG2_TABLE_BITS)
 #define OFF 0x3fe6000000000000
 
+/* Top 16 bits of a double.  */
 static inline uint32_t
 top16 (double x)
 {
@@ -72,24 +73,24 @@ double
 	return 0;
       r = x - 1.0;
 #if __HAVE_FAST_FMA
-      hi = r*InvLn2hi;
-      lo = r*InvLn2lo + fma (r, InvLn2hi, -hi);
+      hi = r * InvLn2hi;
+      lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
 #else
       double_t rhi, rlo;
-      rhi = asdouble (asuint64 (r) & -1ULL<<32);
+      rhi = asdouble (asuint64 (r) & -1ULL << 32);
       rlo = r - rhi;
-      hi = rhi*InvLn2hi;
-      lo = rlo*InvLn2hi + r*InvLn2lo;
+      hi = rhi * InvLn2hi;
+      lo = rlo * InvLn2hi + r * InvLn2lo;
 #endif
       r2 = r * r; /* rounding error: 0x1p-62.  */
       r4 = r2 * r2;
 #if LOG2_POLY1_ORDER == 11
       /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
-      p = r2*(B[0] + r*B[1]);
+      p = r2 * (B[0] + r * B[1]);
       y = hi + p;
       lo += hi - y + p;
-      lo += r4*(B[2] + r*B[3] + r2*(B[4] + r*B[5])
-		+ r4*(B[6] + r*B[7] + r2*(B[8] + r*B[9])));
+      lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+		  + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
       y += lo;
 #endif
       return y;
@@ -125,16 +126,16 @@ double
 #if __HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
-  t1 = r*InvLn2hi;
-  t2 = r*InvLn2lo + fma (r, InvLn2hi, -t1);
+  t1 = r * InvLn2hi;
+  t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
 #else
   double_t rhi, rlo;
   /* rounding error: 0x1p-55/N + 0x1p-65.  */
-  r = (z - T2[i].chi - T2[i].clo)*invc;
+  r = (z - T2[i].chi - T2[i].clo) * invc;
   rhi = asdouble (asuint64 (r) & -1ULL << 32);
   rlo = r - rhi;
-  t1 = rhi*InvLn2hi;
-  t2 = rlo*InvLn2hi + r*InvLn2lo;
+  t1 = rhi * InvLn2hi;
+  t2 = rlo * InvLn2hi + r * InvLn2lo;
 #endif
 
   /* hi + lo = r/ln2 + log2(c) + k.  */
@@ -149,8 +150,8 @@ double
 #if LOG2_POLY_ORDER == 7
   /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
      ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
-  p = A[0] + r*A[1] + r2*(A[2] + r*A[3]) + r4*(A[4] + r*A[5]);
-  y = lo + r2*p + hi;
+  p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
+  y = lo + r2 * p + hi;
 #endif
   return y;
 }
diff --git a/newlib/libm/common/math_config.h b/newlib/libm/common/math_config.h
index 3a8cebbe3..aec9cd0d6 100644
--- a/newlib/libm/common/math_config.h
+++ b/newlib/libm/common/math_config.h
@@ -233,28 +233,48 @@ eval_as_double (double x)
 # define unlikely(x) (x)
 #endif
 
-/* Error handling tail calls for special cases, with sign argument.  */
+/* Error handling tail calls for special cases, with a sign argument.
+   The sign of the return value is set if the argument is non-zero.  */
+
+/* The result overflows.  */
 HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
 HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
 HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero.  */
 HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows.  */
 HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode.  */
 HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only.  */
 HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero.  */
 HIDDEN double __math_divzero (uint32_t);
+
 /* Error handling using input checking.  */
+
+/* Invalid input unless it is a quiet NaN.  */
 HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN.  */
 HIDDEN double __math_invalid (double);
+
 /* Error handling using output checking, only for errno setting.  */
+
+/* Check if the result overflowed to infinity.  */
 HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0.  */
 HIDDEN double __math_check_uflow (double);
 
+/* Check if the result overflowed to infinity.  */
 static inline double
 check_oflow (double x)
 {
   return WANT_ERRNO ? __math_check_oflow (x) : x;
 }
 
+/* Check if the result underflowed to 0.  */
 static inline double
 check_uflow (double x)
 {
@@ -324,7 +344,8 @@ extern const struct powf_log2_data
 #define EXP_USE_TOINT_NARROW 0
 #define EXP2_POLY_ORDER 5
 #define EXP2_POLY_WIDE 0
-extern const struct exp_data {
+extern const struct exp_data
+{
   double invln2N;
   double shift;
   double negln2hiN;
@@ -338,7 +359,8 @@ extern const struct exp_data {
 #define LOG_TABLE_BITS 7
 #define LOG_POLY_ORDER 6
 #define LOG_POLY1_ORDER 12
-extern const struct log_data {
+extern const struct log_data
+{
   double ln2hi;
   double ln2lo;
   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
@@ -352,7 +374,8 @@ extern const struct log_data {
 #define LOG2_TABLE_BITS 6
 #define LOG2_POLY_ORDER 7
 #define LOG2_POLY1_ORDER 11
-extern const struct log2_data {
+extern const struct log2_data
+{
   double invln2hi;
   double invln2lo;
   double poly[LOG2_POLY_ORDER - 1];
@@ -365,7 +388,8 @@ extern const struct log2_data {
 
 #define POW_LOG_TABLE_BITS 7
 #define POW_LOG_POLY_ORDER 8
-extern const struct pow_log_data {
+extern const struct pow_log_data
+{
   double ln2hi;
   double ln2lo;
   double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
diff --git a/newlib/libm/common/pow.c b/newlib/libm/common/pow.c
index da7081cb4..7d8060751 100644
--- a/newlib/libm/common/pow.c
+++ b/newlib/libm/common/pow.c
@@ -46,12 +46,16 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
 #define N (1 << POW_LOG_TABLE_BITS)
 #define OFF 0x3fe6955500000000
 
+/* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 top12 (double x)
 {
   return asuint64 (x) >> 52;
 }
 
+/* Compute y+tail = log(x) where the rounded result is y and tail has about
+   additional 15 bits precision.  The bit representation of x if in ix, but
+   normalized in the subnormal range using sign bit too for the exponent.  */
 static inline double_t
 log_inline (uint64_t ix, double_t *tail)
 {
@@ -111,7 +115,8 @@ log_inline (uint64_t ix, double_t *tail)
 #endif
   /* p = log1p(r) - r - A[0]*r*r.  */
 #if POW_LOG_POLY_ORDER == 8
-  p = ar3*(A[1] + r*A[2] + ar2*(A[3] + r*A[4] + ar2*(A[5] + r*A[6])));
+  p = (ar3
+       * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
 #endif
   lo = lo1 + lo2 + lo3 + lo4 + p;
   y = hi + lo;
@@ -133,6 +138,13 @@ log_inline (uint64_t ix, double_t *tail)
 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
 
+/* Handle cases that may overflow or underflow when computing the result that
+   is scale*(1+TMP) without intermediate rounding.  The bit representation of
+   scale is in SBITS, however it has a computed exponent that may have
+   overflown into the sign bit so that needs to be adjusted before using it as
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
+   adjustment of scale, positive k here means the result may overflow and
+   negative k means the result may underflow.  */
 static inline double
 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 {
@@ -176,6 +188,8 @@ specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
 
 #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
 
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+   The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
 static inline double
 exp_inline (double x, double xtail, uint32_t sign_bias)
 {
@@ -223,26 +237,26 @@ exp_inline (double x, double xtail, uint32_t sign_bias)
   ki = asuint64 (kd);
   kd -= Shift;
 #endif
-  r = x + kd*NegLn2hiN + kd*NegLn2loN;
+  r = x + kd * NegLn2hiN + kd * NegLn2loN;
   /* The code assumes 2^-200 < |xtail| < 2^-8/N.  */
   r += xtail;
   /* 2^(k/N) ~= scale * (1 + tail).  */
-  idx = 2*(ki % N);
+  idx = 2 * (ki % N);
   top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
   tail = asdouble (T[idx]);
   /* This is only a valid scale when -1023*N < k < 1024*N.  */
   sbits = T[idx + 1] + top;
   /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1).  */
   /* Evaluation is optimized assuming superscalar pipelined execution.  */
-  r2 = r*r;
+  r2 = r * r;
   /* Without fma the worst case error is 0.25/N ulp larger.  */
   /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp.  */
 #if EXP_POLY_ORDER == 4
-  tmp = tail + r + r2*C2 + r*r2*(C3 + r*C4);
+  tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
 #elif EXP_POLY_ORDER == 5
-  tmp = tail + r + r2*(C2 + r*C3) + r2*r2*(C4 + r*C5);
+  tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
 #elif EXP_POLY_ORDER == 6
-  tmp = tail + r + r2*(0.5 + r*C3) + r2*r2*(C4 + r*C5 + r2*C6);
+  tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
 #endif
   if (unlikely (abstop == 0))
     return specialcase (tmp, sbits, ki);
@@ -268,6 +282,7 @@ checkint (uint64_t iy)
   return 2;
 }
 
+/* Returns 1 if input is the bit representation of 0, infinity or nan.  */
 static inline int
 zeroinfnan (uint64_t i)
 {
diff --git a/newlib/libm/common/sincosf.c b/newlib/libm/common/sincosf.c
index 85f12642f..2ead353e8 100644
--- a/newlib/libm/common/sincosf.c
+++ b/newlib/libm/common/sincosf.c
@@ -50,14 +50,14 @@ sincosf (float y, float *sinp, float *cosp)
       double x2 = x * x;
 
       if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
-      {
-	if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
-	  /* Force underflow for tiny y.  */
-	  force_eval_float (x2);
-	*sinp = y;
-	*cosp = 1.0f;
-	return;
-      }
+	{
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+	    /* Force underflow for tiny y.  */
+	    force_eval_float (x2);
+	  *sinp = y;
+	  *cosp = 1.0f;
+	  return;
+	}
 
       sincosf_poly (x, x2, p, 0, sinp, cosp);
     }
diff --git a/newlib/libm/common/sinf.c b/newlib/libm/common/sinf.c
index c1baf4237..715bdc8d0 100644
--- a/newlib/libm/common/sinf.c
+++ b/newlib/libm/common/sinf.c
@@ -49,12 +49,12 @@ sinf (float y)
       s = x * x;
 
       if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
-      {
-	if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
-	  /* Force underflow for tiny y.  */
-	  force_eval_float (s);
-	return y;
-      }
+	{
+	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
+	    /* Force underflow for tiny y.  */
+	    force_eval_float (s);
+	  return y;
+	}
 
       return sinf_poly (x, s, p, 0);
     }
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 0/6 v2] Updates to the new math code
@ 2018-07-05 16:01 Szabolcs Nagy
  2018-07-05 16:01 ` [PATCH 1/6] Fix code style and comments of " Szabolcs Nagy
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:01 UTC (permalink / raw)
  To: newlib; +Cc: nd

v2:
- Made HAVE_FAST_FMA macros consistent with other HAVE_* macros.
- Removed TOINT_RINT and TOINT_SHIFT.
- Include sinf/cosf namespace fix in the patch set.

There are some modifications and bug fixes in the Arm Optimized
Routines repo and i'd like to sync newlib with it.

Szabolcs Nagy (6):
   Fix code style and comments of new math code
   Move __HAVE_FAST_FMA to math_config.h
   Remove unused TOINT_RINT and TOINT_SHIFT macros
   Change the return type of converttoint and document the semantics
   Fix large ulp error in pow without fma very near 1.0
   Fix namespace issues in sinf, cosf and sincosf

  newlib/libc/include/machine/ieeefp.h | 23 -------------
  newlib/libm/common/cosf.c            |  6 ++--
  newlib/libm/common/exp.c             | 22 +++++++++----
  newlib/libm/common/exp2.c            | 18 ++++++++---
  newlib/libm/common/log.c             | 48 ++++++++++++++-------------
  newlib/libm/common/log2.c            | 35 ++++++++++----------
  newlib/libm/common/log2_data.c       |  4 +--
  newlib/libm/common/log_data.c        |  4 +--
  newlib/libm/common/math_config.h     | 63 ++++++++++++++++++++++++++++--------
  newlib/libm/common/pow.c             | 41 ++++++++++++++++-------
  newlib/libm/common/sf_exp.c          |  5 +--
  newlib/libm/common/sincosf.c         | 22 ++++++-------
  newlib/libm/common/sincosf.h         | 15 +++++----
  newlib/libm/common/sincosf_data.c    |  4 +--
  newlib/libm/common/sinf.c            | 18 +++++------
  15 files changed, 188 insertions(+), 140 deletions(-)

--
2.14.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/6] Move __HAVE_FAST_FMA to math_config.h
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
  2018-07-05 16:01 ` [PATCH 1/6] Fix code style and comments of " Szabolcs Nagy
@ 2018-07-05 16:02 ` Szabolcs Nagy
  2018-07-05 16:03 ` [PATCH 4/6] Change the return type of converttoint and document the semantics Szabolcs Nagy
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:02 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0002-Move-__HAVE_FAST_FMA-to-math_config.h.patch --]
[-- Type: text/x-patch, Size: 6692 bytes --]

From c3b27ce0c388601e9537e9621969de8f36b84df7 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu, 5 Jul 2018 12:37:25 +0100
Subject: [PATCH 2/6] Move __HAVE_FAST_FMA to math_config.h

Define it consistently with other HAVE_* macros that only affect code
using math_config.h.  This is also closer to the Arm Optimized Routines
code.
---
 newlib/libc/include/machine/ieeefp.h | 23 -----------------------
 newlib/libm/common/log.c             |  2 +-
 newlib/libm/common/log2.c            |  4 ++--
 newlib/libm/common/log2_data.c       |  4 ++--
 newlib/libm/common/log_data.c        |  4 ++--
 newlib/libm/common/math_config.h     | 13 +++++++++++--
 newlib/libm/common/pow.c             |  6 +++---
 7 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/newlib/libc/include/machine/ieeefp.h b/newlib/libc/include/machine/ieeefp.h
index 23a365ea6..a40975248 100644
--- a/newlib/libc/include/machine/ieeefp.h
+++ b/newlib/libc/include/machine/ieeefp.h
@@ -65,17 +65,6 @@
 	double and single precision arithmetics has similar latency and it
 	has no legacy SVID matherr support, only POSIX errno and fenv
 	exception based error handling.
-
-   __HAVE_FAST_FMA_DEFAULT
-
-	Default value for __HAVE_FAST_FMA if that's not set by the user.
-	It should be set here based on predefined feature macros.
-
-   __HAVE_FAST_FMA
-
-	It should be set to 1 if the compiler can inline an fma call as a
-	single instruction.  Some math code has a separate faster code
-	path assuming the target has single instruction fma.
 */
 
 #if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__)
@@ -91,9 +80,6 @@
 # endif
 # if __ARM_FP & 0x8
 #  define __OBSOLETE_MATH_DEFAULT 0
-#  if __ARM_FEATURE_FMA
-#   define __HAVE_FAST_FMA_DEFAULT 1
-#  endif
 # endif
 #else
 # define __IEEE_BIG_ENDIAN
@@ -110,7 +96,6 @@
 #define __IEEE_BIG_ENDIAN
 #endif
 #define __OBSOLETE_MATH_DEFAULT 0
-#define __HAVE_FAST_FMA_DEFAULT 1
 #endif
 
 #ifdef __epiphany__
@@ -479,14 +464,6 @@
 #define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT
 #endif
 
-#ifndef __HAVE_FAST_FMA_DEFAULT
-/* Assume slow fma by default.  */
-#define __HAVE_FAST_FMA_DEFAULT 0
-#endif
-#ifndef __HAVE_FAST_FMA
-#define __HAVE_FAST_FMA __HAVE_FAST_FMA_DEFAULT
-#endif
-
 #ifndef __IEEE_BIG_ENDIAN
 #ifndef __IEEE_LITTLE_ENDIAN
 #error Endianess not declared!!
diff --git a/newlib/libm/common/log.c b/newlib/libm/common/log.c
index 2e350749f..35329392d 100644
--- a/newlib/libm/common/log.c
+++ b/newlib/libm/common/log.c
@@ -146,7 +146,7 @@ log (double x)
 
   /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
   /* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
 #else
diff --git a/newlib/libm/common/log2.c b/newlib/libm/common/log2.c
index a2da93e74..00eb406b2 100644
--- a/newlib/libm/common/log2.c
+++ b/newlib/libm/common/log2.c
@@ -72,7 +72,7 @@ double
       if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
 	return 0;
       r = x - 1.0;
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
       hi = r * InvLn2hi;
       lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
 #else
@@ -123,7 +123,7 @@ double
 
   /* log2(x) = log2(z/c) + log2(c) + k.  */
   /* r ~= z/c - 1, |r| < 1/(2*N).  */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
   /* rounding error: 0x1p-55/N.  */
   r = fma (z, invc, -1.0);
   t1 = r * InvLn2hi;
diff --git a/newlib/libm/common/log2_data.c b/newlib/libm/common/log2_data.c
index c3e5fa688..ee9efcc2a 100644
--- a/newlib/libm/common/log2_data.c
+++ b/newlib/libm/common/log2_data.c
@@ -134,7 +134,7 @@ const struct log2_data __log2_data = {
 {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
 #endif
 },
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
 .tab2 = {
 # if N == 64
 {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
@@ -203,6 +203,6 @@ const struct log2_data __log2_data = {
 {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
 # endif
 },
-#endif /* !__HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA */
 };
 #endif /* __OBSOLETE_MATH */
diff --git a/newlib/libm/common/log_data.c b/newlib/libm/common/log_data.c
index ef62677ca..26b9c3f44 100644
--- a/newlib/libm/common/log_data.c
+++ b/newlib/libm/common/log_data.c
@@ -307,7 +307,7 @@ const struct log_data __log_data = {
 {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
 #endif
 },
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
 .tab2 = {
 # if N == 64
 {0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
@@ -505,6 +505,6 @@ const struct log_data __log_data = {
 {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
 #endif
 },
-#endif /* !__HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA */
 };
 #endif /* __OBSOLETE_MATH */
diff --git a/newlib/libm/common/math_config.h b/newlib/libm/common/math_config.h
index aec9cd0d6..1f83756ab 100644
--- a/newlib/libm/common/math_config.h
+++ b/newlib/libm/common/math_config.h
@@ -61,6 +61,15 @@
 # endif
 #endif
 
+/* Compiler can inline fma as a single instruction.  */
+#ifndef HAVE_FAST_FMA
+# if __aarch64__ || __ARM_FEATURE_FMA
+#   define HAVE_FAST_FMA 1
+# else
+#   define HAVE_FAST_FMA 0
+# endif
+#endif
+
 #if HAVE_FAST_ROUND
 # define TOINT_INTRINSICS 1
 
@@ -366,7 +375,7 @@ extern const struct log_data
   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
   double poly1[LOG_POLY1_ORDER - 1];
   struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
   struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
 #endif
 } __log_data HIDDEN;
@@ -381,7 +390,7 @@ extern const struct log2_data
   double poly[LOG2_POLY_ORDER - 1];
   double poly1[LOG2_POLY1_ORDER - 1];
   struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
   struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
 #endif
 } __log2_data HIDDEN;
diff --git a/newlib/libm/common/pow.c b/newlib/libm/common/pow.c
index 7d8060751..11964e343 100644
--- a/newlib/libm/common/pow.c
+++ b/newlib/libm/common/pow.c
@@ -80,7 +80,7 @@ log_inline (uint64_t ix, double_t *tail)
   logctail = T[i].logctail;
 
   /* r = z/c - 1, arranged to be exact.  */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
   r = fma (z, invc, -1.0);
 #else
   double_t zhi = asdouble (iz & (-1ULL << 32));
@@ -102,7 +102,7 @@ log_inline (uint64_t ix, double_t *tail)
   ar2 = r * ar;
   ar3 = r * ar2;
   /* k*Ln2 + log(c) + r + A[0]*r*r.  */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
   hi = t2 + ar2;
   lo3 = fma (ar, r, -ar2);
   lo4 = t2 - hi + ar2;
@@ -376,7 +376,7 @@ pow (double x, double y)
   double_t lo;
   double_t hi = log_inline (ix, &lo);
   double_t ehi, elo;
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
   ehi = y * hi;
   elo = y * lo + fma (y, hi, -ehi);
 #else
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/6] Remove unused TOINT_RINT and TOINT_SHIFT macros
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
                   ` (2 preceding siblings ...)
  2018-07-05 16:03 ` [PATCH 4/6] Change the return type of converttoint and document the semantics Szabolcs Nagy
@ 2018-07-05 16:03 ` Szabolcs Nagy
  2018-07-05 16:04 ` [PATCH 5/6] Fix large ulp error in pow without fma very near 1.0 Szabolcs Nagy
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:03 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0003-Remove-unused-TOINT_RINT-and-TOINT_SHIFT-macros.patch --]
[-- Type: text/x-patch, Size: 1398 bytes --]

From 2801b0ad036d74ca45a89df8b9210207b81961e7 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu, 5 Jul 2018 12:42:13 +0100
Subject: [PATCH 3/6] Remove unused TOINT_RINT and TOINT_SHIFT macros

Only have separate code paths for TOINT_INTRINSICS and !TOINT_INTRINSICS.
---
 newlib/libm/common/math_config.h | 6 ------
 newlib/libm/common/sf_exp.c      | 5 +----
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/newlib/libm/common/math_config.h b/newlib/libm/common/math_config.h
index 1f83756ab..b46e44e51 100644
--- a/newlib/libm/common/math_config.h
+++ b/newlib/libm/common/math_config.h
@@ -93,12 +93,6 @@ converttoint (double_t x)
 #ifndef TOINT_INTRINSICS
 # define TOINT_INTRINSICS 0
 #endif
-#ifndef TOINT_RINT
-# define TOINT_RINT 0
-#endif
-#ifndef TOINT_SHIFT
-# define TOINT_SHIFT 1
-#endif
 
 static inline uint32_t
 asuint (float f)
diff --git a/newlib/libm/common/sf_exp.c b/newlib/libm/common/sf_exp.c
index 79ec62bf5..5d72c3451 100644
--- a/newlib/libm/common/sf_exp.c
+++ b/newlib/libm/common/sf_exp.c
@@ -88,10 +88,7 @@ expf (float x)
 #if TOINT_INTRINSICS
   kd = roundtoint (z);
   ki = converttoint (z);
-#elif TOINT_RINT
-  kd = rint (z);
-  ki = (long) kd;
-#elif TOINT_SHIFT
+#else
 # define SHIFT __exp2f_data.shift
   kd = (double) (z + SHIFT); /* Rounding to double precision is required.  */
   ki = asuint64 (kd);
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 4/6] Change the return type of converttoint and document the semantics
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
  2018-07-05 16:01 ` [PATCH 1/6] Fix code style and comments of " Szabolcs Nagy
  2018-07-05 16:02 ` [PATCH 2/6] Move __HAVE_FAST_FMA to math_config.h Szabolcs Nagy
@ 2018-07-05 16:03 ` Szabolcs Nagy
  2018-07-05 16:03 ` [PATCH 3/6] Remove unused TOINT_RINT and TOINT_SHIFT macros Szabolcs Nagy
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:03 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0004-Change-the-return-type-of-converttoint-and-document-.patch --]
[-- Type: text/x-patch, Size: 2249 bytes --]

From bd3ab2b60800eb26e3fc7f2f2e6709c6651ac3d9 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed, 4 Jul 2018 11:09:39 +0100
Subject: [PATCH 4/6] Change the return type of converttoint and document the
 semantics

The roundtoint and converttoint internal functions are only called with small
values, so 32 bit result is enough for converttoint and it is a signed int
conversion so the natural return type is int32_t.

The original idea was to help the compiler keeping the result in uint64_t,
then it's clear that no sign extension is needed and there is no accidental
undefined or implementation defined signed int arithmetics.

But it turns out gcc does a good job with inlining so changing the type has
no overhead and the semantics of the conversion is less surprising this way.
Since we want to allow the asuint64 (x + 0x1.8p52) style conversion, the top
bits were never usable and the existing code ensures that only the bottom
32 bits of the conversion result are used.

In newlib with default settings only aarch64 is affected and there is no
significant code generation change with gcc after the patch.
---
 newlib/libm/common/math_config.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/newlib/libm/common/math_config.h b/newlib/libm/common/math_config.h
index b46e44e51..d5896bbdd 100644
--- a/newlib/libm/common/math_config.h
+++ b/newlib/libm/common/math_config.h
@@ -71,15 +71,23 @@
 #endif
 
 #if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+   the semantics documented below.  */
 # define TOINT_INTRINSICS 1
 
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
 static inline double_t
 roundtoint (double_t x)
 {
   return round (x);
 }
 
-static inline uint64_t
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static inline int32_t
 converttoint (double_t x)
 {
 # if HAVE_FAST_LROUND
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 5/6] Fix large ulp error in pow without fma very near 1.0
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
                   ` (3 preceding siblings ...)
  2018-07-05 16:03 ` [PATCH 3/6] Remove unused TOINT_RINT and TOINT_SHIFT macros Szabolcs Nagy
@ 2018-07-05 16:04 ` Szabolcs Nagy
  2018-07-05 18:45 ` [PATCH 6/6] Fix namespace issues in sinf, cosf and sincosf Szabolcs Nagy
  2018-07-11  3:37 ` [PATCH 0/6 v2] Updates to the new math code Corinna Vinschen
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 16:04 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0005-Fix-large-ulp-error-in-pow-without-fma-very-near-1.0.patch --]
[-- Type: text/x-patch, Size: 1506 bytes --]

From b8aea235537a696ee05a84d56963a4b5537d8084 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue, 3 Jul 2018 13:05:31 +0100
Subject: [PATCH 5/6] Fix large ulp error in pow without fma very near 1.0

The !HAVE_FAST_FMA code path split r = z/c - 1 into r = rhi + rlo such
that when z = 1-tiny and c = 1 then rlo and rhi could have much larger
magnitude than r which later caused large rounding errors.

So do a nearest rounding instead of truncation at the split.

In newlib with default settings this was observable on some arm targets
that enable the new math code but has no fma.
---
 newlib/libm/common/pow.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/newlib/libm/common/pow.c b/newlib/libm/common/pow.c
index 11964e343..a5504e5e9 100644
--- a/newlib/libm/common/pow.c
+++ b/newlib/libm/common/pow.c
@@ -79,11 +79,13 @@ log_inline (uint64_t ix, double_t *tail)
   logc = T[i].logc;
   logctail = T[i].logctail;
 
-  /* r = z/c - 1, arranged to be exact.  */
+  /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+     |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.  */
 #if HAVE_FAST_FMA
   r = fma (z, invc, -1.0);
 #else
-  double_t zhi = asdouble (iz & (-1ULL << 32));
+  /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
+  double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
   double_t zlo = z - zhi;
   double_t rhi = zhi * invc - 1.0;
   double_t rlo = zlo * invc;
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 6/6] Fix namespace issues in sinf, cosf and sincosf
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
                   ` (4 preceding siblings ...)
  2018-07-05 16:04 ` [PATCH 5/6] Fix large ulp error in pow without fma very near 1.0 Szabolcs Nagy
@ 2018-07-05 18:45 ` Szabolcs Nagy
  2018-07-11  3:37 ` [PATCH 0/6 v2] Updates to the new math code Corinna Vinschen
  6 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2018-07-05 18:45 UTC (permalink / raw)
  To: newlib; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0006-Fix-namespace-issues-in-sinf-cosf-and-sincosf.patch --]
[-- Type: text/x-patch, Size: 6222 bytes --]

From 9d90f90f60290134cf32ec5710594ab249f12876 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed, 4 Jul 2018 17:52:36 +0100
Subject: [PATCH 6/6] Fix namespace issues in sinf, cosf and sincosf

Use const sincos_t for clarity instead of making the typedef const.
Use __inv_pi4 and __sincosf_table to avoid namespace issues with
static linking.
---
 newlib/libm/common/cosf.c         |  6 +++---
 newlib/libm/common/sincosf.c      |  6 +++---
 newlib/libm/common/sincosf.h      | 15 ++++++++-------
 newlib/libm/common/sincosf_data.c |  4 ++--
 newlib/libm/common/sinf.c         |  6 +++---
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/newlib/libm/common/cosf.c b/newlib/libm/common/cosf.c
index aac0a9aee..f87186c68 100644
--- a/newlib/libm/common/cosf.c
+++ b/newlib/libm/common/cosf.c
@@ -43,7 +43,7 @@ cosf (float y)
   double x = y;
   double s;
   int n;
-  sincos_t *p = &sincosf_table[0];
+  const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4))
     {
@@ -62,7 +62,7 @@ cosf (float y)
       s = p->sign[n & 3];
 
       if (n & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n ^ 1);
     }
@@ -77,7 +77,7 @@ cosf (float y)
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n ^ 1);
     }
diff --git a/newlib/libm/common/sincosf.c b/newlib/libm/common/sincosf.c
index 2ead353e8..65dd05e6e 100644
--- a/newlib/libm/common/sincosf.c
+++ b/newlib/libm/common/sincosf.c
@@ -43,7 +43,7 @@ sincosf (float y, float *sinp, float *cosp)
   double x = y;
   double s;
   int n;
-  sincos_t *p = &sincosf_table[0];
+  const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4))
     {
@@ -69,7 +69,7 @@ sincosf (float y, float *sinp, float *cosp)
       s = p->sign[n & 3];
 
       if (n & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       sincosf_poly (x * s, x * x, p, n, sinp, cosp);
     }
@@ -84,7 +84,7 @@ sincosf (float y, float *sinp, float *cosp)
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       sincosf_poly (x * s, x * x, p, n, sinp, cosp);
     }
diff --git a/newlib/libm/common/sincosf.h b/newlib/libm/common/sincosf.h
index 955a73312..27f24ffe3 100644
--- a/newlib/libm/common/sincosf.h
+++ b/newlib/libm/common/sincosf.h
@@ -33,15 +33,15 @@ static const double pi64 = 0x1.921FB54442D18p-62;
 /* PI / 4.  */
 static const double pio4 = 0x1.921FB54442D18p-1;
 
-typedef const struct
+typedef struct
 {
   double sign[4];
   double hpi_inv, hpi, c0, c1, c2, c3, c4, s1, s2, s3;
 } sincos_t;
 
-extern sincos_t sincosf_table[2] HIDDEN;
+extern const sincos_t __sincosf_table[2] HIDDEN;
 
-extern const uint32_t inv_pio4[] HIDDEN;
+extern const uint32_t __inv_pio4[] HIDDEN;
 
 /* abstop12 assumes floating point reinterpret is fast by default.
    If floating point comparisons are faster, define PREFER_FLOAT_COMPARISON.  */
@@ -63,7 +63,8 @@ abstop12 (float x)
    polynomial P and store the results in SINP and COSP.  N is the quadrant,
    if odd the cosine and sine polynomials are swapped.  */
 static inline void
-sincosf_poly (double x, double x2, sincos_t *p, int n, float *sinp, float *cosp)
+sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
+	      float *cosp)
 {
   double x3, x4, x5, x6, s, c, c1, c2, s1;
 
@@ -91,7 +92,7 @@ sincosf_poly (double x, double x2, sincos_t *p, int n, float *sinp, float *cosp)
 /* Return the sine of inputs X and X2 (X squared) using the polynomial P.
    N is the quadrant, and if odd the cosine polynomial is used.  */
 static inline float
-sinf_poly (double x, double x2, sincos_t *p, int n)
+sinf_poly (double x, double x2, const sincos_t *p, int n)
 {
   double x3, x4, x6, x7, s, c, c1, c2, s1;
 
@@ -126,7 +127,7 @@ sinf_poly (double x, double x2, sincos_t *p, int n)
    Use round/lround if inlined, otherwise convert to int.  To avoid inaccuracies
    introduced by truncating negative values, compute the quadrant * 2^24.  */
 static inline double
-reduce_fast (double x, sincos_t *p, int *np)
+reduce_fast (double x, const sincos_t *p, int *np)
 {
   double r;
 #if TOINT_INTRINSICS
@@ -151,7 +152,7 @@ reduce_fast (double x, sincos_t *p, int *np)
 static inline double
 reduce_large (uint32_t xi, int *np)
 {
-  const uint32_t *arr = &inv_pio4[(xi >> 26) & 15];
+  const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
   int shift = (xi >> 23) & 7;
   uint64_t n, res0, res1, res2;
 
diff --git a/newlib/libm/common/sincosf_data.c b/newlib/libm/common/sincosf_data.c
index 069ee7bf6..47fe960c1 100644
--- a/newlib/libm/common/sincosf_data.c
+++ b/newlib/libm/common/sincosf_data.c
@@ -34,7 +34,7 @@
 
 /* The constants and polynomials for sine and cosine.  The 2nd entry
    computes -cos (x) rather than cos (x) to get negation for free.  */
-sincos_t sincosf_table[2] =
+const sincos_t __sincosf_table[2] =
 {
   {
     { 1.0, -1.0, -1.0, 1.0 },
@@ -74,7 +74,7 @@ sincos_t sincosf_table[2] =
 
 /* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
    only 8 new bits are added per entry, making the table 4 times larger.  */
-const uint32_t inv_pio4[24] =
+const uint32_t __inv_pio4[24] =
 {
   0xa2,       0xa2f9,	  0xa2f983,   0xa2f9836e,
   0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
diff --git a/newlib/libm/common/sinf.c b/newlib/libm/common/sinf.c
index 715bdc8d0..c2e61039b 100644
--- a/newlib/libm/common/sinf.c
+++ b/newlib/libm/common/sinf.c
@@ -42,7 +42,7 @@ sinf (float y)
   double x = y;
   double s;
   int n;
-  sincos_t *p = &sincosf_table[0];
+  const sincos_t *p = &__sincosf_table[0];
 
   if (abstop12 (y) < abstop12 (pio4))
     {
@@ -66,7 +66,7 @@ sinf (float y)
       s = p->sign[n & 3];
 
       if (n & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n);
     }
@@ -81,7 +81,7 @@ sinf (float y)
       s = p->sign[(n + sign) & 3];
 
       if ((n + sign) & 2)
-	p = &sincosf_table[1];
+	p = &__sincosf_table[1];
 
       return sinf_poly (x * s, x * x, p, n);
     }
-- 
2.14.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/6 v2] Updates to the new math code
  2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
                   ` (5 preceding siblings ...)
  2018-07-05 18:45 ` [PATCH 6/6] Fix namespace issues in sinf, cosf and sincosf Szabolcs Nagy
@ 2018-07-11  3:37 ` Corinna Vinschen
  6 siblings, 0 replies; 8+ messages in thread
From: Corinna Vinschen @ 2018-07-11  3:37 UTC (permalink / raw)
  To: Szabolcs Nagy; +Cc: newlib, nd

[-- Attachment #1: Type: text/plain, Size: 1753 bytes --]

On Jul  5 16:58, Szabolcs Nagy wrote:
> v2:
> - Made HAVE_FAST_FMA macros consistent with other HAVE_* macros.
> - Removed TOINT_RINT and TOINT_SHIFT.
> - Include sinf/cosf namespace fix in the patch set.
> 
> There are some modifications and bug fixes in the Arm Optimized
> Routines repo and i'd like to sync newlib with it.
> 
> Szabolcs Nagy (6):
>   Fix code style and comments of new math code
>   Move __HAVE_FAST_FMA to math_config.h
>   Remove unused TOINT_RINT and TOINT_SHIFT macros
>   Change the return type of converttoint and document the semantics
>   Fix large ulp error in pow without fma very near 1.0
>   Fix namespace issues in sinf, cosf and sincosf
> 
>  newlib/libc/include/machine/ieeefp.h | 23 -------------
>  newlib/libm/common/cosf.c            |  6 ++--
>  newlib/libm/common/exp.c             | 22 +++++++++----
>  newlib/libm/common/exp2.c            | 18 ++++++++---
>  newlib/libm/common/log.c             | 48 ++++++++++++++-------------
>  newlib/libm/common/log2.c            | 35 ++++++++++----------
>  newlib/libm/common/log2_data.c       |  4 +--
>  newlib/libm/common/log_data.c        |  4 +--
>  newlib/libm/common/math_config.h     | 63 ++++++++++++++++++++++++++++--------
>  newlib/libm/common/pow.c             | 41 ++++++++++++++++-------
>  newlib/libm/common/sf_exp.c          |  5 +--
>  newlib/libm/common/sincosf.c         | 22 ++++++-------
>  newlib/libm/common/sincosf.h         | 15 +++++----
>  newlib/libm/common/sincosf_data.c    |  4 +--
>  newlib/libm/common/sinf.c            | 18 +++++------
>  15 files changed, 188 insertions(+), 140 deletions(-)
> 
> --
> 2.14.1

Pushed.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-06  8:44 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-05 16:01 [PATCH 0/6 v2] Updates to the new math code Szabolcs Nagy
2018-07-05 16:01 ` [PATCH 1/6] Fix code style and comments of " Szabolcs Nagy
2018-07-05 16:02 ` [PATCH 2/6] Move __HAVE_FAST_FMA to math_config.h Szabolcs Nagy
2018-07-05 16:03 ` [PATCH 4/6] Change the return type of converttoint and document the semantics Szabolcs Nagy
2018-07-05 16:03 ` [PATCH 3/6] Remove unused TOINT_RINT and TOINT_SHIFT macros Szabolcs Nagy
2018-07-05 16:04 ` [PATCH 5/6] Fix large ulp error in pow without fma very near 1.0 Szabolcs Nagy
2018-07-05 18:45 ` [PATCH 6/6] Fix namespace issues in sinf, cosf and sincosf Szabolcs Nagy
2018-07-11  3:37 ` [PATCH 0/6 v2] Updates to the new math code Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).