[PATCH] math: Add new exp10 implementation

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [PATCH] math: Add new exp10 implementation
@ 2023-11-28 13:05 Joe Ramsay
  2023-11-28 14:09 ` Paul Zimmermann
  2023-11-28 16:30 ` Adhemerval Zanella Netto
  0 siblings, 2 replies; 5+ messages in thread
From: Joe Ramsay @ 2023-11-28 13:05 UTC (permalink / raw)
  To: libc-alpha; +Cc: Joe Ramsay

New implementation is based on the existing exp/exp2, with different
reduction constants and polynomial.

The default selection has worst-case error of 0.501 ULP. For targets
without rounding intrinsics a wider polynomial can be enabled to
retain 1 ULP of accuracy in non-nearest rounding modes - this would be
enabled by toggling EXP10_POLY_WIDE=1.

The exp/exp2 shared table is reused for exp10 - .rodata size of
e_exp_data increases by 64 bytes.

As for exp/exp2, targets with single-instruction rounding/conversion
intrinsics can use them by toggling TOINT_INTRINSICS=1 and adding the
necessary code to their math_private.h.

Improvements on Neoverse V1 compared to current GLIBC master:
exp10 thruput: 4.1x in [-9.9, 9.9]
exp10 latency: 1.6x in [-9.9, 9.9]
exp10 thruput: 4.1x in [0.5, 1]
exp10 latency: 1.6x in [0.5, 1]

Tested on:
    aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and
    x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction)
---
Thanks,
Joe
 sysdeps/ieee754/dbl-64/e_exp10.c     | 145 ++++++++++++++++++++++-----
 sysdeps/ieee754/dbl-64/e_exp_data.c  |  21 ++++
 sysdeps/ieee754/dbl-64/math_config.h |   7 ++
 3 files changed, 149 insertions(+), 24 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
index fa47f4f922..3f3a65d06f 100644
--- a/sysdeps/ieee754/dbl-64/e_exp10.c
+++ b/sysdeps/ieee754/dbl-64/e_exp10.c
@@ -16,36 +16,133 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
+#include <math-barriers.h>
+#include <math-narrow-eval.h>
 #include <math_private.h>
 #include <float.h>
 #include <libm-alias-finite.h>
+#include "math_config.h"
 
-static const double log10_high = 0x2.4d7637p0;
-static const double log10_low = 0x7.6aaa2b05ba95cp-28;
+#define N (1 << EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */
+#define UFlowBound -350
+#define SmallTop 0x3c6 /* top12(0x1p-57).  */
+#define BigTop 0x407   /* top12(0x1p8).  */
+#define Thresh 0x41    /* BigTop - SmallTop.  */
+#define Shift __exp_data.shift
+#define C(i) __exp_data.exp10_poly[i]
 
+static double __attribute__ ((noinline))
+special_case (uint64_t sbits, double_t tmp, uint64_t ki)
+{
+  double_t scale, y;
+
+  if (ki - (1ull << 16) < 0x80000000)
+    {
+      /* The exponent of scale might have overflowed by 1.  */
+      sbits -= 1ull << 52;
+      scale = asdouble (sbits);
+      y = 2 * (scale + scale * tmp);
+      return check_oflow (y);
+    }
+
+  /* n < 0, need special care in the subnormal range.  */
+  sbits += 1022ull << 52;
+  scale = asdouble (sbits);
+  y = scale + scale * tmp;
+
+  if (y < 1.0)
+    {
+      /* Round y to the right precision before scaling it into the subnormal
+	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	 E is the worst-case ulp error outside the subnormal range.  So this
+	 is only useful if the goal is better than 1 ulp worst-case error.  */
+      double_t lo = scale - y + scale * tmp;
+      double_t hi = 1.0 + y;
+      lo = 1.0 - hi + y + lo;
+      y = math_narrow_eval (hi + lo) - 1.0;
+      /* Avoid -0.0 with downward rounding.  */
+      if (WANT_ROUNDING && y == 0.0)
+	y = 0.0;
+      /* The underflow exception needs to be signaled explicitly.  */
+      math_force_eval (math_opt_barrier (0x1p-1022) * 0x1p-1022);
+    }
+  y = 0x1p-1022 * y;
+
+  return check_uflow (y);
+}
+
+/* Double-precision 10^x approximation. Largest observed error is ~0.501 ULP.
+ */
 double
-__ieee754_exp10 (double arg)
+__ieee754_exp10 (double x)
 {
-  int32_t lx;
-  double arg_high, arg_low;
-  double exp_high, exp_low;
-
-  if (!isfinite (arg))
-    return __ieee754_exp (arg);
-  if (arg < DBL_MIN_10_EXP - DBL_DIG - 10)
-    return DBL_MIN * DBL_MIN;
-  else if (arg > DBL_MAX_10_EXP + 1)
-    return DBL_MAX * DBL_MAX;
-  else if (fabs (arg) < 0x1p-56)
-    return 1.0;
-
-  GET_LOW_WORD (lx, arg);
-  lx &= 0xf8000000;
-  arg_high = arg;
-  SET_LOW_WORD (arg_high, lx);
-  arg_low = arg - arg_high;
-  exp_high = arg_high * log10_high;
-  exp_low = arg_high * log10_low + arg_low * M_LN10;
-  return __ieee754_exp (exp_high) * __ieee754_exp (exp_low);
+  uint64_t ix = asuint64 (x);
+  uint32_t abstop = (ix >> 52) & 0x7ff;
+
+  if (__glibc_unlikely (abstop - SmallTop >= Thresh))
+    {
+      if (abstop - SmallTop >= 0x80000000)
+	/* Avoid spurious underflow for tiny x.
+	   Note: 0 is common input.  */
+	return x + 1;
+      if (abstop == 0x7ff)
+	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
+      if (x >= OFlowBound)
+	return __math_oflow (0);
+      if (x < UFlowBound)
+	return __math_uflow (0);
+
+      /* Large x is special-cased below.  */
+      abstop = 0;
+    }
+
+  /* Reduce x: z = x * N / log10(2), k = round(z).  */
+  double_t z = __exp_data.invlog10_2N * x;
+  double_t kd;
+  int64_t ki;
+#if TOINT_INTRINSICS
+  kd = roundtoint (z);
+  ki = converttoint (z);
+#else
+  kd = math_narrow_eval (z + Shift);
+  kd -= Shift;
+  ki = kd;
+#endif
+
+  /* r = x - k * log10(2), r in [-0.5, 0.5].  */
+  double_t r = x;
+  r = __exp_data.neglog10_2hiN * kd + r;
+  r = __exp_data.neglog10_2loN * kd + r;
+
+  /* exp10(x) = 2^(k/N) * 2^(r/N).
+     Approximate the two components separately.  */
+
+  /* s = 2^(k/N), using lookup table.  */
+  uint64_t e = ki << (52 - EXP_TABLE_BITS);
+  uint64_t i = (ki & IndexMask) * 2;
+  uint64_t u = __exp_data.tab[i + 1];
+  uint64_t sbits = u + e;
+
+  double_t tail = asdouble (__exp_data.tab[i]);
+
+  /* 2^(r/N) ~= 1 + r * Poly(r).  */
+  double_t r2 = r * r;
+  double_t p = C (0) + r * C (1);
+  double_t y = C (2) + r * C (3);
+  y = y + r2 * C (4);
+  y = p + r2 * y;
+  y = tail + y * r;
+
+  if (__glibc_unlikely (abstop == 0))
+    return special_case (sbits, y, ki);
+
+  /* Assemble components:
+     y  = 2^(r/N) * 2^(k/N)
+       ~= (y + 1) * s.  */
+  double_t s = asdouble (sbits);
+  return s * y + s;
 }
+
 libm_alias_finite (__ieee754_exp10, __exp10)
diff --git a/sysdeps/ieee754/dbl-64/e_exp_data.c b/sysdeps/ieee754/dbl-64/e_exp_data.c
index d498b8bc3b..c81f6dd9b1 100644
--- a/sysdeps/ieee754/dbl-64/e_exp_data.c
+++ b/sysdeps/ieee754/dbl-64/e_exp_data.c
@@ -58,6 +58,27 @@ const struct exp_data __exp_data = {
 0x1.5d7e09b4e3a84p-10,
 #endif
 },
+.invlog10_2N = 0x1.a934f0979a371p1 * N,
+.neglog10_2hiN = -0x1.3441350ap-2 / N,
+.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N,
+.exp10_poly = {
+#if EXP10_POLY_WIDE
+/* Range is wider if using shift-based reduction: coeffs generated
+   using Remez in [-log10(2)/128, log10(2)/128 ].  */
+0x1.26bb1bbb55515p1,
+0x1.53524c73cd32bp1,
+0x1.0470591e1a108p1,
+0x1.2bd77b12fe9a8p0,
+0x1.14289fef24b78p-1
+#else
+/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ].  */
+0x1.26bb1bbb55516p1,
+0x1.53524c73ce9fep1,
+0x1.0470591ce4b26p1,
+0x1.2bd76577fe684p0,
+0x1.1446eeccd0efbp-1
+#endif
+},
 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
 // tab[2*k] = asuint64(T[k])
 // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
index 19af33fd86..3db595a296 100644
--- a/sysdeps/ieee754/dbl-64/math_config.h
+++ b/sysdeps/ieee754/dbl-64/math_config.h
@@ -192,6 +192,9 @@ check_uflow (double x)
 #define EXP_TABLE_BITS 7
 #define EXP_POLY_ORDER 5
 #define EXP2_POLY_ORDER 5
+/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
+   and !TOINT_INTRINSICS.  */
+#define EXP10_POLY_WIDE 0
 extern const struct exp_data
 {
   double invln2N;
@@ -201,6 +204,10 @@ extern const struct exp_data
   double poly[4]; /* Last four coefficients.  */
   double exp2_shift;
   double exp2_poly[EXP2_POLY_ORDER];
+  double invlog10_2N;
+  double neglog10_2hiN;
+  double neglog10_2loN;
+  double exp10_poly[5];
   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data attribute_hidden;
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] math: Add new exp10 implementation
  2023-11-28 13:05 [PATCH] math: Add new exp10 implementation Joe Ramsay
@ 2023-11-28 14:09 ` Paul Zimmermann
  2023-11-28 16:30 ` Adhemerval Zanella Netto
  1 sibling, 0 replies; 5+ messages in thread
From: Paul Zimmermann @ 2023-11-28 14:09 UTC (permalink / raw)
  To: Joe Ramsay; +Cc: libc-alpha, Joe.Ramsay

       Hi Joe,

> The default selection has worst-case error of 0.501 ULP.

how do you get that value? With the check_sample.c program at [1] I can find
in a few minutes an error of 0.513ULP:

NEW exp10 0 28 -0x1.56113379e8deap-7 [1] [0.513] 0.512211 0.5122107111676528

I suggest you use this program to check the claims for worst-case errors,
with -threshold at least 1000000.

Best regards,
Paul

PS: 0.513ULP (if confirmed) is still much better than the 2.01ULP of the
current version, and it would outperform the best library for exp10, namely
the Apple library (0.512ULP) [2].

[1] https://gitlab.inria.fr/zimmerma/math_accuracy/-/blob/master/binary64/check_sample.c?ref_type=heads
[2] https://members.loria.fr/PZimmermann/papers/accuracy.pdf

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] math: Add new exp10 implementation
  2023-11-28 13:05 [PATCH] math: Add new exp10 implementation Joe Ramsay
  2023-11-28 14:09 ` Paul Zimmermann
@ 2023-11-28 16:30 ` Adhemerval Zanella Netto
  1 sibling, 0 replies; 5+ messages in thread
From: Adhemerval Zanella Netto @ 2023-11-28 16:30 UTC (permalink / raw)
  To: Joe Ramsay, libc-alpha



On 28/11/23 10:05, Joe Ramsay wrote:
> New implementation is based on the existing exp/exp2, with different
> reduction constants and polynomial.
> 
> The default selection has worst-case error of 0.501 ULP. For targets
> without rounding intrinsics a wider polynomial can be enabled to
> retain 1 ULP of accuracy in non-nearest rounding modes - this would be
> enabled by toggling EXP10_POLY_WIDE=1.

Does it really required for glibc? It is deep buried in the code, without
any comment (besides the commit itself), and only useful for non-default
rounding mode.  I think it is unlikely that an architecture maintainer
will eventually enable it.

> 
> The exp/exp2 shared table is reused for exp10 - .rodata size of
> e_exp_data increases by 64 bytes.
> 
> As for exp/exp2, targets with single-instruction rounding/conversion
> intrinsics can use them by toggling TOINT_INTRINSICS=1 and adding the
> necessary code to their math_private.h.
> 
> Improvements on Neoverse V1 compared to current GLIBC master:
> exp10 thruput: 4.1x in [-9.9, 9.9]
> exp10 latency: 1.6x in [-9.9, 9.9]
> exp10 thruput: 4.1x in [0.5, 1]
> exp10 latency: 1.6x in [0.5, 1]
> 

Can you provide a benchtest with these inputs? It should be straightforward
by defining some random inputs in the range.

Also, you might check on remove the SVID compat layer from exp10 (similar
to be668a8d782ab6bf363d4cdd7086295b5eebb8ea for exp10f).


> Tested on:
>     aarch64-linux-gnu (TOINT_INTRINSICS, fma contraction) and
>     x86_64-linux-gnu (!TOINT_INTRINSICS, no fma contraction)
> ---
> Thanks,
> Joe
>  sysdeps/ieee754/dbl-64/e_exp10.c     | 145 ++++++++++++++++++++++-----
>  sysdeps/ieee754/dbl-64/e_exp_data.c  |  21 ++++
>  sysdeps/ieee754/dbl-64/math_config.h |   7 ++
>  3 files changed, 149 insertions(+), 24 deletions(-)
> 
> diff --git a/sysdeps/ieee754/dbl-64/e_exp10.c b/sysdeps/ieee754/dbl-64/e_exp10.c
> index fa47f4f922..3f3a65d06f 100644
> --- a/sysdeps/ieee754/dbl-64/e_exp10.c
> +++ b/sysdeps/ieee754/dbl-64/e_exp10.c
> @@ -16,36 +16,133 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <math.h>
> +#include <math-barriers.h>
> +#include <math-narrow-eval.h>
>  #include <math_private.h>
>  #include <float.h>
>  #include <libm-alias-finite.h>
> +#include "math_config.h"
>  
> -static const double log10_high = 0x2.4d7637p0;
> -static const double log10_low = 0x7.6aaa2b05ba95cp-28;
> +#define N (1 << EXP_TABLE_BITS)
> +#define IndexMask (N - 1)
> +#define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX).  */

Usually for new code we prefer constants to be on float hex format.

> +#define UFlowBound -350
> +#define SmallTop 0x3c6 /* top12(0x1p-57).  */
> +#define BigTop 0x407   /* top12(0x1p8).  */
> +#define Thresh 0x41    /* BigTop - SmallTop.  */
> +#define Shift __exp_data.shift
> +#define C(i) __exp_data.exp10_poly[i]
>  
> +static double __attribute__ ((noinline))

I guess that noinline is generating better code here, maybe because it decreases
__ieee754_exp10 size and put less icache pressure.  In any case, it would be good
to add a comment on why noinline is preferable here. 

> +special_case (uint64_t sbits, double_t tmp, uint64_t ki)
> +{
> +  double_t scale, y;
> +
> +  if (ki - (1ull << 16) < 0x80000000)
> +    {
> +      /* The exponent of scale might have overflowed by 1.  */
> +      sbits -= 1ull << 52;
> +      scale = asdouble (sbits);
> +      y = 2 * (scale + scale * tmp);
> +      return check_oflow (y);
> +    }
> +
> +  /* n < 0, need special care in the subnormal range.  */
> +  sbits += 1022ull << 52;
> +  scale = asdouble (sbits);
> +  y = scale + scale * tmp;
> +
> +  if (y < 1.0)
> +    {
> +      /* Round y to the right precision before scaling it into the subnormal
> +	 range to avoid double rounding that can cause 0.5+E/2 ulp error where
> +	 E is the worst-case ulp error outside the subnormal range.  So this
> +	 is only useful if the goal is better than 1 ulp worst-case error.  */
> +      double_t lo = scale - y + scale * tmp;
> +      double_t hi = 1.0 + y;
> +      lo = 1.0 - hi + y + lo;
> +      y = math_narrow_eval (hi + lo) - 1.0;
> +      /* Avoid -0.0 with downward rounding.  */
> +      if (WANT_ROUNDING && y == 0.0)
> +	y = 0.0;
> +      /* The underflow exception needs to be signaled explicitly.  */
> +      math_force_eval (math_opt_barrier (0x1p-1022) * 0x1p-1022);
> +    }
> +  y = 0x1p-1022 * y;
> +
> +  return check_uflow (y);
> +}
> +
> +/* Double-precision 10^x approximation. Largest observed error is ~0.501 ULP.
> + */
>  double
> -__ieee754_exp10 (double arg)
> +__ieee754_exp10 (double x)
>  {
> -  int32_t lx;
> -  double arg_high, arg_low;
> -  double exp_high, exp_low;
> -
> -  if (!isfinite (arg))
> -    return __ieee754_exp (arg);
> -  if (arg < DBL_MIN_10_EXP - DBL_DIG - 10)
> -    return DBL_MIN * DBL_MIN;
> -  else if (arg > DBL_MAX_10_EXP + 1)
> -    return DBL_MAX * DBL_MAX;
> -  else if (fabs (arg) < 0x1p-56)
> -    return 1.0;
> -
> -  GET_LOW_WORD (lx, arg);
> -  lx &= 0xf8000000;
> -  arg_high = arg;
> -  SET_LOW_WORD (arg_high, lx);
> -  arg_low = arg - arg_high;
> -  exp_high = arg_high * log10_high;
> -  exp_low = arg_high * log10_low + arg_low * M_LN10;
> -  return __ieee754_exp (exp_high) * __ieee754_exp (exp_low);
> +  uint64_t ix = asuint64 (x);
> +  uint32_t abstop = (ix >> 52) & 0x7ff;
> +
> +  if (__glibc_unlikely (abstop - SmallTop >= Thresh))
> +    {
> +      if (abstop - SmallTop >= 0x80000000)
> +	/* Avoid spurious underflow for tiny x.
> +	   Note: 0 is common input.  */
> +	return x + 1;
> +      if (abstop == 0x7ff)
> +	return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
> +      if (x >= OFlowBound)
> +	return __math_oflow (0);
> +      if (x < UFlowBound)
> +	return __math_uflow (0);
> +
> +      /* Large x is special-cased below.  */
> +      abstop = 0;
> +    }
> +
> +  /* Reduce x: z = x * N / log10(2), k = round(z).  */
> +  double_t z = __exp_data.invlog10_2N * x;
> +  double_t kd;
> +  int64_t ki;
> +#if TOINT_INTRINSICS
> +  kd = roundtoint (z);
> +  ki = converttoint (z);
> +#else
> +  kd = math_narrow_eval (z + Shift);
> +  kd -= Shift;
> +  ki = kd;
> +#endif
> +
> +  /* r = x - k * log10(2), r in [-0.5, 0.5].  */
> +  double_t r = x;
> +  r = __exp_data.neglog10_2hiN * kd + r;
> +  r = __exp_data.neglog10_2loN * kd + r;
> +
> +  /* exp10(x) = 2^(k/N) * 2^(r/N).
> +     Approximate the two components separately.  */
> +
> +  /* s = 2^(k/N), using lookup table.  */
> +  uint64_t e = ki << (52 - EXP_TABLE_BITS);
> +  uint64_t i = (ki & IndexMask) * 2;
> +  uint64_t u = __exp_data.tab[i + 1];
> +  uint64_t sbits = u + e;
> +
> +  double_t tail = asdouble (__exp_data.tab[i]);
> +
> +  /* 2^(r/N) ~= 1 + r * Poly(r).  */
> +  double_t r2 = r * r;
> +  double_t p = C (0) + r * C (1);
> +  double_t y = C (2) + r * C (3);
> +  y = y + r2 * C (4);
> +  y = p + r2 * y;
> +  y = tail + y * r;
> +
> +  if (__glibc_unlikely (abstop == 0))
> +    return special_case (sbits, y, ki);
> +
> +  /* Assemble components:
> +     y  = 2^(r/N) * 2^(k/N)
> +       ~= (y + 1) * s.  */
> +  double_t s = asdouble (sbits);
> +  return s * y + s;
>  }
> +
>  libm_alias_finite (__ieee754_exp10, __exp10)
> diff --git a/sysdeps/ieee754/dbl-64/e_exp_data.c b/sysdeps/ieee754/dbl-64/e_exp_data.c
> index d498b8bc3b..c81f6dd9b1 100644
> --- a/sysdeps/ieee754/dbl-64/e_exp_data.c
> +++ b/sysdeps/ieee754/dbl-64/e_exp_data.c
> @@ -58,6 +58,27 @@ const struct exp_data __exp_data = {
>  0x1.5d7e09b4e3a84p-10,
>  #endif
>  },
> +.invlog10_2N = 0x1.a934f0979a371p1 * N,
> +.neglog10_2hiN = -0x1.3441350ap-2 / N,
> +.neglog10_2loN = 0x1.0c0219dc1da99p-39 / N,
> +.exp10_poly = {
> +#if EXP10_POLY_WIDE
> +/* Range is wider if using shift-based reduction: coeffs generated
> +   using Remez in [-log10(2)/128, log10(2)/128 ].  */
> +0x1.26bb1bbb55515p1,
> +0x1.53524c73cd32bp1,
> +0x1.0470591e1a108p1,
> +0x1.2bd77b12fe9a8p0,
> +0x1.14289fef24b78p-1
> +#else
> +/* Coeffs generated using Remez in [-log10(2)/256, log10(2)/256 ].  */
> +0x1.26bb1bbb55516p1,
> +0x1.53524c73ce9fep1,
> +0x1.0470591ce4b26p1,
> +0x1.2bd76577fe684p0,
> +0x1.1446eeccd0efbp-1
> +#endif
> +},
>  // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
>  // tab[2*k] = asuint64(T[k])
>  // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
> diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
> index 19af33fd86..3db595a296 100644
> --- a/sysdeps/ieee754/dbl-64/math_config.h
> +++ b/sysdeps/ieee754/dbl-64/math_config.h
> @@ -192,6 +192,9 @@ check_uflow (double x)
>  #define EXP_TABLE_BITS 7
>  #define EXP_POLY_ORDER 5
>  #define EXP2_POLY_ORDER 5
> +/* Wider exp10 polynomial necessary for good precision in non-nearest rounding
> +   and !TOINT_INTRINSICS.  */
> +#define EXP10_POLY_WIDE 0
>  extern const struct exp_data
>  {
>    double invln2N;
> @@ -201,6 +204,10 @@ extern const struct exp_data
>    double poly[4]; /* Last four coefficients.  */
>    double exp2_shift;
>    double exp2_poly[EXP2_POLY_ORDER];
> +  double invlog10_2N;
> +  double neglog10_2hiN;
> +  double neglog10_2loN;
> +  double exp10_poly[5];
>    uint64_t tab[2*(1 << EXP_TABLE_BITS)];
>  } __exp_data attribute_hidden;
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] math: Add new exp10 implementation
  2023-11-28 17:12 Wilco Dijkstra
@ 2023-11-28 18:43 ` Adhemerval Zanella Netto
  0 siblings, 0 replies; 5+ messages in thread
From: Adhemerval Zanella Netto @ 2023-11-28 18:43 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: 'GNU C Library', Joe Ramsay



On 28/11/23 14:12, Wilco Dijkstra wrote:
> Hi Adhemerval,
> 
>> Also, you might check on remove the SVID compat layer from exp10 (similar
>> to be668a8d782ab6bf363d4cdd7086295b5eebb8ea for exp10f).
> 
> Yes I was already looking at that. Would it be almost exactly like exp10f or
> more like modf (16439f419b270184ec501c531bf20d83b6745fb0) though?
> The latter handles all the long double special cases and looks newer/better.

Indeed it would be more like modf because for exp10f one I hadn't need to handle
the NO_LONG_DOUBLE/LONG_DOUBLE_COMPAT ABIs.

> 
> I also noticed that math/w_exp10f_compat.c directly calls __exp10f while all
> the other compat functions use __ieee754_<func>. Maybe some of this is
> because exp10f was never in the sysdeps/ieee754/flt_32 directory?

Yes, before 6e98983c0991433fec8cef8702e2028fa6bef12d exp10f was implemented
on top of __ieee754_exp.

> 
> All this alias magic is magical...

Fell free to simplify this, all the w_*/s_*/e_* made some sense when SVID
supported was added, but currently I think it ends up only adding excessive
complexity.

> 
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] math: Add new exp10 implementation
@ 2023-11-28 17:12 Wilco Dijkstra
  2023-11-28 18:43 ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 5+ messages in thread
From: Wilco Dijkstra @ 2023-11-28 17:12 UTC (permalink / raw)
  To: Adhemerval Zanella; +Cc: 'GNU C Library', Joe Ramsay

Hi Adhemerval,

> Also, you might check on remove the SVID compat layer from exp10 (similar
> to be668a8d782ab6bf363d4cdd7086295b5eebb8ea for exp10f).

Yes I was already looking at that. Would it be almost exactly like exp10f or
more like modf (16439f419b270184ec501c531bf20d83b6745fb0) though?
The latter handles all the long double special cases and looks newer/better.

I also noticed that math/w_exp10f_compat.c directly calls __exp10f while all
the other compat functions use __ieee754_<func>. Maybe some of this is
because exp10f was never in the sysdeps/ieee754/flt_32 directory?

All this alias magic is magical...

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-11-28 18:43 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-28 13:05 [PATCH] math: Add new exp10 implementation Joe Ramsay
2023-11-28 14:09 ` Paul Zimmermann
2023-11-28 16:30 ` Adhemerval Zanella Netto
2023-11-28 17:12 Wilco Dijkstra
2023-11-28 18:43 ` Adhemerval Zanella Netto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).