diff --git a/NEWS b/NEWS index 79bee8ee6b..04a0e89010 100644 --- a/NEWS +++ b/NEWS @@ -16,7 +16,7 @@ Major new features: to set the install root if you wish to install into a non-default configured location. -* Optimized generic exp, exp2, log, sinf, cosf, sincosf and tanf. +* Optimized generic exp, exp2, log, log2, sinf, cosf, sincosf and tanf. * The reallocarray function is now declared under _DEFAULT_SOURCE, not just for _GNU_SOURCE, to match BSD environments. diff --git a/math/Makefile b/math/Makefile index 8bfbebc4d0..2537b2a9ad 100644 --- a/math/Makefile +++ b/math/Makefile @@ -127,7 +127,8 @@ type-ldouble-yes := ldouble type-double-suffix := type-double-routines := branred doasin dosincos mpa mpatan2 \ k_rem_pio2 mpatan mpsqrt mptan sincos32 \ - sincostab math_err e_exp_data e_log_data + sincostab math_err e_exp_data e_log_data \ + e_log2_data # float support type-float-suffix := f diff --git a/sysdeps/i386/fpu/e_log2_data.c b/sysdeps/i386/fpu/e_log2_data.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/i386/fpu/e_log2_data.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ia64/fpu/e_log2_data.c b/sysdeps/ia64/fpu/e_log2_data.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/ia64/fpu/e_log2_data.c @@ -0,0 +1 @@ +/* Not needed. */ diff --git a/sysdeps/ieee754/dbl-64/e_log2.c b/sysdeps/ieee754/dbl-64/e_log2.c index e4a6aff9a3..916eb466f8 100644 --- a/sysdeps/ieee754/dbl-64/e_log2.c +++ b/sysdeps/ieee754/dbl-64/e_log2.c @@ -1,133 +1,141 @@ -/* Adapted for log2 by Ulrich Drepper . */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ +/* Double-precision log2(x) function. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -/* __ieee754_log2(x) - * Return the logarithm to base 2 of x - * - * Method : - * 1. Argument Reduction: find k and f such that - * x = 2^k * (1+f), - * where sqrt(2)/2 < 1+f < sqrt(2) . - * - * 2. Approximation of log(1+f). - * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - * = 2s + 2/3 s**3 + 2/5 s**5 + ....., - * = 2s + s*R - * We use a special Reme algorithm on [0,0.1716] to generate - * a polynomial of degree 14 to approximate R The maximum error - * of this polynomial approximation is bounded by 2**-58.45. In - * other words, - * 2 4 6 8 10 12 14 - * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s - * (the values of Lg1 to Lg7 are listed in the program) - * and - * | 2 14 | -58.45 - * | Lg1*s +...+Lg7*s - R(z) | <= 2 - * | | - * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - * In order to guarantee error in log below 1ulp, we compute log - * by - * log(1+f) = f - s*(f - R) (if f is not too large) - * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) - * - * 3. Finally, log(x) = k + log(1+f). - * = k+(f-(hfsq-(s*(hfsq+R)))) - * - * Special cases: - * log2(x) is NaN with signal if x < 0 (including -INF) ; - * log2(+INF) is +INF; log(0) is -INF with signal; - * log2(NaN) is that NaN with no signal. - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ #include -#include -#include +#include +#include "math_config.h" -static const double ln2 = 0.69314718055994530942; -static const double two54 = 1.80143985094819840000e+16; /* 43500000 00000000 */ -static const double Lg1 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ -static const double Lg2 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ -static const double Lg3 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ -static const double Lg4 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ -static const double Lg5 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ -static const double Lg6 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ -static const double Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ +#define T __log2_data.tab +#define T2 __log2_data.tab2 +#define B __log2_data.poly1 +#define A __log2_data.poly +#define InvLn2hi __log2_data.invln2hi +#define InvLn2lo __log2_data.invln2lo +#define N (1 << LOG2_TABLE_BITS) +#define OFF 0x3fe6000000000000 -static const double zero = 0.0; +/* Top 16 bits of a double. */ +static inline uint32_t +top16 (double x) +{ + return asuint64 (x) >> 48; +} double __ieee754_log2 (double x) { - double hfsq, f, s, z, R, w, t1, t2, dk; - int32_t k, hx, i, j; - uint32_t lx; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; + uint64_t ix, iz, tmp; + uint32_t top; + int k, i; - EXTRACT_WORDS (hx, lx, x); + ix = asuint64 (x); + top = top16 (x); - k = 0; - if (hx < 0x00100000) - { /* x < 2**-1022 */ - if (__glibc_unlikely (((hx & 0x7fffffff) | lx) == 0)) - return -two54 / fabs (x); /* log(+-0)=-inf */ - if (__glibc_unlikely (hx < 0)) - return (x - x) / (x - x); /* log(-#) = NaN */ - k -= 54; - x *= two54; /* subnormal number, scale up x */ - GET_HIGH_WORD (hx, x); - } - if (__glibc_unlikely (hx >= 0x7ff00000)) - return x + x; - k += (hx >> 20) - 1023; - hx &= 0x000fffff; - i = (hx + 0x95f64) & 0x100000; - SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ - k += (i >> 20); - dk = (double) k; - f = x - 1.0; - if ((0x000fffff & (2 + hx)) < 3) - { /* |f| < 2**-20 */ - if (f == zero) - { - if (FIX_INT_FP_CONVERT_ZERO && dk == 0.0) - dk = 0.0; - return dk; - } - R = f * f * (0.5 - 0.33333333333333333 * f); - return dk - (R - f) / ln2; - } - s = f / (2.0 + f); - z = s * s; - i = hx - 0x6147a; - w = z * z; - j = 0x6b851 - hx; - t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); - t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); - i |= j; - R = t2 + t1; - if (i > 0) +#define LO asuint64 (1.0 - 0x1.5b51p-5) +#define HI asuint64 (1.0 + 0x1.6ab2p-5) + if (__glibc_unlikely (ix - LO < HI - LO)) { - hfsq = 0.5 * f * f; - return dk - ((hfsq - (s * (hfsq + R))) - f) / ln2; + /* Handle close to 1.0 inputs separately. */ + /* Fix sign of zero with downward rounding when x==1. */ + if (WANT_ROUNDING && __glibc_unlikely (ix == asuint64 (1.0))) + return 0; + r = x - 1.0; +#ifdef __FP_FAST_FMA + hi = r * InvLn2hi; + lo = r * InvLn2lo + __builtin_fma (r, InvLn2hi, -hi); +#else + double_t rhi, rlo; + rhi = asdouble (asuint64 (r) & -1ULL << 32); + rlo = r - rhi; + hi = rhi * InvLn2hi; + lo = rlo * InvLn2hi + r * InvLn2lo; +#endif + r2 = r * r; /* rounding error: 0x1p-62. */ + r4 = r2 * r2; + /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ + p = r2 * (B[0] + r * B[1]); + y = hi + p; + lo += hi - y + p; + lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); + y += lo; + return y; } - else + if (__glibc_unlikely (top - 0x0010 >= 0x7ff0 - 0x0010)) { - return dk - ((s * (f - R)) - f) / ln2; + /* x < 0x1p-1022 or inf or nan. */ + if (ix * 2 == 0) + return __math_divzero (1); + if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */ + return x; + if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) + return __math_invalid (x); + /* x is subnormal, normalize it. */ + ix = asuint64 (x * 0x1p52); + ix -= 52ULL << 52; } -} + /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. + The range is split into N subintervals. + The ith subinterval contains z and c is near its center. */ + tmp = ix - OFF; + i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; + k = (int64_t) tmp >> 52; /* arithmetic shift */ + iz = ix - (tmp & 0xfffULL << 52); + invc = T[i].invc; + logc = T[i].logc; + z = asdouble (iz); + kd = (double_t) k; + + /* log2(x) = log2(z/c) + log2(c) + k. */ + /* r ~= z/c - 1, |r| < 1/(2*N). */ +#ifdef __FP_FAST_FMA + /* rounding error: 0x1p-55/N. */ + r = __builtin_fma (z, invc, -1.0); + t1 = r * InvLn2hi; + t2 = r * InvLn2lo + __builtin_fma (r, InvLn2hi, -t1); +#else + double_t rhi, rlo; + /* rounding error: 0x1p-55/N + 0x1p-65. */ + r = (z - T2[i].chi - T2[i].clo) * invc; + rhi = asdouble (asuint64 (r) & -1ULL << 32); + rlo = r - rhi; + t1 = rhi * InvLn2hi; + t2 = rlo * InvLn2hi + r * InvLn2lo; +#endif + + /* hi + lo = r/ln2 + log2(c) + k. */ + t3 = kd + logc; + hi = t3 + t1; + lo = t3 - hi + t1 + t2; + + /* log2(r+1) = r/ln2 + r^2*poly(r). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + r2 = r * r; /* rounding error: 0x1p-54/N^2. */ + r4 = r2 * r2; + /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). + ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ + p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); + y = lo + r2 * p + hi; + return y; +} +#ifndef __ieee754_log2 strong_alias (__ieee754_log2, __log2_finite) +#endif diff --git a/sysdeps/ieee754/dbl-64/e_log2_data.c b/sysdeps/ieee754/dbl-64/e_log2_data.c new file mode 100644 index 0000000000..f650072421 --- /dev/null +++ b/sysdeps/ieee754/dbl-64/e_log2_data.c @@ -0,0 +1,220 @@ +/* Data for log2. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "math_config.h" + +#define N (1 << LOG2_TABLE_BITS) + +const struct log2_data __log2_data = { +// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0 +.invln2hi = 0x1.7154765200000p+0, +.invln2lo = 0x1.705fc2eefa200p-33, +.poly1 = { +#if LOG2_POLY1_ORDER == 11 +// relative error: 0x1.2fad8188p-63 +// in -0x1.5b51p-5 0x1.6ab2p-5 +-0x1.71547652b82fep-1, +0x1.ec709dc3a03f7p-2, +-0x1.71547652b7c3fp-2, +0x1.2776c50f05be4p-2, +-0x1.ec709dd768fe5p-3, +0x1.a61761ec4e736p-3, +-0x1.7153fbc64a79bp-3, +0x1.484d154f01b4ap-3, +-0x1.289e4a72c383cp-3, +0x1.0b32f285aee66p-3, +#endif +}, +.poly = { +#if N == 64 && LOG2_POLY_ORDER == 7 +// relative error: 0x1.a72c2bf8p-58 +// abs error: 0x1.67a552c8p-66 +// in -0x1.f45p-8 0x1.f45p-8 +-0x1.71547652b8339p-1, +0x1.ec709dc3a04bep-2, +-0x1.7154764702ffbp-2, +0x1.2776c50034c48p-2, +-0x1.ec7b328ea92bcp-3, +0x1.a6225e117f92ep-3, +#endif +}, +/* Algorithm: + + x = 2^k z + log2(x) = k + log2(c) + log2(z/c) + log2(z/c) = poly(z/c - 1) + +where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls +into the ith one, then table entries are computed as + + tab[i].invc = 1/c + tab[i].logc = (double)log2(c) + tab2[i].chi = (double)c + tab2[i].clo = (double)(c - (double)c) + +where c is near the center of the subinterval and is chosen by trying +-2^29 +floating point invc candidates around 1/center and selecting one for which + + 1) the rounding error in 0x1.8p10 + logc is 0, + 2) the rounding error in z - chi - clo is < 0x1p-64 and + 3) the rounding error in (double)log2(c) is minimized (< 0x1p-68). + +Note: 1) ensures that k + logc can be computed without rounding error, 2) +ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a +single rounding error when there is no fast fma for z*invc - 1, 3) ensures +that logc + poly(z/c - 1) has small error, however near x == 1 when +|log2(x)| < 0x1p-4, this is not enough so that is special cased. */ +.tab = { +#if N == 64 +{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, +{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, +{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, +{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2}, +{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2}, +{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2}, +{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2}, +{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2}, +{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2}, +{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2}, +{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2}, +{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2}, +{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2}, +{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2}, +{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2}, +{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2}, +{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2}, +{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2}, +{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2}, +{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2}, +{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3}, +{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3}, +{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3}, +{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3}, +{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3}, +{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3}, +{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3}, +{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3}, +{0x1.19453847f2200p+0, -0x1.162595afdc000p-3}, +{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4}, +{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4}, +{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4}, +{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4}, +{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4}, +{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4}, +{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5}, +{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5}, +{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6}, +{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6}, +{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8}, +{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7}, +{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5}, +{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5}, +{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4}, +{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4}, +{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4}, +{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3}, +{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3}, +{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3}, +{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3}, +{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3}, +{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3}, +{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2}, +{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2}, +{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2}, +{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2}, +{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2}, +{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2}, +{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2}, +{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2}, +{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2}, +{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, +{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, +{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, +#endif +}, +#ifndef __FP_FAST_FMA +.tab2 = { +# if N == 64 +{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, +{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, +{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, +{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55}, +{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55}, +{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56}, +{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56}, +{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57}, +{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55}, +{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57}, +{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55}, +{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55}, +{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56}, +{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56}, +{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56}, +{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55}, +{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57}, +{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55}, +{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55}, +{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58}, +{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55}, +{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58}, +{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56}, +{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56}, +{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57}, +{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56}, +{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56}, +{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55}, +{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58}, +{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56}, +{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55}, +{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56}, +{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55}, +{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56}, +{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55}, +{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55}, +{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55}, +{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59}, +{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58}, +{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55}, +{0x1.0200004292367p+0, 0x1.b7ff365324681p-54}, +{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55}, +{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58}, +{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54}, +{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55}, +{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54}, +{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54}, +{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54}, +{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55}, +{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55}, +{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56}, +{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54}, +{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56}, +{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54}, +{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56}, +{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54}, +{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56}, +{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55}, +{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55}, +{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56}, +{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54}, +{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, +{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, +{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, +# endif +}, +#endif /* __FP_FAST_FMA */ +}; diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h index 2eb793d4c8..9c3ea1d436 100644 --- a/sysdeps/ieee754/dbl-64/math_config.h +++ b/sysdeps/ieee754/dbl-64/math_config.h @@ -149,4 +149,20 @@ extern const struct log_data #endif } __log_data attribute_hidden; +#define LOG2_TABLE_BITS 6 +#define LOG2_POLY_ORDER 7 +#define LOG2_POLY1_ORDER 11 +extern const struct log2_data +{ + double invln2hi; + double invln2lo; + double poly[LOG2_POLY_ORDER - 1]; + double poly1[LOG2_POLY1_ORDER - 1]; + /* See e_log2_data.c for details. */ + struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS]; +#ifndef __FP_FAST_FMA + struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS]; +#endif +} __log2_data attribute_hidden; + #endif diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c deleted file mode 100644 index f08d5b337d..0000000000 --- a/sysdeps/ieee754/dbl-64/wordsize-64/e_log2.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -/* __ieee754_log2(x) - * Return the logarithm to base 2 of x - * - * Method : - * 1. Argument Reduction: find k and f such that - * x = 2^k * (1+f), - * where sqrt(2)/2 < 1+f < sqrt(2) . - * - * 2. Approximation of log(1+f). - * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) - * = 2s + 2/3 s**3 + 2/5 s**5 + ....., - * = 2s + s*R - * We use a special Reme algorithm on [0,0.1716] to generate - * a polynomial of degree 14 to approximate R The maximum error - * of this polynomial approximation is bounded by 2**-58.45. In - * other words, - * 2 4 6 8 10 12 14 - * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s - * (the values of Lg1 to Lg7 are listed in the program) - * and - * | 2 14 | -58.45 - * | Lg1*s +...+Lg7*s - R(z) | <= 2 - * | | - * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. - * In order to guarantee error in log below 1ulp, we compute log - * by - * log(1+f) = f - s*(f - R) (if f is not too large) - * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) - * - * 3. Finally, log(x) = k + log(1+f). - * = k+(f-(hfsq-(s*(hfsq+R)))) - * - * Special cases: - * log2(x) is NaN with signal if x < 0 (including -INF) ; - * log2(+INF) is +INF; log(0) is -INF with signal; - * log2(NaN) is that NaN with no signal. - * - * Constants: - * The hexadecimal values are the intended ones for the following - * constants. The decimal values may be used, provided that the - * compiler will convert from decimal to binary accurately enough - * to produce the hexadecimal values shown. - */ - -#include -#include - -static const double ln2 = 0.69314718055994530942; -static const double two54 = 1.80143985094819840000e+16; /* 4350000000000000 */ -static const double Lg1 = 6.666666666666735130e-01; /* 3FE5555555555593 */ -static const double Lg2 = 3.999999999940941908e-01; /* 3FD999999997FA04 */ -static const double Lg3 = 2.857142874366239149e-01; /* 3FD2492494229359 */ -static const double Lg4 = 2.222219843214978396e-01; /* 3FCC71C51D8E78AF */ -static const double Lg5 = 1.818357216161805012e-01; /* 3FC7466496CB03DE */ -static const double Lg6 = 1.531383769920937332e-01; /* 3FC39A09D078C69F */ -static const double Lg7 = 1.479819860511658591e-01; /* 3FC2F112DF3E5244 */ - -static const double zero = 0.0; - -double -__ieee754_log2 (double x) -{ - double hfsq, f, s, z, R, w, t1, t2, dk; - int64_t hx, i, j; - int32_t k; - - EXTRACT_WORDS64 (hx, x); - - k = 0; - if (hx < INT64_C(0x0010000000000000)) - { /* x < 2**-1022 */ - if (__glibc_unlikely ((hx & UINT64_C(0x7fffffffffffffff)) == 0)) - return -two54 / fabs (x); /* log(+-0)=-inf */ - if (__glibc_unlikely (hx < 0)) - return (x - x) / (x - x); /* log(-#) = NaN */ - k -= 54; - x *= two54; /* subnormal number, scale up x */ - EXTRACT_WORDS64 (hx, x); - } - if (__glibc_unlikely (hx >= UINT64_C(0x7ff0000000000000))) - return x + x; - k += (hx >> 52) - 1023; - hx &= UINT64_C(0x000fffffffffffff); - i = (hx + UINT64_C(0x95f6400000000)) & UINT64_C(0x10000000000000); - /* normalize x or x/2 */ - INSERT_WORDS64 (x, hx | (i ^ UINT64_C(0x3ff0000000000000))); - k += (i >> 52); - dk = (double) k; - f = x - 1.0; - if ((UINT64_C(0x000fffffffffffff) & (2 + hx)) < 3) - { /* |f| < 2**-20 */ - if (f == zero) - return dk; - R = f * f * (0.5 - 0.33333333333333333 * f); - return dk - (R - f) / ln2; - } - s = f / (2.0 + f); - z = s * s; - i = hx - UINT64_C(0x6147a00000000); - w = z * z; - j = UINT64_C(0x6b85100000000) - hx; - t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); - t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); - i |= j; - R = t2 + t1; - if (i > 0) - { - hfsq = 0.5 * f * f; - return dk - ((hfsq - (s * (hfsq + R))) - f) / ln2; - } - else - { - return dk - ((s * (f - R)) - f) / ln2; - } -} - -strong_alias (__ieee754_log2, __log2_finite) diff --git a/sysdeps/m68k/m680x0/fpu/e_log2_data.c b/sysdeps/m68k/m680x0/fpu/e_log2_data.c new file mode 100644 index 0000000000..1cc8931700 --- /dev/null +++ b/sysdeps/m68k/m680x0/fpu/e_log2_data.c @@ -0,0 +1 @@ +/* Not needed. */