diff --git a/sysdeps/aarch64/fpu/libmvec_d_sincos2.c b/sysdeps/aarch64/fpu/libmvec_d_sincos2.c index e69de29..f92291f 100644 --- a/sysdeps/aarch64/fpu/libmvec_d_sincos2.c +++ b/sysdeps/aarch64/fpu/libmvec_d_sincos2.c @@ -0,0 +1,21 @@ + +/* We need the includes before the defines so that the define of __sin + and __cos do not mess up the bits/mathcalls.h declaration of __sin + and __cos. */ + +#include +#include "endian.h" +#include "mydefs.h" +#include "usncs.h" +#include "MathLib.h" +#include +#include +#include +#include + +#define VEC_SIZE 2 +#define OP_TYPE __Float64x2_t +#define __sin _ZGVnN2v_sin +#define __cos _ZGVnN2v_cos + +#include "sysdeps/ieee754/dbl-64/s_sin.c" diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index 8c589cb..e1db599 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -55,6 +55,18 @@ #include #include +#ifndef VEC_SIZE +# define VEC_SIZE 1 +#endif + +#if VEC_SIZE != 1 && VEC_SIZE != 2 +# error "This file must be updated to support other vector lengths." +#endif + +#ifndef OP_TYPE +# define OP_TYPE double +#endif + /* Helper macros to compute sin of the input values. */ #define POLYNOMIAL2(xx) ((((s5 * (xx) + s4) * (xx) + s3) * (xx) + s2) * (xx)) @@ -67,13 +79,25 @@ The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so on. The result is returned to LHS and correction in COR. */ -#define TAYLOR_SIN(xx, a, da, cor) \ -({ \ - double t = ((POLYNOMIAL (xx) * (a) - 0.5 * (da)) * (xx) + (da)); \ - double res = (a) + t; \ - (cor) = ((a) - res) + t; \ - res; \ -}) +static inline double +__always_inline +taylor_sin_scalar (double xx, double a, double da, double *cor) +{ + double t = (POLYNOMIAL (xx) * a - 0.5 * da) * xx + da; + double res = a + t; + *cor = (a - res) + t; + return res; +} + +static inline OP_TYPE +__always_inline +taylor_sin_vector (OP_TYPE xx, OP_TYPE a, OP_TYPE da, OP_TYPE *cor) +{ + OP_TYPE t = (POLYNOMIAL (xx) * a - 0.5 * da) * xx + da; + OP_TYPE res = a + t; + *cor = (a - res) + t; + return res; +} /* This is again a variation of the Taylor series expansion with the term x^3/3! expanded into the following for better accuracy: @@ -126,10 +150,11 @@ static const double static const double t22 = 0x1.8p22; -void __dubsin (double x, double dx, double w[]); -void __docos (double x, double dx, double w[]); -double __mpsin (double x, double dx, bool reduce_range); -double __mpcos (double x, double dx, bool reduce_range); +extern void __dubsin (double x, double dx, double w[]); +extern void __docos (double x, double dx, double w[]); +extern double __mpsin (double x, double dx, bool reduce_range); +extern double __mpcos (double x, double dx, bool reduce_range); + static double slow (double x); static double slow1 (double x); static double slow2 (double x); @@ -148,7 +173,7 @@ static double cslow2 (double x); get the result in RES and a correction value in COR. */ static inline double __always_inline -do_cos (double x, double dx, double *corp) +do_cos_scalar (double x, double dx, double *corp) { mynumber u; @@ -170,6 +195,45 @@ do_cos (double x, double dx, double *corp) return res; } +static inline OP_TYPE +__always_inline +do_cos_vector (OP_TYPE x, OP_TYPE dx, OP_TYPE *corp) +{ +#if VEC_SIZE == 1 + mynumber u; + if (x < 0) + dx = -dx; + u.x = big + fabs (x); + x = fabs (x) - (u.x - big) + dx; +#else + mynumber u0, u1; + if (x[0] < 0) + dx[0] = -dx[0]; + if (x[1] < 0) + dx[1] = -dx[1]; + u0.x = big + fabs (x[0]); + u1.x = big + fabs (x[1]); + x[0] = fabs (x[0]) - (u0.x - big) + dx[0]; + x[1] = fabs (x[1]) - (u1.x - big) + dx[1]; +#endif + + OP_TYPE xx, s, sn, ssn, c, cs, ccs, res, cor; + xx = x * x; + s = x + x * xx * (sn3 + xx * sn5); + c = xx * (cs2 + xx * (cs4 + xx * cs6)); +#if VEC_SIZE == 1 + SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); +#else + SINCOS_TABLE_LOOKUP (u0, sn[0], ssn[0], cs[0], ccs[0]); + SINCOS_TABLE_LOOKUP (u1, sn[1], ssn[1], cs[1], ccs[1]); +#endif + cor = (ccs - s * ssn - cs * c) - sn * s; + res = cs + cor; + cor = (cs - res) + cor; + *corp = cor; + return res; +} + /* A more precise variant of DO_COS. EPS is the adjustment to the correction COR. */ static inline double @@ -210,7 +274,7 @@ do_cos_slow (double x, double dx, double eps, double *corp) the result in RES and a correction value in COR. */ static inline double __always_inline -do_sin (double x, double dx, double *corp) +do_sin_scalar (double x, double dx, double *corp) { mynumber u; @@ -231,6 +295,45 @@ do_sin (double x, double dx, double *corp) return res; } +static inline OP_TYPE +__always_inline +do_sin_vector (OP_TYPE x, OP_TYPE dx, OP_TYPE *corp) +{ +#if VEC_SIZE == 1 + mynumber u; + if (x <= 0) + dx = -dx; + u.x = big + fabs (x); + x = fabs (x) - (u.x - big); +#else + mynumber u0, u1; + if (x[0] <= 0) + dx[0] = -dx[0]; + if (x[1] <= 0) + dx[1] = -dx[1]; + u0.x = big + fabs (x[0]); + u1.x = big + fabs (x[1]); + x[0] = fabs (x[0]) - (u0.x - big); + x[1] = fabs (x[1]) - (u1.x - big); +#endif + + OP_TYPE xx, s, sn, ssn, c, cs, ccs, cor, res; + xx = x * x; + s = x + (dx + x * xx * (sn3 + xx * sn5)); + c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6)); +#if VEC_SIZE == 1 + SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs); +#else + SINCOS_TABLE_LOOKUP (u0, sn[0], ssn[0], cs[0], ccs[0]); + SINCOS_TABLE_LOOKUP (u1, sn[1], ssn[1], cs[1], ccs[1]); +#endif + cor = (ssn + s * ccs - sn * c) + cs * s; + res = sn + cor; + cor = (sn - res) + cor; + *corp = cor; + return res; +} + /* A more precise variant of DO_SIN. EPS is the adjustment to the correction COR. */ static inline double @@ -337,13 +440,13 @@ do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant) if (xx < 0.01588) { /* Taylor series. */ - res = TAYLOR_SIN (xx, a, da, cor); + res = taylor_sin_scalar (xx, a, da, &cor); cor = 1.02 * cor + __copysign (eps, cor); retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant); } else { - res = do_sin (a, da, &cor); + res = do_sin_scalar (a, da, &cor); cor = 1.035 * cor + __copysign (eps, cor); retval = ((res == res + cor) ? __copysign (res, a) : sloww1 (a, da, x, shift_quadrant)); @@ -352,7 +455,7 @@ do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant) case 1: case 3: - res = do_cos (a, da, &cor); + res = do_cos_scalar (a, da, &cor); cor = 1.025 * cor + __copysign (eps, cor); retval = ((res == res + cor) ? ((n & 2) ? -res : res) : sloww2 (a, da, x, n)); @@ -411,13 +514,13 @@ do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant) if (xx < 0.01588) { /* Taylor series. */ - res = TAYLOR_SIN (xx, a, da, cor); + res = taylor_sin_scalar (xx, a, da, &cor); cor = 1.02 * cor + __copysign (eps, cor); retval = (res == res + cor) ? res : bsloww (a, da, x, n); } else { - res = do_sin (a, da, &cor); + res = do_sin_scalar (a, da, &cor); cor = 1.035 * cor + __copysign (eps, cor); retval = ((res == res + cor) ? __copysign (res, a) : bsloww1 (a, da, x, n)); @@ -426,7 +529,7 @@ do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant) case 1: case 3: - res = do_cos (a, da, &cor); + res = do_cos_scalar (a, da, &cor); cor = 1.025 * cor + __copysign (eps, cor); retval = ((res == res + cor) ? ((n & 2) ? -res : res) : bsloww2 (a, da, x, n)); @@ -436,33 +539,84 @@ do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant) return retval; } +static double +__always_inline +do_sincos_tail (double x, int4 k, int4 kl, bool shift_quadrant) +{ + int4 n; + double retval; + if (k < 0x419921FB) + { + double a, da; + n = reduce_sincos_1 (x, &a, &da); + retval = do_sincos_1 (a, da, x, n, shift_quadrant); + } + else if (k < 0x42F00000) + { + double a, da; + n = reduce_sincos_2 (x, &a, &da); + retval = do_sincos_2 (a, da, x, n, shift_quadrant); + } + else if (k < 0x7ff00000) + { + retval = reduce_and_compute (x, shift_quadrant); + } + else + { + if (k == 0x7ff00000 && kl == 0) + __set_errno (EDOM); + retval = x / x; + } + return retval; +} + /*******************************************************************/ /* An ultimate sin routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of sin(x) */ /*******************************************************************/ #ifdef IN_SINCOS -static double +static OP_TYPE #else -double +OP_TYPE SECTION #endif -__sin (double x) +__sin (OP_TYPE x) { - double xx, res, t, cor; - mynumber u; - int4 k, m; - double retval = 0; + OP_TYPE xx, res, res2, t, cor; + OP_TYPE retval; #ifndef IN_SINCOS SET_RESTORE_ROUND_53BIT (FE_TONEAREST); #endif +#if VEC_SIZE == 1 + mynumber u; + int4 m, k; + OP_TYPE zero = 0.0; u.x = x; m = u.i[HIGH_HALF]; k = 0x7fffffff & m; /* no sign */ +#else /* VEC_SIZE == 2 */ + mynumber u0, u1; + int4 m0, m1, k0, k1, k; + OP_TYPE zero = {0.0, 0.0}; + u0.x = x[0]; + u1.x = x[1]; + m0 = u0.i[HIGH_HALF]; + m1 = u1.i[HIGH_HALF]; + k0 = 0x7fffffff & m0; + k1 = 0x7fffffff & m1; + k = k0 > k1 ? k0 : k1; +#endif + if (k < 0x3e500000) /* if x->0 =>sin(x)=x */ { +#if VEC_SIZE == 1 math_check_force_underflow (x); +#else + math_check_force_underflow (x[0]); + math_check_force_underflow (x[1]); +#endif retval = x; } /*---------------------------- 2^-26 < |x|< 0.25 ----------------------*/ @@ -473,54 +627,64 @@ __sin (double x) t = POLYNOMIAL (xx) * (xx * x); res = x + t; cor = (x - res) + t; - retval = (res == res + 1.07 * cor) ? res : slow (x); + res2 = res + 1.07 * cor; +#if VEC_SIZE == 1 + retval = (res == res2) ? res : slow (x); +#else + retval[0] = (res[0] == res2[0]) ? res[0] : slow (x[0]); + retval[1] = (res[1] == res2[1]) ? res[1] : slow (x[1]); +#endif } /* else if (k < 0x3fd00000) */ + /*---------------------------- 0.25<|x|< 0.855469---------------------- */ else if (k < 0x3feb6000) { - res = do_sin (x, 0, &cor); - retval = (res == res + 1.096 * cor) ? res : slow1 (x); + res = do_sin_vector (x, zero, &cor); + res2 = res + 1.096 * cor; +#if VEC_SIZE == 1 + retval = (res == res2) ? res : slow1 (x); retval = __copysign (retval, x); +#else + retval[0] = (res[0] == res2[0]) ? res[0] : slow1 (x[0]); + retval[1] = (res[1] == res2[1]) ? res[1] : slow1 (x[1]); + retval[0] = __copysign (retval[0], x[0]); + retval[1] = __copysign (retval[1], x[1]); +#endif } /* else if (k < 0x3feb6000) */ /*----------------------- 0.855469 <|x|<2.426265 ----------------------*/ else if (k < 0x400368fd) { - +#if VEC_SIZE == 1 t = hp0 - fabs (x); - res = do_cos (t, hp1, &cor); - retval = (res == res + 1.020 * cor) ? res : slow2 (x); + res = do_cos_vector (t, hp1, &cor); + res2 = res + 1.020 * cor; + retval = (res == res2) ? res : slow2 (x); retval = __copysign (retval, x); +#else + OP_TYPE hp1_vec; + t[0] = hp0 - fabs (x[0]); + t[1] = hp0 - fabs (x[1]); + hp1_vec[0] = hp1_vec[1] = hp1; + res = do_cos_vector (t, hp1_vec, &cor); + res2 = res + 1.020 * cor; + retval[0] = (res[0] == res2[0]) ? res[0] : slow2 (x[0]); + retval[1] = (res[1] == res2[1]) ? res[1] : slow2 (x[1]); + retval[0] = __copysign (retval[0], x[0]); + retval[1] = __copysign (retval[1], x[1]); +#endif } /* else if (k < 0x400368fd) */ #ifndef IN_SINCOS /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/ - else if (k < 0x419921FB) - { - double a, da; - int4 n = reduce_sincos_1 (x, &a, &da); - retval = do_sincos_1 (a, da, x, n, false); - } /* else if (k < 0x419921FB ) */ - -/*---------------------105414350 <|x|< 281474976710656 --------------------*/ - else if (k < 0x42F00000) - { - double a, da; - - int4 n = reduce_sincos_2 (x, &a, &da); - retval = do_sincos_2 (a, da, x, n, false); - } /* else if (k < 0x42F00000 ) */ - -/* -----------------281474976710656 <|x| <2^1024----------------------------*/ - else if (k < 0x7ff00000) - retval = reduce_and_compute (x, false); - -/*--------------------- |x| > 2^1024 ----------------------------------*/ else { - if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) - __set_errno (EDOM); - retval = x / x; +#if VEC_SIZE == 1 + retval = do_sincos_tail (x, k, u.i[LOW_HALF], false); +#else + retval[0] = do_sincos_tail (x[0], k0, u0.i[LOW_HALF], false); + retval[1] = do_sincos_tail (x[1], k1, u1.i[LOW_HALF], false); +#endif } #endif @@ -534,85 +698,122 @@ __sin (double x) /*******************************************************************/ #ifdef IN_SINCOS -static double +static OP_TYPE #else -double +OP_TYPE SECTION #endif -__cos (double x) +__cos (OP_TYPE x) { - double y, xx, res, cor, a, da; - mynumber u; - int4 k, m; - - double retval = 0; + OP_TYPE y, xx, res, res2, a, da, cor; + OP_TYPE retval; #ifndef IN_SINCOS SET_RESTORE_ROUND_53BIT (FE_TONEAREST); #endif +#if VEC_SIZE == 1 + mynumber u; + int4 m, k; + OP_TYPE zero = 0.0; + OP_TYPE one = 1.0; u.x = x; m = u.i[HIGH_HALF]; - k = 0x7fffffff & m; + k = 0x7fffffff & m; /* no sign */ +#else /* VEC_SIZE == 2 */ + mynumber u0, u1; + int4 m0, m1, k0, k1, k; + OP_TYPE zero = {0.0, 0.0}; + OP_TYPE one = {1.0, 1.0}; + u0.x = x[0]; + u1.x = x[1]; + m0 = u0.i[HIGH_HALF]; + m1 = u1.i[HIGH_HALF]; + k0 = 0x7fffffff & m0; + k1 = 0x7fffffff & m1; + k = k0 > k1 ? k0 : k1; +#endif /* |x|<2^-27 => cos(x)=1 */ if (k < 0x3e400000) - retval = 1.0; + retval = one; else if (k < 0x3feb6000) { /* 2^-27 < |x| < 0.855469 */ - res = do_cos (x, 0, &cor); - retval = (res == res + 1.020 * cor) ? res : cslow2 (x); + res = do_cos_vector(x, zero, &cor); + res2 = res + 1.020 * cor; +#if VEC_SIZE == 1 + retval = (res == res2) ? res : cslow2 (x); +#else + retval[0] = (res[0] == res2[0]) ? res[0] : cslow2 (x[0]); + retval[1] = (res[1] == res2[1]) ? res[1] : cslow2 (x[1]); +#endif } /* else if (k < 0x3feb6000) */ else if (k < 0x400368fd) { /* 0.855469 <|x|<2.426265 */ ; +#if VEC_SIZE == 1 y = hp0 - fabs (x); a = y + hp1; da = (y - a) + hp1; xx = a * a; if (xx < 0.01588) { - res = TAYLOR_SIN (xx, a, da, cor); + res = taylor_sin_scalar (xx, a, da, &cor); cor = 1.02 * cor + __copysign (1.0e-31, cor); retval = (res == res + cor) ? res : sloww (a, da, x, true); } else { - res = do_sin (a, da, &cor); + res = do_sin_scalar (a, da, &cor); cor = 1.035 * cor + __copysign (1.0e-31, cor); retval = ((res == res + cor) ? __copysign (res, a) : sloww1 (a, da, x, true)); } - +#else + OP_TYPE hp1_vec, cors; + hp1_vec[0] = hp1_vec[1] = hp1; + y[0] = hp0 - fabs (x[0]); + y[1] = hp0 - fabs (x[1]); + a = y + hp1_vec; + da = (y - a) + hp1_vec; + xx = a * a; + if (xx[0] < 0.01588 && x[1] < 0.01588) + { + res = taylor_sin_vector (xx, a, da, &cor); + cors[0] = __copysign (1.0e-31, cor[0]); + cors[1] = __copysign (1.0e-31, cor[1]); + cor = 1.02 * cor + cors; + res2 = res + cor; + retval[0] = (res[0] == res2[0]) ? res[0] + : sloww (a[0], da[0], x[0], true); + retval[1] = (res[1] == res2[1]) ? res[1] + : sloww (a[1], da[1], x[1], true); + } + else + { + res = do_sin_vector (a, da, &cor); + cors[0] = __copysign (1.0e-31, cor[0]); + cors[1] = __copysign (1.0e-31, cor[1]); + cor = 1.035 * cor + cors; + res2 = res + cor; + retval[0] = __copysign (res[0], a[0]); + retval[1] = __copysign (res[1], a[1]); + retval[0] = (res[0] == res2[0]) ? retval[0] : sloww1 (a[0], da[0], x[0], true); + retval[1] = (res[1] == res2[1]) ? retval[1] : sloww1 (a[1], da[1], x[1], true); + } +#endif } /* else if (k < 0x400368fd) */ - #ifndef IN_SINCOS - else if (k < 0x419921FB) - { /* 2.426265<|x|< 105414350 */ - double a, da; - int4 n = reduce_sincos_1 (x, &a, &da); - retval = do_sincos_1 (a, da, x, n, true); - } /* else if (k < 0x419921FB ) */ - - else if (k < 0x42F00000) - { - double a, da; - - int4 n = reduce_sincos_2 (x, &a, &da); - retval = do_sincos_2 (a, da, x, n, true); - } /* else if (k < 0x42F00000 ) */ - - /* 281474976710656 <|x| <2^1024 */ - else if (k < 0x7ff00000) - retval = reduce_and_compute (x, true); - else { - if (k == 0x7ff00000 && u.i[LOW_HALF] == 0) - __set_errno (EDOM); - retval = x / x; /* |x| > 2^1024 */ +#if VEC_SIZE == 1 + retval = do_sincos_tail (x, k, u.i[LOW_HALF], true); +#else + retval[0] = do_sincos_tail (x[0], k0, u0.i[LOW_HALF], true); + retval[1] = do_sincos_tail (x[1], k1, u1.i[LOW_HALF], true); +#endif } #endif