From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 78481 invoked by alias); 21 Mar 2018 17:52:39 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Received: (qmail 74329 invoked by uid 89); 21 Mar 2018 17:52:32 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-24.6 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,KAM_ASCII_DIVIDERS,RCVD_IN_DNSWL_NONE,SPF_HELO_PASS,SPF_PASS autolearn=ham version=3.3.2 spammy=ux, abs X-HELO: EUR01-HE1-obe.outbound.protection.outlook.com From: Wilco Dijkstra To: "libc-alpha@sourceware.org" CC: nd Subject: [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction Date: Wed, 21 Mar 2018 17:52:00 -0000 Message-ID: authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco.Dijkstra@arm.com; x-ms-publictraffictype: Email x-microsoft-exchange-diagnostics: 1;DB6PR0801MB2039;7:JmHXXDVARtCN09KiYeGLuSCwh5zR6pyfM5lKUSaqyIrpgaKrvcvYNXPFebHtZGprQWZkpMVFzP/5jR3affbmY1qsxA1nwn7958te4hzEU1A8PmUl8DFqIbMulr732edJHmjhN2twjuZQk68MktFOkULaO+8H3KTK66RAz564PRMz7hlvq+70GOI/H0wNsYri66X6J36qef2u34a4rB78aS1ueKRSKQGi385FZ3U8LztU8Gg9GQn88aEumtvd1maT x-ms-exchange-antispam-srfa-diagnostics: SOS; x-ms-office365-filtering-ht: Tenant x-ms-office365-filtering-correlation-id: cbbc067c-36ed-4442-7008-08d58f54824c x-microsoft-antispam: UriScan:;BCL:0;PCL:0;RULEID:(7020095)(4652020)(48565401081)(5600026)(4604075)(3008032)(2017052603328)(7153060)(7193020);SRVR:DB6PR0801MB2039; x-ms-traffictypediagnostic: DB6PR0801MB2039: nodisclaimer: True x-microsoft-antispam-prvs: x-exchange-antispam-report-test: UriScan:(180628864354917); x-exchange-antispam-report-cfa-test: BCL:0;PCL:0;RULEID:(8211001083)(6040522)(2401047)(5005006)(8121501046)(3231221)(944501325)(52105095)(93006095)(93001095)(3002001)(10201501046)(6055026)(6041310)(20161123564045)(20161123558120)(20161123560045)(201703131423095)(201702281528075)(20161123555045)(201703061421075)(201703061406153)(20161123562045)(6072148)(201708071742011);SRVR:DB6PR0801MB2039;BCL:0;PCL:0;RULEID:;SRVR:DB6PR0801MB2039; x-forefront-prvs: 0618E4E7E1 x-forefront-antispam-report: SFV:NSPM;SFS:(10009020)(396003)(376002)(39380400002)(366004)(346002)(39860400002)(377424004)(189003)(199004)(54534003)(4326008)(9686003)(33656002)(14454004)(2501003)(3846002)(6916009)(6116002)(5250100002)(25786009)(53936002)(2906002)(55016002)(105586002)(66066001)(6436002)(5640700003)(8676002)(81166006)(81156014)(5660300001)(102836004)(7696005)(3280700002)(8936002)(3660700001)(86362001)(106356001)(2900100001)(99286004)(316002)(97736004)(26005)(2351001)(305945005)(7736002)(72206003)(74316002)(68736007)(478600001)(59450400001)(6506007);DIR:OUT;SFP:1101;SCL:1;SRVR:DB6PR0801MB2039;H:DB6PR0801MB2053.eurprd08.prod.outlook.com;FPR:;SPF:None;PTR:InfoNoRecords;MX:1;A:1;LANG:en; received-spf: None (protection.outlook.com: arm.com does not designate permitted sender hosts) x-microsoft-antispam-message-info: 8udvnrhhtDkn7ksAl/P74A9RlVn2cyae7DVqyZ9JOgSlFNLHzxHh4fygRuZMyVgwAsnrBUM1DVndYlJZIYEoedkd8Hr/3IgeRk6WzscNqBFfI6u8GP7U1cXwZpis1uAZ8qh87Frklh9VEW9pdi9Rr60Cep8GghfxGgkuffIXpCu/SNPSp7fqYPBbqgWaYG+Crv/Ajf5JPclhbZ+J7/HyQnnxup6TSe19QugL65so60SCLEcxIg+qxu1sxYG3g10dpgytwlFG5eur87diNJ/UtctHLFEyGTbHiT8a2BgSOHYX8BLgFIkcIg9VADPGKPqFSNkj5RfGeckTRs7edzZskQ== spamdiagnosticoutput: 1:99 spamdiagnosticmetadata: NSPM Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-Network-Message-Id: cbbc067c-36ed-4442-7008-08d58f54824c X-MS-Exchange-CrossTenant-originalarrivaltime: 21 Mar 2018 17:52:26.8554 (UTC) X-MS-Exchange-CrossTenant-fromentityheader: Hosted X-MS-Exchange-CrossTenant-id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-Transport-CrossTenantHeadersStamped: DB6PR0801MB2039 X-SW-Source: 2018-03/txt/msg00506.txt.bz2 This patch improves the accuracy of the range reduction. When the input is large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is = not enough. Improve range reduction accuracy to 136 bits. As a result the spe= cial checks for results close to zero can be removed. The ULP of the polynomial= s is at worst 0.55ULP, so there is no reason for the slow functions, and they ca= n be removed. ChangeLog: 2018-03-20 Wilco Dijkstra * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to sincos_1, improve accuracy to 136 bits. (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions. (__sin): Use improved reduction and simplified do_sincos calculation. (__cos): Likewise. * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise. -- diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index c86fb9f2aa9f18418defc522830a7b8f85c1dfae..b8c366a6f05ef6b6632302fac96= cd19af518f1fe 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant) return retval; } =20 +/* Reduce range of x to within PI/2 with abs (x) < 105414350. The high pa= rt + is written to *a, the low part to *da. Range reduction is accurate to = 136 + bits so that when x is large and *a very close to zero, all 53 bits of = *a + are correct. */ static inline int4 __always_inline -reduce_sincos_1 (double x, double *a, double *da) +reduce_sincos (double x, double *a, double *da) { mynumber v; =20 @@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da) v.x =3D t; double y =3D (x - xn * mp1) - xn * mp2; int4 n =3D v.i[LOW_HALF] & 3; - double db =3D xn * mp3; - double b =3D y - db; - db =3D (y - b) - db; + + double b, db, t1, t2; + t1 =3D xn * pp3; + t2 =3D y - t1; + db =3D (y - t2) - t1; + + t1 =3D xn * pp4; + b =3D t2 - t1; + db +=3D (t2 - b) - t1; =20 *a =3D b; *da =3D db; - return n; } =20 -/* Compute sin (A + DA). cos can be computed by passing SHIFT_QUADRANT as - true, which results in shifting the quadrant N clockwise. */ +/* Compute sin or cos (A + DA) for the given quadrant N. */ static double __always_inline -do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant) +do_sincos (double a, double da, int4 n) { - double xx, retval, res, cor; - double eps =3D fabs (x) * 1.2e-30; + double retval, cor; =20 - int k1 =3D (n + shift_quadrant) & 3; - switch (k1) - { /* quarter of unit circle */ - case 2: - a =3D -a; - da =3D -da; - /* Fall through. */ - case 0: - xx =3D a * a; + if (n & 1) + /* Max ULP is 0.513. */ + retval =3D do_cos (a, da, &cor); + else + { + double xx =3D a * a; + /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */ if (xx < 0.01588) - { - /* Taylor series. */ - res =3D TAYLOR_SIN (xx, a, da, cor); - cor =3D 1.02 * cor + __copysign (eps, cor); - retval =3D (res =3D=3D res + cor) ? res : sloww (a, da, x, shift_quadra= nt); - } + retval =3D TAYLOR_SIN (xx, a, da, cor); else - { - res =3D do_sin (a, da, &cor); - cor =3D 1.035 * cor + __copysign (eps, cor); - retval =3D ((res =3D=3D res + cor) ? __copysign (res, a) - : sloww1 (a, da, x, shift_quadrant)); - } - break; - - case 1: - case 3: - res =3D do_cos (a, da, &cor); - cor =3D 1.025 * cor + __copysign (eps, cor); - retval =3D ((res =3D=3D res + cor) ? ((n & 2) ? -res : res) - : sloww2 (a, da, x, n)); - break; + retval =3D __copysign (do_sin (a, da, &cor), a); } =20 - return retval; + return (n & 2) ? -retval : retval; } =20 + /*******************************************************************/ /* An ultimate sin routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of sin(x) */ @@ -374,13 +361,18 @@ SECTION #endif __sin (double x) { - double xx, t, cor; +#ifndef IN_SINCOS + double xx, t, a, da, cor; mynumber u; - int4 k, m; + int4 k, m, n; double retval =3D 0; =20 -#ifndef IN_SINCOS SET_RESTORE_ROUND_53BIT (FE_TONEAREST); +#else + double xx, t, cor; + mynumber u; + int4 k, m; + double retval =3D 0; #endif =20 u.x =3D x; @@ -419,9 +411,8 @@ __sin (double x) /*-------------------------- 2.426265<|x|< 105414350 ---------------------= -*/ else if (k < 0x419921FB) { - double a, da; - int4 n =3D reduce_sincos_1 (x, &a, &da); - retval =3D do_sincos_1 (a, da, x, n, false); + n =3D reduce_sincos (x, &a, &da); + retval =3D do_sincos (a, da, n); } /* else if (k < 0x419921FB ) */ =20 /* --------------------105414350 <|x| <2^1024-----------------------------= -*/ @@ -456,7 +447,11 @@ __cos (double x) { double y, xx, cor, a, da; mynumber u; +#ifndef IN_SINCOS + int4 k, m, n; +#else int4 k, m; +#endif =20 double retval =3D 0; =20 @@ -496,9 +491,8 @@ __cos (double x) #ifndef IN_SINCOS else if (k < 0x419921FB) { /* 2.426265<|x|< 105414350 */ - double a, da; - int4 n =3D reduce_sincos_1 (x, &a, &da); - retval =3D do_sincos_1 (a, da, x, n, true); + n =3D reduce_sincos (x, &a, &da); + retval =3D do_sincos (a, da, n + 1); } /* else if (k < 0x419921FB ) */ =20 /* 105414350 <|x| <2^1024 */ diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_s= incos.c index a9af8ce526bfe78c06cfafa65de0815ec69585c5..4f032d2e42593ccde22169b3747= 28386dd8fca8e 100644 --- a/sysdeps/ieee754/dbl-64/s_sincos.c +++ b/sysdeps/ieee754/dbl-64/s_sincos.c @@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx) if (k < 0x419921FB) { double a, da; - int4 n =3D reduce_sincos_1 (x, &a, &da); + int4 n =3D reduce_sincos (x, &a, &da); =20 - *sinx =3D do_sincos_1 (a, da, x, n, false); - *cosx =3D do_sincos_1 (a, da, x, n, true); + *sinx =3D do_sincos (a, da, n); + *cosx =3D do_sincos (a, da, n + 1); =20 return; }