From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 45871 invoked by alias); 21 Mar 2018 17:57:35 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Received: (qmail 45503 invoked by uid 89); 21 Mar 2018 17:57:34 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.3 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,RCVD_IN_DNSWL_NONE,SPF_HELO_PASS,SPF_PASS autolearn=ham version=3.3.2 spammy=Hx-languages-length:4694 X-HELO: EUR01-DB5-obe.outbound.protection.outlook.com From: Wilco Dijkstra To: "libc-alpha@sourceware.org" CC: nd Subject: [PATCH 7/7] sin/cos slow paths: refactor sincos implementation Date: Wed, 21 Mar 2018 17:57:00 -0000 Message-ID: authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco.Dijkstra@arm.com; x-ms-publictraffictype: Email x-microsoft-exchange-diagnostics: 1;DB6PR0801MB2039;7:mnznwPlr6uLIuVJZAEBu4giADXFLm1osbPc12ZxUZ21OQSVmqshj0fVgDTooMmVVProApYIMXhZV4qoUCEjkR3JmfRqN4yKJceWVXA2JYHBiX6rBQOIp5i/e1qNerbREXExU5eRFfBNejN2QpMuyy6PpzFYV8x25ZjE+xA+RUv7cKPqLpqd1pkCGdvU19Am+ORnvq0vDMOJaqO3ll87Qn4t1wajq93vsDdqedV0xnIYrDxnNWmPdwsKpK34l0g7w x-ms-exchange-antispam-srfa-diagnostics: SOS; x-ms-office365-filtering-ht: Tenant x-ms-office365-filtering-correlation-id: 7bf121ca-3d03-4baf-ac30-08d58f5536f7 x-microsoft-antispam: UriScan:;BCL:0;PCL:0;RULEID:(7020095)(4652020)(48565401081)(5600026)(4604075)(3008032)(2017052603328)(7153060)(7193020);SRVR:DB6PR0801MB2039; x-ms-traffictypediagnostic: DB6PR0801MB2039: nodisclaimer: True x-microsoft-antispam-prvs: x-exchange-antispam-report-test: UriScan:(180628864354917); x-exchange-antispam-report-cfa-test: BCL:0;PCL:0;RULEID:(8211001083)(6040522)(2401047)(5005006)(8121501046)(3231221)(944501325)(52105095)(93006095)(93001095)(3002001)(10201501046)(6055026)(6041310)(20161123564045)(20161123558120)(20161123560045)(201703131423095)(201702281528075)(20161123555045)(201703061421075)(201703061406153)(20161123562045)(6072148)(201708071742011);SRVR:DB6PR0801MB2039;BCL:0;PCL:0;RULEID:;SRVR:DB6PR0801MB2039; x-forefront-prvs: 0618E4E7E1 x-forefront-antispam-report: SFV:NSPM;SFS:(10009020)(396003)(376002)(39380400002)(366004)(346002)(39860400002)(377424004)(189003)(199004)(54534003)(4326008)(9686003)(33656002)(14454004)(2501003)(3846002)(6916009)(6116002)(5250100002)(25786009)(53936002)(2906002)(55016002)(105586002)(66066001)(6436002)(5640700003)(8676002)(81166006)(81156014)(5660300001)(102836004)(7696005)(3280700002)(8936002)(3660700001)(86362001)(106356001)(2900100001)(99286004)(316002)(575784001)(97736004)(26005)(2351001)(305945005)(7736002)(72206003)(74316002)(68736007)(478600001)(59450400001)(6506007);DIR:OUT;SFP:1101;SCL:1;SRVR:DB6PR0801MB2039;H:DB6PR0801MB2053.eurprd08.prod.outlook.com;FPR:;SPF:None;PTR:InfoNoRecords;MX:1;A:1;LANG:en; received-spf: None (protection.outlook.com: arm.com does not designate permitted sender hosts) x-microsoft-antispam-message-info: /V2RcgPVRIsP/OfLJHnuFXRFT3xfPR69subgv83ZCt3b8q0QmgoiWrIVvm5iVFzonr/XBB6L2Of4B5tM6Z//KpufZ8JD4UmebWyhgFQuh0GcyXWXqQyvPT89Ul5wnxQ/wt1QCiouKc8aMnm6HwsQlpcETQ6WRNBxUWh8XiNbO7rC7+pAeMtcOgJ7fBSXPW3uR5GZQqiIf5CQI2FGHUm8HYgG89cQH1mNvBUVNMsRj0IlsWzrNRy0y8H6T7AogkjsYhQQOzTgO1hm2U/wMNe3eutPeY94CmQ81M65TQStEQkPbUZggpQvSWNf6TCa3oKwlqcSA2KRycQN2wl5UziDNQ== spamdiagnosticoutput: 1:99 spamdiagnosticmetadata: NSPM Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-Network-Message-Id: 7bf121ca-3d03-4baf-ac30-08d58f5536f7 X-MS-Exchange-CrossTenant-originalarrivaltime: 21 Mar 2018 17:57:29.9517 (UTC) X-MS-Exchange-CrossTenant-fromentityheader: Hosted X-MS-Exchange-CrossTenant-id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-Transport-CrossTenantHeadersStamped: DB6PR0801MB2039 X-SW-Source: 2018-03/txt/msg00510.txt.bz2 Refactor the sincos implementation - rather than rely on odd partial inlini= ng of preprocessed portions from sin and cos, explicitly write out the cases. This makes sincos much easier to maintain and provides an additional 16-20% speedup between 0 and 2^27. The overall speedup of sincos is 48% over this= range. Between 0 and PI it is 66% faster. ChangeLog: 2018-03-20 Wilco Dijkstra * sysdeps/ieee754/dbl-64/s_sin.c (__sin): Cleanup ifdefs. (__cos): Likewise. * sysdeps/ieee754/dbl-64/s_sin.c (__sincos): Refactor using the same logic as sin and cos. -- diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c index e4a2153bb8d010d72d898c0d08e9253f4173f51d..2fde7713ee340aa8e3ce143db72= 54d0b57f1ab5d 100644 --- a/sysdeps/ieee754/dbl-64/s_sin.c +++ b/sysdeps/ieee754/dbl-64/s_sin.c @@ -197,27 +197,17 @@ do_sincos (double a, double da, int4 n) /* An ultimate sin routine. Given an IEEE double machine number x */ /* it computes the correctly rounded (to nearest) value of sin(x) */ /*******************************************************************/ -#ifdef IN_SINCOS -static double -#else +#ifndef IN_SINCOS double SECTION -#endif __sin (double x) { -#ifndef IN_SINCOS double t, a, da; mynumber u; int4 k, m, n; double retval =3D 0; =20 SET_RESTORE_ROUND_53BIT (FE_TONEAREST); -#else - double xx, t, cor; - mynumber u; - int4 k, m; - double retval =3D 0; -#endif =20 u.x =3D x; m =3D u.i[HIGH_HALF]; @@ -242,7 +232,6 @@ __sin (double x) retval =3D __copysign (do_cos (t, hp1), x); } /* else if (k < 0x400368fd) */ =20 -#ifndef IN_SINCOS /*-------------------------- 2.426265<|x|< 105414350 ---------------------= -*/ else if (k < 0x419921FB) { @@ -263,7 +252,6 @@ __sin (double x) __set_errno (EDOM); retval =3D x / x; } -#endif =20 return retval; } @@ -274,27 +262,17 @@ __sin (double x) /* it computes the correctly rounded (to nearest) value of cos(x) */ /*******************************************************************/ =20 -#ifdef IN_SINCOS -static double -#else double SECTION -#endif __cos (double x) { double y, a, da; mynumber u; -#ifndef IN_SINCOS int4 k, m, n; -#else - int4 k, m; -#endif =20 double retval =3D 0; =20 -#ifndef IN_SINCOS SET_RESTORE_ROUND_53BIT (FE_TONEAREST); -#endif =20 u.x =3D x; m =3D u.i[HIGH_HALF]; @@ -320,8 +298,6 @@ __cos (double x) retval =3D do_sin (a, da); } /* else if (k < 0x400368fd) */ =20 - -#ifndef IN_SINCOS else if (k < 0x419921FB) { /* 2.426265<|x|< 105414350 */ n =3D reduce_sincos (x, &a, &da); @@ -341,7 +317,6 @@ __cos (double x) __set_errno (EDOM); retval =3D x / x; /* |x| > 2^1024 */ } -#endif =20 return retval; } @@ -352,3 +327,5 @@ libm_alias_double (__cos, cos) #ifndef __sin libm_alias_double (__sin, sin) #endif + +#endif diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_s= incos.c index 4335ecbba3c9894e61c087ac970b392fa73abfab..c7460371e44a02c99522f265efa= 7e5e66a121b1e 100644 --- a/sysdeps/ieee754/dbl-64/s_sincos.c +++ b/sysdeps/ieee754/dbl-64/s_sincos.c @@ -23,9 +23,7 @@ #include #include =20 -#define __sin __sin_local -#define __cos __cos_local -#define IN_SINCOS 1 +#define IN_SINCOS #include "s_sin.c" =20 void @@ -37,31 +35,63 @@ __sincos (double x, double *sinx, double *cosx) SET_RESTORE_ROUND_53BIT (FE_TONEAREST); =20 u.x =3D x; - k =3D 0x7fffffff & u.i[HIGH_HALF]; + k =3D u.i[HIGH_HALF] & 0x7fffffff; =20 if (k < 0x400368fd) { - *sinx =3D __sin_local (x); - *cosx =3D __cos_local (x); - return; - } - if (k < 0x419921FB) - { - double a, da; - int4 n =3D reduce_sincos (x, &a, &da); - - *sinx =3D do_sincos (a, da, n); - *cosx =3D do_sincos (a, da, n + 1); + double a, da, y; + /* |x| < 2^-27 =3D> cos (x) =3D 1, sin (x) =3D x. */ + if (k < 0x3e400000) + { + if (k < 0x3e500000) + math_check_force_underflow (x); + *sinx =3D x; + *cosx =3D 1.0; + return; + } + /* |x| < 0.855469. */ + else if (k < 0x3feb6000) + { + *sinx =3D do_sin (x, 0); + *cosx =3D do_cos (x, 0); + return; + } =20 + /* |x| < 2.426265. */ + y =3D hp0 - fabs (x); + a =3D y + hp1; + da =3D (y - a) + hp1; + *sinx =3D __copysign (do_cos (a, da), x); + *cosx =3D do_sin (a, da); return; } + /* |x| < 2^1024. */ if (k < 0x7ff00000) { - double a, da; - int4 n =3D __branred (x, &a, &da); + double a, da, xx; + unsigned int n; =20 - *sinx =3D do_sincos (a, da, n); - *cosx =3D do_sincos (a, da, n + 1); + /* If |x| < 105414350 use simple range reduction. */ + n =3D k < 0x419921FB ? reduce_sincos (x, &a, &da) : __branred (x, &a= , &da); + n =3D n & 3; + + if (n =3D=3D 1 || n =3D=3D 2) + { + a =3D -a; + da =3D -da; + } + + if (n & 1) + { + double *temp =3D cosx; + cosx =3D sinx; + sinx =3D temp; + } + + *sinx =3D do_sin (a, da); + xx =3D do_cos (a, da); + *cosx =3D (n & 2) ? -xx : xx; + return; } =20 if (isinf (x))