From: "Stefan Kanthak" <stefan.kanthak@nexgo.de>
To: "Szabolcs Nagy" <szabolcs.nagy@arm.com>,
"Adhemerval Zanella" <adhemerval.zanella@linaro.org>
Cc: <libc-help@sourceware.org>, <libc-alpha@sourceware.org>
Subject: [PATCH] Re: nextafter() about an order of magnitude slower than trivial implementation
Date: Thu, 19 Aug 2021 19:57:28 +0200 [thread overview]
Message-ID: <F0A4F1FEA8CE4002A11FD63115925377@H270> (raw)
In-Reply-To: <741b2c48-a754-51c5-cb72-a2f97795e30f@linaro.org>
[-- Attachment #1: Type: text/plain, Size: 3645 bytes --]
Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote:
> On 18/08/2021 14:11, Stefan Kanthak wrote:
>> Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
>>
>>> The 08/16/2021 18:03, Stefan Kanthak wrote:
>>>> Testing and benchmarking an exponential function that consumes
>>>> about 10ns/call on an AMD EPYC 7262, I noticed that nextafter()
>>>> itself is DEAD SLOW: it consumes about 8.5ns/call!
>>>>
>>>> The following (quick&dirty) implementation consumes 0.7ns/call,
>>>> i.e. is about an order of magnitude faster:
>>>
>>> correctly measuring latency on a modern x86_64 core:
>>>
>>> musl: 3.16 ns
>>> glibc: 5.68 ns
>>> your: 5.72 ns
>
> Thanks for bring this up, if you want to contribute a patch
See attachment.
For completeness sake and to ease an initial review, I post the full
source here too: without ALTERNATE defined, it uses integer arithmetic
to the extent possible, like the original.
Choose whatever you like best or gives better throughput/latency.
Stefan
JFTR: .../sysdeps/ieee754/flt-32/s_nextafterf.c as well as
.../sysdeps/i386/fpu/s_nextafterl.c will need the same changes.
--- .../math/s_nextafter.c ---
/* IEEE functions
* nextafter(x,y)
* return the next machine floating-point number of x in the
* direction toward y.
* Special cases:
*/
/* Ugly hack so that the aliasing works. */
#define __nexttoward __internal___nexttoward
#define nexttoward __internal_nexttoward
#include <errno.h>
#include <math.h>
#include <math-barriers.h>
#include <math_private.h>
#include <float.h>
#include <libm-alias-double.h>
double __nextafter(double x, double y)
{
union {
double d;
int64_t s;
uint64_t u;
} xx = {x}, yy = {y};
uint64_t ax = xx.u + xx.u; /* |x|<<1 */
uint64_t ay = yy.u + yy.u; /* |y|<<1 */
if(ax > 0x7ffull << 53) /* x is nan */
return x;
if(ay > 0x7ffull << 53) /* y is nan */
return y;
if(x == y) return y; /* x=y, return y */
if(ax == 0ull) { /* x == 0 */
xx.u = yy.s < 0 ? 0x8000000000000001ull : 1ull;
xx.d = math_opt_barrier (xx.d);
xx.d += 0.0;
math_force_eval (xx.d); /* raise underflow flag */
return xx.d; /* return +-minsubnormal */
}
#ifdef ALTERNATE
if((x < y) == (x < 0.0))
--xx.u;
else
++xx.u;
#else
if(xx.s < 0) { /* x < 0 */
if(yy.s < 0 && xx.s < yy.s)
++xx.s; /* x < 0, x > y, x += ulp */
else
--xx.s; /* x < 0, x < y, x -= ulp */
} else { /* x > 0 */
if(xx.s > yy.s)
--xx.s; /* x > 0, x > y, x -= ulp */
else
++xx.s; /* x > 0, x < y, x += ulp */
}
#endif
ax = xx.u + xx.u; /* |x|<<1 */
if((ax >= 0x7ffull << 53) /* overflow */
|| (ax < 0x001ull << 53)) { /* underflow */
xx.d = math_opt_barrier (xx.d);
xx.d += 0.0;
math_force_eval (xx.d); /* raise overflow/underflow flag */
__set_errno (ERANGE);
}
return xx.d;
}
libm_alias_double (__nextafter, nextafter)
#ifdef NO_LONG_DOUBLE
strong_alias (__nextafter, __nexttowardl)
weak_alias (__nexttowardl, nexttowardl)
#undef __nexttoward
strong_alias (__nextafter, __nexttoward)
#undef nexttoward
weak_alias (__nextafter, nexttoward)
#endif
--- EOF ---
[-- Attachment #2: glibc.patch --]
[-- Type: application/octet-stream, Size: 3533 bytes --]
--- -/math/s_nextafter.c
+++ +/math/s_nextafter.c
@@ -1,19 +1,3 @@
-/* @(#)s_nextafter.c 5.1 93/09/24 */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_nextafter.c,v 1.8 1995/05/10 20:47:58 jtc Exp $";
-#endif
-
/* IEEE functions
* nextafter(x,y)
* return the next machine floating-point number of x in the
@@ -34,55 +18,53 @@
double __nextafter(double x, double y)
{
- int32_t hx,hy,ix,iy;
- uint32_t lx,ly;
+ union {
+ double d;
+ int64_t s;
+ uint64_t u;
+ } xx = {x}, yy = {y};
- EXTRACT_WORDS(hx,lx,x);
- EXTRACT_WORDS(hy,ly,y);
- ix = hx&0x7fffffff; /* |x| */
- iy = hy&0x7fffffff; /* |y| */
+ uint64_t ax = xx.u + xx.u; /* |x|<<1 */
+ uint64_t ay = yy.u + yy.u; /* |y|<<1 */
- if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */
- ((iy>=0x7ff00000)&&((iy-0x7ff00000)|ly)!=0)) /* y is nan */
- return x+y;
- if(x==y) return y; /* x=y, return y */
- if((ix|lx)==0) { /* x == 0 */
- double u;
- INSERT_WORDS(x,hy&0x80000000,1); /* return +-minsubnormal */
- u = math_opt_barrier (x);
- u = u*u;
- math_force_eval (u); /* raise underflow flag */
+ if(ax > 0x7ffull << 53) /* x is nan */
return x;
+ if(ay > 0x7ffull << 53) /* y is nan */
+ return y;
+ if(x == y) return y; /* x=y, return y */
+ if(ax == 0ull) { /* x == 0 */
+ xx.u = yy.s < 0 ? 0x8000000000000001ull : 1ull;
+ xx.d = math_opt_barrier (xx.d);
+ xx.d += 0.0;
+ math_force_eval (xx.d); /* raise underflow flag */
+ return xx.d; /* return +-minsubnormal */
}
- if(hx>=0) { /* x > 0 */
- if(hx>hy||((hx==hy)&&(lx>ly))) { /* x > y, x -= ulp */
- if(lx==0) hx -= 1;
- lx -= 1;
- } else { /* x < y, x += ulp */
- lx += 1;
- if(lx==0) hx += 1;
- }
- } else { /* x < 0 */
- if(hy>=0||hx>hy||((hx==hy)&&(lx>ly))){/* x < y, x -= ulp */
- if(lx==0) hx -= 1;
- lx -= 1;
- } else { /* x > y, x += ulp */
- lx += 1;
- if(lx==0) hx += 1;
- }
+#ifdef ALTERNATE
+ if ((x < y) == (x < 0.0))
+ --xx.u;
+ else
+ ++xx.u;
+#else
+ if(xx.s < 0) { /* x < 0 */
+ if(yy.s < 0 && xx.s < yy.s)
+ ++xx.s; /* x < 0, x > y, x += ulp */
+ else
+ --xx.s; /* x < 0, x < y, x -= ulp */
+ } else { /* x > 0 */
+ if(xx.s > yy.s)
+ --xx.s; /* x > 0, x > y, x -= ulp */
+ else
+ ++xx.s; /* x > 0, x < y, x += ulp */
}
- hy = hx&0x7ff00000;
- if(hy>=0x7ff00000) {
- double u = x+x; /* overflow */
- math_force_eval (u);
+#endif
+ ax = xx.u + xx.u; /* |x|<<1 */
+ if((ax >= 0x7ffull << 53) /* overflow */
+ || (ax < 0x001ull << 53)) { /* underflow */
+ xx.d = math_opt_barrier (xx.d);
+ xx.d += 0.0;
+ math_force_eval (xx.d); /* raise overflow/underflow flag */
__set_errno (ERANGE);
}
- if(hy<0x00100000) {
- double u = x*x; /* underflow */
- math_force_eval (u); /* raise underflow flag */
- __set_errno (ERANGE);
- }
- INSERT_WORDS(x,hx,lx);
return xx.d;
}
libm_alias_double (__nextafter, nextafter)
next prev parent reply other threads:[~2021-08-19 18:00 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-16 16:03 Stefan Kanthak
2021-08-18 12:51 ` Szabolcs Nagy
2021-08-18 17:11 ` Stefan Kanthak
2021-08-19 11:20 ` Adhemerval Zanella
2021-08-19 17:57 ` Stefan Kanthak [this message]
2021-08-20 9:52 ` [PATCH/2nd version] " Stefan Kanthak
2021-08-20 16:55 ` Joseph Myers
2021-08-20 20:19 ` Stefan Kanthak
2021-08-20 21:03 ` Joseph Myers
2021-08-23 12:50 ` Adhemerval Zanella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=F0A4F1FEA8CE4002A11FD63115925377@H270 \
--to=stefan.kanthak@nexgo.de \
--cc=adhemerval.zanella@linaro.org \
--cc=libc-alpha@sourceware.org \
--cc=libc-help@sourceware.org \
--cc=szabolcs.nagy@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).