From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org
Subject: [PATCH 8/9] i386: Move hypot implementation to C
Date: Wed, 6 Oct 2021 15:05:56 -0300 [thread overview]
Message-ID: <20211006180557.933826-9-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20211006180557.933826-1-adhemerval.zanella@linaro.org>
The generic hypotf is slight slower, mostly due the tricks the assembly
does to optimize the isinf/isnan/issignaling. Results on a Ryzen 5900X
with gcc 10.3.1:
master:
"hypotf": {
"workload-random": {
"duration": 3.76493e+09,
"iterations": 6.8e+07,
"reciprocal-throughput": 38.4243,
"latency": 72.309,
"max-throughput": 2.60252e+07,
"min-throughput": 1.38295e+07
}
}
patched:
"hypotf": {
"workload-random": {
"duration": 3.78098e+09,
"iterations": 6.8e+07,
"reciprocal-throughput": 28.9206,
"latency": 82.2848,
"max-throughput": 3.45774e+07,
"min-throughput": 1.21529e+07
}
}
The generic hypot is way slower, since the optimized implementation
uses the i386 default excessive precision to issue the operation
directly. A similar implementation is provided instead of using
the generic implementation:
master:
"hypot": {
"workload-random": {
"duration": 3.7452e+09,
"iterations": 6.6e+07,
"reciprocal-throughput": 40.3203,
"latency": 73.1707,
"max-throughput": 2.48014e+07,
"min-throughput": 1.36667e+07
}
}
patched:
./benchtests/bench-hypot
"hypot": {
"workload-random": {
"duration": 3.72606e+09,
"iterations": 7.6e+07,
"reciprocal-throughput": 25.9437,
"latency": 72.1104,
"max-throughput": 3.8545e+07,
"min-throughput": 1.38676e+07
}
}
Checked on i686-linux-gnu.
---
sysdeps/i386/fpu/e_hypot.S | 75 -------------------------------------
sysdeps/i386/fpu/e_hypot.c | 42 +++++++++++++++++++++
sysdeps/i386/fpu/e_hypotf.S | 64 -------------------------------
3 files changed, 42 insertions(+), 139 deletions(-)
delete mode 100644 sysdeps/i386/fpu/e_hypot.S
create mode 100644 sysdeps/i386/fpu/e_hypot.c
delete mode 100644 sysdeps/i386/fpu/e_hypotf.S
diff --git a/sysdeps/i386/fpu/e_hypot.S b/sysdeps/i386/fpu/e_hypot.S
deleted file mode 100644
index f2c956b77a..0000000000
--- a/sysdeps/i386/fpu/e_hypot.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Compute the hypothenuse of X and Y.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_DBL_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
- .text
-ENTRY(__ieee754_hypot)
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
- fldl 4(%esp) // x
- fxam
- fnstsw
- fldl 12(%esp) // y : x
- movb %ah, %ch
- fxam
- fnstsw
- movb %ah, %al
- orb %ch, %ah
- sahf
- jc 1f
- fmul %st(0) // y * y : x
- fxch // x : y * y
- fmul %st(0) // x * x : y * y
- faddp // x * x + y * y
- fsqrt
- DBL_NARROW_EVAL_UFLOW_NONNEG
-2: ret
-
- // We have to test whether any of the parameters is Inf.
- // In this case the result is infinity.
-1: andb $0x45, %al
- cmpb $5, %al
- je 3f // jump if y is Inf
- andb $0x45, %ch
- cmpb $5, %ch
- jne 4f // jump if x is not Inf
- fxch
-3: fstp %st(1)
- fabs
- jmp 2b
-
-4: testb $1, %al
- jnz 5f // y is NaN
- fxch
-5: fstp %st(1)
- jmp 2b
-
-END(__ieee754_hypot)
-libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypot.c b/sysdeps/i386/fpu/e_hypot.c
new file mode 100644
index 0000000000..4920a8cb49
--- /dev/null
+++ b/sysdeps/i386/fpu/e_hypot.c
@@ -0,0 +1,42 @@
+/* Euclidean distance function. Double/Binary64 i386 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include <math_private.h>
+#include <math-underflow.h>
+#include <math-narrow-eval.h>
+#include <libm-alias-finite.h>
+#include <math_config.h>
+
+/* The i386 allows ot use the default excess of precision to optimize the
+ hypot implementation, since internal multiplication and sqrt is carried
+ with 80-bit FP type. */
+double
+__ieee754_hypot (double x, double y)
+{
+ if ((isinf (x) || isinf (y))
+ && !issignaling (x) && !issignaling (y))
+ return INFINITY;
+ if (isnan (x) || isnan (y))
+ return x + y;
+
+ double r = math_narrow_eval (sqrt (x * x + y * y));
+ math_check_force_underflow_nonneg (r);
+ return r;
+}
+libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypotf.S b/sysdeps/i386/fpu/e_hypotf.S
deleted file mode 100644
index cec5d15403..0000000000
--- a/sysdeps/i386/fpu/e_hypotf.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Compute the hypothenuse of X and Y.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
- .text
-ENTRY(__ieee754_hypotf)
- flds 4(%esp) // x
- fxam
- fnstsw
- flds 8(%esp) // y : x
- movb %ah, %ch
- fxam
- fnstsw
- movb %ah, %al
- orb %ch, %ah
- sahf
- jc 1f
- fmul %st(0) // y * y : x
- fxch // x : y * y
- fmul %st(0) // x * x : y * y
- faddp // x * x + y * y
- fsqrt
- FLT_NARROW_EVAL
-2: ret
-
- // We have to test whether any of the parameters is Inf.
- // In this case the result is infinity.
-1: andb $0x45, %al
- cmpb $5, %al
- je 3f // jump if y is Inf
- andb $0x45, %ch
- cmpb $5, %ch
- jne 4f // jump if x is not Inf
- fxch
-3: fstp %st(1)
- fabs
- jmp 2b
-
-4: testb $1, %al
- jnz 5f // y is NaN
- fxch
-5: fstp %st(1)
- jmp 2b
-
-END(__ieee754_hypotf)
-libm_alias_finite (__ieee754_hypotf, __hypotf)
--
2.30.2
next prev parent reply other threads:[~2021-10-06 18:15 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-06 18:05 [PATCH 0/9] Improve hypot() Adhemerval Zanella
2021-10-06 18:05 ` [PATCH 1/9] benchtests: Make hypot input random Adhemerval Zanella
2021-10-07 9:48 ` Paul Zimmermann
2021-10-07 11:20 ` Adhemerval Zanella
2021-10-07 17:08 ` Joseph Myers
2021-10-06 18:05 ` [PATCH 2/9] benchtests: Add hypotf Adhemerval Zanella
2021-10-07 10:40 ` Paul Zimmermann
2021-10-06 18:05 ` [PATCH 3/9] math: Simplify hypotf implementation Adhemerval Zanella
2021-10-07 9:44 ` Paul Zimmermann
2021-10-07 11:37 ` Adhemerval Zanella
2021-10-07 12:08 ` Paul Zimmermann
2021-10-06 18:05 ` [PATCH 4/9] math: Use an improved algorithm for hypot (dbl-64) Adhemerval Zanella
2021-10-06 18:05 ` [PATCH 5/9] math: Use an improved algorithm for hypotl (ldbl-96) Adhemerval Zanella
2021-10-06 18:05 ` [PATCH 6/9] math: Use an improved algorithm for hypotl (ldbl-128) Adhemerval Zanella
2021-10-06 18:05 ` [PATCH 7/9] math: Remove powerpc e_hypot Adhemerval Zanella
2021-10-06 19:43 ` Paul E Murphy
2021-10-06 19:46 ` Adhemerval Zanella
2021-10-06 19:51 ` Paul E Murphy
2021-10-12 12:17 ` Paul A. Clarke
2021-10-06 18:05 ` Adhemerval Zanella [this message]
2021-10-06 18:37 ` [PATCH 8/9] i386: Move hypot implementation to C Joseph Myers
2021-10-06 19:19 ` Adhemerval Zanella
2021-10-06 19:20 ` Adhemerval Zanella
2021-10-06 19:52 ` Joseph Myers
2021-10-07 8:28 ` Paul Zimmermann
2021-10-07 17:05 ` Joseph Myers
2021-10-06 18:05 ` [PATCH 9/9] math: Remove the error handling wrapper from hypot and hypotf Adhemerval Zanella
2021-10-06 18:38 ` Joseph Myers
2021-10-06 19:23 ` Adhemerval Zanella
2021-10-07 11:03 ` [PATCH 0/9] Improve hypot() Paul Zimmermann
2021-10-07 12:39 ` Adhemerval Zanella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211006180557.933826-9-adhemerval.zanella@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).