From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org
Cc: Joseph Myers <josmyers@redhat.com>,
Paul Zimmermann <Paul.Zimmermann@inria.fr>,
Alexei Sibidanov <sibid@uvic.ca>, DJ Delorie <dj@redhat.com>
Subject: [PATCH 1/3] math: Consolidate acosf and asinf internal tables
Date: Mon, 3 Feb 2025 18:24:17 -0300 [thread overview]
Message-ID: <20250203212546.911043-2-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20250203212546.911043-1-adhemerval.zanella@linaro.org>
The libm size improvement built with "--enable-stack-protector=strong
--enable-bind-now=yes --enable-fortify-source=2":
From:
text data bss dec hex filename
587896 860 12 588768 8fbe0 aarch64-linux-gnu-master/math/libm.so
963175 1068 12 964255 eb69f x86_64-linux-gnu-master/math/libm.so
1191542 5544 368 1197454 12458e powerpc64le-linux-gnu-master/math/libm.so
To:
text data bss dec hex filename
587304 860 12 588176 8f990 aarch64-linux-gnu/math/libm.so
962855 1068 12 963935 eb55f x86_64-linux-gnu/math/libm.so
1191222 5544 368 1197134 12444e powerpc64le-linux-gnu/math/libm.so
The are not code changes for x86_64 and powerpc64le, but on aarch64
with gcc-14 I see a slight better code generation due the usage of
ldq for floating point constant loading.
---
math/Makefile | 1 +
sysdeps/ieee754/flt-32/e_acosf.c | 36 ++++------------
sysdeps/ieee754/flt-32/e_asincosf_data.c | 53 ++++++++++++++++++++++++
sysdeps/ieee754/flt-32/e_asincosf_data.h | 37 +++++++++++++++++
sysdeps/ieee754/flt-32/e_asinf.c | 38 ++++-------------
5 files changed, 106 insertions(+), 59 deletions(-)
create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.c
create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.h
diff --git a/math/Makefile b/math/Makefile
index f24cee5c39..a6da38a135 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -362,6 +362,7 @@ type-double-routines := \
# float support
type-float-suffix := f
type-float-routines := \
+ e_asincosf_data \
e_exp2f_data \
e_log2f_data \
e_logf_data \
diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c
index a5a4de4fc2..90196ac61e 100644
--- a/sysdeps/ieee754/flt-32/e_acosf.c
+++ b/sysdeps/ieee754/flt-32/e_acosf.c
@@ -30,6 +30,7 @@ SOFTWARE.
#include <libm-alias-finite.h>
#include <math-barriers.h>
#include "math_config.h"
+#include "e_asincosf_data.h"
static __attribute__ ((noinline)) float
as_special (float x)
@@ -77,15 +78,6 @@ __ieee754_acosf (float x)
return as_special (x);
if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */
{
- static const double b[] =
- {
- 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4,
- 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1,
- -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6,
- 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9,
- -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7,
- 0x1.5c2018c0c0105p+5
- };
/* Avoid spurious underflow exception. */
if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */
/* GCC <= 11 wrongly assumes the rounding is to nearest and
@@ -97,11 +89,11 @@ __ieee754_acosf (float x)
double z4 = z2 * z2;
double z8 = z4 * z4;
double z16 = z8 * z8;
- r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3]))
- + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7])))
- + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11]))
+ r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]))
+ + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7])))
+ + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11]))
+ z8
- * ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15]))));
+ * ((B[12] + z2 * B[13])+ z4 * (B[14] + z2 * B[15]))));
float ub = 0x1.921fb54574191p+0 - r;
float lb = 0x1.921fb543118ap+0 - r;
if (ub == lb)
@@ -110,33 +102,19 @@ __ieee754_acosf (float x)
/* accurate path */
if (ax < (0x7eu << 24))
{
- static const double c[] =
- {
- 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
- 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
- 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
- 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
- };
if (t == 0x328885a3u)
return 0x1.921fb6p+0f + 0x1p-25;
if (t == 0x39826222u)
return 0x1.920f6ap+0f + 0x1p-25;
double x2 = xs * xs;
- r = (pi2 - xs) - (xs * x2) * poly12 (x2, c);
+ r = (pi2 - xs) - (xs * x2) * poly12 (x2, C0);
}
else
{
- static const double c[] =
- {
- 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6,
- 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10,
- 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
- 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
- };
double bx = fabs (xs);
double z = 1.0 - bx;
double s = copysign (sqrt (z), xs);
- r = o[t >> 31] + s * poly12 (z, c);
+ r = o[t >> 31] + s * poly12 (z, C1);
}
return r;
}
diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.c b/sysdeps/ieee754/flt-32/e_asincosf_data.c
new file mode 100644
index 0000000000..2ffc2c28f3
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_asincosf_data.c
@@ -0,0 +1,53 @@
+/* Common data for asinpif/acospif implementations.
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/sinpi/sinpif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "e_asincosf_data.h"
+
+const double __asincosf_b[] =
+ {
+ 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4,
+ 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1,
+ -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6,
+ 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9,
+ -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7,
+ 0x1.5c2018c0c0105p+5
+ };
+
+const double __asincosf_c0[] =
+ {
+ 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
+ 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
+ 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
+ 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
+ };
+
+const double __asincosf_c1[] =
+ {
+ 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6,
+ 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10,
+ 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
+ 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
+ };
diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.h b/sysdeps/ieee754/flt-32/e_asincosf_data.h
new file mode 100644
index 0000000000..7dffb00dbe
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_asincosf_data.h
@@ -0,0 +1,37 @@
+/* Common data for asinpif/acospif implementations.
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/sinpi/sinpif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#ifndef _ASINCOSF_DATAH
+#define _ASINCOSF_DATAH
+
+extern const double __asincosf_b[] attribute_hidden;
+extern const double __asincosf_c0[] attribute_hidden;
+extern const double __asincosf_c1[] attribute_hidden;
+#define B __asincosf_b
+#define C0 __asincosf_c0
+#define C1 __asincosf_c1
+
+#endif
diff --git a/sysdeps/ieee754/flt-32/e_asinf.c b/sysdeps/ieee754/flt-32/e_asinf.c
index 944bf6f5ce..854466361e 100644
--- a/sysdeps/ieee754/flt-32/e_asinf.c
+++ b/sysdeps/ieee754/flt-32/e_asinf.c
@@ -28,6 +28,7 @@ SOFTWARE.
#include <errno.h>
#include <libm-alias-finite.h>
#include "math_config.h"
+#include "e_asincosf_data.h"
static __attribute__ ((noinline)) float
as_special (float x)
@@ -69,25 +70,16 @@ __ieee754_asinf (float x)
{
if (__glibc_unlikely (ax < 115 << 24))
return fmaf (x, 0x1p-25, x);
- static const double b[] =
- {
- 0x1.0000000000005p+0, 0x1.55557aeca105dp-3, 0x1.3314ec3db7d12p-4,
- 0x1.775738a5a6f92p-5, 0x1.5d5f7ce1c8538p-8, 0x1.605c6d58740fp-2,
- -0x1.5728b732d73c6p+1, 0x1.f152170f151ebp+3, -0x1.f962ea3ca992ep+5,
- 0x1.71971e17375ap+7, -0x1.860512b4ba23p+8, 0x1.26a3b8d4bdb14p+9,
- -0x1.36f2ea5698b51p+9, 0x1.b3d722aebfa2ep+8, -0x1.6cf89703b1289p+7,
- 0x1.1518af6a65e2dp+5
- };
double z = xs;
double z2 = z * z;
double z4 = z2 * z2;
double z8 = z4 * z4;
double z16 = z8 * z8;
- r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3]))
- + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7])))
- + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11]))
- + z8 * ((b[12] + z2 * b[13])
- + z4 * (b[14] + z2 * b[15]))));
+ r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]))
+ + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7])))
+ + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11]))
+ + z8 * ((B[12] + z2 * B[13])
+ + z4 * (B[14] + z2 * B[15]))));
float ub = r;
float lb = r - z * 0x1.efa8ebp-31;
if (ub == lb)
@@ -95,16 +87,9 @@ __ieee754_asinf (float x)
}
if (ax < (0x7eu << 24))
{
- static const double c[] =
- {
- 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
- 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
- 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
- 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
- };
double z = xs;
double z2 = z * z;
- double c0 = poly12 (z2, c);
+ double c0 = poly12 (z2, C0);
r = z + (z * z2) * c0;
}
else
@@ -116,14 +101,7 @@ __ieee754_asinf (float x)
double bx = fabs (xs);
double z = 1.0 - bx;
double s = sqrt (z);
- static const double c[] =
- {
- 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6,
- 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10,
- 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
- 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
- };
- r = pi2 - s * poly12 (z, c);
+ r = pi2 - s * poly12 (z, C1);
r = copysign (r, xs);
}
return r;
--
2.43.0
next prev parent reply other threads:[~2025-02-03 21:25 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-03 21:24 [PATCH 0/3] Optimize CORE-MATH " Adhemerval Zanella
2025-02-03 21:24 ` Adhemerval Zanella [this message]
2025-02-10 20:55 ` [PATCH 1/3] math: Consolidate acosf and asinf " Andreas K. Huettel
2025-02-11 21:02 ` Adhemerval Zanella Netto
2025-02-03 21:24 ` [PATCH 2/3] math: Consolidate acoshf and asinhf " Adhemerval Zanella
2025-02-10 21:24 ` Andreas K. Huettel
2025-02-03 21:24 ` [PATCH 3/3] math: Consolidate coshf and sinhf " Adhemerval Zanella
2025-02-11 19:53 ` Andreas K. Huettel
2025-02-04 6:32 ` [PATCH 0/3] Optimize CORE-MATH " Paul Zimmermann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250203212546.911043-2-adhemerval.zanella@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=Paul.Zimmermann@inria.fr \
--cc=dj@redhat.com \
--cc=josmyers@redhat.com \
--cc=libc-alpha@sourceware.org \
--cc=sibid@uvic.ca \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).