public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org
Cc: Joseph Myers <josmyers@redhat.com>,
	Paul Zimmermann <Paul.Zimmermann@inria.fr>,
	Alexei Sibidanov <sibid@uvic.ca>, DJ Delorie <dj@redhat.com>
Subject: [PATCH 1/3] math: Consolidate acosf and asinf internal tables
Date: Mon,  3 Feb 2025 18:24:17 -0300	[thread overview]
Message-ID: <20250203212546.911043-2-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20250203212546.911043-1-adhemerval.zanella@linaro.org>

The libm size improvement built with "--enable-stack-protector=strong
--enable-bind-now=yes --enable-fortify-source=2":

From:

   text	   data	    bss	    dec	    hex	filename
 587896     860      12  588768   8fbe0 aarch64-linux-gnu-master/math/libm.so
 963175    1068      12  964255   eb69f x86_64-linux-gnu-master/math/libm.so
1191542    5544     368 1197454  12458e powerpc64le-linux-gnu-master/math/libm.so

To:

   text	   data	    bss	    dec	    hex	filename
 587304     860      12  588176   8f990 aarch64-linux-gnu/math/libm.so
 962855    1068      12  963935   eb55f x86_64-linux-gnu/math/libm.so
1191222    5544     368 1197134  12444e powerpc64le-linux-gnu/math/libm.so

The are not code changes for x86_64 and powerpc64le, but on aarch64
with gcc-14 I see a slight better code generation due the usage of
ldq for floating point constant loading.
---
 math/Makefile                            |  1 +
 sysdeps/ieee754/flt-32/e_acosf.c         | 36 ++++------------
 sysdeps/ieee754/flt-32/e_asincosf_data.c | 53 ++++++++++++++++++++++++
 sysdeps/ieee754/flt-32/e_asincosf_data.h | 37 +++++++++++++++++
 sysdeps/ieee754/flt-32/e_asinf.c         | 38 ++++-------------
 5 files changed, 106 insertions(+), 59 deletions(-)
 create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.c
 create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.h

diff --git a/math/Makefile b/math/Makefile
index f24cee5c39..a6da38a135 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -362,6 +362,7 @@ type-double-routines := \
 # float support
 type-float-suffix := f
 type-float-routines := \
+  e_asincosf_data \
   e_exp2f_data \
   e_log2f_data \
   e_logf_data \
diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c
index a5a4de4fc2..90196ac61e 100644
--- a/sysdeps/ieee754/flt-32/e_acosf.c
+++ b/sysdeps/ieee754/flt-32/e_acosf.c
@@ -30,6 +30,7 @@ SOFTWARE.
 #include <libm-alias-finite.h>
 #include <math-barriers.h>
 #include "math_config.h"
+#include "e_asincosf_data.h"
 
 static __attribute__ ((noinline)) float
 as_special (float x)
@@ -77,15 +78,6 @@ __ieee754_acosf (float x)
     return as_special (x);
   if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */
     {
-      static const double b[] =
-	{
-	   0x1.fffffffd9ccb8p-1,  0x1.5555c94838007p-3,  0x1.32ded4b7c20fap-4,
-	   0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6,  0x1.56fbb04998344p-1,
-	  -0x1.403d8e4c49f52p+2,  0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6,
-	   0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9,  0x1.a558a59cc19d3p+9,
-	  -0x1.aca4b6a529ffp+9,   0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7,
-	   0x1.5c2018c0c0105p+5
-	};
       /* Avoid spurious underflow exception.  */
       if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */
 	/* GCC <= 11 wrongly assumes the rounding is to nearest and
@@ -97,11 +89,11 @@ __ieee754_acosf (float x)
       double z4 = z2 * z2;
       double z8 = z4 * z4;
       double z16 = z8 * z8;
-      r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3]))
-		+ z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7])))
-	       + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11]))
+      r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]))
+		+ z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7])))
+	       + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11]))
 			+ z8
-			* ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15]))));
+			* ((B[12] + z2 * B[13])+ z4 * (B[14] + z2 * B[15]))));
       float ub = 0x1.921fb54574191p+0 - r;
       float lb = 0x1.921fb543118ap+0 - r;
       if (ub == lb)
@@ -110,33 +102,19 @@ __ieee754_acosf (float x)
   /* accurate path  */
   if (ax < (0x7eu << 24))
     {
-      static const double c[] =
-	{
-	  0x1.555555555529cp-3,  0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
-	  0x1.f1c72e13ac306p-6,  0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
-	  0x1.c6dbbcc7c6315p-7,  0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
-	  0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
-	};
       if (t == 0x328885a3u)
 	return 0x1.921fb6p+0f + 0x1p-25;
       if (t == 0x39826222u)
 	return 0x1.920f6ap+0f + 0x1p-25;
       double x2 = xs * xs;
-      r = (pi2 - xs) - (xs * x2) * poly12 (x2, c);
+      r = (pi2 - xs) - (xs * x2) * poly12 (x2, C0);
     }
   else
     {
-      static const double c[] =
-	{
-	  0x1.6a09e667f3bcbp+0,   0x1.e2b7dddff2db9p-4,  0x1.b27247ab42dbcp-6,
-	  0x1.02995cc4e0744p-7,   0x1.5ffb0276ec8eap-9,  0x1.033885a928decp-10,
-	  0x1.911f2be23f8c7p-12,  0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
-	  0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
-	};
       double bx = fabs (xs);
       double z = 1.0 - bx;
       double s = copysign (sqrt (z), xs);
-      r = o[t >> 31] + s * poly12 (z, c);
+      r = o[t >> 31] + s * poly12 (z, C1);
     }
   return r;
 }
diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.c b/sysdeps/ieee754/flt-32/e_asincosf_data.c
new file mode 100644
index 0000000000..2ffc2c28f3
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_asincosf_data.c
@@ -0,0 +1,53 @@
+/* Common data for asinpif/acospif implementations.
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/sinpi/sinpif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "e_asincosf_data.h"
+
+const double __asincosf_b[] =
+  {
+     0x1.fffffffd9ccb8p-1,  0x1.5555c94838007p-3,  0x1.32ded4b7c20fap-4,
+     0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6,  0x1.56fbb04998344p-1,
+    -0x1.403d8e4c49f52p+2,  0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6,
+     0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9,  0x1.a558a59cc19d3p+9,
+    -0x1.aca4b6a529ffp+9,   0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7,
+     0x1.5c2018c0c0105p+5
+  };
+
+const double __asincosf_c0[] =
+  {
+    0x1.555555555529cp-3,  0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
+    0x1.f1c72e13ac306p-6,  0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
+    0x1.c6dbbcc7c6315p-7,  0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
+    0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
+  };
+
+const double __asincosf_c1[] =
+  {
+    0x1.6a09e667f3bcbp+0,   0x1.e2b7dddff2db9p-4,  0x1.b27247ab42dbcp-6,
+    0x1.02995cc4e0744p-7,   0x1.5ffb0276ec8eap-9,  0x1.033885a928decp-10,
+    0x1.911f2be23f8c7p-12,  0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
+    0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
+  };
diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.h b/sysdeps/ieee754/flt-32/e_asincosf_data.h
new file mode 100644
index 0000000000..7dffb00dbe
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_asincosf_data.h
@@ -0,0 +1,37 @@
+/* Common data for asinpif/acospif implementations.
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/sinpi/sinpif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#ifndef _ASINCOSF_DATAH
+#define _ASINCOSF_DATAH
+
+extern const double __asincosf_b[] attribute_hidden;
+extern const double __asincosf_c0[] attribute_hidden;
+extern const double __asincosf_c1[] attribute_hidden;
+#define B __asincosf_b
+#define C0 __asincosf_c0
+#define C1 __asincosf_c1
+
+#endif
diff --git a/sysdeps/ieee754/flt-32/e_asinf.c b/sysdeps/ieee754/flt-32/e_asinf.c
index 944bf6f5ce..854466361e 100644
--- a/sysdeps/ieee754/flt-32/e_asinf.c
+++ b/sysdeps/ieee754/flt-32/e_asinf.c
@@ -28,6 +28,7 @@ SOFTWARE.
 #include <errno.h>
 #include <libm-alias-finite.h>
 #include "math_config.h"
+#include "e_asincosf_data.h"
 
 static __attribute__ ((noinline)) float
 as_special (float x)
@@ -69,25 +70,16 @@ __ieee754_asinf (float x)
     {
       if (__glibc_unlikely (ax < 115 << 24))
 	return fmaf (x, 0x1p-25, x);
-      static const double b[] =
-	{
-	   0x1.0000000000005p+0,  0x1.55557aeca105dp-3,  0x1.3314ec3db7d12p-4,
-	   0x1.775738a5a6f92p-5,  0x1.5d5f7ce1c8538p-8,  0x1.605c6d58740fp-2,
-	  -0x1.5728b732d73c6p+1,  0x1.f152170f151ebp+3, -0x1.f962ea3ca992ep+5,
-	   0x1.71971e17375ap+7,  -0x1.860512b4ba23p+8,   0x1.26a3b8d4bdb14p+9,
-	  -0x1.36f2ea5698b51p+9,  0x1.b3d722aebfa2ep+8, -0x1.6cf89703b1289p+7,
-	   0x1.1518af6a65e2dp+5
-	};
       double z = xs;
       double z2 = z * z;
       double z4 = z2 * z2;
       double z8 = z4 * z4;
       double z16 = z8 * z8;
-      r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3]))
-		+ z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7])))
-	       + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11]))
-			+ z8 * ((b[12] + z2 * b[13])
-				+ z4 * (b[14] + z2 * b[15]))));
+      r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]))
+		+ z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7])))
+	       + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11]))
+			+ z8 * ((B[12] + z2 * B[13])
+				+ z4 * (B[14] + z2 * B[15]))));
       float ub = r;
       float lb = r - z * 0x1.efa8ebp-31;
       if (ub == lb)
@@ -95,16 +87,9 @@ __ieee754_asinf (float x)
     }
   if (ax < (0x7eu << 24))
     {
-      static const double c[] =
-	{
-	  0x1.555555555529cp-3,  0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5,
-	  0x1.f1c72e13ac306p-6,  0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6,
-	  0x1.c6dbbcc7c6315p-7,  0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8,
-	  0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5
-	};
       double z = xs;
       double z2 = z * z;
-      double c0 = poly12 (z2, c);
+      double c0 = poly12 (z2, C0);
       r = z + (z * z2) * c0;
     }
   else
@@ -116,14 +101,7 @@ __ieee754_asinf (float x)
       double bx = fabs (xs);
       double z = 1.0 - bx;
       double s = sqrt (z);
-      static const double c[] =
-	{
-	  0x1.6a09e667f3bcbp+0,   0x1.e2b7dddff2db9p-4,  0x1.b27247ab42dbcp-6,
-	  0x1.02995cc4e0744p-7,   0x1.5ffb0276ec8eap-9,  0x1.033885a928decp-10,
-	  0x1.911f2be23f8c7p-12,  0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15,
-	  0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16
-	};
-      r = pi2 - s * poly12 (z, c);
+      r = pi2 - s * poly12 (z, C1);
       r = copysign (r, xs);
     }
   return r;
-- 
2.43.0


  reply	other threads:[~2025-02-03 21:25 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-03 21:24 [PATCH 0/3] Optimize CORE-MATH " Adhemerval Zanella
2025-02-03 21:24 ` Adhemerval Zanella [this message]
2025-02-10 20:55   ` [PATCH 1/3] math: Consolidate acosf and asinf " Andreas K. Huettel
2025-02-11 21:02     ` Adhemerval Zanella Netto
2025-02-03 21:24 ` [PATCH 2/3] math: Consolidate acoshf and asinhf " Adhemerval Zanella
2025-02-10 21:24   ` Andreas K. Huettel
2025-02-03 21:24 ` [PATCH 3/3] math: Consolidate coshf and sinhf " Adhemerval Zanella
2025-02-11 19:53   ` Andreas K. Huettel
2025-02-04  6:32 ` [PATCH 0/3] Optimize CORE-MATH " Paul Zimmermann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250203212546.911043-2-adhemerval.zanella@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=Paul.Zimmermann@inria.fr \
    --cc=dj@redhat.com \
    --cc=josmyers@redhat.com \
    --cc=libc-alpha@sourceware.org \
    --cc=sibid@uvic.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).