From: "H.J. Lu" <hjl.tools@gmail.com>
To: Fangrui Song <maskray@google.com>
Cc: Adhemerval Zanella <adhemerval.zanella@linaro.org>,
libc-alpha@sourceware.org
Subject: Re: [PATCH] x86: Remove .tfloat usage
Date: Tue, 27 Sep 2022 12:44:38 -0700 [thread overview]
Message-ID: <CAMe9rOqYtn4FTFPoAseP126rm19hHfXG-YubdPOu6g2tpswo=Q@mail.gmail.com> (raw)
In-Reply-To: <20220927081924.2vw22by62poneamo@google.com>
On Tue, Sep 27, 2022 at 1:19 AM Fangrui Song via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On 2022-09-27, Fangrui Song wrote:
> >On 2022-09-26, Adhemerval Zanella via Libc-alpha wrote:
> >>Some compiler does not support it (such as clang integrated assembly)
> >>neither gcc emits it.
> >
> >assembler
> >
> >>---
> >>sysdeps/i386/fpu/e_atanh.S | 3 ++-
> >>sysdeps/i386/fpu/e_atanhf.S | 3 ++-
> >>sysdeps/i386/fpu/e_atanhl.S | 3 ++-
> >>sysdeps/i386/fpu/s_asinhl.S | 3 ++-
> >>sysdeps/i386/fpu/s_cbrtl.S | 44 +++++++++++++++++++++--------------
> >>sysdeps/i386/fpu/s_expm1.S | 3 ++-
> >>sysdeps/i386/fpu/s_expm1f.S | 3 ++-
> >>sysdeps/i386/fpu/s_log1pl.S | 3 ++-
> >>sysdeps/x86_64/fpu/s_log1pl.S | 3 ++-
> >>9 files changed, 42 insertions(+), 26 deletions(-)
> >>
> >>diff --git a/sysdeps/i386/fpu/e_atanh.S b/sysdeps/i386/fpu/e_atanh.S
> >>index 6e4fef06b2..a7fd9a60fa 100644
> >>--- a/sysdeps/i386/fpu/e_atanh.S
> >>+++ b/sysdeps/i386/fpu/e_atanh.S
> >>@@ -33,7 +33,8 @@ one: .double 1.0
> >>limit: .double 0.29
> >> ASM_SIZE_DIRECTIVE(limit)
> >> .type ln2_2,@object
> >>-ln2_2: .tfloat 0.3465735902799726547086160
> >>+ln2_2: .quad 0xb17217f7d1cf79ac /* 0.3465735902799726547086160L */
> >>+ .short 0x3ffd
> >> ASM_SIZE_DIRECTIVE(ln2_2)
> >
> >There is one space before /* while the following uses two.
> >
> >.tfloat output is 10 bytes without padding. .quad output is 8 bytes.
> >Does this change semantics?
> >
>
> Sorry. I did not notice .short . This is fine.
>
> >>DEFINE_DBL_MIN
> >>diff --git a/sysdeps/i386/fpu/e_atanhf.S b/sysdeps/i386/fpu/e_atanhf.S
> >>index 146196eced..4ab1fa31fb 100644
> >>--- a/sysdeps/i386/fpu/e_atanhf.S
> >>+++ b/sysdeps/i386/fpu/e_atanhf.S
> >>@@ -34,7 +34,8 @@ limit: .double 0.29
> >> ASM_SIZE_DIRECTIVE(limit)
> >> .align ALIGNARG(4)
> >> .type ln2_2,@object
> >>-ln2_2: .tfloat 0.3465735902799726547086160
> >>+ln2_2: .quad 0xb17217f7d1cf79ac /* 0.3465735902799726547086160L */
> >>+ .short 0x3ffd
> >> ASM_SIZE_DIRECTIVE(ln2_2)
> >>
> >>DEFINE_FLT_MIN
> >>diff --git a/sysdeps/i386/fpu/e_atanhl.S b/sysdeps/i386/fpu/e_atanhl.S
> >>index 1f6eb7ce48..df3f1b8f84 100644
> >>--- a/sysdeps/i386/fpu/e_atanhl.S
> >>+++ b/sysdeps/i386/fpu/e_atanhl.S
> >>@@ -39,7 +39,8 @@ limit: .double 0.29
> >> ASM_SIZE_DIRECTIVE(limit)
> >> .align ALIGNARG(4)
> >> .type ln2_2,@object
> >>-ln2_2: .tfloat 0.3465735902799726547086160
> >>+ln2_2: .quad 0xb17217f7d1cf79ac /* 0.3465735902799726547086160 */
> >>+ .short 0x3ffd
> >> ASM_SIZE_DIRECTIVE(ln2_2)
> >>
> >>#ifdef PIC
> >>diff --git a/sysdeps/i386/fpu/s_asinhl.S b/sysdeps/i386/fpu/s_asinhl.S
> >>index bd442c6a09..f4f420d060 100644
> >>--- a/sysdeps/i386/fpu/s_asinhl.S
> >>+++ b/sysdeps/i386/fpu/s_asinhl.S
> >>@@ -23,7 +23,8 @@
> >>
> >> .align ALIGNARG(4)
> >> .type huge,@object
> >>-huge: .tfloat 1e+4930
> >>+huge: .quad 0x89b634e7456ffa1d /* 1e+4930 */
> >>+ .short 0x7ff8
> >> ASM_SIZE_DIRECTIVE(huge)
> >> .align ALIGNARG(4)
> >> /* Please note that we use double value for 1.0. This number
> >>diff --git a/sysdeps/i386/fpu/s_cbrtl.S b/sysdeps/i386/fpu/s_cbrtl.S
> >>index 8802164706..935ac20530 100644
> >>--- a/sysdeps/i386/fpu/s_cbrtl.S
> >>+++ b/sysdeps/i386/fpu/s_cbrtl.S
> >>@@ -23,55 +23,63 @@
> >>
> >> .align ALIGNARG(4)
> >> .type f8,@object
> >>-f8: .tfloat 0.161617097923756032
> >>+f8: .quad 0xa57ef3d83a542839 /* 0.161617097923756032 */
> >>+ .short 0x3ffc
> >> ASM_SIZE_DIRECTIVE(f8)
> >> .align ALIGNARG(4)
> >> .type f7,@object
> >>-f7: .tfloat -0.988553671195413709
> >>+f7: .quad 0xfd11da7820029014 /* -0.988553671195413709 */
> >>+ .short 0xbffe
> >> ASM_SIZE_DIRECTIVE(f7)
> >> .align ALIGNARG(4)
> >> .type f6,@object
> >>-f6: .tfloat 2.65298938441952296
> >>+f6: .quad 0xa9ca93fcade3b4ad /* 2.65298938441952296 */
> >>+ .short 0x4000
> >> ASM_SIZE_DIRECTIVE(f6)
> >> .align ALIGNARG(4)
> >> .type f5,@object
> >>-f5: .tfloat -4.11151425200350531
> >>+f5: .quad 0x839186562c931c34 /* -4.11151425200350531 */
> >>+ .short 0xc001
> >> ASM_SIZE_DIRECTIVE(f5)
> >> .align ALIGNARG(4)
> >> .type f4,@object
> >>-f4: .tfloat 4.09559907378707839
> >>+f4: .quad 0x830f25c9ee304594 /* 4.09559907378707839 */
> >>+ .short 0x4001
> >> ASM_SIZE_DIRECTIVE(f4)
> >> .align ALIGNARG(4)
> >> .type f3,@object
> >>-f3: .tfloat -2.82414939754975962
> >>+f3: .quad 0xb4bedd1d5fa2f0c6 /* -2.82414939754975962 */
> >>+ .short 0xc000
> >> ASM_SIZE_DIRECTIVE(f3)
> >> .align ALIGNARG(4)
> >> .type f2,@object
> >>-f2: .tfloat 1.67595307700780102
> >>+f2: .quad 0xd685a163b08586e3 /* 1.67595307700780102 */
> >>+ .short 0x3fff
> >> ASM_SIZE_DIRECTIVE(f2)
> >> .align ALIGNARG(4)
> >> .type f1,@object
> >>-f1: .tfloat 0.338058687610520237
> >>+f1: .quad 0xad16073ed4ec3b45 /* 0.338058687610520237 */
> >>+ .short 0x3ffd
> >> ASM_SIZE_DIRECTIVE(f1)
> >>
> >>-#define CBRT2 1.2599210498948731648
> >>-#define ONE_CBRT2 0.793700525984099737355196796584
> >>-#define SQR_CBRT2 1.5874010519681994748
> >>-#define ONE_SQR_CBRT2 0.629960524947436582364439673883
> >>-
> >> /* We make the entries in the following table all 16 bytes
> >> wide to avoid having to implement a multiplication by 10. */
> >> .type factor,@object
> >> .align ALIGNARG(4)
> >>-factor: .tfloat ONE_SQR_CBRT2
> >>+factor: .quad 0xa14517cc6b945711 /* 0.629960524947436582364439673883L */
> >>+ .short 0x3ffe
> >
> >Perhaps keep the macro name in the comment for readability.
> >
> >> .byte 0, 0, 0, 0, 0, 0
> >>- .tfloat ONE_CBRT2
> >>+ .quad 0xcb2ff529eb71e415 /* 1.5874010519681994748L */
> >>+ .short 0x3ffe
> >> .byte 0, 0, 0, 0, 0, 0
> >>- .tfloat 1.0
> >>+ .quad 0x8000000000000000 /* 1.0L */
> >>+ .short 0x3fff
> >> .byte 0, 0, 0, 0, 0, 0
> >>- .tfloat CBRT2
> >>+ .quad 0xa14517cc6b945711 /* 1.2599210498948731648L */
> >>+ .short 0x3fff
> >> .byte 0, 0, 0, 0, 0, 0
> >>- .tfloat SQR_CBRT2
> >>+ .quad 0xcb2ff529eb71e416 /* 1.5874010519681994748L */
> >>+ .short 0x3fff
> >> ASM_SIZE_DIRECTIVE(factor)
> >>
> >> .type two64,@object
> >>diff --git a/sysdeps/i386/fpu/s_expm1.S b/sysdeps/i386/fpu/s_expm1.S
> >>index 7199d681ba..038ff72feb 100644
> >>--- a/sysdeps/i386/fpu/s_expm1.S
> >>+++ b/sysdeps/i386/fpu/s_expm1.S
> >>@@ -33,7 +33,8 @@ minus1: .double -1.0
> >>one: .double 1.0
> >> ASM_SIZE_DIRECTIVE(one)
> >> .type l2e,@object
> >>-l2e: .tfloat 1.442695040888963407359924681002
> >>+l2e: .quad 0xb8aa3b295c17f0bc /* 1.442695040888963407359924681002 */
> >>+ .short 0x3fff
> >> ASM_SIZE_DIRECTIVE(l2e)
> >>
> >>DEFINE_DBL_MIN
> >>diff --git a/sysdeps/i386/fpu/s_expm1f.S b/sysdeps/i386/fpu/s_expm1f.S
> >>index 04c37bda1b..b0406a45aa 100644
> >>--- a/sysdeps/i386/fpu/s_expm1f.S
> >>+++ b/sysdeps/i386/fpu/s_expm1f.S
> >>@@ -33,7 +33,8 @@ minus1: .double -1.0
> >>one: .double 1.0
> >> ASM_SIZE_DIRECTIVE(one)
> >> .type l2e,@object
> >>-l2e: .tfloat 1.442695040888963407359924681002
> >>+l2e: .quad 0xb8aa3b295c17f0bc /* 1.442695040888963407359924681002 */
> >>+ .short 0x3fff
> >> ASM_SIZE_DIRECTIVE(l2e)
> >>
> >>DEFINE_FLT_MIN
> >>diff --git a/sysdeps/i386/fpu/s_log1pl.S b/sysdeps/i386/fpu/s_log1pl.S
> >>index f28349f7d2..202995d3d6 100644
> >>--- a/sysdeps/i386/fpu/s_log1pl.S
> >>+++ b/sysdeps/i386/fpu/s_log1pl.S
> >>@@ -14,7 +14,8 @@ RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
> >> -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2
> >> 0.29 is a safe value.
> >> */
> >>-limit: .tfloat 0.29
> >>+limit: .quad 0x947ae147ae147ae1 /* 0.29 */
> >>+ .short 0x3ffd
> >
> >Inconsistent use of L suffixes.
We don't use suffixes for .quad in assembly codes.
> >
> >> /* Please note: we use a double value here. Since 1.0 has
> >> an exact representation this does not effect the accuracy
> >> but it helps to optimize the code. */
> >>diff --git a/sysdeps/x86_64/fpu/s_log1pl.S b/sysdeps/x86_64/fpu/s_log1pl.S
> >>index 8219f6fbcc..b053579dc5 100644
> >>--- a/sysdeps/x86_64/fpu/s_log1pl.S
> >>+++ b/sysdeps/x86_64/fpu/s_log1pl.S
> >>@@ -14,7 +14,8 @@ RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
> >> -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2
> >> 0.29 is a safe value.
> >> */
> >>-limit: .tfloat 0.29
> >>+limit: .quad 0x947ae147ae147ae1 /* 0.29L */
> >>+ .short 0x3ffd
> >> /* Please note: we use a double value here. Since 1.0 has
> >> an exact representation this does not effect the accuracy
> >> but it helps to optimize the code. */
> >>--
> >>2.34.1
> >>
--
H.J.
next prev parent reply other threads:[~2022-09-27 19:45 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-26 16:53 Adhemerval Zanella
2022-09-26 19:00 ` Noah Goldstein
2022-09-27 19:46 ` H.J. Lu
2022-09-27 8:18 ` Fangrui Song
2022-09-27 8:19 ` Fangrui Song
2022-09-27 19:44 ` H.J. Lu [this message]
2022-10-03 13:39 ` Adhemerval Zanella Netto
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAMe9rOqYtn4FTFPoAseP126rm19hHfXG-YubdPOu6g2tpswo=Q@mail.gmail.com' \
--to=hjl.tools@gmail.com \
--cc=adhemerval.zanella@linaro.org \
--cc=libc-alpha@sourceware.org \
--cc=maskray@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).