[PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
@ 2020-03-10  9:55 Andrea Corallo
  2020-03-11 13:50 ` Szabolcs Nagy
  2020-03-17 10:51 ` Andrea Corallo
  0 siblings, 2 replies; 8+ messages in thread
From: Andrea Corallo @ 2020-03-10  9:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1009 bytes --]

Hi all,

second and last patch of the two reworking FPCR and FPSR builtins.

This rework __builtin_aarch64_set_fpcr (unsigned) and
__builtin_aarch64_set_fpsr (unsigned) to emit a read-modify-sequences
as:

         mrs     x1, fpsr
         bfi     x1, x0, 0, 32
         msr     fpsr, x1

This in order to preserve the original high 32 bits of the system
register.  Both FPSR and FPCR became 64bit regs with armv8.1.

Bootstrapped on aarch64-linux-gnu, does not introduce regressions.

Regards

  Andrea

gcc/ChangeLog:

2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>

	* config/aarch64/aarch64.md (insv_reg<mode>): Pattern renamed.
	(set_fpcr, set_fpsr): Pattern modified for read-modify-write
	sequence respecting high 32bit register content.

gcc/testsuite/ChangeLog:

2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>

	* gcc.target/aarch64/set_fpcr.c: New test.
	* gcc.target/aarch64/get_fpcr.c: New test.
	* gcc.target/aarch64/set_fpsr.c: New test.
	* gcc.target/aarch64/get_fpsr.c: New test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: set-fpcr-fpsr.patch --]
[-- Type: text/x-diff, Size: 3926 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b6836710c9c2..b6ee2e1a946e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5613,7 +5613,7 @@
   operands[3] = force_reg (<MODE>mode, value);
 })
 
-(define_insn "*insv_reg<mode>"
+(define_insn "insv_reg<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
 			  (match_operand 1 "const_int_operand" "n")
 			  (match_operand 2 "const_int_operand" "n"))
@@ -7173,10 +7173,21 @@
    (set_attr "type" "multiple")])
 
 ;; Write Floating-point Control Register.
-(define_insn "set_fpcr"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+
+(define_expand "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+                    UNSPECV_SET_FPCR)]
   ""
-  "msr\\tfpcr, %0"
+  {
+    rtx val32 = simplify_gen_subreg (DImode, operands[0], SImode, 0);
+    /* Read-modify-write sequence.  */
+    rtx scratch = gen_reg_rtx (DImode);
+    emit_insn (gen_get_fpcr64 (scratch));
+    emit_insn (gen_insv_regdi (scratch, GEN_INT (32), GEN_INT (0),
+			       val32));
+    emit_insn (gen_set_fpcr64 (scratch));
+    DONE;
+  }
   [(set_attr "type" "mrs")])
 
 ;; Read Floating-point Control Register.
@@ -7188,10 +7199,19 @@
   [(set_attr "type" "mrs")])
 
 ;; Write Floating-point Status Register.
-(define_insn "set_fpsr"
+(define_expand "set_fpsr"
   [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
   ""
-  "msr\\tfpsr, %0"
+  {
+    rtx val32 = simplify_gen_subreg (DImode, operands[0], SImode, 0);
+    /* Read-modify-write sequence.  */
+    rtx scratch = gen_reg_rtx (DImode);
+    emit_insn (gen_get_fpsr64 (scratch));
+    emit_insn (gen_insv_regdi (scratch, GEN_INT (32), GEN_INT (0),
+			       val32));
+    emit_insn (gen_set_fpsr64 (scratch));
+    DONE;
+  }
   [(set_attr "type" "mrs")])
 
 ;; Read Floating-point Status Register.
diff --git a/gcc/testsuite/gcc.target/aarch64/get_fpcr.c b/gcc/testsuite/gcc.target/aarch64/get_fpcr.c
new file mode 100644
index 000000000000..f33e70e34cd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/get_fpcr.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int
+get_fpcr ()
+{
+  return __builtin_aarch64_get_fpcr ();
+}
+
+/* { dg-final { scan-assembler-times "mrs.*fpcr" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/get_fpsr.c b/gcc/testsuite/gcc.target/aarch64/get_fpsr.c
new file mode 100644
index 000000000000..2f7d75637d20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/get_fpsr.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int
+get_fpsr ()
+{
+  return __builtin_aarch64_get_fpsr ();
+}
+
+/* { dg-final { scan-assembler-times "mrs.*fpsr" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/set_fpcr.c b/gcc/testsuite/gcc.target/aarch64/set_fpcr.c
new file mode 100644
index 000000000000..74525981f323
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/set_fpcr.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+set_fpcr (unsigned int x)
+{
+  return __builtin_aarch64_set_fpcr (x);
+}
+
+/* { dg-final { scan-assembler-times "bfi" 1 } } */
+/* { dg-final { scan-assembler-times "mrs.*fpcr" 1 } } */
+/* { dg-final { scan-assembler-times "msr.*fpcr" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/set_fpsr.c b/gcc/testsuite/gcc.target/aarch64/set_fpsr.c
new file mode 100644
index 000000000000..e3e2e631f70c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/set_fpsr.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+set_fpsr (unsigned int x)
+{
+  return __builtin_aarch64_set_fpsr (x);
+}
+
+/* { dg-final { scan-assembler-times "bfi" 1 } } */
+/* { dg-final { scan-assembler-times "mrs.*fpsr" 1 } } */
+/* { dg-final { scan-assembler-times "msr.*fpsr" 1 } } */

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-10  9:55 [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins Andrea Corallo
@ 2020-03-11 13:50 ` Szabolcs Nagy
  2020-03-11 13:53   ` Richard Earnshaw (lists)
  2020-03-17 10:51 ` Andrea Corallo
  1 sibling, 1 reply; 8+ messages in thread
From: Szabolcs Nagy @ 2020-03-11 13:50 UTC (permalink / raw)
  To: Andrea Corallo, gcc-patches; +Cc: nd

On 10/03/2020 09:55, Andrea Corallo wrote:
> Hi all,
> 
> second and last patch of the two reworking FPCR and FPSR builtins.
> 
> This rework __builtin_aarch64_set_fpcr (unsigned) and
> __builtin_aarch64_set_fpsr (unsigned) to emit a read-modify-sequences
> as:
> 
>          mrs     x1, fpsr
>          bfi     x1, x0, 0, 32
>          msr     fpsr, x1
> 
> This in order to preserve the original high 32 bits of the system
> register.  Both FPSR and FPCR became 64bit regs with armv8.1.

so the architectural fpsr and fpcr registers got
extended to 64bit from 32bit and currently the top
32 bits are reserved 0.

the floating-point environment has to fit in fenv_t
which is currently two unsigned ints on aarch64.

so glibc cannot support 64bit registers without breaking
abi which means in c programs the fpsr, fpcr top bits
must be held 0 at all times when extern calls are made
that involves dynamic linking or calls into the libc
(otherwise fegetenv and fesetenv are not able to
reliably restore the default fp environment).

i think trying to preserve the top bits is wrong
(it may be acceptable depending on what those bits
do, but it seems to me better to only allow 0 top bits).

if the user changes the top bits the behaviour is
undefined if an extern c call is made with such setting.

i think we have to change the builtins to ensure the
top bits of the used x reg is 0. we may or may not need
a new builtin to allow setting 64 bits of fpsr and fpcr:
currently non-zero top bits are unsupportable (but once
there is new meaning for the top 32bits then it may make
sense to break abi).

> 
> Bootstrapped on aarch64-linux-gnu, does not introduce regressions.
> 
> Regards
> 
>   Andrea
> 
> gcc/ChangeLog:
> 
> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
> 
> 	* config/aarch64/aarch64.md (insv_reg<mode>): Pattern renamed.
> 	(set_fpcr, set_fpsr): Pattern modified for read-modify-write
> 	sequence respecting high 32bit register content.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
> 
> 	* gcc.target/aarch64/set_fpcr.c: New test.
> 	* gcc.target/aarch64/get_fpcr.c: New test.
> 	* gcc.target/aarch64/set_fpsr.c: New test.
> 	* gcc.target/aarch64/get_fpsr.c: New test.
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-11 13:50 ` Szabolcs Nagy
@ 2020-03-11 13:53   ` Richard Earnshaw (lists)
  2020-03-11 16:22     ` Szabolcs Nagy
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Earnshaw (lists) @ 2020-03-11 13:53 UTC (permalink / raw)
  To: Szabolcs Nagy, Andrea Corallo, gcc-patches; +Cc: nd

On 11/03/2020 13:50, Szabolcs Nagy wrote:
> On 10/03/2020 09:55, Andrea Corallo wrote:
>> Hi all,
>>
>> second and last patch of the two reworking FPCR and FPSR builtins.
>>
>> This rework __builtin_aarch64_set_fpcr (unsigned) and
>> __builtin_aarch64_set_fpsr (unsigned) to emit a read-modify-sequences
>> as:
>>
>>           mrs     x1, fpsr
>>           bfi     x1, x0, 0, 32
>>           msr     fpsr, x1
>>
>> This in order to preserve the original high 32 bits of the system
>> register.  Both FPSR and FPCR became 64bit regs with armv8.1.
> 
> so the architectural fpsr and fpcr registers got
> extended to 64bit from 32bit and currently the top
> 32 bits are reserved 0.
> 
> the floating-point environment has to fit in fenv_t
> which is currently two unsigned ints on aarch64.
> 
> so glibc cannot support 64bit registers without breaking
> abi which means in c programs the fpsr, fpcr top bits
> must be held 0 at all times when extern calls are made
> that involves dynamic linking or calls into the libc
> (otherwise fegetenv and fesetenv are not able to
> reliably restore the default fp environment).

I disagree.  The top bits must be preserved, even if they are not zero. 
This does potentially limit what those top bits might be used for, since 
they are beyond the ability to represent state in C-like programming 
environments, but it doesn't mean they should be reset by a call to 
change the register.


R.

> 
> i think trying to preserve the top bits is wrong
> (it may be acceptable depending on what those bits
> do, but it seems to me better to only allow 0 top bits).
> 
> if the user changes the top bits the behaviour is
> undefined if an extern c call is made with such setting.
> 
> i think we have to change the builtins to ensure the
> top bits of the used x reg is 0. we may or may not need
> a new builtin to allow setting 64 bits of fpsr and fpcr:
> currently non-zero top bits are unsupportable (but once
> there is new meaning for the top 32bits then it may make
> sense to break abi).
> 
> 
>>
>> Bootstrapped on aarch64-linux-gnu, does not introduce regressions.
>>
>> Regards
>>
>>    Andrea
>>
>> gcc/ChangeLog:
>>
>> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>>
>> 	* config/aarch64/aarch64.md (insv_reg<mode>): Pattern renamed.
>> 	(set_fpcr, set_fpsr): Pattern modified for read-modify-write
>> 	sequence respecting high 32bit register content.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>>
>> 	* gcc.target/aarch64/set_fpcr.c: New test.
>> 	* gcc.target/aarch64/get_fpcr.c: New test.
>> 	* gcc.target/aarch64/set_fpsr.c: New test.
>> 	* gcc.target/aarch64/get_fpsr.c: New test.
>>
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-11 13:53   ` Richard Earnshaw (lists)
@ 2020-03-11 16:22     ` Szabolcs Nagy
  0 siblings, 0 replies; 8+ messages in thread
From: Szabolcs Nagy @ 2020-03-11 16:22 UTC (permalink / raw)
  To: Richard Earnshaw (lists), Andrea Corallo, gcc-patches; +Cc: nd

On 11/03/2020 13:53, Richard Earnshaw (lists) wrote:
> On 11/03/2020 13:50, Szabolcs Nagy wrote:
>> On 10/03/2020 09:55, Andrea Corallo wrote:
>>> Hi all,
>>>
>>> second and last patch of the two reworking FPCR and FPSR builtins.
>>>
>>> This rework __builtin_aarch64_set_fpcr (unsigned) and
>>> __builtin_aarch64_set_fpsr (unsigned) to emit a read-modify-sequences
>>> as:
>>>
>>>           mrs     x1, fpsr
>>>           bfi     x1, x0, 0, 32
>>>           msr     fpsr, x1
>>>
>>> This in order to preserve the original high 32 bits of the system
>>> register.  Both FPSR and FPCR became 64bit regs with armv8.1.
>>
>> so the architectural fpsr and fpcr registers got
>> extended to 64bit from 32bit and currently the top
>> 32 bits are reserved 0.
>>
>> the floating-point environment has to fit in fenv_t
>> which is currently two unsigned ints on aarch64.
>>
>> so glibc cannot support 64bit registers without breaking
>> abi which means in c programs the fpsr, fpcr top bits
>> must be held 0 at all times when extern calls are made
>> that involves dynamic linking or calls into the libc
>> (otherwise fegetenv and fesetenv are not able to
>> reliably restore the default fp environment).
> 
> I disagree.  The top bits must be preserved, even if they are not zero. This does potentially limit what those top bits might be used for, since
> they are beyond the ability to represent state in C-like programming environments, but it doesn't mean they should be reset by a call to change
> the register.


ok preserving works for fesetenv.
(which internally uses the builtin)

it means the top bits are not part of fenv.
(we will have to change asm in glibc to deal with this)

since the top and bottom bits cannot be set
independently there are implications about
atomic updates, but they are not relevant
on linux where signal handlers save/restore
the fenv (not required by the standard so
theoretically treating the top and bottom bits
separately may cause issues on some systems).

> 
> 
> R.
> 
>>
>> i think trying to preserve the top bits is wrong
>> (it may be acceptable depending on what those bits
>> do, but it seems to me better to only allow 0 top bits).
>>
>> if the user changes the top bits the behaviour is
>> undefined if an extern c call is made with such setting.
>>
>> i think we have to change the builtins to ensure the
>> top bits of the used x reg is 0. we may or may not need
>> a new builtin to allow setting 64 bits of fpsr and fpcr:
>> currently non-zero top bits are unsupportable (but once
>> there is new meaning for the top 32bits then it may make
>> sense to break abi).
>>
>>
>>>
>>> Bootstrapped on aarch64-linux-gnu, does not introduce regressions.
>>>
>>> Regards
>>>
>>>    Andrea
>>>
>>> gcc/ChangeLog:
>>>
>>> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>>>
>>>     * config/aarch64/aarch64.md (insv_reg<mode>): Pattern renamed.
>>>     (set_fpcr, set_fpsr): Pattern modified for read-modify-write
>>>     sequence respecting high 32bit register content.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>>>
>>>     * gcc.target/aarch64/set_fpcr.c: New test.
>>>     * gcc.target/aarch64/get_fpcr.c: New test.
>>>     * gcc.target/aarch64/set_fpsr.c: New test.
>>>     * gcc.target/aarch64/get_fpsr.c: New test.
>>>
>>
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-10  9:55 [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins Andrea Corallo
  2020-03-11 13:50 ` Szabolcs Nagy
@ 2020-03-17 10:51 ` Andrea Corallo
  1 sibling, 0 replies; 8+ messages in thread
From: Andrea Corallo @ 2020-03-17 10:51 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

Andrea Corallo <andrea.corallo@arm.com> writes:

> Hi all,
>
> second and last patch of the two reworking FPCR and FPSR builtins.
>
> This rework __builtin_aarch64_set_fpcr (unsigned) and
> __builtin_aarch64_set_fpsr (unsigned) to emit a read-modify-sequences
> as:
>
>          mrs     x1, fpsr
>          bfi     x1, x0, 0, 32
>          msr     fpsr, x1
>
> This in order to preserve the original high 32 bits of the system
> register.  Both FPSR and FPCR became 64bit regs with armv8.1.
>
> Bootstrapped on aarch64-linux-gnu, does not introduce regressions.
>
> Regards
>
>   Andrea
>
> gcc/ChangeLog:
>
> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>
> 	* config/aarch64/aarch64.md (insv_reg<mode>): Pattern renamed.
> 	(set_fpcr, set_fpsr): Pattern modified for read-modify-write
> 	sequence respecting high 32bit register content.
>
> gcc/testsuite/ChangeLog:
>
> 2020-??-??  Andrea Corallo  <andrea.corallo@arm.com>
>
> 	* gcc.target/aarch64/set_fpcr.c: New test.
> 	* gcc.target/aarch64/get_fpcr.c: New test.
> 	* gcc.target/aarch64/set_fpsr.c: New test.
> 	* gcc.target/aarch64/get_fpsr.c: New test.

Ping, please let me know if these are okay for trunk.

Thanks

  Andrea

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-17 14:52 ` Richard Earnshaw
@ 2020-03-31  8:25   ` Andrea Corallo
  0 siblings, 0 replies; 8+ messages in thread
From: Andrea Corallo @ 2020-03-31  8:25 UTC (permalink / raw)
  To: Richard Earnshaw; +Cc: Wilco Dijkstra, GCC Patches, Kyrill Tkachov

Richard Earnshaw <Richard.Earnshaw@arm.com> writes:

> On 17/03/2020 12:34, Wilco Dijkstra wrote:
>> Hi Andrea,
>> I think the first part is fine when approved, but the 2nd part is
>> problematic like Szabolcs
>> already pointed out. We can't just change the ABI or semantics, and these builtins are critical
>> for GLIBC performance. We would first need to change GLIBC back to using inline assembler
>> so it will still write zeroes in the top 32 bits (since that is the current ABI).
>> Cheers,
>> Wilco
>> 
>
> No, you shouldn't ever arbitrarily write zeros to the upper bits (or
> even worse, write garbage as we do now).  That's the whole point of 
> these patches.
>
> R.

Hi Richard,

FWIW I agree with you but, what should we do now?  Shall I push the
first patch at least?

Thanks

  Andrea

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
  2020-03-17 12:34 Wilco Dijkstra
@ 2020-03-17 14:52 ` Richard Earnshaw
  2020-03-31  8:25   ` Andrea Corallo
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Earnshaw @ 2020-03-17 14:52 UTC (permalink / raw)
  To: Wilco Dijkstra, Andrea Corallo; +Cc: GCC Patches

On 17/03/2020 12:34, Wilco Dijkstra wrote:
> Hi Andrea,
> 
> I think the first part is fine when approved, but the 2nd part is problematic like Szabolcs
> already pointed out. We can't just change the ABI or semantics, and these builtins are critical
> for GLIBC performance. We would first need to change GLIBC back to using inline assembler
> so it will still write zeroes in the top 32 bits (since that is the current ABI).
> 
> Cheers,
> Wilco
> 

No, you shouldn't ever arbitrarily write zeros to the upper bits (or 
even worse, write garbage as we do now).  That's the whole point of 
these patches.

R.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins
@ 2020-03-17 12:34 Wilco Dijkstra
  2020-03-17 14:52 ` Richard Earnshaw
  0 siblings, 1 reply; 8+ messages in thread
From: Wilco Dijkstra @ 2020-03-17 12:34 UTC (permalink / raw)
  To: Andrea Corallo; +Cc: GCC Patches

Hi Andrea,

I think the first part is fine when approved, but the 2nd part is problematic like Szabolcs
already pointed out. We can't just change the ABI or semantics, and these builtins are critical
for GLIBC performance. We would first need to change GLIBC back to using inline assembler
so it will still write zeroes in the top 32 bits (since that is the current ABI).

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-03-31  8:26 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-10  9:55 [PATCH 2/2] [aarch64] Rework fpcr fpsr getter/setter builtins Andrea Corallo
2020-03-11 13:50 ` Szabolcs Nagy
2020-03-11 13:53   ` Richard Earnshaw (lists)
2020-03-11 16:22     ` Szabolcs Nagy
2020-03-17 10:51 ` Andrea Corallo
2020-03-17 12:34 Wilco Dijkstra
2020-03-17 14:52 ` Richard Earnshaw
2020-03-31  8:25   ` Andrea Corallo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).