* [PATCH V3] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd
@ 2023-07-04 6:42 Jiufu Guo
0 siblings, 0 replies; only message in thread
From: Jiufu Guo @ 2023-07-04 6:42 UTC (permalink / raw)
To: gcc-patches; +Cc: segher, dje.gcc, linkw, bergner, guojiufu
Hi,
As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI#0 to SF (or lowpart DI to SF).
For code:
*(long long*)buff = di;
float f = *(float*)(buff);
"sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" is generated.
But "mtvsrws 1,3 ; xscvspdpn 1,1" would be better.
Or say, the bitcast from lowpart DI(also highpart DI) to SF could be enhanced.
Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611823.html
This patch does not define new insn for mtvsrws, but use existing insns.
Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?
BR,
Jeff (Jiufu)
PR target/108338
gcc/ChangeLog:
* config/rs6000/predicates.md (lowpart_subreg_operator): New
define_predicate.
* config/rs6000/rs6000.md (any_rshift): New code_iterator.
(movsf_from_si): Update to generate mtvsrws for P9.
(movsf_from_si2): Rename to...
(movsf_from_si2_<code>): ... this, and use lowpart_subreg_operator.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr108338.c: New test.
---
gcc/config/rs6000/predicates.md | 5 +++
gcc/config/rs6000/rs6000.md | 34 +++++++++++------
gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
3 files changed, 70 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a16ee30f0c061965da07a5832097eeffa6ccf29c..94b948868881b1a96c5653cbc396b81ebb60c74c 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2101,3 +2101,8 @@ (define_predicate "macho_pic_address"
else
return false;
})
+
+(define_predicate "lowpart_subreg_operator"
+ (and (match_code "subreg")
+ (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+ == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b0db8ae508d8ee50e34f85b654988ae0919e152f..1a8f3ff362a3973ec6260a0702fa930679cf66d1 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8215,13 +8215,24 @@ (define_insn_and_split "movsf_from_si"
{
rtx op0 = operands[0];
rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
- /* Move SF value to upper 32-bits for xscvspdpn. */
- emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
- emit_insn (gen_p8_mtvsrd_sf (op0, op2));
- emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ if (TARGET_P9_VECTOR)
+ {
+ rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+ emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+ else
+ {
+ rtx op2 = operands[2];
+ rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+ /* Move SF value to upper 32-bits for xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+
DONE;
}
[(set_attr "length"
@@ -8234,18 +8245,19 @@ (define_insn_and_split "movsf_from_si"
"*, *, p9v, p8v, *, *,
p8v, p8v, p8v, *")])
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
;; For extracting high part element from DImode register like:
;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
;; split it before reload with "and mask" to avoid generating shift right
;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_<code>"
[(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
(unspec:SF
- [(subreg:SI
- (ashiftrt:DI
+ [(match_operator:SI 3 "lowpart_subreg_operator"
+ [(any_rshift:DI
(match_operand:DI 1 "input_operand" "r")
- (const_int 32))
- 0)]
+ (const_int 32))])]
UNSPEC_SF_FROM_SI))
(clobber (match_scratch:DI 2 "=r"))]
"TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 0000000000000000000000000000000000000000..39da7cec535c59ec34d5f6fc4a63b44ef4316976
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+ char buff[16];
+ *(long long*)buff = l;
+ float f = *(float*)(buff);
+ return f;
+}
+
+float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+ char buff[16];
+ *(long long*)buff = l;
+ float f = *(float*)(buff + 4);
+ return f;
+}
+
+/* Under lp64, parameter 'l' is in one DI reg, then bitcast sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+
+union di_sf_sf
+{
+ struct {float f1; float f2;};
+ long long l;
+};
+
+int main()
+{
+ union di_sf_sf v;
+ v.f1 = 1.0f;
+ v.f2 = 2.0f;
+ if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
+ __builtin_abort ();
+ return 0;
+}
--
2.39.3
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-07-04 6:42 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-04 6:42 [PATCH V3] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd Jiufu Guo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).