public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd
@ 2023-01-10 13:45 Jiufu Guo
  2023-01-10 14:22 ` Segher Boessenkool
  0 siblings, 1 reply; 4+ messages in thread
From: Jiufu Guo @ 2023-01-10 13:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu

Hi,

As mentioned in PR108338, on p9, we could use mtvsrws to implement
the conversion from SI#0 to SF (or lowpart DI to SF).  And we find
we can also enhance the conversion from highpart DI to SF (as the
case in this patch).

This patch enhances these conversions accordingly.

Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)

	PR target/108338

gcc/ChangeLog:

	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
	(movsf_from_si2): Rename to...
	(movsf_from_si2_<any_rshift:code>): ... this.
	(p9_mtvsrws): New define_insn.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr108338.c: New test.
---
 gcc/config/rs6000/rs6000.md                 | 32 +++++++++++++---
 gcc/testsuite/gcc.target/powerpc/pr108338.c | 41 +++++++++++++++++++++
 2 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 3cae64a264a..9025a912141 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -158,6 +158,7 @@ (define_c_enum "unspec"
    UNSPEC_HASHCHK
    UNSPEC_XXSPLTIDP_CONST
    UNSPEC_XXSPLTIW_CONST
+   UNSPEC_P9V_MTVSRWS
   ])
 
 ;;
@@ -8203,10 +8204,19 @@ (define_insn_and_split "movsf_from_si"
   rtx op2 = operands[2];
   rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
 
-  /* Move SF value to upper 32-bits for xscvspdpn.  */
-  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  if (TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
+    {
+       emit_insn (gen_p9_mtvsrws (op0, op1_di));
+       emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+    }
+  else
+    {
+      /* Move SF value to upper 32-bits for xscvspdpn.  */
+      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+    }
+
   DONE;
 }
   [(set_attr "length"
@@ -8219,15 +8229,17 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
 ;; For extracting high part element from DImode register like:
 ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_<any_rshift:code>"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
 	    (unspec:SF
 	     [(subreg:SI
-	       (ashiftrt:DI
+	       (any_rshift:DI
 		(match_operand:DI 1 "input_operand" "r")
 		(const_int 32))
 	       0)]
@@ -9475,6 +9487,14 @@ (define_insn "p8_mtvsrd_sf"
   "mtvsrd %x0,%1"
   [(set_attr "type" "mtvsr")])
 
+(define_insn "p9_mtvsrws"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P9V_MTVSRWS))]
+  "TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrws %x0,%1"
+  [(set_attr "type" "mtvsr")])
+
 (define_insn_and_split "reload_vsx_from_gprsf"
   [(set (match_operand:SF 0 "register_operand" "=wa")
 	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 00000000000..2afac79ea4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,41 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff);
+  return f;    
+}
+
+float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff + 4);
+  return f; 
+}
+
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { has_arch_ppc64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { has_arch_ppc64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { has_arch_ppc64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { has_arch_pwr8 && has_arch_ppc64 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { has_arch_pwr8 && { has_arch_ppc64 && { ! has_arch_pwr9 } } } } } } */
+
+union di_sf_sf
+{
+  struct {float f1; float f2;};
+  long long l;
+};
+
+int main()
+{
+  union di_sf_sf v;
+  v.f1 = 1.0f;
+  v.f2 = 2.0f;
+  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
+    __builtin_abort ();
+  return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 4+ messages in thread
* [PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd
@ 2023-02-17  1:34 Jiufu Guo
  0 siblings, 0 replies; 4+ messages in thread
From: Jiufu Guo @ 2023-02-17  1:34 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu

Hi,

Compare with previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611823.html
This patch does not define new insn for mtvsrws, but use exit one.

As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI#0 to SF (or lowpart DI to SF).

For code:
  *(long long*)buff = di;
  float f = *(float*)(buff);

We generate "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" instead of
"mtvsrws 1,3 ; xscvspdpn 1,1".

This patch update this, and also enhance the bitcast from highpart
DI to SF.

Bootstrap and regtests pass on ppc64{,le}.
Is this ok for trunk?

BR,
Jeff (Jiufu)


	PR target/108338

gcc/ChangeLog:

	* config/rs6000/predicates.md (lowpart_subreg_operator): New
	define_predicate.
	* config/rs6000/rs6000.md (any_rshift): New code_iterator.
	(movsf_from_si): Update to generate mtvsrws.
	(movsf_from_si2): Rename to...
	(movsf_from_si2_<code>): ... this.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/pr108338.c: New test.

---
 gcc/config/rs6000/predicates.md             |  5 +++
 gcc/config/rs6000/rs6000.md                 | 34 +++++++++++------
 gcc/testsuite/gcc.target/powerpc/pr108338.c | 42 +++++++++++++++++++++
 3 files changed, 70 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108338.c

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..e57c9d99c6b 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -2064,3 +2064,8 @@ (define_predicate "macho_pic_address"
   else
     return false;
 })
+
+(define_predicate "lowpart_subreg_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
+		    == SUBREG_BYTE (op)")))
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..74b1c9cee6a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8200,13 +8200,24 @@ (define_insn_and_split "movsf_from_si"
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
-  rtx op2 = operands[2];
-  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
 
-  /* Move SF value to upper 32-bits for xscvspdpn.  */
-  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  if (TARGET_P9_VECTOR)
+    {
+      rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+      emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+    }
+  else
+    {
+      rtx op2 = operands[2];
+      rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+      /* Move SF value to upper 32-bits for xscvspdpn.  */
+      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+    }
+
   DONE;
 }
   [(set_attr "length"
@@ -8219,18 +8230,19 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+(define_code_iterator any_rshift [ashiftrt lshiftrt])
+
 ;; For extracting high part element from DImode register like:
 ;;     {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
-(define_insn_and_split "movsf_from_si2"
+(define_insn_and_split "movsf_from_si2_<code>"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
 	    (unspec:SF
-	     [(subreg:SI
-	       (ashiftrt:DI
+	     [(match_operator:SI 3 "lowpart_subreg_operator"
+	       [(any_rshift:DI
 		(match_operand:DI 1 "input_operand" "r")
-		(const_int 32))
-	       0)]
+		(const_int 32))])]
 	     UNSPEC_SF_FROM_SI))
   (clobber (match_scratch:DI 2 "=r"))]
   "TARGET_NO_SF_SUBREG"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
new file mode 100644
index 00000000000..2438dc13f41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
@@ -0,0 +1,42 @@
+// { dg-do run }
+// { dg-options "-O2 -save-temps" }
+
+float __attribute__ ((noipa)) sf_from_di_off0 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff);
+  return f;    
+}
+
+float  __attribute__ ((noipa)) sf_from_di_off4 (long long l)
+{
+  char buff[16];
+  *(long long*)buff = l;
+  float f = *(float*)(buff + 4);
+  return f; 
+}
+
+/* Under lp64, 'l' is in one DI reg, then check sub DI to SF. */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
+
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+
+union di_sf_sf
+{
+  struct {float f1; float f2;};
+  long long l;
+};
+
+int main()
+{
+  union di_sf_sf v;
+  v.f1 = 1.0f;
+  v.f2 = 2.0f;
+  if (sf_from_di_off0 (v.l) != 1.0f || sf_from_di_off4 (v.l) != 2.0f )
+    __builtin_abort ();
+  return 0;
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-02-17  1:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-10 13:45 [PATCH] rs6000: Enhance lowpart/highpart DI->SF by mtvsrws/mtvsrd Jiufu Guo
2023-01-10 14:22 ` Segher Boessenkool
2023-01-11  3:48   ` Jiufu Guo
2023-02-17  1:34 Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).