public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] use subreg for movsf_from_si and remove UNSPEC_SF_FROM_SI
@ 2023-02-24  8:30 Jiufu Guo
  0 siblings, 0 replies; only message in thread
From: Jiufu Guo @ 2023-02-24  8:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, dje.gcc, linkw, guojiufu, meissner

Hi,

In patch https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html,
we improved the bictcast from lowpart/highpart of DI to SF by using mtvsrws
or mtvsrd.

As investigating this functionality, we may improve the related code by using
bitcast subreg from SI to SF, and avoid generating UNSPEC_SF_FROM_SI.

We can also improve the cases like "subreg:SI(reg:SF)=reg:SI" which is cast
SI to SF (e.g. pr48335-1.c).

This patch also reduce clobber usage, only adding clobber for p8 where additional
register is required.

This patch pass bootstrap and regtest for ppc64(p7,p8 and p9) and ppc64le(p10,p9).

Is this patch ok for trunk (or maybe stage1)? Thanks for comments and sugguestions!


BR,
Jeff (Jiufu)

gcc/ChangeLog:

	* config/rs6000/predicates.md: Rename TARGET_NO_SF_SUBREG to
	BITCAST_SI_SF_IN_REGS, and rename TARGET_ALLOW_SF_SUBREG to
	BITCAST_SI_SF_IN_MEM.
	* config/rs6000/rs6000.cc (valid_sf_si_move): Likewise.
	(is_lfs_stfs_insn): Split to is_stfs_insn and is_lfs_insn.
	(is_stfs_insn): Split from is_lfs_stfs_insn.
	(is_lfs_insn): Split from is_lfs_stfs_insn.
	(prefixed_load_p): Call is_lfs_insn.
	(prefixed_store_p): Call is_stfs_insn.
	* config/rs6000/rs6000.h (TARGET_NO_SF_SUBREG): Rename to ...
	(BITCAST_SI_SF_IN_REGS): ... this.
	(TARGET_ALLOW_SF_SUBREG): Rename to ...
	(BITCAST_SI_SF_IN_MEM): ... this.
	* config/rs6000/rs6000.md (movsf_from_si_p8): New define_insn.

---
 gcc/config/rs6000/predicates.md | 16 +++---
 gcc/config/rs6000/rs6000.cc     | 36 ++++++++----
 gcc/config/rs6000/rs6000.h      |  4 +-
 gcc/config/rs6000/rs6000.md     | 98 +++++++++++++++++++++------------
 4 files changed, 97 insertions(+), 57 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e57c9d99c6b..4a7d5893126 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -47,7 +47,7 @@ (define_predicate "sf_subreg_operand"
   rtx inner_reg = SUBREG_REG (op);
   machine_mode inner_mode = GET_MODE (inner_reg);
 
-  if (TARGET_ALLOW_SF_SUBREG || !REG_P (inner_reg))
+  if (BITCAST_SI_SF_IN_MEM || !REG_P (inner_reg))
     return 0;
 
   if ((mode == SFmode && GET_MODE_CLASS (inner_mode) == MODE_INT)
@@ -67,7 +67,7 @@ (define_predicate "altivec_register_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -88,7 +88,7 @@ (define_predicate "vsx_register_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -126,7 +126,7 @@ (define_predicate "vfloat_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -148,7 +148,7 @@ (define_predicate "vint_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -170,7 +170,7 @@ (define_predicate "vlogical_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -346,7 +346,7 @@ (define_predicate "gpc_reg_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -375,7 +375,7 @@ (define_predicate "int_reg_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..b8a9f01cbfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10565,7 +10565,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
 bool
 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
 {
-  if (TARGET_ALLOW_SF_SUBREG)
+  if (BITCAST_SI_SF_IN_MEM)
     return true;
 
   if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
@@ -26425,13 +26425,10 @@ pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
    - stfs:
     - SET is from UNSPEC_SI_FROM_SF to MEM:SI
     - CLOBBER is a V4SF
-   - lfs:
-    - SET is from UNSPEC_SF_FROM_SI to REG:SF
-    - CLOBBER is a DI
  */
 
 static bool
-is_lfs_stfs_insn (rtx_insn *insn)
+is_stfs_insn (rtx_insn *insn)
 {
   rtx pattern = PATTERN (insn);
   if (GET_CODE (pattern) != PARALLEL)
@@ -26466,10 +26463,27 @@ is_lfs_stfs_insn (rtx_insn *insn)
       && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
     return true;
 
-  /* lfs case.  */
-  if (XINT (src, 1) == UNSPEC_SF_FROM_SI
-      && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
-      && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
+  return false;
+}
+
+
+static bool
+is_lfs_insn (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  if (GET_CODE (set) != SET)
+    return false;
+
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
+
+  if (!SUBREG_P (src))
+    return false;
+
+   /* lfs case.  */
+  if (GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
+      && GET_MODE (SUBREG_REG (src)) == SImode
+      && GET_CODE (SUBREG_REG (src)) == MEM)
     return true;
 
   return false;
@@ -26585,7 +26599,7 @@ prefixed_load_p (rtx_insn *insn)
   else
     non_prefixed = reg_to_non_prefixed (reg, mem_mode);
 
-  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+  if (non_prefixed == NON_PREFIXED_X && is_lfs_insn (insn))
     return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
   else
     return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
@@ -26623,7 +26637,7 @@ prefixed_store_p (rtx_insn *insn)
   /* Need to make sure we aren't looking at a stfs which doesn't look
      like the other things reg_to_non_prefixed/address_is_prefixed
      looks for.  */
-  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+  if (non_prefixed == NON_PREFIXED_X && is_stfs_insn (insn))
     return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
   else
     return address_is_prefixed (addr, mem_mode, non_prefixed);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..03b20fb8d66 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -480,8 +480,8 @@ extern int rs6000_vector_align[];
 				 && TARGET_POWERPC64)
 
 /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI).  */
-#define TARGET_NO_SF_SUBREG	TARGET_DIRECT_MOVE_64BIT
-#define TARGET_ALLOW_SF_SUBREG	(!TARGET_DIRECT_MOVE_64BIT)
+#define BITCAST_SI_SF_IN_REGS	TARGET_DIRECT_MOVE_64BIT
+#define BITCAST_SI_SF_IN_MEM	(!TARGET_DIRECT_MOVE_64BIT)
 
 /* This wants to be set for p8 and newer.  On p7, overlapping unaligned
    loads are slow. */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 74b1c9cee6a..90ee0d566ab 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -145,7 +145,6 @@ (define_c_enum "unspec"
    UNSPEC_SQRT_ROUND_TO_ODD
    UNSPEC_TRUNC_ROUND_TO_ODD
    UNSPEC_SIGNBIT
-   UNSPEC_SF_FROM_SI
    UNSPEC_SI_FROM_SF
    UNSPEC_PLTSEQ
    UNSPEC_PLT16_HA
@@ -7655,7 +7654,7 @@ (define_insn_and_split "movsi_from_sf"
 		"=X,         X,           X,           X,        X,
 		 X,          X,           X,           wa,       X,
 		 X"))]
-  "TARGET_NO_SF_SUBREG
+  "BITCAST_SI_SF_IN_REGS
    && (register_operand (operands[0], SImode)
        || register_operand (operands[1], SFmode))"
   "@
@@ -7761,7 +7760,7 @@ (define_insn "*movsi_from_df"
 	(unspec:SI [(float_truncate:SF
 		     (match_operand:DF 1 "gpc_reg_operand" "wa"))]
 		    UNSPEC_SI_FROM_SF))]
-  "TARGET_NO_SF_SUBREG"
+  "BITCAST_SI_SF_IN_REGS"
   "xscvdpsp %x0,%x1"
   [(set_attr "type" "fp")])
 
@@ -8053,7 +8052,7 @@ (define_insn "movsf_hardfloat"
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
-   && (TARGET_ALLOW_SF_SUBREG
+   && (BITCAST_SI_SF_IN_MEM
        || valid_sf_si_move (operands[0], operands[1], SFmode))"
   "@
    lwz%U1%X1 %0,%1
@@ -8171,14 +8170,10 @@ (define_insn_and_split "movsf_from_si"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	    "=!r,       f,         v,         wa,        m,         Z,
 	     Z,         wa,        ?r,        !r")
-	(unspec:SF [(match_operand:SI 1 "input_operand" 
+	(subreg:SF (match_operand:SI 1 "input_operand"
 	    "m,         m,         wY,        Z,         r,         f,
-	     wa,        r,         wa,        r")]
-		   UNSPEC_SF_FROM_SI))
-   (clobber (match_scratch:DI 2
-	    "=X,        X,         X,         X,         X,         X,
-             X,         r,         X,         X"))]
-  "TARGET_NO_SF_SUBREG
+	     wa,        r,         wa,        r") 0))]
+  "BITCAST_SI_SF_IN_REGS
    && (register_operand (operands[0], SFmode)
        || register_operand (operands[1], SImode))"
   "@
@@ -8192,31 +8187,25 @@ (define_insn_and_split "movsf_from_si"
    #
    mfvsrwz %0,%x1
    mr %0,%1"
-
-  "&& reload_completed
-   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
-   && int_reg_operand_not_pseudo (operands[1], SImode)"
+  "&& ((!reload_completed && !TARGET_P9_VECTOR
+        && gpc_reg_operand (operands[0], SFmode)
+        && gpc_reg_operand (operands[1], SImode))
+       || (reload_completed && TARGET_P9_VECTOR
+	   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
+	   && int_reg_operand_not_pseudo (operands[1], SImode)))"
   [(const_int 0)]
 {
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
-
-  if (TARGET_P9_VECTOR)
+  if (reload_completed)
     {
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+
       rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
       emit_insn (gen_vsx_splat_v4si (op0_v, op1));
       emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
     }
   else
-    {
-      rtx op2 = operands[2];
-      rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
-
-      /* Move SF value to upper 32-bits for xscvspdpn.  */
-      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
-    }
+    emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1]));
 
   DONE;
 }
@@ -8230,6 +8219,46 @@ (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+(define_insn_and_split "movsf_from_si_p8"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	(subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))
+   (clobber (match_scratch:DI 2 "=r"))]
+  "BITCAST_SI_SF_IN_REGS"
+  "#"
+  "&& reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
+   && int_reg_operand_not_pseudo (operands[1], SImode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
+
+(define_split
+  [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand") 0)
+	(match_operand:SI 1 "gpc_reg_operand"))]
+  "BITCAST_SI_SF_IN_REGS"
+  [(const_int 0)]
+{
+  if (TARGET_P9_VECTOR)
+    emit_insn (gen_movsf_from_si (operands[0], operands[1]));
+  else
+    emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1]));
+  DONE;
+})
+
 (define_code_iterator any_rshift [ashiftrt lshiftrt])
 
 ;; For extracting high part element from DImode register like:
@@ -8237,15 +8266,12 @@ (define_code_iterator any_rshift [ashiftrt lshiftrt])
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
 (define_insn_and_split "movsf_from_si2_<code>"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
-	    (unspec:SF
-	     [(match_operator:SI 3 "lowpart_subreg_operator"
-	       [(any_rshift:DI
-		(match_operand:DI 1 "input_operand" "r")
-		(const_int 32))])]
-	     UNSPEC_SF_FROM_SI))
+  [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand" "=wa") 0)
+	(match_operator:SI 3 "lowpart_subreg_operator"
+	  [(any_rshift:DI (match_operand:DI 1 "input_operand" "r")
+			  (const_int 32))]))
   (clobber (match_scratch:DI 2 "=r"))]
-  "TARGET_NO_SF_SUBREG"
+  "BITCAST_SI_SF_IN_REGS"
   "#"
   "&& 1"
   [(const_int 0)]
-- 
2.31.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-24  8:30 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-24  8:30 [PATCH] use subreg for movsf_from_si and remove UNSPEC_SF_FROM_SI Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).