[gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register.
@ 2022-03-12  6:36 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-03-12  6:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d4ff248ef1e0788ca0e9ccfceb0d094ac584e2c8

commit d4ff248ef1e0788ca0e9ccfceb0d094ac584e2c8
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Sat Mar 12 01:35:42 2022 -0500

    Optimize extendditi2 GPR to VSX register.
    
    2022-03-12   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/vsx.md (extendditi2): Optimize extendditi2 GPR to
            VSX register.

Diff:
---
 gcc/config/rs6000/vsx.md | 82 +++++++++++++++++++++++++-----------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 25ef1702f49..c9d94032580 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5035,11 +5035,12 @@
 ;; We also need the GPR code for power9 so that we can optimize to use the
 ;; multiply-add instructions.
 (define_insn_and_split "extendditi2"
-  [(set (match_operand:TI 0 "register_operand"              "=r,r,wa,v,v")
-	(sign_extend:TI (match_operand:DI 1 "input_operand"  "r,m,b, v,Z")))
-   (clobber (match_scratch:DI 2                            "=&X,X,r, X,X"))
+  [(set (match_operand:TI 0 "register_operand" "=r, wa, v, r, v")
+	(sign_extend:TI
+	 (match_operand:DI 1 "input_operand"    "r, b,  v, m, Z")))
+   (clobber (match_scratch:DI 2               "=&X, r,  X, X, X"))
    (clobber (reg:DI CA_REGNO))]
-  "TARGET_POWERPC64 && TARGET_MADDLD"
+  "TARGET_POWERPC64 && TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(pc)]
@@ -5047,27 +5048,10 @@
   rtx dest = operands[0];
   rtx src = operands[1];
   int dest_regno = reg_or_subregno (dest);
-  int src_regno = ((REG_P (src) || SUBREG_P (src))
-		   ? reg_or_subregno (src)
-		   : -1);
-
-  /* If we are converting from a GPR to a vector register, do the sign
-     extension in a scratch GPR register, and then do the mtvsrdd.  */
-  if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
-    {
-      rtx tmp = operands[2];
-      rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
-      emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
-      if (BYTES_BIG_ENDIAN)
-	emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
-      else
-	emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
-      DONE;
-    }
 
   /* Handle conversion to GPR registers.  Load up the low part and then do
      a sign extension to the upper part.  */
-  else if (INT_REGNO_P (dest_regno))
+  if (INT_REGNO_P (dest_regno))
     {
       rtx dest_hi = gen_highpart (DImode, dest);
       rtx dest_lo = gen_lowpart (DImode, dest);
@@ -5075,36 +5059,56 @@
       emit_move_insn (dest_lo, src);
       /* In case src is a MEM, we have to use the destination, which is a
          register, instead of re-using the source.  */
-      rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
+      rtx src2 = int_reg_operand (src, DImode) ? src : dest_lo;
       emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
       DONE;
     }
 
-  /* For conversion to an Altivec register, generate either a splat operation
-     or a load rightmost double word instruction.  Both instructions gets the
-     DImode value into the lower 64 bits, and then do the vextsd2q
-     instruction.  */
-
-  else if (ALTIVEC_REGNO_P (dest_regno))
+  /* For memory, use lxvrdx to load the value into the bottom of the
+     register and do the sign extension.  */
+  else if (ALTIVEC_REGNO_P (dest_regno) && MEM_P (src))
     {
-      if (MEM_P (src))
-	emit_insn (gen_vsx_lxvrdx (dest, src));
-      else
+      emit_insn (gen_vsx_lxvrdx (dest, src));
+      emit_insn (gen_extendditi2_vector (dest, dest));
+      DONE;
+    }
+
+  else
+    {  
+      int src_regno = reg_or_subregno (src);
+
+      /* If we are converting from a GPR to a vector register, do the
+         sign extension in a scratch GPR register, and then do the
+         mtvsrdd.  */
+      if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
+	{
+	  rtx tmp = operands[2];
+	  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
+	  emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
+	  if (BYTES_BIG_ENDIAN)
+	    emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
+	  else
+	    emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
+	  DONE;
+	}
+
+     /* For conversion to an Altivec register, generate a splat operation to
+         to get the value in the bottom 64-bits.  */
+      else if (ALTIVEC_REGNO_P (dest_regno) && ALTIVEC_REGNO_P (src_regno))
 	{
 	  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
 	  emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
+	  emit_insn (gen_extendditi2_vector (dest, dest));
+	  DONE;
 	}
 
-      emit_insn (gen_extendditi2_vector (dest, dest));
-      DONE;
+      else
+	gcc_unreachable ();
     }
-
-  else
-    gcc_unreachable ();
 }
   [(set_attr "length" "8")
-   (set_attr "type" "shift,load,mtvsr,vecperm,load")
-   (set_attr "isa" "p9,p9,p9,p10,p10")])
+   (set_attr "type" "shift,mtvsr,vecperm,load,vecload")
+   (set_attr "isa"  "*,    *,    p10,    *,   p10")])
 
 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
 (define_insn "extendditi2_vector"


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register.
@ 2022-03-15 21:15 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-03-15 21:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fad5716a6579578052024626445b6a645a1c8a76

commit fad5716a6579578052024626445b6a645a1c8a76
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 15 17:14:42 2022 -0400

    Optimize extendditi2 GPR to VSX register.
    
    2022-03-15   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/vsx.md (extendditi2): Optimize extendditi2 GPR to
            VSX register.
    
    gcc/testsuite/
            * gcc.target/powerpc/pr104698-2.c: Update instruction counts.

Diff:
---
 gcc/config/rs6000/vsx.md                      | 84 ++++++++++++++-------------
 gcc/testsuite/gcc.target/powerpc/pr104698-2.c | 34 +++++++----
 2 files changed, 68 insertions(+), 50 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 25ef1702f49..2fab6843c5f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5035,11 +5035,12 @@
 ;; We also need the GPR code for power9 so that we can optimize to use the
 ;; multiply-add instructions.
 (define_insn_and_split "extendditi2"
-  [(set (match_operand:TI 0 "register_operand"              "=r,r,wa,v,v")
-	(sign_extend:TI (match_operand:DI 1 "input_operand"  "r,m,b, v,Z")))
-   (clobber (match_scratch:DI 2                            "=&X,X,r, X,X"))
+  [(set (match_operand:TI 0 "register_operand" "=r, wa, v, r, v")
+	(sign_extend:TI
+	 (match_operand:DI 1 "input_operand"    "r, b,  v, m, Z")))
+   (clobber (match_scratch:DI 2                "=X, &b, X, X, X"))
    (clobber (reg:DI CA_REGNO))]
-  "TARGET_POWERPC64 && TARGET_MADDLD"
+  "TARGET_POWERPC64 && TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(pc)]
@@ -5047,27 +5048,10 @@
   rtx dest = operands[0];
   rtx src = operands[1];
   int dest_regno = reg_or_subregno (dest);
-  int src_regno = ((REG_P (src) || SUBREG_P (src))
-		   ? reg_or_subregno (src)
-		   : -1);
-
-  /* If we are converting from a GPR to a vector register, do the sign
-     extension in a scratch GPR register, and then do the mtvsrdd.  */
-  if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
-    {
-      rtx tmp = operands[2];
-      rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
-      emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
-      if (BYTES_BIG_ENDIAN)
-	emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
-      else
-	emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
-      DONE;
-    }
 
   /* Handle conversion to GPR registers.  Load up the low part and then do
      a sign extension to the upper part.  */
-  else if (INT_REGNO_P (dest_regno))
+  if (INT_REGNO_P (dest_regno))
     {
       rtx dest_hi = gen_highpart (DImode, dest);
       rtx dest_lo = gen_lowpart (DImode, dest);
@@ -5075,36 +5059,58 @@
       emit_move_insn (dest_lo, src);
       /* In case src is a MEM, we have to use the destination, which is a
          register, instead of re-using the source.  */
-      rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
+      rtx src2 = int_reg_operand (src, DImode) ? src : dest_lo;
       emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
       DONE;
     }
 
-  /* For conversion to an Altivec register, generate either a splat operation
-     or a load rightmost double word instruction.  Both instructions gets the
-     DImode value into the lower 64 bits, and then do the vextsd2q
-     instruction.  */
-
-  else if (ALTIVEC_REGNO_P (dest_regno))
+  /* For memory, use lxvrdx to load the value into the bottom of the
+     register and do the sign extension.  */
+  else if (ALTIVEC_REGNO_P (dest_regno) && MEM_P (src))
     {
-      if (MEM_P (src))
-	emit_insn (gen_vsx_lxvrdx (dest, src));
-      else
+      emit_insn (gen_vsx_lxvrdx (dest, src));
+      emit_insn (gen_extendditi2_vector (dest, dest));
+      DONE;
+    }
+
+  else
+    {  
+      int src_regno = reg_or_subregno (src);
+
+      /* If we are converting from a GPR to a vector register, do the
+         sign extension in a scratch GPR register, and then do the
+         mtvsrdd.  Note, the vsx_concat_v2di function will reverse
+         the arguments on little endian systems, so we need to build
+         the insn so the appropriate registers are generated.  */
+      if (VSX_REGNO_P (dest_regno) && INT_REGNO_P (src_regno))
+	{
+	  rtx tmp = operands[2];
+	  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
+	  emit_insn (gen_ashrdi3 (tmp, src, GEN_INT (63)));
+	  if (BYTES_BIG_ENDIAN)
+	    emit_insn (gen_vsx_concat_v2di (dest_v2di, tmp, src));
+	  else
+	    emit_insn (gen_vsx_concat_v2di (dest_v2di, src, tmp));
+	  DONE;
+	}
+
+     /* For conversion to an Altivec register, generate a splat operation to
+         to get the value in the bottom 64-bits.  */
+      else if (ALTIVEC_REGNO_P (dest_regno) && ALTIVEC_REGNO_P (src_regno))
 	{
 	  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
 	  emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
+	  emit_insn (gen_extendditi2_vector (dest, dest));
+	  DONE;
 	}
 
-      emit_insn (gen_extendditi2_vector (dest, dest));
-      DONE;
+      else
+	gcc_unreachable ();
     }
-
-  else
-    gcc_unreachable ();
 }
   [(set_attr "length" "8")
-   (set_attr "type" "shift,load,mtvsr,vecperm,load")
-   (set_attr "isa" "p9,p9,p9,p10,p10")])
+   (set_attr "type" "shift,mtvsr,vecperm,load,vecload")
+   (set_attr "isa"  "*,    *,    p10,    *,   p10")])
 
 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
 (define_insn "extendditi2_vector"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
index 6966fce2ba9..10132d10e8e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr104698-2.c
@@ -3,31 +3,43 @@
 /* { dg-options "-mdejagnu-cpu=power10 -O2" } */
 
 /* PR target/104694 involved GCC generating vextsd2q to convent long long to
-   __int128_t when the long long value was in the GPR register.  This test
-   verifies that if the result is in the Altivec registers, we still want to
-   generate vextsd2q.  We use __int128_t to indicate that we want the result of
-   the conversion to be in an Altivec register. */
+   __int128_t when the long long value was in the GPR register.
+
+   This test verifies that the code we want when converting a GPR to a vector
+   register, that we generate the sign extend in the GPR register, and then do
+   a mtvsrdd to get it into the vector register.  */
 
 void do_div_1 (__int128_t *p, __int128_t *q, long long r)
 {
-  *p = *q / r;		/* mtvsrdd, vextsd2q, vdivsq.  */
+  *p = *q / r;		/* sradi, mtvsrdd, vdivsq.  */
 }
 
-/* Test the optimization in vsx.md to use lxvrdx instead of ld and mtvsrdd if
-   the value is coming from memory.  */
+/* Test the optimization in vsx.md to use lxvrdx instead of ld and then do
+   vextsd2q to sign extend it.  */
 
 void do_div_2 (__int128_t *p, __int128_t *q, long long *r)
 {
   *p = *q / r[2];	/* lxvrdx, vextsd2q, vdivsq.  */
 }
 
+/* If the 64-bit integer is in a vector register (after conversion from
+   double), don't move it back to a GPR register to do the sign extension.
+   Instead use vextsd2q.  */
+
+void do_div_3 (__int128_t *p, __int128_t *q, double r)
+{
+  *p = *q / (long long)r;	/* xscvdpsxds, xxpermdi, vextsd2q, vdivsq.  */
+}
+
+
 /* { dg-final { scan-assembler-not   {\mld\M}         } } */
 /* { dg-final { scan-assembler-not   {\mmfvsrd\M}     } } */
 /* { dg-final { scan-assembler-not   {\mmfvsrld\M}    } } */
-/* { dg-final { scan-assembler-not   {\msradi\M}      } } */
-/* { dg-final { scan-assembler-times {\mlxv\M}      2 } } */
+/* { dg-final { scan-assembler-times {\msradi\M}    1 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M}      3 } } */
 /* { dg-final { scan-assembler-times {\mlxvrdx\M}   1 } } */
 /* { dg-final { scan-assembler-times {\mmtvsrdd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M}     2 } } */
-/* { dg-final { scan-assembler-times {\mvdivsq\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M}     3 } } */
+/* { dg-final { scan-assembler-times {\mvdivsq\M}   3 } } */
 /* { dg-final { scan-assembler-times {\mvextsd2q\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-03-15 21:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-12  6:36 [gcc(refs/users/meissner/heads/work081)] Optimize extendditi2 GPR to VSX register Michael Meissner
2022-03-15 21:15 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).