public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work087)] Add options to control load/store vector pair generation.
@ 2022-04-27 15:45 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-04-27 15:45 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2efc7b6bee0b21960d902fc13f989d6ee320f867

commit 2efc7b6bee0b21960d902fc13f989d6ee320f867
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Apr 27 11:38:26 2022 -0400

    Add options to control load/store vector pair generation.
    
    This patch adds options to allow disabling generating either the load
    vector pair instructions (lxvp, lxvpx, plxvp) or the store vector pair
    instructions (stxvp, stxvpx, pstxvp).
    
    2022-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/mma.md (movoo): Add support to suppress load/store
            vector pair instructions.
            (movxo): Likewise.
            * config/rs6000/rs6000.cc (rs6000_setup_reg_addr_masks): Disable
            indexed loads for vector pair if either lxvp/stxvp are disabled.
            (rs6000_split_multireg_move): Do not split vector quad to vector
            pair if lxvp/stxvp is disabled.
            * config/rs6000/rs6000.md (isa attribute): Add lxvp and stxvp
            attributes.
            (enabled attribute): Add lxvp/stxvp support.
            * config/rs6000/rs6000.opt (-mload-vector-pair): New option.
            (-mstore-vector-pair): New option.
    
    gcc/testsuite/
            * gcc.target/powerpc/p10-load-vector-pair-1.c: New test.
            * gcc.target/powerpc/p10-load-vector-pair-2.c: New test.
            * gcc.target/powerpc/p10-store-vector-pair-1.c: New test.
            * gcc.target/powerpc/p10-store-vector-pair-2.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md                           | 39 ++++++----
 gcc/config/rs6000/rs6000.cc                        | 10 ++-
 gcc/config/rs6000/rs6000.md                        | 13 +++-
 gcc/config/rs6000/rs6000.opt                       |  8 +++
 .../gcc.target/powerpc/p10-load-vector-pair-1.c    | 82 ++++++++++++++++++++++
 .../gcc.target/powerpc/p10-load-vector-pair-2.c    | 81 +++++++++++++++++++++
 .../gcc.target/powerpc/p10-store-vector-pair-1.c   | 82 ++++++++++++++++++++++
 .../gcc.target/powerpc/p10-store-vector-pair-2.c   | 81 +++++++++++++++++++++
 8 files changed, 378 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6d516..a9f3b736fdd 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -274,26 +274,35 @@
   DONE;
 })
 
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split lxvp into 2 lxv instructions, and/or stxvp into 2 stxv
+;; instructions.
 (define_insn_and_split "*movoo"
-  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,wa,m, o, wa")
+	(match_operand:OO 1 "input_operand"         "m, o, wa,wa,wa"))]
   "TARGET_MMA
    && (gpc_reg_operand (operands[0], OOmode)
        || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
+   #
    stxvp%X0 %x1,%0
+   #
    #"
   "&& reload_completed
-   && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+   && ((MEM_P (operands[1]) && !TARGET_LOAD_VECTOR_PAIR)
+       || (MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+       || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical")
+  [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
    (set_attr "size" "256")
-   (set_attr "length" "*,*,8")])
+   (set_attr "length" "*,*,*,*,8")
+   (set_attr "max_prefixed_insns" "*,2,*,2,*")
+   (set_attr "isa" "lxvp,*,stxvp,*,*")])
 
 \f
 ;; Vector quad support.  XOmode can only live in FPRs.
@@ -306,25 +315,27 @@
   DONE;
 })
 
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split the loads into 4 lxv instructions instead of 2 lxvp
+;; instructions, and/or the stores into 4 stxv instructions instead of 2 stxvp
+;; instructions.
 (define_insn_and_split "*movxo"
-  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
-	(match_operand:XO 1 "input_operand" "m,d,d"))]
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,d,m,o,d")
+	(match_operand:XO 1 "input_operand"         "m,o,d,d,d"))]
   "TARGET_MMA
    && (gpc_reg_operand (operands[0], XOmode)
        || gpc_reg_operand (operands[1], XOmode))"
-  "@
-   #
-   #
-   #"
+  "#"
   "&& reload_completed"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical")
-   (set_attr "length" "*,*,16")
-   (set_attr "max_prefixed_insns" "2,2,*")])
+  [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
+   (set_attr "length" "*,*,*,*,16")
+   (set_attr "max_prefixed_insns" "2,4,2,4,*")
+   (set_attr "isa" "lxvp,*,stxvp,*,*")])
 
 (define_expand "vsx_assemble_pair"
   [(match_operand:OO 0 "vsx_register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f07e57cafb2..3a689e0fdfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2716,7 +2716,8 @@ rs6000_setup_reg_addr_masks (void)
 	  /* Vector pairs can do both indexed and offset loads if the
 	     instructions are enabled, otherwise they can only do offset loads
 	     since it will be broken into two vector moves.  Vector quads can
-	     only do offset loads.  */
+	     only do offset loads.  If either stxvp or ldxvp is disabled, we
+	     can't do indexed arithmetic.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
 		   && (m2 == OOmode || m2 == XOmode))
 	    {
@@ -2724,7 +2725,9 @@ rs6000_setup_reg_addr_masks (void)
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == OOmode)
+		  if (m2 == OOmode
+		      && TARGET_LOAD_VECTOR_PAIR
+		      && TARGET_STORE_VECTOR_PAIR)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -26968,7 +26971,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
   if (mode == XOmode && TARGET_MMA
-      && (MEM_P (dst) || MEM_P (src)))
+      && ((MEM_P (dst) && TARGET_STORE_VECTOR_PAIR)
+	  || (MEM_P (src) && TARGET_LOAD_VECTOR_PAIR)))
     {
       reg_mode = OOmode;
       nregs /= 2;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..90a11366266 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -354,7 +354,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,lxvp,stxvp"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -402,6 +402,17 @@
      (and (eq_attr "isa" "p10")
 	  (match_test "TARGET_POWER10"))
      (const_int 1)
+
+     (and (eq_attr "isa" "lxvp")
+	  (match_test "TARGET_POWER10")
+	  (match_test "TARGET_LOAD_VECTOR_PAIR"))
+     (const_int 1)
+
+     (and (eq_attr "isa" "stxvp")
+	  (match_test "TARGET_POWER10")
+	  (match_test "TARGET_STORE_VECTOR_PAIR"))
+     (const_int 1)
+
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6c4caf4c9ee..766f8f1591c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -624,6 +624,14 @@ mieee128-constant
 Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
 Generate (do not generate) code that uses the LXVKQ instruction.
 
+; Generate (do not generate) code that uses the load vector pair instructions.
+mload-vector-pair
+Target Undocumented Var(TARGET_LOAD_VECTOR_PAIR) Init(1) Save
+
+; Generate (do not generate) code that uses the store vector pair instructions.
+mstore-vector-pair
+Target Undocumented Var(TARGET_STORE_VECTOR_PAIR) Init(1) Save
+
 -param=rs6000-density-pct-threshold=
 Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
 When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
new file mode 100644
index 00000000000..d1f5790d238
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mload-vector-pair -mmma" } */
+
+/* Test if we generate load vector pair instructions if the user uses the
+   -mload-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, pstxvp.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxvp.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, stxvp.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, pstxvp.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 2x pstxvp.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 2x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 2x stxvp.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 2x stxvp.  */
+}
+
+/* { dg-final { scan-assembler {\mp?lxvpx?\M}  } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
new file mode 100644
index 00000000000..54f2e16314f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-load-vector-pair -mmma" } */
+
+/* Test if we do not generate load vector pair instructions if the user uses
+   the -mno-load-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, 2x pstxv.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxv.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, 2x stxv.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, 2x pstxv.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 4x pstxv.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 4x stxv.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 4x pstxv.  */
+}
+
+/* { dg-final { scan-assembler-not {\mp?lxvpx?\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
new file mode 100644
index 00000000000..c1a36bf5fff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mstore-vector-pair -mmma" } */
+
+/* Test if we generate store vector pair instructions if the user uses the
+   -mstore-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, pstxvp.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxvp.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, stxvp.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, pstxvp.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 2x pstxvp.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 2x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 2x stxvp.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 2x stxvp.  */
+}
+
+/* { dg-final { scan-assembler {\mp?stxvpx?\M}  } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
new file mode 100644
index 00000000000..b8c3bdbfd89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-store-vector-pair -mmma" } */
+
+/* Test if we do not generate store vector pair instructions if the user uses
+   the -mno-store-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, 2x pstxv.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxv.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, 2x stxv.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, 2x pstxv.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 4x pstxv.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 4x stxv.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 4x pstxv.  */
+}
+
+/* { dg-final { scan-assembler-not {\mp?vstxvpx?\M} } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/users/meissner/heads/work087)] Add options to control load/store vector pair generation.
@ 2022-04-27 15:38 Michael Meissner
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Meissner @ 2022-04-27 15:38 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:95b3ecf37f892237f21cc610cc499344ca3ac4d4

commit 95b3ecf37f892237f21cc610cc499344ca3ac4d4
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Apr 27 11:38:26 2022 -0400

    Add options to control load/store vector pair generation.
    
    This patch adds options to allow disabling generating either the load
    vector pair instructions (lxvp, lxvpx, plxvp) or the store vector pair
    instructions (stxvp, stxvpx, pstxvp).
    
    2022-04-27   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/mma.md (movoo): Add support to suppress load/store
            vector pair instructions.
            (movxo): Likewise.
            * config/rs6000/rs6000.cc (rs6000_setup_reg_addr_masks): Disable
            indexed loads for vector pair if either lxvp/stxvp are disabled.
            (rs6000_split_multireg_move): Do not split vector quad to vector
            pair if lxvp/stxvp is disabled.
            * config/rs6000/rs6000.md (isa attribute): Add lxvp and stxvp
            attributes.
            (enabled attribute): Add lxvp/stxvp support.
            * config/rs6000/rs6000.opt (-mload-vector-pair): New option.
            (-mstore-vector-pair): New option.
    
    gcc/testsuite/
            * gcc.target/powerpc/p10-load-vector-pair-1.c: New test.
            * gcc.target/powerpc/p10-load-vector-pair-2.c: New test.
            * gcc.target/powerpc/p10-store-vector-pair-1.c: New test.
            * gcc.target/powerpc/p10-store-vector-pair-2.c: New test.

Diff:
---
 gcc/config/rs6000/mma.md                           | 39 ++++++----
 gcc/config/rs6000/rs6000.cc                        | 10 ++-
 gcc/config/rs6000/rs6000.md                        | 13 +++-
 gcc/config/rs6000/rs6000.opt                       |  8 +++
 .../gcc.target/powerpc/p10-load-vector-pair-1.c    | 82 ++++++++++++++++++++++
 .../gcc.target/powerpc/p10-load-vector-pair-2.c    | 81 +++++++++++++++++++++
 .../gcc.target/powerpc/p10-store-vector-pair-1.c   | 82 ++++++++++++++++++++++
 .../gcc.target/powerpc/p10-store-vector-pair-2.c   | 81 +++++++++++++++++++++
 8 files changed, 378 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 907c9d6d516..a9f3b736fdd 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -274,26 +274,35 @@
   DONE;
 })
 
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split lxvp into 2 lxv instructions, and/or stxvp into 2 stxv
+;; instructions.
 (define_insn_and_split "*movoo"
-  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,wa,m, o, wa")
+	(match_operand:OO 1 "input_operand"         "m, o, wa,wa,wa"))]
   "TARGET_MMA
    && (gpc_reg_operand (operands[0], OOmode)
        || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
+   #
    stxvp%X0 %x1,%0
+   #
    #"
   "&& reload_completed
-   && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
+   && ((MEM_P (operands[1]) && !TARGET_LOAD_VECTOR_PAIR)
+       || (MEM_P (operands[0]) && !TARGET_STORE_VECTOR_PAIR)
+       || (!MEM_P (operands[0]) && !MEM_P (operands[1])))"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical")
+  [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
    (set_attr "size" "256")
-   (set_attr "length" "*,*,8")])
+   (set_attr "length" "*,*,*,*,8")
+   (set_attr "max_prefixed_insns" "*,2,*,2,*")
+   (set_attr "isa" "lxvp,*,stxvp,*,*")])
 
 \f
 ;; Vector quad support.  XOmode can only live in FPRs.
@@ -306,25 +315,27 @@
   DONE;
 })
 
+;; With the -mno-load-vector-pair and -mno-store-vector-pair options, we might
+;; have to split the loads into 4 lxv instructions instead of 2 lxvp
+;; instructions, and/or the stores into 4 stxv instructions instead of 2 stxvp
+;; instructions.
 (define_insn_and_split "*movxo"
-  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d")
-	(match_operand:XO 1 "input_operand" "m,d,d"))]
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,d,m,o,d")
+	(match_operand:XO 1 "input_operand"         "m,o,d,d,d"))]
   "TARGET_MMA
    && (gpc_reg_operand (operands[0], XOmode)
        || gpc_reg_operand (operands[1], XOmode))"
-  "@
-   #
-   #
-   #"
+  "#"
   "&& reload_completed"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
   DONE;
 }
-  [(set_attr "type" "vecload,vecstore,veclogical")
-   (set_attr "length" "*,*,16")
-   (set_attr "max_prefixed_insns" "2,2,*")])
+  [(set_attr "type" "vecload,vecload,vecstore,vecstore,veclogical")
+   (set_attr "length" "*,*,*,*,16")
+   (set_attr "max_prefixed_insns" "2,4,2,4,*")
+   (set_attr "isa" "lxvp,*,stxvp,*,*")])
 
 (define_expand "vsx_assemble_pair"
   [(match_operand:OO 0 "vsx_register_operand")
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f07e57cafb2..3a689e0fdfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -2716,7 +2716,8 @@ rs6000_setup_reg_addr_masks (void)
 	  /* Vector pairs can do both indexed and offset loads if the
 	     instructions are enabled, otherwise they can only do offset loads
 	     since it will be broken into two vector moves.  Vector quads can
-	     only do offset loads.  */
+	     only do offset loads.  If either stxvp or ldxvp is disabled, we
+	     can't do indexed arithmetic.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
 		   && (m2 == OOmode || m2 == XOmode))
 	    {
@@ -2724,7 +2725,9 @@ rs6000_setup_reg_addr_masks (void)
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == OOmode)
+		  if (m2 == OOmode
+		      && TARGET_LOAD_VECTOR_PAIR
+		      && TARGET_STORE_VECTOR_PAIR)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -26968,7 +26971,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
   if (mode == XOmode && TARGET_MMA
-      && (MEM_P (dst) || MEM_P (src)))
+      && ((MEM_P (dst) && TARGET_STORE_VECTOR_PAIR)
+	  || (MEM_P (src) && TARGET_LOAD_VECTOR_PAIR)))
     {
       reg_mode = OOmode;
       nregs /= 2;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 64049a6e521..90a11366266 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -354,7 +354,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,lxvp,stxvp"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -402,6 +402,17 @@
      (and (eq_attr "isa" "p10")
 	  (match_test "TARGET_POWER10"))
      (const_int 1)
+
+     (and (eq_attr "isa" "lxvp")
+	  (match_test "TARGET_POWER10")
+	  (match_test "TARGET_LOAD_VECTOR_PAIR"))
+     (const_int 1)
+
+     (and (eq_attr "isa" "stxvp")
+	  (match_test "TARGET_POWER10")
+	  (match_test "TARGET_STORE_VECTOR_PAIR"))
+     (const_int 1)
+
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 6c4caf4c9ee..766f8f1591c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -624,6 +624,14 @@ mieee128-constant
 Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
 Generate (do not generate) code that uses the LXVKQ instruction.
 
+; Generate (do not generate) code that uses the load vector pair instructions.
+mload-vector-pair
+Target Undocumented Var(TARGET_LOAD_VECTOR_PAIR) Init(1) Save
+
+; Generate (do not generate) code that uses the store vector pair instructions.
+mstore-vector-pair
+Target Undocumented Var(TARGET_STORE_VECTOR_PAIR) Init(1) Save
+
 -param=rs6000-density-pct-threshold=
 Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
 When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
new file mode 100644
index 00000000000..d1f5790d238
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mload-vector-pair -mmma" } */
+
+/* Test if we generate load vector pair instructions if the user uses the
+   -mload-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, pstxvp.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxvp.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, stxvp.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, pstxvp.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 2x pstxvp.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 2x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 2x stxvp.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 2x stxvp.  */
+}
+
+/* { dg-final { scan-assembler {\mp?lxvpx?\M}  } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
new file mode 100644
index 00000000000..54f2e16314f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-load-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-load-vector-pair -mmma" } */
+
+/* Test if we do not generate load vector pair instructions if the user uses
+   the -mno-load-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, 2x pstxv.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxv.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, 2x stxv.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, 2x pstxv.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 4x pstxv.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 4x stxv.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 4x pstxv.  */
+}
+
+/* { dg-final { scan-assembler-not {\mp?lxvpx?\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
new file mode 100644
index 00000000000..c1a36bf5fff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c
@@ -0,0 +1,82 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mstore-vector-pair -mmma" } */
+
+/* Test if we generate store vector pair instructions if the user uses the
+   -mstore-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, stxvp.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, pstxvp.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxvp.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, stxvp.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, pstxvp.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 2x stxvp.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 2x pstxvp.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 2x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 2x stxvp.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 2x stxvp.  */
+}
+
+/* { dg-final { scan-assembler {\mp?stxvpx?\M}  } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
new file mode 100644
index 00000000000..b8c3bdbfd89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c
@@ -0,0 +1,81 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-store-vector-pair -mmma" } */
+
+/* Test if we do not generate store vector pair instructions if the user uses
+   the -mno-store-vector-pair option.  */
+static __vector_quad sq;
+static __vector_pair sp;
+
+void
+load_store_pair (__vector_pair *p, __vector_pair *q)
+{
+  *p = *q;			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_1 (__vector_pair *p, __vector_pair *q)
+{
+  p[1] = q[1];			/* lxvp, 2x stxv.  */
+}
+
+void
+load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q)
+{
+  p[0x10000] = q[0x10000];	/* plxvp, 2x pstxv.  */
+}
+
+void
+load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n)
+{
+  p[n] = q[n];			/* lxvpx, 2x stxv.  */
+}
+
+void
+load_pair_static (__vector_pair *p)
+{
+  *p = sp;			/* plxvp, 2x stxv.  */
+}
+
+void
+store_pair_static (__vector_pair *p)
+{
+  sp = *p;			/* lxvp, 2x pstxv.  */
+}
+
+void
+load_store_quad (__vector_quad *p, __vector_quad *q)
+{
+  *p = *q;			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_1 (__vector_quad *p, __vector_quad *q)
+{
+  p[1] = q[1];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q)
+{
+  p[0x10000] = q[0x10000];	/* 2x plxvp, 4x pstxv.  */
+}
+
+void
+load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n)
+{
+  p[n] = q[n];			/* 2x lxvp, 4x stxv.  */
+}
+
+void
+load_quad_static (__vector_quad *p)
+{
+  *p = sq;			/* 2x plxvp, 4x stxv.  */
+}
+
+void
+store_quad_static (__vector_quad *p)
+{
+  sq = *p;			/* 2x lxvp, 4x pstxv.  */
+}
+
+/* { dg-final { scan-assembler-not {\mp?vstxvpx?\M} } } */


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-04-27 15:45 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-27 15:45 [gcc(refs/users/meissner/heads/work087)] Add options to control load/store vector pair generation Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2022-04-27 15:38 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).