public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-17 16:13 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-17 16:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:083e586fde6454f53dfc0b49200eb5a908130cbe

commit 083e586fde6454f53dfc0b49200eb5a908130cbe
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 385 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 172 ++++-----
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 198 ++++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 500 insertions(+), 372 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..0ecf57df334 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,329 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && vsx_register_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "wa")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  src = gen_rtx_UNSPEC (V16QImode,
+      	    		gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..47b1f74e616 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",	QUAD, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair",	PAIR, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 3bd89a79bad..f23edceb82b 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (cfun != NULL
+      && !cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6593,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7546,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -10073,7 +10067,8 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
     }
 
   unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
-  if (attr & RS6000_BTC_QUAD)
+  if (attr & RS6000_BTC_QUAD
+      || fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
     attr_args++;
 
   gcc_assert (nopnds == attr_args);
@@ -11687,23 +11682,24 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   gimple *new_call;
   tree new_decl;
 
-  if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
+  if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+      || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
     {
       /* This is an MMA disassemble built-in function.  */
-      gcc_assert (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
-		  || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR);
-
       push_gimplify_context (true);
+      unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) ? 4 : 2;
       tree dst_ptr = gimple_call_arg (stmt, 0);
       tree src_ptr = gimple_call_arg (stmt, 1);
       tree src_type = TREE_TYPE (src_ptr);
       tree src = make_ssa_name (TREE_TYPE (src_type));
       gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
 
-      /* If we are not disassembling an accumulator or our destination is
-	 another accumulator, then just copy the entire thing as is.  */
-      if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
-	  || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+      /* If we are not disassembling an accumulator/pair or our destination is
+	 another accumulator/pair, then just copy the entire thing as is.  */
+      if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+	   && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+	  || (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR
+	      && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
 	{
 	  tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
 						   src_type, dst_ptr));
@@ -11713,29 +11709,33 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
 	  return true;
 	}
 
-      /* We're disassembling an accumulator into a different type, so we need
+      /* If we're disassembling an accumulator into a different type, we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
-      new_call = gimple_build_call (new_decl, 1, src);
-      src = make_ssa_name (vector_quad_type_node);
-      gimple_call_set_lhs (new_call, src);
-      gimple_seq_add_stmt (&new_seq, new_call);
-
-      /* Copy the accumulator vector by vector.  */
+      if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)
+	{
+	  new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
+	  new_call = gimple_build_call (new_decl, 1, src);
+	  src = make_ssa_name (vector_quad_type_node);
+	  gimple_call_set_lhs (new_call, src);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	}
+      
+      /* Copy the accumulator/pair vector by vector.  */
+      new_decl = rs6000_builtin_decls[fncode + 1];
       tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
 						   ptr_mode, true);
       tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
-      tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4);
-      tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
-      for (unsigned i = 0; i < 4; i++)
+      for (unsigned i = 0; i < nvec; i++)
 	{
-	  unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
-	  tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
-			     build_int_cst (size_type_node, i),
-			     NULL_TREE, NULL_TREE);
+	  unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
 	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
 			     build_int_cst (dst_type, index * 16));
-	  gimplify_assign (dst, ref, &new_seq);
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
+	  new_call = gimple_build_call (new_decl, 2, src,
+					build_int_cstu (uint16_type_node, i));
+	  gimple_call_set_lhs (new_call, dstssa);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	  gimplify_assign (dst, dstssa, &new_seq);
 	}
       pop_gimplify_context (NULL);
       gsi_replace_with_seq (gsi, new_seq, true);
@@ -13190,17 +13190,23 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
       SET_TYPE_ALIGN (vector_pair_type_node, 256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
-      layout_type (vector_pair_type_node);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_ALIGN (vector_quad_type_node, 512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
-      layout_type (vector_quad_type_node);
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      SET_TYPE_ALIGN (vector_pair_type_node, 512);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
     }
@@ -13236,8 +13242,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14048,36 +14054,40 @@ mma_init_builtins (void)
 	  icode = d[1].icode;
 	}
       else
-	{
-	  if ((attr & RS6000_BTC_QUAD) == 0)
-	    attr_args--;
-
-	  /* Ensure we have the correct number and type of operands.  */
-	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
-	}
-
-      if (icode == CODE_FOR_nothing)
-	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
+        {
+          if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+                  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+               && ((attr & RS6000_BTC_QUAD) == 0))
+            attr_args--;
+
+          /* Ensure we have the correct number and type of operands.  */
+          gcc_assert (attr_args == insn_data[icode].n_operands - 1);
+        }
+
+      /* This is a disassemble pair/acc function. */
+      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
+           || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
+        {
 	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
-	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	  else
-	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	}
+          if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
+            op[nopnds++] = build_pointer_type (vector_quad_type_node);
+          else
+            op[nopnds++] = build_pointer_type (vector_pair_type_node);
+        }
       else
-	{
+        {
 	  /* This is a normal MMA built-in function.  */
-	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
+	  unsigned j = 0;
+	  if (attr & RS6000_BTC_QUAD
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+	    j = 1;
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d7dcd93f088..e03355d50f4 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -8129,8 +8123,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10323,11 +10317,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12596,10 +12590,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16323,15 +16317,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16377,12 +16371,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16391,15 +16389,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16412,11 +16410,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16425,7 +16423,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16434,8 +16432,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       if (GET_CODE (src) == UNSPEC)
 	{
 	  gcc_assert (REG_P (dst)
-		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && ((GET_MODE (src) == XOmode
+			   && FP_REGNO_P (REGNO (dst)))
+			  || (GET_MODE (src) == OOmode
+			      && VSX_REGNO_P (REGNO (dst))))
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16444,9 +16445,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16459,22 +16463,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16611,7 +16628,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16626,16 +16643,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19865,7 +19890,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27186,14 +27212,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27202,19 +27228,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5a47aa14722..f35aaf4ffd1 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1041,7 +1041,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2556,6 +2556,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 5e5ad9f7c3d..b3f77ec665c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..9cffd2576c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-17 16:25 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-17 16:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:02ce36f9cc17fba7dcc03d01e6b037bb575d6b05

commit 02ce36f9cc17fba7dcc03d01e6b037bb575d6b05
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 385 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 144 ++++----
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 198 ++++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 486 insertions(+), 358 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..7d520e19b0d 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,329 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && vsx_register_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "wa")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  src = gen_rtx_UNSPEC (V16QImode,
+			gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..47b1f74e616 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",	QUAD, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair",	PAIR, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 3bd89a79bad..67e58485270 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (cfun != NULL
+      && !cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6593,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7546,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -10073,7 +10067,8 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
     }
 
   unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
-  if (attr & RS6000_BTC_QUAD)
+  if (attr & RS6000_BTC_QUAD
+      || fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
     attr_args++;
 
   gcc_assert (nopnds == attr_args);
@@ -11687,23 +11682,24 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   gimple *new_call;
   tree new_decl;
 
-  if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
+  if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+      || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
     {
       /* This is an MMA disassemble built-in function.  */
-      gcc_assert (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
-		  || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR);
-
       push_gimplify_context (true);
+      unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) ? 4 : 2;
       tree dst_ptr = gimple_call_arg (stmt, 0);
       tree src_ptr = gimple_call_arg (stmt, 1);
       tree src_type = TREE_TYPE (src_ptr);
       tree src = make_ssa_name (TREE_TYPE (src_type));
       gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
 
-      /* If we are not disassembling an accumulator or our destination is
-	 another accumulator, then just copy the entire thing as is.  */
-      if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
-	  || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+      /* If we are not disassembling an accumulator/pair or our destination is
+	 another accumulator/pair, then just copy the entire thing as is.  */
+      if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+	   && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+	  || (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR
+	      && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
 	{
 	  tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
 						   src_type, dst_ptr));
@@ -11713,29 +11709,33 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
 	  return true;
 	}
 
-      /* We're disassembling an accumulator into a different type, so we need
+      /* If we're disassembling an accumulator into a different type, we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
-      new_call = gimple_build_call (new_decl, 1, src);
-      src = make_ssa_name (vector_quad_type_node);
-      gimple_call_set_lhs (new_call, src);
-      gimple_seq_add_stmt (&new_seq, new_call);
+      if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)
+	{
+	  new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
+	  new_call = gimple_build_call (new_decl, 1, src);
+	  src = make_ssa_name (vector_quad_type_node);
+	  gimple_call_set_lhs (new_call, src);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	}
 
-      /* Copy the accumulator vector by vector.  */
+      /* Copy the accumulator/pair vector by vector.  */
+      new_decl = rs6000_builtin_decls[fncode + 1];
       tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
 						   ptr_mode, true);
       tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
-      tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4);
-      tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
-      for (unsigned i = 0; i < 4; i++)
+      for (unsigned i = 0; i < nvec; i++)
 	{
-	  unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
-	  tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
-			     build_int_cst (size_type_node, i),
-			     NULL_TREE, NULL_TREE);
+	  unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
 	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
 			     build_int_cst (dst_type, index * 16));
-	  gimplify_assign (dst, ref, &new_seq);
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
+	  new_call = gimple_build_call (new_decl, 2, src,
+					build_int_cstu (uint16_type_node, i));
+	  gimple_call_set_lhs (new_call, dstssa);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	  gimplify_assign (dst, dstssa, &new_seq);
 	}
       pop_gimplify_context (NULL);
       gsi_replace_with_seq (gsi, new_seq, true);
@@ -13190,17 +13190,23 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
       SET_TYPE_ALIGN (vector_pair_type_node, 256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
-      layout_type (vector_pair_type_node);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_ALIGN (vector_quad_type_node, 512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
-      layout_type (vector_quad_type_node);
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      SET_TYPE_ALIGN (vector_pair_type_node, 512);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
     }
@@ -13236,8 +13242,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14049,21 +14055,21 @@ mma_init_builtins (void)
 	}
       else
 	{
-	  if ((attr & RS6000_BTC_QUAD) == 0)
+	  if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+		  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+	       && ((attr & RS6000_BTC_QUAD) == 0))
 	    attr_args--;
 
 	  /* Ensure we have the correct number and type of operands.  */
 	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
 	}
 
-      if (icode == CODE_FOR_nothing)
+      /* This is a disassemble pair/acc function. */
+      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
+	   || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
 	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
 	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
+	  if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
 	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
 	  else
 	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
@@ -14071,13 +14077,17 @@ mma_init_builtins (void)
       else
 	{
 	  /* This is a normal MMA built-in function.  */
-	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
+	  unsigned j = 0;
+	  if (attr & RS6000_BTC_QUAD
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+	    j = 1;
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d7dcd93f088..33418a44b22 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -8129,8 +8123,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10323,11 +10317,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12596,10 +12590,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16323,15 +16317,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16377,12 +16371,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16391,15 +16389,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16412,11 +16410,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16425,7 +16423,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16434,8 +16432,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       if (GET_CODE (src) == UNSPEC)
 	{
 	  gcc_assert (REG_P (dst)
-		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && ((GET_MODE (src) == XOmode
+			   && FP_REGNO_P (REGNO (dst)))
+			  || (GET_MODE (src) == OOmode
+			      && VSX_REGNO_P (REGNO (dst))))
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16444,9 +16445,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16459,22 +16463,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16611,7 +16628,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16626,16 +16643,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19865,7 +19890,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27186,14 +27212,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27202,19 +27228,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5a47aa14722..f35aaf4ffd1 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1041,7 +1041,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2556,6 +2556,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 5e5ad9f7c3d..b3f77ec665c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..9cffd2576c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-17  0:07 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-17  0:07 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2289a559c52efce0c45acd490c35e9b8d0dd4e0e

commit 2289a559c52efce0c45acd490c35e9b8d0dd4e0e
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 385 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 211 ++++++-----
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 198 ++++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 542 insertions(+), 369 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..9789dc37c1a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,329 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && vsx_register_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "wa")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);                    
+  DONE;
+})
+  
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  src = gen_rtx_UNSPEC (V16QImode,
+      	    		gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);			
+  DONE;
+})
+  
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..47b1f74e616 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",	QUAD, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair",	PAIR, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..61e2eb6931c 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (cfun != NULL
+      && !cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6593,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7546,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -10073,7 +10067,8 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
     }
 
   unsigned attr_args = attr & RS6000_BTC_OPND_MASK;
-  if (attr & RS6000_BTC_QUAD)
+  if (attr & RS6000_BTC_QUAD
+      || fcode == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
     attr_args++;
 
   gcc_assert (nopnds == attr_args);
@@ -11687,23 +11682,24 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   gimple *new_call;
   tree new_decl;
 
-  if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
+  if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+      || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
     {
       /* This is an MMA disassemble built-in function.  */
-      gcc_assert (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
-		  || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR);
-
       push_gimplify_context (true);
+      unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC) ? 4 : 2;
       tree dst_ptr = gimple_call_arg (stmt, 0);
       tree src_ptr = gimple_call_arg (stmt, 1);
       tree src_type = TREE_TYPE (src_ptr);
       tree src = make_ssa_name (TREE_TYPE (src_type));
       gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
 
-      /* If we are not disassembling an accumulator or our destination is
-	 another accumulator, then just copy the entire thing as is.  */
-      if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
-	  || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+      /* If we are not disassembling an accumulator/pair or our destination is
+	 another accumulator/pair, then just copy the entire thing as is.  */
+      if ((fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+	   && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+	  || (fncode == MMA_BUILTIN_DISASSEMBLE_PAIR
+	      && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node))
 	{
 	  tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
 						   src_type, dst_ptr));
@@ -11713,29 +11709,33 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
 	  return true;
 	}
 
-      /* We're disassembling an accumulator into a different type, so we need
+      /* If we're disassembling an accumulator into a different type, we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
-      new_call = gimple_build_call (new_decl, 1, src);
-      src = make_ssa_name (vector_quad_type_node);
-      gimple_call_set_lhs (new_call, src);
-      gimple_seq_add_stmt (&new_seq, new_call);
-
-      /* Copy the accumulator vector by vector.  */
+      if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)
+	{
+	  new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
+	  new_call = gimple_build_call (new_decl, 1, src);
+	  src = make_ssa_name (vector_quad_type_node);
+	  gimple_call_set_lhs (new_call, src);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	}
+      
+      /* Copy the accumulator/pair vector by vector.  */
+      new_decl = rs6000_builtin_decls[fncode + 1];
       tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
 						   ptr_mode, true);
       tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
-      tree array_type = build_array_type_nelts (unsigned_V16QI_type_node, 4);
-      tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
-      for (unsigned i = 0; i < 4; i++)
+      for (unsigned i = 0; i < nvec; i++)
 	{
-	  unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
-	  tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
-			     build_int_cst (size_type_node, i),
-			     NULL_TREE, NULL_TREE);
+	  unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
 	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
 			     build_int_cst (dst_type, index * 16));
-	  gimplify_assign (dst, ref, &new_seq);
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
+	  new_call = gimple_build_call (new_decl, 2, src,
+					build_int_cstu (uint16_type_node, i));
+	  gimple_call_set_lhs (new_call, dstssa);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	  gimplify_assign (dst, dstssa, &new_seq);
 	}
       pop_gimplify_context (NULL);
       gsi_replace_with_seq (gsi, new_seq, true);
@@ -13190,17 +13190,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13285,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14046,36 +14097,40 @@ mma_init_builtins (void)
 	  icode = d[1].icode;
 	}
       else
-	{
-	  if ((attr & RS6000_BTC_QUAD) == 0)
-	    attr_args--;
-
-	  /* Ensure we have the correct number and type of operands.  */
-	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
-	}
-
-      if (icode == CODE_FOR_nothing)
-	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
+        {
+          if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+                  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+               && ((attr & RS6000_BTC_QUAD) == 0))
+            attr_args--;
+
+          /* Ensure we have the correct number and type of operands.  */
+          gcc_assert (attr_args == insn_data[icode].n_operands - 1);
+        }
+
+      /* This is a disassemble pair/acc function. */
+      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
+           || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
+        {
 	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
-	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	  else
-	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	}
+          if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
+            op[nopnds++] = build_pointer_type (vector_quad_type_node);
+          else
+            op[nopnds++] = build_pointer_type (vector_pair_type_node);
+        }
       else
-	{
+        {
 	  /* This is a normal MMA built-in function.  */
-	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
+	  unsigned j = 0;
+	  if (attr & RS6000_BTC_QUAD
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+	    j = 1;
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..927370cb479 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16202,12 +16196,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16216,15 +16214,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16237,11 +16235,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16248,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16259,8 +16257,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       if (GET_CODE (src) == UNSPEC)
 	{
 	  gcc_assert (REG_P (dst)
-		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && ((GET_MODE (src) == XOmode
+			   && FP_REGNO_P (REGNO (dst)))
+			  || (GET_MODE (src) == OOmode
+			      && VSX_REGNO_P (REGNO (dst))))
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16270,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16288,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16453,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16468,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19715,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27036,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27052,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..9cffd2576c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src)
+{ /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-16 18:35 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-16 18:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ba77adcd5f03b5b82c2ee90bc5b743b379690c93

commit ba77adcd5f03b5b82c2ee90bc5b743b379690c93
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 385 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 212 +++++++++---
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 193 ++++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 564 insertions(+), 343 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..137e0c6add9 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,329 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);                    
+  DONE;
+})
+  
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], XOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  src = gen_rtx_UNSPEC (V16QImode,
+      	    		gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);			
+  DONE;
+})
+  
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..47b1f74e616 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",	QUAD, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair",	PAIR, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..a7e3589de0f 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,22 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (cfun != NULL
+      && !cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6593,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7546,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -11687,6 +11681,62 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   gimple *new_call;
   tree new_decl;
 
+  if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+      || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
+    {
+      /* This is an MMA disassemble built-in function.  */
+      push_gimplify_context (true);
+      unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)?4:2;
+      tree dst_ptr = gimple_call_arg (stmt, 0);
+      tree src_ptr = gimple_call_arg (stmt, 1);
+      tree src_type = TREE_TYPE (src_ptr);
+      tree src = make_ssa_name (TREE_TYPE (src_type));
+      gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
+
+      /* If we are not disassembling an accumulator or our destination is
+	 another accumulator, then just copy the entire thing as is.  */
+      /* FIXME: should this just explicitly handle pair->pair and quad->quad copies? */
+      if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
+	  || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+	{
+	  tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
+						   src_type, dst_ptr));
+	  gimplify_assign (dst, src, &new_seq);
+	  pop_gimplify_context (NULL);
+	  gsi_replace_with_seq (gsi, new_seq, true);
+	  return true;
+	}
+
+      /* We're disassembling an accumulator into a different type, so we need
+	 to emit a xxmfacc instruction now, since we cannot do it later.  */
+      new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
+      new_call = gimple_build_call (new_decl, 1, src);
+      src = make_ssa_name (vector_quad_type_node);
+      gimple_call_set_lhs (new_call, src);
+      gimple_seq_add_stmt (&new_seq, new_call);
+
+      /* Copy the accumulator vector by vector.  */
+      new_decl = rs6000_builtin_decls[fncode + 1];
+      tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
+						   ptr_mode, true);
+      tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
+      for (unsigned i = 0; i < nvec; i++)
+	{
+	  unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
+	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
+			     build_int_cst (dst_type, index * 16));
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
+	  new_call = gimple_build_call (new_decl, 2, src,
+					build_int_cstu (uint16_type_node, i));
+	  gimple_call_set_lhs (new_call, dstssa);
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	  gimplify_assign (dst, dstssa, &new_seq);
+	}
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
+#if 0
   if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
     {
       /* This is an MMA disassemble built-in function.  */
@@ -11741,6 +11791,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
       gsi_replace_with_seq (gsi, new_seq, true);
       return true;
     }
+#endif
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
@@ -13190,17 +13241,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13336,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14046,36 +14148,40 @@ mma_init_builtins (void)
 	  icode = d[1].icode;
 	}
       else
-	{
-	  if ((attr & RS6000_BTC_QUAD) == 0)
-	    attr_args--;
-
-	  /* Ensure we have the correct number and type of operands.  */
-	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
-	}
-
-      if (icode == CODE_FOR_nothing)
-	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
+        {
+          if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+                  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+               && ((attr & RS6000_BTC_QUAD) == 0))
+            attr_args--;
+
+          /* Ensure we have the correct number and type of operands.  */
+          gcc_assert (attr_args == insn_data[icode].n_operands - 1);
+        }
+
+      /* This is a disassemble pair/acc function. */
+      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
+           || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
+        {
 	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
-	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	  else
-	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	}
+          if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
+            op[nopnds++] = build_pointer_type (vector_quad_type_node);
+          else
+            op[nopnds++] = build_pointer_type (vector_pair_type_node);
+        }
       else
-	{
+        {
 	  /* This is a normal MMA built-in function.  */
-	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
+	  unsigned j = 0;
+	  if (attr & RS6000_BTC_QUAD
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+	      && d->code != MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+	    j = 1;
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..4c3d76cc730 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16202,12 +16196,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16216,15 +16214,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16237,11 +16235,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16248,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16258,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16267,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16285,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16450,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16465,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19712,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27033,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27049,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..b0b7a318448
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-13 23:37 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-13 23:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:adecfa0f91ed7486c9487ea8337c16a165d8d636

commit adecfa0f91ed7486c9487ea8337c16a165d8d636
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 385 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 215 +++++++++---
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 193 ++++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 567 insertions(+), 343 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..0d0303bdba0 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,329 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);                    
+  DONE;
+})
+  
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  src = gen_rtx_UNSPEC (V16QImode,
+      	    		gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);			
+  DONE;
+})
+  
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..48a170a7406 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",  MISC, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair", MISC, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..92520bb2879 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,21 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (!cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6592,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7545,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -11687,6 +11680,64 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
   gimple *new_call;
   tree new_decl;
 
+  if (fncode == MMA_BUILTIN_DISASSEMBLE_ACC
+      || fncode == MMA_BUILTIN_DISASSEMBLE_PAIR)
+    {
+      /* This is an MMA disassemble built-in function.  */
+      push_gimplify_context (true);
+      unsigned nvec = (fncode == MMA_BUILTIN_DISASSEMBLE_ACC)?4:2;
+      tree dst_ptr = gimple_call_arg (stmt, 0);
+      tree src_ptr = gimple_call_arg (stmt, 1);
+      tree src_type = TREE_TYPE (src_ptr);
+      tree src = make_ssa_name (TREE_TYPE (src_type));
+      gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
+
+      /* If we are not disassembling an accumulator or our destination is
+	 another accumulator, then just copy the entire thing as is.  */
+      /* FIXME: should this just explicitly handle pair->pair and quad->quad copies? */
+      if (fncode != MMA_BUILTIN_DISASSEMBLE_ACC
+	  || TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node)
+	{
+	  tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR,
+						   src_type, dst_ptr));
+	  gimplify_assign (dst, src, &new_seq);
+	  pop_gimplify_context (NULL);
+	  gsi_replace_with_seq (gsi, new_seq, true);
+	  return true;
+	}
+
+      /* We're disassembling an accumulator into a different type, so we need
+	 to emit a xxmfacc instruction now, since we cannot do it later.  */
+      new_decl = rs6000_builtin_decls[MMA_BUILTIN_XXMFACC_INTERNAL];
+      new_call = gimple_build_call (new_decl, 1, src);
+      src = make_ssa_name (vector_quad_type_node);
+      gimple_call_set_lhs (new_call, src);
+      gimple_seq_add_stmt (&new_seq, new_call);
+
+      /* Copy the accumulator vector by vector.  */
+      if ( fncode == MMA_BUILTIN_DISASSEMBLE_ACC )
+	new_decl = rs6000_builtin_decls[MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL];
+      else
+	new_decl = rs6000_builtin_decls[MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL];
+
+      tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node,
+						   ptr_mode, true);
+      tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr);
+      for (unsigned i = 0; i < nvec; i++)
+	{
+	  unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
+	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
+			     build_int_cst (dst_type, index * 16));
+
+	  new_call = gimple_build_call (new_decl, 3, dst, src,
+					build_int_cstu (uint16_type_node, i));
+	  gimple_seq_add_stmt (&new_seq, new_call);
+	}
+      pop_gimplify_context (NULL);
+      gsi_replace_with_seq (gsi, new_seq, true);
+      return true;
+    }
+#if 0
   if (rs6000_builtin_info[fncode + 1].icode == CODE_FOR_nothing)
     {
       /* This is an MMA disassemble built-in function.  */
@@ -11741,6 +11792,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
       gsi_replace_with_seq (gsi, new_seq, true);
       return true;
     }
+#endif
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
@@ -13190,17 +13242,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13337,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14046,36 +14149,42 @@ mma_init_builtins (void)
 	  icode = d[1].icode;
 	}
       else
-	{
-	  if ((attr & RS6000_BTC_QUAD) == 0)
-	    attr_args--;
-
-	  /* Ensure we have the correct number and type of operands.  */
-	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
-	}
-
-      if (icode == CODE_FOR_nothing)
-	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
-	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
-	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	  else
-	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	}
+        {
+          if ( !( d->code == MMA_BUILTIN_DISASSEMBLE_ACC_INTERNAL
+                  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR_INTERNAL)
+               && ((attr & RS6000_BTC_QUAD) == 0))
+            attr_args--;
+
+          /* Ensure we have the correct number and type of operands.  */
+          gcc_assert (attr_args == insn_data[icode].n_operands - 1);
+        }
+
+      /* This is a disassemble pair/acc function. */
+      if ( d->code == MMA_BUILTIN_DISASSEMBLE_ACC
+           || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR)
+        {
+          /* These functions have two operands, the first is a pointer
+             to an array of vectors and the second is a pointer to a
+             vector pair or quad.  The expand pattern does not have
+             the same args so we have to make this up from scratch
+             here.  In rs6000_gimple_fold_mma_builtin we convert these
+             so that they will match the expand pattern.  */
+          op[nopnds++] = build_pointer_type (builtin_mode_to_type[V16QImode][1]);
+          if (d->code == MMA_BUILTIN_DISASSEMBLE_ACC)
+            op[nopnds++] = build_pointer_type (vector_quad_type_node);
+          else
+            op[nopnds++] = build_pointer_type (vector_pair_type_node);
+        }
       else
-	{
+        {
 	  /* This is a normal MMA built-in function.  */
 	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..4c3d76cc730 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16202,12 +16196,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16216,15 +16214,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16237,11 +16235,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16248,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16258,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16267,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16285,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16450,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16465,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19712,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27033,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27049,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..b0b7a318448
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-13 20:59 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-13 20:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fd81fa16b2a20d2a493ab971dd5508eb2c42dea0

commit fd81fa16b2a20d2a493ab971dd5508eb2c42dea0
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 393 ++++++++++++---------
 gcc/config/rs6000/predicates.md                    |  14 +-
 gcc/config/rs6000/rs6000-builtin.def               |  14 +-
 gcc/config/rs6000/rs6000-call.c                    | 140 +++++---
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 193 +++++-----
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 ++
 13 files changed, 498 insertions(+), 345 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..f4b8267b271 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,245 +324,337 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_disassemble_pair"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:OO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  if (BYTES_BIG_ENDIAN)
+    regoff = 1 - regoff;
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
   DONE;
 })
 
+(define_expand "mma_disassemble_pair"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:OO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 1));
+  if (BYTES_BIG_ENDIAN)
+    regoff = 1 - regoff;
+  src = gen_rtx_UNSPEC (V16QImode,
+                        gen_rtvec (2, operands[1], GEN_INT (regoff)),
+                        UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);                    
+  DONE;
+})
+  
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
+(define_insn_and_split "*mma_disassemble_acc"
+  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
+       (unspec:V16QI [(match_operand:XO 1 "input_operand" "d")
+                      (match_operand 2 "const_int_operand")]
+		      UNSPEC_MMA_EXTRACT))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[1], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  gcc_assert (REG_P (operands[1]));
+  int reg = REGNO (operands[1]);
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  if (BYTES_BIG_ENDIAN)
+    regoff = 3 - regoff;
+  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_expand "mma_disassemble_acc"
+  [(match_operand:V16QI 0 "mma_disassemble_output_operand")
+   (match_operand:XO 1 "input_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_MMA"
+{
+  rtx src;
+  int regoff = INTVAL (operands[2]);
+  gcc_assert (IN_RANGE (regoff, 0, 3));
+  if (BYTES_BIG_ENDIAN)
+    regoff = 3 - regoff;
+  src = gen_rtx_UNSPEC (V16QImode,
+      	    		gen_rtvec (2, operands[1], GEN_INT (regoff)),
+			UNSPEC_MMA_EXTRACT);
+  emit_move_insn (operands[0], src);			
+  DONE;
+})
+  
 ;; MMA instructions that do not use their accumulators as an input, still
 ;; must not allow their vector operands to overlap the registers used by
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..0c1acf186a6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
@@ -1144,6 +1145,15 @@
   (match_test "(mode == V16QImode
 		&& (vsx_register_operand (op, mode) || MEM_P (op)))"))
 
+;; Return 1 if this operand is valid for an MMA disassemble insn.
+(define_predicate "mma_disassemble_output_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (REG_P (op) && !vsx_register_operand (op, mode))
+    return false;
+  return true;
+})
+
 ;; Return true if operand is an operator used in rotate-and-mask instructions.
 (define_predicate "rotate_mask_operator"
   (match_code "rotate,ashift,lshiftrt"))
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a58102c3785..48a170a7406 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -352,7 +352,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_MMA_V2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_MMA_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_mma_" NAME,		/* NAME */	\
 		    RS6000_BTM_MMA,			/* MASK */	\
@@ -360,7 +360,13 @@
 		     | RS6000_BTC_BINARY				\
 		     | RS6000_BTC_VOID					\
 		     | RS6000_BTC_GIMPLE),				\
-		    CODE_FOR_nothing)			/* ICODE */
+		    CODE_FOR_nothing)			/* ICODE */	\
+  RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM ## _INTERNAL,	/* ENUM */	\
+		    "__builtin_mma_" NAME "_internal",	/* NAME */	\
+		    RS6000_BTM_MMA,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
 
 #define BU_MMA_3(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_M (MMA_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -3108,8 +3114,8 @@ BU_MMA_1 (XXMFACC,	    "xxmfacc",		QUAD, mma_xxmfacc)
 BU_MMA_1 (XXMTACC,	    "xxmtacc",		QUAD, mma_xxmtacc)
 BU_MMA_1 (XXSETACCZ,	    "xxsetaccz",	MISC, mma_xxsetaccz)
 
-BU_MMA_V2 (DISASSEMBLE_ACC, "disassemble_acc",  QUAD, nothing)
-BU_MMA_V2 (DISASSEMBLE_PAIR,"disassemble_pair", PAIR, nothing)
+BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",  MISC, mma_disassemble_acc)
+BU_MMA_2 (DISASSEMBLE_PAIR,"disassemble_pair", MISC, mma_disassemble_pair)
 
 BU_MMA_3 (ASSEMBLE_PAIR,    "assemble_pair",	MISC, mma_assemble_pair)
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..f2a88c13eb0 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,21 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (!cfun->machine->mma_return_type_error
+      && TREE_TYPE (cfun->decl) == fntype
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6577,30 +6592,8 @@ machine_mode
 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 			      machine_mode mode,
 			      int *punsignedp ATTRIBUTE_UNUSED,
-			      const_tree, int for_return)
+			      const_tree, int for_return ATTRIBUTE_UNUSED)
 {
-  /* Warning: this is a static local variable and not always NULL!
-     This function is called multiple times for the same function
-     and return value.  PREV_FUNC is used to keep track of the
-     first time we encounter a function's return value in order
-     to not report an error with that return value multiple times.  */
-  static struct function *prev_func = NULL;
-
-  /* We do not allow MMA types being used as return values.  Only report
-     the invalid return value usage the first time we encounter it.  */
-  if (for_return
-      && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
-    {
-      /* Record we have now handled function CFUN, so the next time we
-	 are called, we do not re-report the same error.  */
-      prev_func = cfun;
-      if (TYPE_CANONICAL (type) != NULL_TREE)
-	type = TYPE_CANONICAL (type);
-      error ("invalid use of MMA type %qs as a function return value",
-	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
-    }
-
   PROMOTE_MODE (mode, *punsignedp, type);
 
   return mode;
@@ -7552,7 +7545,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -13190,17 +13183,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13278,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14054,34 +14098,20 @@ mma_init_builtins (void)
 	  gcc_assert (attr_args == insn_data[icode].n_operands - 1);
 	}
 
-      if (icode == CODE_FOR_nothing)
+      /* FIXME: special stuff for disassemble pair/acc here */
+      /* This is a MMA built-in function.  */
+      unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
+      for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	{
-	  /* This is a disassemble MMA built-in function.  */
-	  gcc_assert (attr_args == RS6000_BTC_BINARY
-		      && (d->code == MMA_BUILTIN_DISASSEMBLE_ACC
-			  || d->code == MMA_BUILTIN_DISASSEMBLE_PAIR));
-	  op[nopnds++] = build_pointer_type (void_type_node);
-	  if (attr & RS6000_BTC_QUAD)
+	  machine_mode mode = insn_data[icode].operand[j].mode;
+	  if (gimple_func && mode == XOmode)
 	    op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	  else
+	  else if (gimple_func && mode == OOmode
+		   && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 	    op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	}
-      else
-	{
-	  /* This is a normal MMA built-in function.  */
-	  unsigned j = (attr & RS6000_BTC_QUAD) ? 1 : 0;
-	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
-	    {
-	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
-		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
-		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
-		op[nopnds++] = build_pointer_type (vector_pair_type_node);
-	      else
-		/* MMA uses unsigned types.  */
-		op[nopnds++] = builtin_mode_to_type[mode][1];
-	    }
+	  else
+	    /* MMA uses unsigned types.  */
+	    op[nopnds++] = builtin_mode_to_type[mode][1];
 	}
 
       switch (nopnds)
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..4c3d76cc730 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16202,12 +16196,16 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       return;
     }
 
-  /* The __vector_pair and __vector_quad modes are multi-register modes,
-     so if have to load or store the registers, we have to be careful to
-     properly swap them if we're in little endian mode below.  This means
-     the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
     {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
       if (MEM_P (dst))
 	{
 	  unsigned offset = 0;
@@ -16216,15 +16214,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16237,11 +16235,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  unsigned offset = 0;
 	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-	  for (int i = 0; i < nregs; i++)
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16248,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16258,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16267,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16285,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16450,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16465,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19712,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27033,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27049,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..b0b7a318448
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-13 18:15 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-13 18:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:75e16c01ba28553d43a3c75a78e1be05dc145c68

commit 75e16c01ba28553d43a3c75a78e1be05dc145c68
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md                           | 316 ++++++++++-----------
 gcc/config/rs6000/predicates.md                    |   5 +-
 gcc/config/rs6000/rs6000-call.c                    |  85 +++++-
 gcc/config/rs6000/rs6000-modes.def                 |  10 +-
 gcc/config/rs6000/rs6000-string.c                  |   6 +-
 gcc/config/rs6000/rs6000.c                         | 175 ++++++------
 gcc/config/rs6000/rs6000.h                         |   3 +-
 gcc/config/rs6000/rs6000.md                        |   2 +-
 gcc/testsuite/gcc.target/powerpc/mma-double-test.c |   3 +
 gcc/testsuite/gcc.target/powerpc/mma-single-test.c |   3 +
 .../gcc.target/powerpc/{pr96506.c => pr96506-1.c}  |  24 --
 gcc/testsuite/gcc.target/powerpc/pr96506-2.c       |  38 +++
 12 files changed, 379 insertions(+), 291 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..c06f7093440 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,56 +324,69 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
-
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
@@ -406,186 +396,186 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..cef570c1e77 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..909583c78fd 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,20 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (!cfun->machine->mma_return_type_error
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6590,7 +6604,7 @@ rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
      the invalid return value usage the first time we encounter it.  */
   if (for_return
       && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
+      && (mode == OOmode || mode == XOmode))
     {
       /* Record we have now handled function CFUN, so the next time we
 	 are called, we do not re-report the same error.  */
@@ -7552,7 +7566,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -13190,17 +13204,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13299,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14073,9 +14138,9 @@ mma_init_builtins (void)
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..124d1939207 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16206,7 +16200,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
      so if have to load or store the registers, we have to be careful to
      properly swap them if we're in little endian mode below.  This means
      the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (MEM_P (dst))
 	{
@@ -16216,15 +16210,14 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16239,9 +16232,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16242,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16252,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16261,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16279,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16444,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16459,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19706,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27027,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27043,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index 53843794a95..254af7f8f79 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -181,6 +181,9 @@ main (int argc, char *argv[])
     printf ("MMA double test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index ac4125ba329..ebbc5ae2e1b 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -189,6 +189,9 @@ main (int argc, char *argv[])
     printf ("MMA single test fail: %d errors\n",ret);
   else
     printf ("MMA single test success: 0 MMA errors\n");
+#else
+  if (ret)
+    abort();
 #endif
       
   return ret;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506.c b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
similarity index 61%
rename from gcc/testsuite/gcc.target/powerpc/pr96506.c
rename to gcc/testsuite/gcc.target/powerpc/pr96506-1.c
index b1b40c5a5c8..91835cec30c 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr96506.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-1.c
@@ -40,27 +40,3 @@ foo3 (void)
   vquad_t v;
   bar3 (v); /* { dg-error "invalid use of MMA operand of type .__vector_quad. as a function parameter" } */
 }
-
-__vector_pair
-foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-vpair_t
-foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
-{
-  return *src;
-}
-
-__vector_quad
-foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
-
-vquad_t
-foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
-{
-  return *src;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr96506-2.c b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
new file mode 100644
index 00000000000..b0b7a318448
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr96506-2.c
@@ -0,0 +1,38 @@
+/* PR target/96506 */
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+extern void bar0();
+extern void bar1();
+extern void bar2();
+extern void bar3();
+
+typedef __vector_pair vpair_t;
+typedef __vector_quad vquad_t;
+
+/* Verify we flag errors on the following.  */
+
+__vector_pair
+foo4 (__vector_pair *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+vpair_t
+foo5 (vpair_t *src) /* { dg-error "invalid use of MMA type .__vector_pair. as a function return value" } */
+{
+  return *src;
+}
+
+__vector_quad
+foo6 (__vector_quad *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}
+
+vquad_t
+foo7 (vquad_t *src) /* { dg-error "invalid use of MMA type .__vector_quad. as a function return value" } */
+{
+  return *src;
+}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-10 16:47 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-10 16:47 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3b3fbeb3b469625cdfd1fdebe897a3947f2d813b

commit 3b3fbeb3b469625cdfd1fdebe897a3947f2d813b
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md           | 316 ++++++++++++++++++-------------------
 gcc/config/rs6000/predicates.md    |   5 +-
 gcc/config/rs6000/rs6000-call.c    |  85 ++++++++--
 gcc/config/rs6000/rs6000-modes.def |  10 +-
 gcc/config/rs6000/rs6000-string.c  |   6 +-
 gcc/config/rs6000/rs6000.c         | 175 ++++++++++----------
 gcc/config/rs6000/rs6000.h         |   3 +-
 gcc/config/rs6000/rs6000.md        |   2 +-
 8 files changed, 335 insertions(+), 267 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..c06f7093440 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,24 +19,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
-;; To use these modes, we must define XImode and OImode move patterns
-;; so the independent parts of the compiler can use our large partial
-;; integer modes.  However, if we enable the XImode and OImode move
-;; patterns, then the compiler will attempt to use them and this can
-;; cause byte swapping issues on litte-endian systems.  We don't need
-;; the XImode and OImode move patterns for actual code generation,
-;; therefore, we define the XImode and OImode move patterns, but we
-;; disable their use with a "false" condition flag.
+;; We define these modes with the new OPAQUE_MODE mechanism to prevent
+;; anything from trying to open them up.
 
 (define_constants [(MAX_MMA_OPERANDS 7)])
 
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -265,31 +260,22 @@
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
 
-;; Define a disabled OImode move pattern, so we can use POImode.
-(define_expand "movoi"
-  [(set (match_operand:OI 0 "nonimmediate_operand")
-	(match_operand:OI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -305,38 +291,29 @@
    (set_attr "length" "*,*,8")])
 
 \f
-;; Define a disabled XImode move pattern, so we can use PXImode.
-(define_expand "movxi"
-  [(set (match_operand:XI 0 "nonimmediate_operand")
-	(match_operand:XI 1 "input_operand"))]
-  "0"
-{
-  gcc_unreachable ();
-})
-
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,56 +324,69 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
-
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
@@ -406,186 +396,186 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..cef570c1e77 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..909583c78fd 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6325,6 +6325,20 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
+  /* We do not allow MMA types being used as return values.  Only report
+     the invalid return value usage the first time we encounter it.  */
+  if (!cfun->machine->mma_return_type_error
+      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+    {
+      /* Record we have now handled function CFUN, so the next time we
+	 are called, we do not re-report the same error.  */
+      cfun->machine->mma_return_type_error = true;
+      if (TYPE_CANONICAL (type) != NULL_TREE)
+	type = TYPE_CANONICAL (type);
+      error ("invalid use of MMA type %qs as a function return value",
+	     IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    }
+
   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
   if (TARGET_MACHO
       && rs6000_darwin64_abi
@@ -6590,7 +6604,7 @@ rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
      the invalid return value usage the first time we encounter it.  */
   if (for_return
       && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
+      && (mode == OOmode || mode == XOmode))
     {
       /* Record we have now handled function CFUN, so the next time we
 	 are called, we do not re-report the same error.  */
@@ -7552,7 +7566,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -13190,17 +13204,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13299,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14073,9 +14138,9 @@ mma_init_builtins (void)
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..e47e355ab75 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -83,12 +83,6 @@ VECTOR_MODE (INT, SI, 2);     /*                 V2SI  */
    combination.  */
 PARTIAL_INT_MODE (TI, 128, PTI);
 
-/* Define, but don't use the larger integer modes.  We need an integer mode
-   defined that is the same size as the vector pair and vector quad modes.  */
-
-INT_MODE (OI, 32);
-INT_MODE (XI, 64);
-
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..124d1939207 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,12 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
-     even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+     registers.  */
+  if (mode == OOmode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
-  /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
-     the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  /* MMA accumulator modes need FPR registers divisible by 4.  */
+  if (mode == XOmode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1938,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1948,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2241,10 +2238,8 @@ rs6000_debug_reg_global (void)
     V2DFmode,
     V8SFmode,
     V4DFmode,
-    OImode,
-    XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2701,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2916,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3059,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4122,11 +4117,10 @@ rs6000_option_override_internal (bool global_init_p)
 
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
     {
-      /* When the POImode issues of PR96791 are resolved, then we can
-	 once again enable use of vector pair for memcpy/memmove on
-	 P10 if we have TARGET_MMA.  For now we make it disabled by
-	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7948,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10142,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12415,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16142,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16206,7 +16200,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
      so if have to load or store the registers, we have to be careful to
      properly swap them if we're in little endian mode below.  This means
      the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (MEM_P (dst))
 	{
@@ -16216,15 +16210,14 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16239,9 +16232,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16242,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16252,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16261,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16279,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16444,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16459,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -19690,7 +19706,8 @@ rs6000_mangle_type (const_tree type)
   type = TYPE_MAIN_VARIANT (type);
 
   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
-      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
+      && TREE_CODE (type) != OPAQUE_TYPE)
     return NULL;
 
   if (type == bool_char_type_node) return "U6__boolc";
@@ -27010,14 +27027,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27043,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..cdb495e6aa4 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
@@ -2554,6 +2554,7 @@ typedef struct GTY(()) machine_function
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
   bool toc_is_wrapped_separately;
+  bool mma_return_type_error;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes
@ 2020-11-06 21:26 Aaron Sawdey
  0 siblings, 0 replies; 9+ messages in thread
From: Aaron Sawdey @ 2020-11-06 21:26 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2f9f1622d7e99bd01e41d64131f914903ec8aaa0

commit 2f9f1622d7e99bd01e41d64131f914903ec8aaa0
Author: Aaron Sawdey <acsawdey@linux.ibm.com>
Date:   Fri Nov 6 15:25:54 2020 -0600

    Make MMA use opaque modes

Diff:
---
 gcc/config/rs6000/mma.md           | 288 +++++++++++++++++++------------------
 gcc/config/rs6000/predicates.md    |   5 +-
 gcc/config/rs6000/rs6000-call.c    |  71 +++++++--
 gcc/config/rs6000/rs6000-modes.def |   4 +-
 gcc/config/rs6000/rs6000-string.c  |   6 +-
 gcc/config/rs6000/rs6000.c         | 163 ++++++++++++---------
 gcc/config/rs6000/rs6000.h         |   2 +-
 gcc/config/rs6000/rs6000.md        |   2 +-
 8 files changed, 316 insertions(+), 225 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a3fd28bdd0a..37bee4d592d 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -19,7 +19,7 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-;; The MMA patterns use the multi-register PXImode and POImode partial
+;; The MMA patterns use the multi-register XOmode and OOmode partial
 ;; integer modes to implement the target specific __vector_quad and
 ;; __vector_pair types that the MMA built-in functions reference.
 ;; To use these modes, we must define XImode and OImode move patterns
@@ -36,7 +36,8 @@
 ;; Constants for creating unspecs
 
 (define_c_enum "unspec"
-  [UNSPEC_MMA_ASSEMBLE_ACC
+  [UNSPEC_MMA_ASSEMBLE
+   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -274,22 +275,22 @@
   gcc_unreachable ();
 })
 
-;; Vector pair support.  POImode can only live in VSRs.
-(define_expand "movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand")
-	(match_operand:POI 1 "input_operand"))]
+;; Vector pair support.  OOmode can only live in VSRs.
+(define_expand "movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand")
+	(match_operand:OO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], POImode);
+  rs6000_emit_move (operands[0], operands[1], OOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpoi"
-  [(set (match_operand:POI 0 "nonimmediate_operand" "=wa,m,wa")
-	(match_operand:POI 1 "input_operand" "m,wa,wa"))]
+(define_insn_and_split "*movoo"
+  [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa")
+	(match_operand:OO 1 "input_operand" "m,wa,wa"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], POImode)
-       || gpc_reg_operand (operands[1], POImode))"
+   && (gpc_reg_operand (operands[0], OOmode)
+       || gpc_reg_operand (operands[1], OOmode))"
   "@
    lxvp%X1 %x0,%1
    stxvp%X0 %x1,%0
@@ -314,29 +315,29 @@
   gcc_unreachable ();
 })
 
-;; Vector quad support.  PXImode can only live in FPRs.
-(define_expand "movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand")
-	(match_operand:PXI 1 "input_operand"))]
+;; Vector quad support.  XOmode can only live in FPRs.
+(define_expand "movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand")
+	(match_operand:XO 1 "input_operand"))]
   "TARGET_MMA"
 {
-  rs6000_emit_move (operands[0], operands[1], PXImode);
+  rs6000_emit_move (operands[0], operands[1], XOmode);
   DONE;
 })
 
-(define_insn_and_split "*movpxi"
-  [(set (match_operand:PXI 0 "nonimmediate_operand" "=d,m,d,d")
-	(match_operand:PXI 1 "input_operand" "m,d,d,O"))]
+(define_insn_and_split "*movxo"
+  [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d,d")
+	(match_operand:XO 1 "input_operand" "m,d,d,O"))]
   "TARGET_MMA
-   && (gpc_reg_operand (operands[0], PXImode)
-       || gpc_reg_operand (operands[1], PXImode))"
+   && (gpc_reg_operand (operands[0], XOmode)
+       || gpc_reg_operand (operands[1], XOmode))"
   "@
    #
    #
    #
    xxsetaccz %A0"
   "&& reload_completed
-   && !(fpr_reg_operand (operands[0], PXImode) && operands[1] == const0_rtx)"
+   && !(fpr_reg_operand (operands[0], XOmode) && operands[1] == const0_rtx)"
   [(const_int 0)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -347,56 +348,69 @@
    (set_attr "max_prefixed_insns" "2,2,*,*")])
 
 (define_expand "mma_assemble_pair"
-  [(match_operand:POI 0 "vsx_register_operand")
+  [(match_operand:OO 0 "vsx_register_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")]
   "TARGET_MMA"
 {
-  rtx dst;
-
-  /* Let the compiler know the code below fully defines our output value.  */
-  emit_clobber (operands[0]);
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
 
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 0);
-  emit_move_insn (dst, operands[1]);
-  dst = simplify_gen_subreg (V16QImode, operands[0], POImode, 16);
-  emit_move_insn (dst, operands[2]);
+(define_insn_and_split "*mma_assemble_pair"
+  [(set (match_operand:OO 0 "fpr_reg_operand" "=d")
+	(unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
+  "TARGET_MMA
+   && fpr_reg_operand (operands[0], OOmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx src = gen_rtx_UNSPEC (OOmode,
+			    gen_rtvec (2, operands[1], operands[2]),
+			    UNSPEC_MMA_ASSEMBLE);
+  rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
 
 (define_expand "mma_assemble_acc"
-  [(match_operand:PXI 0 "fpr_reg_operand")
+  [(match_operand:XO 0 "fpr_reg_operand")
    (match_operand:V16QI 1 "input_operand")
    (match_operand:V16QI 2 "input_operand")
    (match_operand:V16QI 3 "input_operand")
    (match_operand:V16QI 4 "input_operand")]
   "TARGET_MMA"
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   emit_move_insn (operands[0], src);
   DONE;
 })
 
 (define_insn_and_split "*mma_assemble_acc"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=d")
-	(unspec:PXI [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
-		     (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
-		     UNSPEC_MMA_ASSEMBLE_ACC))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=d")
+	(unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
+		    (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
+		    UNSPEC_MMA_ASSEMBLE))]
   "TARGET_MMA
-   && fpr_reg_operand (operands[0], PXImode)"
+   && fpr_reg_operand (operands[0], XOmode)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
 {
-  rtx src = gen_rtx_UNSPEC (PXImode,
+  rtx src = gen_rtx_UNSPEC (XOmode,
 			    gen_rtvec (4, operands[1], operands[2],
 				       operands[3], operands[4]),
-			    UNSPEC_MMA_ASSEMBLE_ACC);
+			    UNSPEC_MMA_ASSEMBLE);
   rs6000_split_multireg_move (operands[0], src);
   DONE;
 })
@@ -406,186 +420,186 @@
 ;; the accumulator.  We enforce this by marking the output as early clobber.
 
 (define_insn "mma_<acc>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
 		    MMA_ACC))]
   "TARGET_MMA"
   "<acc> %A0"
   [(set_attr "type" "mma")])
 
 (define_expand "mma_xxsetaccz"
-  [(set (match_operand:PXI 0 "fpr_reg_operand")
+  [(set (match_operand:XO 0 "fpr_reg_operand")
 	(const_int 0))]
   "TARGET_MMA"
 {
-  emit_insn (gen_movpxi (operands[0], const0_rtx));
+  emit_insn (gen_movxo (operands[0], const0_rtx));
   DONE;
 })
 
 (define_insn "mma_<vv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_VV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_VV))]
   "TARGET_MMA"
   "<vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<avv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_AVV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_AVV))]
   "TARGET_MMA"
   "<avv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<pv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")]
-		     MMA_PV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")]
+		    MMA_PV))]
   "TARGET_MMA"
   "<pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<apv>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")]
-		     MMA_APV))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")]
+		    MMA_APV))]
   "TARGET_MMA"
   "<apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")])
 
 (define_insn "mma_<vvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "u8bit_cint_operand" "n")]
-		     MMA_VVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "u8bit_cint_operand" "n")]
+		    MMA_VVI4I4I8))]
   "TARGET_MMA"
   "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i8>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "u8bit_cint_operand" "n")]
-		     MMA_AVVI4I4I8))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "u8bit_cint_operand" "n")]
+		    MMA_AVVI4I4I8))]
   "TARGET_MMA"
   "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_VVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_VVI4I4I2))]
   "TARGET_MMA"
   "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_3_operand" "n")]
-		     MMA_AVVI4I4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_3_operand" "n")]
+		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
   "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4))]
   "TARGET_MMA"
   "<vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4))]
   "TARGET_MMA"
   "<avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<pvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:POI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_3_operand" "n")]
-		     MMA_PVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_3_operand" "n")]
+		    MMA_PVI4I2))]
   "TARGET_MMA"
   "<pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<apvi4i2>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:POI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_3_operand" "n")]
-		     MMA_APVI4I2))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_3_operand" "n")]
+		    MMA_APVI4I2))]
   "TARGET_MMA"
   "<apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<vvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:SI 3 "const_0_to_15_operand" "n")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")]
-		     MMA_VVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:SI 3 "const_0_to_15_operand" "n")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")]
+		    MMA_VVI4I4I4))]
   "TARGET_MMA"
   "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "length" "8")])
 
 (define_insn "mma_<avvi4i4i4>"
-  [(set (match_operand:PXI 0 "fpr_reg_operand" "=&d")
-	(unspec:PXI [(match_operand:PXI 1 "fpr_reg_operand" "0")
-		     (match_operand:V16QI 2 "vsx_register_operand" "wa")
-		     (match_operand:V16QI 3 "vsx_register_operand" "wa")
-		     (match_operand:SI 4 "const_0_to_15_operand" "n")
-		     (match_operand:SI 5 "const_0_to_15_operand" "n")
-		     (match_operand:SI 6 "const_0_to_15_operand" "n")]
-		     MMA_AVVI4I4I4))]
+  [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
+	(unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")
+		    (match_operand:V16QI 2 "vsx_register_operand" "wa")
+		    (match_operand:V16QI 3 "vsx_register_operand" "wa")
+		    (match_operand:SI 4 "const_0_to_15_operand" "n")
+		    (match_operand:SI 5 "const_0_to_15_operand" "n")
+		    (match_operand:SI 6 "const_0_to_15_operand" "n")]
+		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..cef570c1e77 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1072,8 +1072,9 @@
       && easy_fp_constant (op, mode))
     return 1;
 
-  /* Allow any integer constant.  */
-  if (SCALAR_INT_MODE_P (mode) && CONST_SCALAR_INT_P (op))
+  /* Allow any integer constant.  Also allow consts for OPAQUE_MODE.  */
+  if ((SCALAR_INT_MODE_P (mode) || OPAQUE_MODE_P (mode))
+      && CONST_SCALAR_INT_P (op))
     return 1;
 
   /* Allow easy vector constants.  */
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 92378e958a9..43c9b185251 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -6590,7 +6590,7 @@ rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
      the invalid return value usage the first time we encounter it.  */
   if (for_return
       && prev_func != cfun
-      && (mode == POImode || mode == PXImode))
+      && (mode == OOmode || mode == XOmode))
     {
       /* Record we have now handled function CFUN, so the next time we
 	 are called, we do not re-report the same error.  */
@@ -7552,7 +7552,7 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   int n_elts;
 
   /* We do not allow MMA types being used as function arguments.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
@@ -13190,17 +13190,68 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      vector_pair_type_node = make_unsigned_type (256);
-      SET_TYPE_MODE (vector_pair_type_node, POImode);
+
+      vector_pair_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
+      TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode));
+      TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode);
+      TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode));
+      unsigned mode_align = GET_MODE_ALIGNMENT (OOmode);
+      SET_TYPE_ALIGN (vector_pair_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_pair_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_pair_type_node,
+					      "__vector_pair");
+
+      vector_quad_type_node = make_node (OPAQUE_TYPE);
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
+      TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode));
+      TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode);
+      TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode));
+      mode_align = GET_MODE_ALIGNMENT (XOmode);
+      SET_TYPE_ALIGN (vector_quad_type_node, mode_align);
+      TYPE_USER_ALIGN (vector_quad_type_node) = 0;
+      lang_hooks.types.register_builtin_type (vector_quad_type_node,
+					      "__vector_quad");
+
+      /*
+      tree vp_inner = make_unsigned_type (256);
+      layout_type (vp_inner);
+      vp_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_pair_inner"), vp_inner);
+      vector_pair_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_pair_type_node) = vp_inner;
+      SET_TYPE_MODE (vector_pair_type_node, OOmode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      vector_quad_type_node = make_unsigned_type (512);
-      SET_TYPE_MODE (vector_quad_type_node, PXImode);
+      tree vq_inner = make_unsigned_type (512);
+      layout_type (vq_inner);
+      vq_inner = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			     get_identifier ("__vector_quad_inner"), vq_inner);
+      vector_quad_type_node = make_node (RECORD_TYPE);
+      TYPE_FIELDS (vector_quad_type_node) = vq_inner;
+      SET_TYPE_MODE (vector_quad_type_node, XOmode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
 					      "__vector_quad");
+      */
+
+      /*
+  t = build_index_type (size_int (size - 1));
+  t = build_array_type (char_type_node, t);
+  t = build_decl (DECL_SOURCE_LOCATION (info->context),
+		  FIELD_DECL, get_identifier ("__data"), t);
+  SET_DECL_ALIGN (t, align);
+  DECL_USER_ALIGN (t) = 1;
+
+  trampoline_type = make_node (RECORD_TYPE);
+  TYPE_NAME (trampoline_type) = get_identifier ("__builtin_trampoline");
+  TYPE_FIELDS (trampoline_type) = t;
+  layout_type (trampoline_type);
+  DECL_CONTEXT (t) = trampoline_type;
+      */
+
     }
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13234,8 +13285,8 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
-  builtin_mode_to_type[POImode][1] = vector_pair_type_node;
-  builtin_mode_to_type[PXImode][1] = vector_quad_type_node;
+  builtin_mode_to_type[OOmode][1] = vector_pair_type_node;
+  builtin_mode_to_type[XOmode][1] = vector_quad_type_node;
 
   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
   TYPE_NAME (bool_char_type_node) = tdecl;
@@ -14073,9 +14124,9 @@ mma_init_builtins (void)
 	  for (; j < (unsigned) insn_data[icode].n_operands; j++)
 	    {
 	      machine_mode mode = insn_data[icode].operand[j].mode;
-	      if (gimple_func && mode == PXImode)
+	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == POImode
+	      else if (gimple_func && mode == OOmode
 		       && d->code == MMA_BUILTIN_ASSEMBLE_PAIR)
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index ddb218b3fba..383ec4735f4 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -90,5 +90,5 @@ INT_MODE (OI, 32);
 INT_MODE (XI, 64);
 
 /* Modes used by __vector_pair and __vector_quad.  */
-PARTIAL_INT_MODE (OI, 256, POI);	/* __vector_pair.  */
-PARTIAL_INT_MODE (XI, 512, PXI);	/* __vector_quad.  */
+OPAQUE_MODE (OO, 32); /* instead of POI */
+OPAQUE_MODE (XO, 64); /* instead of PXI */
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 82cc24ecdda..a2e6821d353 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -2787,7 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap)
       rtx src, dest;
       bool move_with_length = false;
 
-      /* Use POImode for paired vsx load/store.  Use V2DI for single
+      /* Use OOmode for paired vsx load/store.  Use V2DI for single
 	 unaligned vsx load/store, for consistency with what other
 	 expansions (compare) already do, and so we can use lxvd2x on
 	 p8.  Order is VSX pair unaligned, VSX unaligned, Altivec, VSX
@@ -2799,8 +2799,8 @@ expand_block_move (rtx operands[], bool might_overlap)
 	  && (align >= 256 || !STRICT_ALIGNMENT))
 	{
 	  move_bytes = 32;
-	  mode = POImode;
-	  gen_func.mov = gen_movpoi;
+	  mode = OOmode;
+	  gen_func.mov = gen_movoo;
 	}
       else if (TARGET_POWERPC64 && TARGET_BLOCK_OPS_UNALIGNED_VSX
 	       && VECTOR_MEM_VSX_P (V2DImode)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1e506b83762..7d1a46ce74d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1826,15 +1826,15 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
     mode = GET_MODE_INNER (mode);
 
   /* Vector pair modes need even/odd VSX register pairs.  Only allow vector
-     registers.  We need to allow OImode to have the same registers as POImode,
+     registers.  We need to allow OImode to have the same registers as OOmode,
      even though we do not enable the move pattern for OImode.  */
-  if (mode == POImode || mode == OImode)
+  if (mode == OOmode || mode == OImode)
     return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
 
   /* MMA accumulator modes need FPR registers divisible by 4.  We need to allow
-     XImode to have the same registers as PXImode, even though we do not enable
+     XImode to have the same registers as XOmode, even though we do not enable
      the move pattern for XImode.  */
-  if (mode == PXImode || mode == XImode)
+  if (mode == XOmode || mode == XImode)
     return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
 
   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
@@ -1941,8 +1941,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
    57744).
 
-   Similarly, don't allow POImode (vector pair, restricted to even VSX
-   registers) or PXImode (vector quad, restricted to FPR registers divisible
+   Similarly, don't allow OOmode (vector pair, restricted to even VSX
+   registers) or XOmode (vector quad, restricted to FPR registers divisible
    by 4) to tie with other modes.
 
    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
@@ -1951,8 +1951,8 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 static bool
 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
-  if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
-      || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
+  if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
+      || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2243,8 +2243,8 @@ rs6000_debug_reg_global (void)
     V4DFmode,
     OImode,
     XImode,
-    POImode,
-    PXImode,
+    OOmode,
+    XOmode,
     CCmode,
     CCUNSmode,
     CCEQmode,
@@ -2706,13 +2706,13 @@ rs6000_setup_reg_addr_masks (void)
 	     since it will be broken into two vector moves.  Vector quads can
 	     only do offset loads.  */
 	  else if ((addr_mask != 0) && TARGET_MMA
-		   && (m2 == POImode || m2 == PXImode))
+		   && (m2 == OOmode || m2 == XOmode))
 	    {
 	      addr_mask |= RELOAD_REG_OFFSET;
 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
 		{
 		  addr_mask |= RELOAD_REG_QUAD_OFFSET;
-		  if (m2 == POImode)
+		  if (m2 == OOmode)
 		    addr_mask |= RELOAD_REG_INDEXED;
 		}
 	    }
@@ -2921,13 +2921,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
-      rs6000_vector_unit[POImode] = VECTOR_NONE;
-      rs6000_vector_mem[POImode] = VECTOR_VSX;
-      rs6000_vector_align[POImode] = 256;
+      rs6000_vector_unit[OOmode] = VECTOR_NONE;
+      rs6000_vector_mem[OOmode] = VECTOR_VSX;
+      rs6000_vector_align[OOmode] = 256;
 
-      rs6000_vector_unit[PXImode] = VECTOR_NONE;
-      rs6000_vector_mem[PXImode] = VECTOR_VSX;
-      rs6000_vector_align[PXImode] = 512;
+      rs6000_vector_unit[XOmode] = VECTOR_NONE;
+      rs6000_vector_mem[XOmode] = VECTOR_VSX;
+      rs6000_vector_align[XOmode] = 512;
     }
 
   /* Register class constraints for the constraints that depend on compile
@@ -3064,10 +3064,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
 	      if (TARGET_MMA)
 		{
-		  reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
-		  reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
-		  reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
-		  reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
+		  reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
+		  reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
+		  reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
+		  reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
 		}
 	    }
 	}
@@ -4126,7 +4126,10 @@ rs6000_option_override_internal (bool global_init_p)
 	 once again enable use of vector pair for memcpy/memmove on
 	 P10 if we have TARGET_MMA.  For now we make it disabled by
 	 default for all targets.  */
-      rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      if (TARGET_EFFICIENT_UNALIGNED_VSX)
+	rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
+      else
+	rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
     }
 
   /* Use long double size to select the appropriate long double.  We use
@@ -7954,8 +7957,8 @@ reg_offset_addressing_ok_p (machine_mode mode)
 
       /* The vector pair/quad types support offset addressing if the
 	 underlying vectors support offset addressing.  */
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       return TARGET_MMA;
 
     case E_SDmode:
@@ -10148,11 +10151,11 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 	operands[1] = force_const_mem (mode, operands[1]);
       break;
 
-    case E_POImode:
-    case E_PXImode:
+    case E_OOmode:
+    case E_XOmode:
       if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
 	error ("%qs is an opaque type, and you can't set it to other values.",
-	       (mode == POImode) ? "__vector_pair" : "__vector_quad");
+	       (mode == OOmode) ? "__vector_pair" : "__vector_quad");
       break;
 
     case E_SImode:
@@ -12421,10 +12424,10 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
      the GPR registers.  */
   if (rclass == GEN_OR_FLOAT_REGS)
     {
-      if (mode == POImode)
+      if (mode == OOmode)
 	return VSX_REGS;
 
-      if (mode == PXImode)
+      if (mode == XOmode)
 	return FLOAT_REGS;
 
       if (GET_MODE_CLASS (mode) == MODE_INT)
@@ -16148,15 +16151,15 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   /* If we have a vector quad register for MMA, and this is a load or store,
      see if we can use vector paired load/stores.  */
-  if (mode == PXImode && TARGET_MMA
+  if (mode == XOmode && TARGET_MMA
       && (MEM_P (dst) || MEM_P (src)))
     {
-      reg_mode = POImode;
+      reg_mode = OOmode;
       nregs /= 2;
     }
   /* If we have a vector pair/quad mode, split it into two/four separate
      vectors.  */
-  else if (mode == POImode || mode == PXImode)
+  else if (mode == OOmode || mode == XOmode)
     reg_mode = V1TImode;
   else if (FP_REGNO_P (reg))
     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
@@ -16206,7 +16209,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
      so if have to load or store the registers, we have to be careful to
      properly swap them if we're in little endian mode below.  This means
      the last register gets the first memory location.  */
-  if (mode == POImode || mode == PXImode)
+  if (mode == OOmode || mode == XOmode)
     {
       if (MEM_P (dst))
 	{
@@ -16216,15 +16219,14 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are reading an accumulator register, we have to
 	     deprime it before we can access it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	    emit_insn (gen_mma_xxmfacc (src, src));
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
 	    }
@@ -16239,9 +16241,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  for (int i = 0; i < nregs; i++)
 	    {
-	      unsigned subreg = (WORDS_BIG_ENDIAN)
-				  ? i * size : (nregs - 1 - i) * size;
-	      rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
+	      unsigned subreg = (WORDS_BIG_ENDIAN) ? i : (nregs - 1 - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
 	      emit_insn (gen_rtx_SET (dst2, src2));
@@ -16250,7 +16251,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  /* If we are writing an accumulator register, we have to
 	     prime it after we've written it.  */
 	  if (TARGET_MMA
-	      && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
 	  return;
@@ -16260,7 +16261,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	{
 	  gcc_assert (REG_P (dst)
 		      && FP_REGNO_P (REGNO (dst))
-		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
+		      && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  for (int i = 0; i < XVECLEN (src, 0); i++)
@@ -16269,9 +16270,12 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  emit_insn (gen_mma_xxmtacc (dst, dst));
+	  if ( GET_MODE (src) == XOmode )
+	    {
+	      /* We are writing an accumulator register, so we have to
+		 prime it after we've written it.  */
+	      emit_insn (gen_mma_xxmtacc (dst, dst));
+	    }
 
 	  return;
 	}
@@ -16284,22 +16288,35 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       /* Move register range backwards, if we might have destructive
 	 overlap.  */
       int i;
-      for (i = nregs - 1; i >= 0; i--)
-	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						     i * reg_mode_size),
-				simplify_gen_subreg (reg_mode, src, mode,
-						     i * reg_mode_size)));
+      /* XO/OO are opaque so cannot use subregs. */
+      if ( mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	}
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
     }
   else
@@ -16436,7 +16453,7 @@ rs6000_split_multireg_move (rtx dst, rtx src)
       /* If we are reading an accumulator register, we have to
 	 deprime it before we can access it.  */
       if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
 	emit_insn (gen_mma_xxmfacc (src, src));
 
       for (i = 0; i < nregs; i++)
@@ -16451,16 +16468,24 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (j == 0 && used_update)
 	    continue;
 
-	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-						       j * reg_mode_size),
-				  simplify_gen_subreg (reg_mode, src, mode,
-						       j * reg_mode_size)));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if ( mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else 
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
 	}
 
       /* If we are writing an accumulator register, we have to
 	 prime it after we've written it.  */
       if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
 	emit_insn (gen_mma_xxmtacc (dst, dst));
 
       if (restore_basereg != NULL_RTX)
@@ -27010,14 +27035,14 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
 
   if (frommode != tomode)
     {
-      /* Do not allow conversions to/from PXImode and POImode types.  */
-      if (frommode == PXImode)
+      /* Do not allow conversions to/from XOmode and OOmode types.  */
+      if (frommode == XOmode)
 	return N_("invalid conversion from type %<__vector_quad%>");
-      if (tomode == PXImode)
+      if (tomode == XOmode)
 	return N_("invalid conversion to type %<__vector_quad%>");
-      if (frommode == POImode)
+      if (frommode == OOmode)
 	return N_("invalid conversion from type %<__vector_pair%>");
-      if (tomode == POImode)
+      if (tomode == OOmode)
 	return N_("invalid conversion to type %<__vector_pair%>");
     }
   else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
@@ -27026,19 +27051,19 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
       frommode = TYPE_MODE (TREE_TYPE (fromtype));
       tomode = TYPE_MODE (TREE_TYPE (totype));
 
-      /* Do not allow conversions to/from PXImode and POImode pointer
+      /* Do not allow conversions to/from XOmode and OOmode pointer
 	 types, except to/from void pointers.  */
       if (frommode != tomode
 	  && frommode != VOIDmode
 	  && tomode != VOIDmode)
 	{
-	  if (frommode == PXImode)
+	  if (frommode == XOmode)
 	    return N_("invalid conversion from type %<* __vector_quad%>");
-	  if (tomode == PXImode)
+	  if (tomode == XOmode)
 	    return N_("invalid conversion to type %<* __vector_quad%>");
-	  if (frommode == POImode)
+	  if (frommode == OOmode)
 	    return N_("invalid conversion from type %<* __vector_pair%>");
-	  if (tomode == POImode)
+	  if (tomode == OOmode)
 	    return N_("invalid conversion to type %<* __vector_pair%>");
 	}
     }
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..a2e6a7cc2fa 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1039,7 +1039,7 @@ enum data_align { align_abi, align_opt, align_both };
 /* Modes that are not vectors, but require vector alignment.  Treat these like
    vectors in terms of loads and stores.  */
 #define VECTOR_ALIGNMENT_P(MODE)					\
-  (FLOAT128_VECTOR_P (MODE) || (MODE) == POImode || (MODE) == PXImode)
+  (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode)
 
 #define ALTIVEC_VECTOR_MODE(MODE)					\
   ((MODE) == V16QImode							\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..7bc93ead972 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -778,7 +778,7 @@
 ;; supplement addressing modes.
 (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
 			      SF SD SI DF DD DI TI PTI KF IF TF
-			      POI PXI])
+			      OO XO])
 
 ;; Iterate over smin, smax
 (define_code_iterator fp_minmax	[smin smax])


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-11-17 16:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-17 16:13 [gcc(refs/users/acsawdey/heads/mode-opaque)] Make MMA use opaque modes Aaron Sawdey
  -- strict thread matches above, loose matches on Subject: below --
2020-11-17 16:25 Aaron Sawdey
2020-11-17  0:07 Aaron Sawdey
2020-11-16 18:35 Aaron Sawdey
2020-11-13 23:37 Aaron Sawdey
2020-11-13 20:59 Aaron Sawdey
2020-11-13 18:15 Aaron Sawdey
2020-11-10 16:47 Aaron Sawdey
2020-11-06 21:26 Aaron Sawdey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).