public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-8609] rs6000: Add new __builtin_vsx_build_pair and __builtin_mma_build_acc built-ins
@ 2021-06-17 22:04 Peter Bergner
  0 siblings, 0 replies; only message in thread
From: Peter Bergner @ 2021-06-17 22:04 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:de6f0c41e9da2195eebe6c1f7c459615c8f1ad58

commit r11-8609-gde6f0c41e9da2195eebe6c1f7c459615c8f1ad58
Author: Peter Bergner <bergner@linux.ibm.com>
Date:   Thu Jun 10 13:54:12 2021 -0500

    rs6000: Add new __builtin_vsx_build_pair and __builtin_mma_build_acc built-ins
    
    The __builtin_vsx_assemble_pair and __builtin_mma_assemble_acc built-ins
    currently assign their first source operand to the first VSX register
    in a pair/quad, their second operand to the second register in a pair/quad, etc.
    This is not endian friendly and forces the user to generate different calls
    depending on endianness.  In agreement with the POWER LLVM team, we've
    decided to lightly deprecate the assemble built-ins and replace them with
    "build" built-ins that automatically handle endianness so the same built-in
    call and be used for both little-endian and big-endian compiles.  We are not
    removing the assemble built-ins, since there is code in the wild that use
    them, but we are removing their documentation to encourage the use of the
    new "build" variants.
    
    gcc/
            * config/rs6000/rs6000-builtin.def (build_pair): New built-in.
            (build_acc): Likewise.
            * config/rs6000/rs6000-call.c (mma_expand_builtin): Swap assemble
            source operands in little-endian mode.
            (rs6000_gimple_fold_mma_builtin): Handle VSX_BUILTIN_BUILD_PAIR.
            (mma_init_builtins): Likewise.
            * config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle endianness
            ordering for the MMA assemble and build source operands.
            * doc/extend.texi (__builtin_vsx_build_acc, __builtin_mma_build_pair):
            Document.
            (__builtin_mma_assemble_acc, __builtin_mma_assemble_pair): Remove
            documentation.
    
    gcc/testsuite/
            * gcc.target/powerpc/mma-builtin-4.c (__builtin_vsx_build_pair): Add
            tests.  Update expected counts.
            * gcc.target/powerpc/mma-builtin-5.c (__builtin_mma_build_acc): Add
            tests.  Update expected counts.
    
    (cherry picked from commit 00d07ec6e12451acc7a290cd93be03bed50cb666)

Diff:
---
 gcc/config/rs6000/rs6000-builtin.def             |  2 ++
 gcc/config/rs6000/rs6000-call.c                  | 19 ++++++++++++++++---
 gcc/config/rs6000/rs6000.c                       |  6 ++++--
 gcc/doc/extend.texi                              |  4 ++--
 gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c | 16 ++++++++++++++--
 gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c | 24 ++++++++++++++++++++----
 6 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 609bebdfd74..4043e14ed3f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -3207,6 +3207,7 @@ BU_MMA_2 (DISASSEMBLE_ACC, "disassemble_acc",	QUAD, mma_disassemble_acc)
 BU_MMA_V2 (DISASSEMBLE_PAIR, "disassemble_pair", PAIR, vsx_disassemble_pair)
 BU_COMPAT (VSX_BUILTIN_DISASSEMBLE_PAIR, "mma_disassemble_pair")
 
+BU_MMA_V3 (BUILD_PAIR,	    "build_pair",	MISC, vsx_assemble_pair)
 BU_MMA_V3 (ASSEMBLE_PAIR,   "assemble_pair",	MISC, vsx_assemble_pair)
 BU_COMPAT (VSX_BUILTIN_ASSEMBLE_PAIR, "mma_assemble_pair")
 BU_MMA_3 (XVBF16GER2,	    "xvbf16ger2",	MISC, mma_xvbf16ger2)
@@ -3239,6 +3240,7 @@ BU_MMA_3 (XVI8GER4SPP,	    "xvi8ger4spp",      QUAD, mma_xvi8ger4spp)
 BU_MMA_3 (XVI16GER2PP,	    "xvi16ger2pp",      QUAD, mma_xvi16ger2pp)
 BU_MMA_3 (XVI16GER2SPP,	    "xvi16ger2spp",     QUAD, mma_xvi16ger2spp)
 
+BU_MMA_5 (BUILD_ACC,	    "build_acc",	MISC, mma_assemble_acc)
 BU_MMA_5 (ASSEMBLE_ACC,     "assemble_acc",	MISC, mma_assemble_acc)
 BU_MMA_5 (PMXVF32GER,	    "pmxvf32ger",       MISC, mma_pmxvf32ger)
 BU_MMA_5 (PMXVF64GER,	    "pmxvf64ger",       PAIR, mma_pmxvf64ger)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index f5676255387..bbd935f0a1b 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -10143,12 +10143,23 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
       pat = GEN_FCN (icode) (op[0], op[1]);
       break;
     case 3:
+      /* The ASSEMBLE builtin source operands are reversed in little-endian
+	 mode, so reorder them.  */
+      if (fcode == VSX_BUILTIN_ASSEMBLE_PAIR_INTERNAL && !WORDS_BIG_ENDIAN)
+	std::swap (op[1], op[2]);
       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
       break;
     case 4:
       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
       break;
     case 5:
+      /* The ASSEMBLE builtin source operands are reversed in little-endian
+	 mode, so reorder them.  */
+      if (fcode == MMA_BUILTIN_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN)
+	{
+	  std::swap (op[1], op[4]);
+	  std::swap (op[2], op[3]);
+	}
       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
       break;
     case 6:
@@ -11860,7 +11871,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
       gcc_unreachable ();
     }
 
-  if (fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
+  if (fncode == VSX_BUILTIN_BUILD_PAIR || fncode == VSX_BUILTIN_ASSEMBLE_PAIR)
     lhs = make_ssa_name (vector_pair_type_node);
   else
     lhs = make_ssa_name (vector_quad_type_node);
@@ -14175,8 +14186,10 @@ mma_init_builtins (void)
 	      machine_mode mode = insn_data[icode].operand[j].mode;
 	      if (gimple_func && mode == XOmode)
 		op[nopnds++] = build_pointer_type (vector_quad_type_node);
-	      else if (gimple_func && mode == OOmode
-		       && d->code == VSX_BUILTIN_ASSEMBLE_PAIR)
+	      else if (gimple_func
+		       && mode == OOmode
+		       && (d->code == VSX_BUILTIN_BUILD_PAIR
+			   || d->code == VSX_BUILTIN_ASSEMBLE_PAIR))
 		op[nopnds++] = build_pointer_type (vector_pair_type_node);
 	      else
 		/* MMA uses unsigned types.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index dd40215a73c..1103245a13b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16834,9 +16834,11 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
 
 	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
-	  for (int i = 0; i < XVECLEN (src, 0); i++)
+	  int nvecs = XVECLEN (src, 0);
+	  for (int i = 0; i < nvecs; i++)
 	    {
-	      rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
+	      int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
+	      rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
 	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
 	    }
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3260f0639d2..62cc63c7e61 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20493,10 +20493,10 @@ void __builtin_mma_xxmtacc (__vector_quad *);
 void __builtin_mma_xxmfacc (__vector_quad *);
 void __builtin_mma_xxsetaccz (__vector_quad *);
 
-void __builtin_mma_assemble_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
+void __builtin_mma_build_acc (__vector_quad *, vec_t, vec_t, vec_t, vec_t);
 void __builtin_mma_disassemble_acc (void *, __vector_quad *);
 
-void __builtin_vsx_assemble_pair (__vector_pair *, vec_t, vec_t);
+void __builtin_vsx_build_pair (__vector_pair *, vec_t, vec_t);
 void __builtin_vsx_disassemble_pair (void *, __vector_pair *);
 
 vec_t __builtin_vsx_xvcvspbf16 (vec_t);
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c
index 3bedf531de0..a9fb0107d12 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-4.c
@@ -20,6 +20,14 @@ foo2 (__vector_pair *dst, vec_t *src)
   *dst = pair;
 }
 
+void
+foo3 (__vector_pair *dst, vec_t *src)
+{
+  __vector_pair pair;
+  __builtin_vsx_build_pair (&pair, src[4], src[0]);
+  *dst = pair;
+}
+
 void
 bar (vec_t *dst, __vector_pair *src)
 {
@@ -54,8 +62,12 @@ bar2 (vec_t *dst, __vector_pair *src)
 #  error "__has_builtin (__builtin_mma_disassemble_pair) failed"
 #endif
 
-/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+#if !__has_builtin (__builtin_vsx_build_pair)
+#  error "__has_builtin (__builtin_vsx_build_pair) failed"
+#endif
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 6 } } */
 /* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c
index 43b6d3ac91e..00503b7343d 100644
--- a/gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-5.c
@@ -12,6 +12,14 @@ foo (__vector_quad *dst, vec_t *src)
   *dst = acc;
 }
 
+void
+foo2 (__vector_quad *dst, vec_t *src)
+{
+  __vector_quad acc;
+  __builtin_mma_build_acc (&acc, src[12], src[8], src[4], src[0]);
+  *dst = acc;
+}
+
 void
 bar (vec_t *dst, __vector_quad *src)
 {
@@ -23,9 +31,17 @@ bar (vec_t *dst, __vector_quad *src)
   dst[12] = res[3];
 }
 
-/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+#if !__has_builtin (__builtin_mma_assemble_acc)
+#  error "__has_builtin (__builtin_mma_assemble_acc) failed"
+#endif
+
+#if !__has_builtin (__builtin_mma_build_acc)
+#  error "__has_builtin (__builtin_mma_build_acc) failed"
+#endif
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 8 } } */
 /* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
 /* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 3 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-17 22:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-17 22:04 [gcc r11-8609] rs6000: Add new __builtin_vsx_build_pair and __builtin_mma_build_acc built-ins Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).