public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-8995] rs6000: Generate an lxvp instead of two adjacent lxv instructions
@ 2021-09-15 17:18 Peter Bergner
  0 siblings, 0 replies; only message in thread
From: Peter Bergner @ 2021-09-15 17:18 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ed9006264f657bf0ec901af9e511c32b9b67926d

commit r11-8995-ged9006264f657bf0ec901af9e511c32b9b67926d
Author: Peter Bergner <bergner@linux.ibm.com>
Date:   Wed Jul 14 18:27:02 2021 -0500

    rs6000: Generate an lxvp instead of two adjacent lxv instructions
    
    The MMA build built-ins currently use individual lxv instructions to
    load up the registers of a __vector_pair or __vector_quad.  If the
    memory addresses of the built-in operands are to adjacent locations,
    then we can use an lxvp in some cases to load up two registers at once.
    The patch below adds support for checking whether memory addresses are
    adjacent and emitting an lxvp instead of two lxv instructions.
    
    2021-07-14  Peter Bergner  <bergner@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000.c (adjacent_mem_locations): Return the lower
            addressed memory rtx, if any.
            (rs6000_split_multireg_move): Fix code formatting.
            Handle MMA build built-ins with operands in adjacent memory locations.
    
    gcc/testsuite/
            * gcc.target/powerpc/mma-builtin-9.c: New test.
    
    (cherry picked from commit 69feb7601e86274fa9abbfb420b00c8adf947e7b)

Diff:
---
 gcc/config/rs6000/rs6000.c                       | 82 ++++++++++++++++++------
 gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c | 28 ++++++++
 2 files changed, 92 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8ad2ec4ec61..ca0fb85c44c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -18077,23 +18077,29 @@ get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
   return true;
 }
 
-/* The function returns true if the target storage location of
-   mem1 is adjacent to the target storage location of mem2 */
-/* Return 1 if memory locations are adjacent.  */
+/* If the target storage locations of arguments MEM1 and MEM2 are
+   adjacent, then return the argument that has the lower address.
+   Otherwise, return NULL_RTX.  */
 
-static bool
+static rtx
 adjacent_mem_locations (rtx mem1, rtx mem2)
 {
   rtx reg1, reg2;
   HOST_WIDE_INT off1, size1, off2, size2;
 
-  if (get_memref_parts (mem1, &reg1, &off1, &size1)
-      && get_memref_parts (mem2, &reg2, &off2, &size2))
-    return ((REGNO (reg1) == REGNO (reg2))
-	    && ((off1 + size1 == off2)
-		|| (off2 + size2 == off1)));
+  if (MEM_P (mem1)
+      && MEM_P (mem2)
+      && get_memref_parts (mem1, &reg1, &off1, &size1)
+      && get_memref_parts (mem2, &reg2, &off2, &size2)
+      && REGNO (reg1) == REGNO (reg2))
+    {
+      if (off1 + size1 == off2)
+	return mem1;
+      else if (off2 + size2 == off1)
+	return mem2;
+    }
 
-  return false;
+  return NULL_RTX;
 }
 
 /* This function returns true if it can be determined that the two MEM
@@ -26655,8 +26661,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg =
-		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      unsigned subreg
+		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = adjust_address (dst, reg_mode, offset);
 	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      offset += size;
@@ -26673,8 +26679,8 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
 	  for (int i = 0; i < nregs; i += reg_mode_nregs)
 	    {
-	      unsigned subreg =
-		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      unsigned subreg
+		= WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
 	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
 	      rtx src2 = adjust_address (src, reg_mode, offset);
 	      offset += size;
@@ -26699,13 +26705,53 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 	  if (GET_MODE (src) == OOmode)
 	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
 
-	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
 	  int nvecs = XVECLEN (src, 0);
 	  for (int i = 0; i < nvecs; i++)
 	    {
-	      int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
-	      rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
-	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
+	      rtx op;
+	      int regno = reg + i;
+
+	      if (WORDS_BIG_ENDIAN)
+		{
+		  op = XVECEXP (src, 0, i);
+
+		  /* If we are loading an even VSX register and the memory location
+		     is adjacent to the next register's memory location (if any),
+		     then we can load them both with one LXVP instruction.  */
+		  if ((regno & 1) == 0)
+		    {
+		      rtx op2 = XVECEXP (src, 0, i + 1);
+		      if (adjacent_mem_locations (op, op2) == op)
+			{
+			  op = adjust_address (op, OOmode, 0);
+			  /* Skip the next register, since we're going to
+			     load it together with this register.  */
+			  i++;
+			}
+		    }
+		}
+	      else
+		{
+		  op = XVECEXP (src, 0, nvecs - i - 1);
+
+		  /* If we are loading an even VSX register and the memory location
+		     is adjacent to the next register's memory location (if any),
+		     then we can load them both with one LXVP instruction.  */
+		  if ((regno & 1) == 0)
+		    {
+			  rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
+			  if (adjacent_mem_locations (op2, op) == op2)
+			    {
+			      op = adjust_address (op2, OOmode, 0);
+			      /* Skip the next register, since we're going to
+				 load it together with this register.  */
+			      i++;
+			    }
+		    }
+		}
+
+	      rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
+	      emit_insn (gen_rtx_SET (dst_i, op));
 	    }
 
 	  /* We are writing an accumulator register, so we have to
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c
new file mode 100644
index 00000000000..397d0f1db35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+typedef unsigned char  vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_pair *dst, vec_t *src)
+{
+  __vector_pair pair;
+  /* Adjacent loads should be combined into one lxvp instruction.  */
+  __builtin_vsx_build_pair (&pair, src[0], src[1]);
+  *dst = pair;
+}
+
+void
+bar (__vector_quad *dst, vec_t *src)
+{
+  __vector_quad quad;
+  /* Adjacent loads should be combined into two lxvp instructions.  */
+  __builtin_mma_build_acc (&quad, src[0], src[1], src[2], src[3]);
+  *dst = quad;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv\M} } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-15 17:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-15 17:18 [gcc r11-8995] rs6000: Generate an lxvp instead of two adjacent lxv instructions Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).