public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-8994] rs6000: Move rs6000_split_multireg_move to later in file
@ 2021-09-15 17:18 Peter Bergner
  0 siblings, 0 replies; only message in thread
From: Peter Bergner @ 2021-09-15 17:18 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:819e7784a0695b6fc9c2563540e593b3a9360de9

commit r11-8994-g819e7784a0695b6fc9c2563540e593b3a9360de9
Author: Peter Bergner <bergner@linux.ibm.com>
Date:   Wed Jul 14 18:23:31 2021 -0500

    rs6000: Move rs6000_split_multireg_move to later in file
    
    An upcoming change to rs6000_split_multireg_move requires it to be
    moved later in the file to fix a declaration issue.
    
    2021-07-14  Peter Bergner  <bergner@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000.c (rs6000_split_multireg_move): Move to later
            in the file.
    
    (cherry picked from commit 7d914777fc6c6151f430d798fc97bae927a430f7)

Diff:
---
 gcc/config/rs6000/rs6000.c | 1845 ++++++++++++++++++++++----------------------
 1 file changed, 922 insertions(+), 923 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 528a6f552bf..8ad2ec4ec61 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16716,533 +16716,157 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
     emit_move_insn (orig_after, after);
 }
 
-/* Emit instructions to move SRC to DST.  Called by splitters for
-   multi-register moves.  It will emit at most one instruction for
-   each register that is accessed; that is, it won't emit li/lis pairs
-   (or equivalent for 64-bit code).  One of SRC or DST must be a hard
-   register.  */
+static GTY(()) alias_set_type TOC_alias_set = -1;
 
-void
-rs6000_split_multireg_move (rtx dst, rtx src)
+alias_set_type
+get_TOC_alias_set (void)
 {
-  /* The register number of the first register being moved.  */
-  int reg;
-  /* The mode that is to be moved.  */
-  machine_mode mode;
-  /* The mode that the move is being done in, and its size.  */
-  machine_mode reg_mode;
-  int reg_mode_size;
-  /* The number of registers that will be moved.  */
-  int nregs;
+  if (TOC_alias_set == -1)
+    TOC_alias_set = new_alias_set ();
+  return TOC_alias_set;
+}
 
-  reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
-  mode = GET_MODE (dst);
-  nregs = hard_regno_nregs (reg, mode);
+/* The mode the ABI uses for a word.  This is not the same as word_mode
+   for -m32 -mpowerpc64.  This is used to implement various target hooks.  */
 
-  /* If we have a vector quad register for MMA, and this is a load or store,
-     see if we can use vector paired load/stores.  */
-  if (mode == XOmode && TARGET_MMA
-      && (MEM_P (dst) || MEM_P (src)))
-    {
-      reg_mode = OOmode;
-      nregs /= 2;
-    }
-  /* If we have a vector pair/quad mode, split it into two/four separate
-     vectors.  */
-  else if (mode == OOmode || mode == XOmode)
-    reg_mode = V1TImode;
-  else if (FP_REGNO_P (reg))
-    reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
-	(TARGET_HARD_FLOAT ? DFmode : SFmode);
-  else if (ALTIVEC_REGNO_P (reg))
-    reg_mode = V16QImode;
+static scalar_int_mode
+rs6000_abi_word_mode (void)
+{
+  return TARGET_32BIT ? SImode : DImode;
+}
+
+/* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
+static char *
+rs6000_offload_options (void)
+{
+  if (TARGET_64BIT)
+    return xstrdup ("-foffload-abi=lp64");
   else
-    reg_mode = word_mode;
-  reg_mode_size = GET_MODE_SIZE (reg_mode);
+    return xstrdup ("-foffload-abi=ilp32");
+}
 
-  gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
+\f
+/* A quick summary of the various types of 'constant-pool tables'
+   under PowerPC:
 
-  /* TDmode residing in FP registers is special, since the ISA requires that
-     the lower-numbered word of a register pair is always the most significant
-     word, even in little-endian mode.  This does not match the usual subreg
-     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
-     the appropriate constituent registers "by hand" in little-endian mode.
+   Target	Flags		Name		One table per
+   AIX		(none)		AIX TOC		object file
+   AIX		-mfull-toc	AIX TOC		object file
+   AIX		-mminimal-toc	AIX minimal TOC	translation unit
+   SVR4/EABI	(none)		SVR4 SDATA	object file
+   SVR4/EABI	-fpic		SVR4 pic	object file
+   SVR4/EABI	-fPIC		SVR4 PIC	translation unit
+   SVR4/EABI	-mrelocatable	EABI TOC	function
+   SVR4/EABI	-maix		AIX TOC		object file
+   SVR4/EABI	-maix -mminimal-toc
+				AIX minimal TOC	translation unit
 
-     Note we do not need to check for destructive overlap here since TDmode
-     can only reside in even/odd register pairs.  */
-  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
-    {
-      rtx p_src, p_dst;
-      int i;
+   Name			Reg.	Set by	entries	      contains:
+					made by	 addrs?	fp?	sum?
 
-      for (i = 0; i < nregs; i++)
-	{
-	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
-	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
-	  else
-	    p_src = simplify_gen_subreg (reg_mode, src, mode,
-					 i * reg_mode_size);
+   AIX TOC		2	crt0	as	 Y	option	option
+   AIX minimal TOC	30	prolog	gcc	 Y	Y	option
+   SVR4 SDATA		13	crt0	gcc	 N	Y	N
+   SVR4 pic		30	prolog	ld	 Y	not yet	N
+   SVR4 PIC		30	prolog	gcc	 Y	option	option
+   EABI TOC		30	prolog	gcc	 Y	option	option
 
-	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
-	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
-	  else
-	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
-					 i * reg_mode_size);
+*/
 
-	  emit_insn (gen_rtx_SET (p_dst, p_src));
-	}
+/* Hash functions for the hash table.  */
 
-      return;
-    }
+static unsigned
+rs6000_hash_constant (rtx k)
+{
+  enum rtx_code code = GET_CODE (k);
+  machine_mode mode = GET_MODE (k);
+  unsigned result = (code << 3) ^ mode;
+  const char *format;
+  int flen, fidx;
 
-  /* The __vector_pair and __vector_quad modes are multi-register
-     modes, so if we have to load or store the registers, we have to be
-     careful to properly swap them if we're in little endian mode
-     below.  This means the last register gets the first memory
-     location.  We also need to be careful of using the right register
-     numbers if we are splitting XO to OO.  */
-  if (mode == OOmode || mode == XOmode)
+  format = GET_RTX_FORMAT (code);
+  flen = strlen (format);
+  fidx = 0;
+
+  switch (code)
     {
-      nregs = hard_regno_nregs (reg, mode);
-      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
-      if (MEM_P (dst))
-	{
-	  unsigned offset = 0;
-	  unsigned size = GET_MODE_SIZE (reg_mode);
+    case LABEL_REF:
+      return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
 
-	  /* If we are reading an accumulator register, we have to
-	     deprime it before we can access it.  */
-	  if (TARGET_MMA
-	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
-	    emit_insn (gen_mma_xxmfacc (src, src));
+    case CONST_WIDE_INT:
+      {
+	int i;
+	flen = CONST_WIDE_INT_NUNITS (k);
+	for (i = 0; i < flen; i++)
+	  result = result * 613 + CONST_WIDE_INT_ELT (k, i);
+	return result;
+      }
 
-	  for (int i = 0; i < nregs; i += reg_mode_nregs)
-	    {
-	      unsigned subreg =
-		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
-	      rtx dst2 = adjust_address (dst, reg_mode, offset);
-	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
-	      offset += size;
-	      emit_insn (gen_rtx_SET (dst2, src2));
-	    }
+    case CONST_DOUBLE:
+      return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
 
-	  return;
-	}
+    case CODE_LABEL:
+      fidx = 3;
+      break;
 
-      if (MEM_P (src))
+    default:
+      break;
+    }
+
+  for (; fidx < flen; fidx++)
+    switch (format[fidx])
+      {
+      case 's':
 	{
-	  unsigned offset = 0;
-	  unsigned size = GET_MODE_SIZE (reg_mode);
+	  unsigned i, len;
+	  const char *str = XSTR (k, fidx);
+	  len = strlen (str);
+	  result = result * 613 + len;
+	  for (i = 0; i < len; i++)
+	    result = result * 613 + (unsigned) str[i];
+	  break;
+	}
+      case 'u':
+      case 'e':
+	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
+	break;
+      case 'i':
+      case 'n':
+	result = result * 613 + (unsigned) XINT (k, fidx);
+	break;
+      case 'w':
+	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
+	  result = result * 613 + (unsigned) XWINT (k, fidx);
+	else
+	  {
+	    size_t i;
+	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
+	      result = result * 613 + (unsigned) (XWINT (k, fidx)
+						  >> CHAR_BIT * i);
+	  }
+	break;
+      case '0':
+	break;
+      default:
+	gcc_unreachable ();
+      }
 
-	  for (int i = 0; i < nregs; i += reg_mode_nregs)
-	    {
-	      unsigned subreg =
-		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
-	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
-	      rtx src2 = adjust_address (src, reg_mode, offset);
-	      offset += size;
-	      emit_insn (gen_rtx_SET (dst2, src2));
-	    }
+  return result;
+}
 
-	  /* If we are writing an accumulator register, we have to
-	     prime it after we've written it.  */
-	  if (TARGET_MMA
-	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
-	    emit_insn (gen_mma_xxmtacc (dst, dst));
+hashval_t
+toc_hasher::hash (toc_hash_struct *thc)
+{
+  return rs6000_hash_constant (thc->key) ^ thc->key_mode;
+}
 
-	  return;
-	}
+/* Compare H1 and H2 for equivalence.  */
 
-      if (GET_CODE (src) == UNSPEC)
-	{
-	  gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
-	  gcc_assert (REG_P (dst));
-	  if (GET_MODE (src) == XOmode)
-	    gcc_assert (FP_REGNO_P (REGNO (dst)));
-	  if (GET_MODE (src) == OOmode)
-	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
-
-	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
-	  int nvecs = XVECLEN (src, 0);
-	  for (int i = 0; i < nvecs; i++)
-	    {
-	      int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
-	      rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
-	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
-	    }
-
-	  /* We are writing an accumulator register, so we have to
-	     prime it after we've written it.  */
-	  if (GET_MODE (src) == XOmode)
-	    emit_insn (gen_mma_xxmtacc (dst, dst));
-
-	  return;
-	}
-
-      /* Register -> register moves can use common code.  */
-    }
-
-  if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
-    {
-      /* If we are reading an accumulator register, we have to
-	 deprime it before we can access it.  */
-      if (TARGET_MMA
-	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
-	emit_insn (gen_mma_xxmfacc (src, src));
-
-      /* Move register range backwards, if we might have destructive
-	 overlap.  */
-      int i;
-      /* XO/OO are opaque so cannot use subregs. */
-      if (mode == OOmode || mode == XOmode )
-	{
-	  for (i = nregs - 1; i >= 0; i--)
-	    {
-	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
-	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
-	      emit_insn (gen_rtx_SET (dst_i, src_i));
-	    }
-	}
-      else
-	{
-	  for (i = nregs - 1; i >= 0; i--)
-	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-							 i * reg_mode_size),
-				    simplify_gen_subreg (reg_mode, src, mode,
-							 i * reg_mode_size)));
-	}
-
-      /* If we are writing an accumulator register, we have to
-	 prime it after we've written it.  */
-      if (TARGET_MMA
-	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
-	emit_insn (gen_mma_xxmtacc (dst, dst));
-    }
-  else
-    {
-      int i;
-      int j = -1;
-      bool used_update = false;
-      rtx restore_basereg = NULL_RTX;
-
-      if (MEM_P (src) && INT_REGNO_P (reg))
-	{
-	  rtx breg;
-
-	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
-	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
-	    {
-	      rtx delta_rtx;
-	      breg = XEXP (XEXP (src, 0), 0);
-	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
-			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
-			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
-	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
-	      src = replace_equiv_address (src, breg);
-	    }
-	  else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
-	    {
-	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
-		{
-		  rtx basereg = XEXP (XEXP (src, 0), 0);
-		  if (TARGET_UPDATE)
-		    {
-		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
-		      emit_insn (gen_rtx_SET (ndst,
-					      gen_rtx_MEM (reg_mode,
-							   XEXP (src, 0))));
-		      used_update = true;
-		    }
-		  else
-		    emit_insn (gen_rtx_SET (basereg,
-					    XEXP (XEXP (src, 0), 1)));
-		  src = replace_equiv_address (src, basereg);
-		}
-	      else
-		{
-		  rtx basereg = gen_rtx_REG (Pmode, reg);
-		  emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
-		  src = replace_equiv_address (src, basereg);
-		}
-	    }
-
-	  breg = XEXP (src, 0);
-	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
-	    breg = XEXP (breg, 0);
-
-	  /* If the base register we are using to address memory is
-	     also a destination reg, then change that register last.  */
-	  if (REG_P (breg)
-	      && REGNO (breg) >= REGNO (dst)
-	      && REGNO (breg) < REGNO (dst) + nregs)
-	    j = REGNO (breg) - REGNO (dst);
-	}
-      else if (MEM_P (dst) && INT_REGNO_P (reg))
-	{
-	  rtx breg;
-
-	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
-	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
-	    {
-	      rtx delta_rtx;
-	      breg = XEXP (XEXP (dst, 0), 0);
-	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
-			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
-			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
-
-	      /* We have to update the breg before doing the store.
-		 Use store with update, if available.  */
-
-	      if (TARGET_UPDATE)
-		{
-		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
-		  emit_insn (TARGET_32BIT
-			     ? (TARGET_POWERPC64
-				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
-				: gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
-			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
-		  used_update = true;
-		}
-	      else
-		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
-	      dst = replace_equiv_address (dst, breg);
-	    }
-	  else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
-		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
-	    {
-	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
-		{
-		  rtx basereg = XEXP (XEXP (dst, 0), 0);
-		  if (TARGET_UPDATE)
-		    {
-		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
-		      emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
-							   XEXP (dst, 0)),
-					      nsrc));
-		      used_update = true;
-		    }
-		  else
-		    emit_insn (gen_rtx_SET (basereg,
-					    XEXP (XEXP (dst, 0), 1)));
-		  dst = replace_equiv_address (dst, basereg);
-		}
-	      else
-		{
-		  rtx basereg = XEXP (XEXP (dst, 0), 0);
-		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
-		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
-			      && REG_P (basereg)
-			      && REG_P (offsetreg)
-			      && REGNO (basereg) != REGNO (offsetreg));
-		  if (REGNO (basereg) == 0)
-		    {
-		      rtx tmp = offsetreg;
-		      offsetreg = basereg;
-		      basereg = tmp;
-		    }
-		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
-		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
-		  dst = replace_equiv_address (dst, basereg);
-		}
-	    }
-	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
-	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
-	}
-
-      /* If we are reading an accumulator register, we have to
-	 deprime it before we can access it.  */
-      if (TARGET_MMA && REG_P (src)
-	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
-	emit_insn (gen_mma_xxmfacc (src, src));
-
-      for (i = 0; i < nregs; i++)
-	{
-	  /* Calculate index to next subword.  */
-	  ++j;
-	  if (j == nregs)
-	    j = 0;
-
-	  /* If compiler already emitted move of first word by
-	     store with update, no need to do anything.  */
-	  if (j == 0 && used_update)
-	    continue;
-
-	  /* XO/OO are opaque so cannot use subregs. */
-	  if (mode == OOmode || mode == XOmode )
-	    {
-	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
-	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
-	      emit_insn (gen_rtx_SET (dst_i, src_i));
-	    }
-	  else
-	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
-							 j * reg_mode_size),
-				    simplify_gen_subreg (reg_mode, src, mode,
-							 j * reg_mode_size)));
-	}
-
-      /* If we are writing an accumulator register, we have to
-	 prime it after we've written it.  */
-      if (TARGET_MMA && REG_P (dst)
-	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
-	emit_insn (gen_mma_xxmtacc (dst, dst));
-
-      if (restore_basereg != NULL_RTX)
-	emit_insn (restore_basereg);
-    }
-}
-
-static GTY(()) alias_set_type TOC_alias_set = -1;
-
-alias_set_type
-get_TOC_alias_set (void)
-{
-  if (TOC_alias_set == -1)
-    TOC_alias_set = new_alias_set ();
-  return TOC_alias_set;
-}
-
-/* The mode the ABI uses for a word.  This is not the same as word_mode
-   for -m32 -mpowerpc64.  This is used to implement various target hooks.  */
-
-static scalar_int_mode
-rs6000_abi_word_mode (void)
-{
-  return TARGET_32BIT ? SImode : DImode;
-}
-
-/* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
-static char *
-rs6000_offload_options (void)
-{
-  if (TARGET_64BIT)
-    return xstrdup ("-foffload-abi=lp64");
-  else
-    return xstrdup ("-foffload-abi=ilp32");
-}
-
-\f
-/* A quick summary of the various types of 'constant-pool tables'
-   under PowerPC:
-
-   Target	Flags		Name		One table per
-   AIX		(none)		AIX TOC		object file
-   AIX		-mfull-toc	AIX TOC		object file
-   AIX		-mminimal-toc	AIX minimal TOC	translation unit
-   SVR4/EABI	(none)		SVR4 SDATA	object file
-   SVR4/EABI	-fpic		SVR4 pic	object file
-   SVR4/EABI	-fPIC		SVR4 PIC	translation unit
-   SVR4/EABI	-mrelocatable	EABI TOC	function
-   SVR4/EABI	-maix		AIX TOC		object file
-   SVR4/EABI	-maix -mminimal-toc
-				AIX minimal TOC	translation unit
-
-   Name			Reg.	Set by	entries	      contains:
-					made by	 addrs?	fp?	sum?
-
-   AIX TOC		2	crt0	as	 Y	option	option
-   AIX minimal TOC	30	prolog	gcc	 Y	Y	option
-   SVR4 SDATA		13	crt0	gcc	 N	Y	N
-   SVR4 pic		30	prolog	ld	 Y	not yet	N
-   SVR4 PIC		30	prolog	gcc	 Y	option	option
-   EABI TOC		30	prolog	gcc	 Y	option	option
-
-*/
-
-/* Hash functions for the hash table.  */
-
-static unsigned
-rs6000_hash_constant (rtx k)
-{
-  enum rtx_code code = GET_CODE (k);
-  machine_mode mode = GET_MODE (k);
-  unsigned result = (code << 3) ^ mode;
-  const char *format;
-  int flen, fidx;
-
-  format = GET_RTX_FORMAT (code);
-  flen = strlen (format);
-  fidx = 0;
-
-  switch (code)
-    {
-    case LABEL_REF:
-      return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
-
-    case CONST_WIDE_INT:
-      {
-	int i;
-	flen = CONST_WIDE_INT_NUNITS (k);
-	for (i = 0; i < flen; i++)
-	  result = result * 613 + CONST_WIDE_INT_ELT (k, i);
-	return result;
-      }
-
-    case CONST_DOUBLE:
-      return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
-
-    case CODE_LABEL:
-      fidx = 3;
-      break;
-
-    default:
-      break;
-    }
-
-  for (; fidx < flen; fidx++)
-    switch (format[fidx])
-      {
-      case 's':
-	{
-	  unsigned i, len;
-	  const char *str = XSTR (k, fidx);
-	  len = strlen (str);
-	  result = result * 613 + len;
-	  for (i = 0; i < len; i++)
-	    result = result * 613 + (unsigned) str[i];
-	  break;
-	}
-      case 'u':
-      case 'e':
-	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
-	break;
-      case 'i':
-      case 'n':
-	result = result * 613 + (unsigned) XINT (k, fidx);
-	break;
-      case 'w':
-	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
-	  result = result * 613 + (unsigned) XWINT (k, fidx);
-	else
-	  {
-	    size_t i;
-	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
-	      result = result * 613 + (unsigned) (XWINT (k, fidx)
-						  >> CHAR_BIT * i);
-	  }
-	break;
-      case '0':
-	break;
-      default:
-	gcc_unreachable ();
-      }
-
-  return result;
-}
-
-hashval_t
-toc_hasher::hash (toc_hash_struct *thc)
-{
-  return rs6000_hash_constant (thc->key) ^ thc->key_mode;
-}
-
-/* Compare H1 and H2 for equivalence.  */
-
-bool
-toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
-{
-  rtx r1 = h1->key;
-  rtx r2 = h2->key;
+bool
+toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
+{
+  rtx r1 = h1->key;
+  rtx r2 = h2->key;
 
   if (h1->key_mode != h2->key_mode)
     return 0;
@@ -26397,538 +26021,913 @@ prefixed_load_p (rtx_insn *insn)
   if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
     return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
   else
-    return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+    return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+}
+
+/* Whether a store instruction is a prefixed instruction.  This is called from
+   the prefixed attribute processing.  */
+
+bool
+prefixed_store_p (rtx_insn *insn)
+{
+  /* Validate the insn to make sure it is a normal store insn.  */
+  extract_insn_cached (insn);
+  if (recog_data.n_operands < 2)
+    return false;
+
+  rtx mem = recog_data.operand[0];
+  rtx reg = recog_data.operand[1];
+
+  if (!REG_P (reg) && !SUBREG_P (reg))
+    return false;
+
+  if (!MEM_P (mem))
+    return false;
+
+  /* Prefixed store instructions do not support update or indexed forms.  */
+  if (get_attr_indexed (insn) == INDEXED_YES
+      || get_attr_update (insn) == UPDATE_YES)
+    return false;
+
+  machine_mode mem_mode = GET_MODE (mem);
+  rtx addr = XEXP (mem, 0);
+  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
+
+  /* Need to make sure we aren't looking at a stfs which doesn't look
+     like the other things reg_to_non_prefixed/address_is_prefixed
+     looks for.  */
+  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+    return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
+  else
+    return address_is_prefixed (addr, mem_mode, non_prefixed);
+}
+
+/* Whether a load immediate or add instruction is a prefixed instruction.  This
+   is called from the prefixed attribute processing.  */
+
+bool
+prefixed_paddi_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  if (!set)
+    return false;
+
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
+
+  if (!REG_P (dest) && !SUBREG_P (dest))
+    return false;
+
+  /* Is this a load immediate that can't be done with a simple ADDI or
+     ADDIS?  */
+  if (CONST_INT_P (src))
+    return (satisfies_constraint_eI (src)
+	    && !satisfies_constraint_I (src)
+	    && !satisfies_constraint_L (src));
+
+  /* Is this a PADDI instruction that can't be done with a simple ADDI or
+     ADDIS?  */
+  if (GET_CODE (src) == PLUS)
+    {
+      rtx op1 = XEXP (src, 1);
+
+      return (CONST_INT_P (op1)
+	      && satisfies_constraint_eI (op1)
+	      && !satisfies_constraint_I (op1)
+	      && !satisfies_constraint_L (op1));
+    }
+
+  /* If not, is it a load of a PC-relative address?  */
+  if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
+    return false;
+
+  if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
+    return false;
+
+  enum insn_form iform = address_to_insn_form (src, Pmode,
+					       NON_PREFIXED_DEFAULT);
+
+  return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
+}
+
+/* Whether the next instruction needs a 'p' prefix issued before the
+   instruction is printed out.  */
+static bool prepend_p_to_next_insn;
+
+/* Define FINAL_PRESCAN_INSN if some processing needs to be done before
+   outputting the assembler code.  On the PowerPC, we remember if the current
+   insn is a prefixed insn where we need to emit a 'p' before the insn.
+
+   In addition, if the insn is part of a PC-relative reference to an external
+   label optimization, this is recorded also.  */
+void
+rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
+{
+  prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
+			    == MAYBE_PREFIXED_YES
+			    && get_attr_prefixed (insn) == PREFIXED_YES);
+  return;
+}
+
+/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
+   We use it to emit a 'p' for prefixed insns that is set in
+   FINAL_PRESCAN_INSN.  */
+void
+rs6000_asm_output_opcode (FILE *stream)
+{
+  if (prepend_p_to_next_insn)
+    {
+      fprintf (stream, "p");
+
+      /* Reset the flag in the case where there are separate insn lines in the
+	 sequence, so the 'p' is only emitted for the first line.  This shows up
+	 when we are doing the PCREL_OPT optimization, in that the label created
+	 with %r<n> would have a leading 'p' printed.  */
+      prepend_p_to_next_insn = false;
+    }
+
+  return;
+}
+
+/* Emit the relocation to tie the next instruction to a previous instruction
+   that loads up an external address.  This is used to do the PCREL_OPT
+   optimization.  Note, the label is generated after the PLD of the got
+   pc-relative address to allow for the assembler to insert NOPs before the PLD
+   instruction.  The operand is a constant integer that is the label
+   number.  */
+
+void
+output_pcrel_opt_reloc (rtx label_num)
+{
+  rtx operands[1] = { label_num };
+  output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
+		   operands);
+}
+
+/* Adjust the length of an INSN.  LENGTH is the currently-computed length and
+   should be adjusted to reflect any required changes.  This macro is used when
+   there is some systematic length adjustment required that would be difficult
+   to express in the length attribute.
+
+   In the PowerPC, we use this to adjust the length of an instruction if one or
+   more prefixed instructions are generated, using the attribute
+   num_prefixed_insns.  A prefixed instruction is 8 bytes instead of 4, but the
+   hardware requires that a prefied instruciton does not cross a 64-byte
+   boundary.  This means the compiler has to assume the length of the first
+   prefixed instruction is 12 bytes instead of 8 bytes.  Since the length is
+   already set for the non-prefixed instruction, we just need to udpate for the
+   difference.  */
+
+int
+rs6000_adjust_insn_length (rtx_insn *insn, int length)
+{
+  if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
+    {
+      rtx pattern = PATTERN (insn);
+      if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
+	  && get_attr_prefixed (insn) == PREFIXED_YES)
+	{
+	  int num_prefixed = get_attr_max_prefixed_insns (insn);
+	  length += 4 * (num_prefixed + 1);
+	}
+    }
+
+  return length;
+}
+
+\f
+#ifdef HAVE_GAS_HIDDEN
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* Fills in the label name that should be used for a 476 link stack thunk.  */
+
+void
+get_ppc476_thunk_name (char name[32])
+{
+  gcc_assert (TARGET_LINK_STACK);
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__ppc476.get_thunk");
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
 }
 
-/* Whether a store instruction is a prefixed instruction.  This is called from
-   the prefixed attribute processing.  */
+/* This function emits the simple thunk routine that is used to preserve
+   the link stack on the 476 cpu.  */
 
-bool
-prefixed_store_p (rtx_insn *insn)
+static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
+static void
+rs6000_code_end (void)
 {
-  /* Validate the insn to make sure it is a normal store insn.  */
-  extract_insn_cached (insn);
-  if (recog_data.n_operands < 2)
-    return false;
+  char name[32];
+  tree decl;
 
-  rtx mem = recog_data.operand[0];
-  rtx reg = recog_data.operand[1];
+  if (!TARGET_LINK_STACK)
+    return;
 
-  if (!REG_P (reg) && !SUBREG_P (reg))
-    return false;
+  get_ppc476_thunk_name (name);
 
-  if (!MEM_P (mem))
-    return false;
+  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
+		     build_function_type_list (void_type_node, NULL_TREE));
+  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				   NULL_TREE, void_type_node);
+  TREE_PUBLIC (decl) = 1;
+  TREE_STATIC (decl) = 1;
 
-  /* Prefixed store instructions do not support update or indexed forms.  */
-  if (get_attr_indexed (insn) == INDEXED_YES
-      || get_attr_update (insn) == UPDATE_YES)
-    return false;
+#if RS6000_WEAK
+  if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
+    {
+      cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
+      targetm.asm_out.unique_section (decl, 0);
+      switch_to_section (get_named_section (decl, NULL, 0));
+      DECL_WEAK (decl) = 1;
+      ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
+      targetm.asm_out.globalize_label (asm_out_file, name);
+      targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
+      ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+    }
+  else
+#endif
+    {
+      switch_to_section (text_section);
+      ASM_OUTPUT_LABEL (asm_out_file, name);
+    }
 
-  machine_mode mem_mode = GET_MODE (mem);
-  rtx addr = XEXP (mem, 0);
-  enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  current_function_decl = decl;
+  allocate_struct_function (decl, false);
+  init_function_start (decl);
+  first_function_block_is_cold = false;
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), asm_out_file, 1);
 
-  /* Need to make sure we aren't looking at a stfs which doesn't look
-     like the other things reg_to_non_prefixed/address_is_prefixed
-     looks for.  */
-  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
-    return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
-  else
-    return address_is_prefixed (addr, mem_mode, non_prefixed);
+  fputs ("\tblr\n", asm_out_file);
+
+  final_end_function ();
+  init_insn_lengths ();
+  free_after_compilation (cfun);
+  set_cfun (NULL);
+  current_function_decl = NULL;
 }
 
-/* Whether a load immediate or add instruction is a prefixed instruction.  This
-   is called from the prefixed attribute processing.  */
+/* Add r30 to hard reg set if the prologue sets it up and it is not
+   pic_offset_table_rtx.  */
 
-bool
-prefixed_paddi_p (rtx_insn *insn)
+static void
+rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
 {
-  rtx set = single_set (insn);
-  if (!set)
-    return false;
+  if (!TARGET_SINGLE_PIC_BASE
+      && TARGET_TOC
+      && TARGET_MINIMAL_TOC
+      && !constant_pool_empty_p ())
+    add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+  if (cfun->machine->split_stack_argp_used)
+    add_to_hard_reg_set (&set->set, Pmode, 12);
 
-  rtx dest = SET_DEST (set);
-  rtx src = SET_SRC (set);
+  /* Make sure the hard reg set doesn't include r2, which was possibly added
+     via PIC_OFFSET_TABLE_REGNUM.  */
+  if (TARGET_TOC)
+    remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
+}
 
-  if (!REG_P (dest) && !SUBREG_P (dest))
-    return false;
+\f
+/* Helper function for rs6000_split_logical to emit a logical instruction after
+   spliting the operation to single GPR registers.
 
-  /* Is this a load immediate that can't be done with a simple ADDI or
-     ADDIS?  */
-  if (CONST_INT_P (src))
-    return (satisfies_constraint_eI (src)
-	    && !satisfies_constraint_I (src)
-	    && !satisfies_constraint_L (src));
+   DEST is the destination register.
+   OP1 and OP2 are the input source registers.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
 
-  /* Is this a PADDI instruction that can't be done with a simple ADDI or
-     ADDIS?  */
-  if (GET_CODE (src) == PLUS)
+static void
+rs6000_split_logical_inner (rtx dest,
+			    rtx op1,
+			    rtx op2,
+			    enum rtx_code code,
+			    machine_mode mode,
+			    bool complement_final_p,
+			    bool complement_op1_p,
+			    bool complement_op2_p)
+{
+  rtx bool_rtx;
+
+  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
+  if (op2 && CONST_INT_P (op2)
+      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
+      && !complement_final_p && !complement_op1_p && !complement_op2_p)
     {
-      rtx op1 = XEXP (src, 1);
+      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
+      HOST_WIDE_INT value = INTVAL (op2) & mask;
 
-      return (CONST_INT_P (op1)
-	      && satisfies_constraint_eI (op1)
-	      && !satisfies_constraint_I (op1)
-	      && !satisfies_constraint_L (op1));
+      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
+      if (code == AND)
+	{
+	  if (value == 0)
+	    {
+	      emit_insn (gen_rtx_SET (dest, const0_rtx));
+	      return;
+	    }
+
+	  else if (value == mask)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (dest, op1));
+	      return;
+	    }
+	}
+
+      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
+	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
+      else if (code == IOR || code == XOR)
+	{
+	  if (value == 0)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (dest, op1));
+	      return;
+	    }
+	}
     }
 
-  /* If not, is it a load of a PC-relative address?  */
-  if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
-    return false;
+  if (code == AND && mode == SImode
+      && !complement_final_p && !complement_op1_p && !complement_op2_p)
+    {
+      emit_insn (gen_andsi3 (dest, op1, op2));
+      return;
+    }
 
-  if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
-    return false;
+  if (complement_op1_p)
+    op1 = gen_rtx_NOT (mode, op1);
 
-  enum insn_form iform = address_to_insn_form (src, Pmode,
-					       NON_PREFIXED_DEFAULT);
+  if (complement_op2_p)
+    op2 = gen_rtx_NOT (mode, op2);
 
-  return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
-}
+  /* For canonical RTL, if only one arm is inverted it is the first.  */
+  if (!complement_op1_p && complement_op2_p)
+    std::swap (op1, op2);
 
-/* Whether the next instruction needs a 'p' prefix issued before the
-   instruction is printed out.  */
-static bool prepend_p_to_next_insn;
+  bool_rtx = ((code == NOT)
+	      ? gen_rtx_NOT (mode, op1)
+	      : gen_rtx_fmt_ee (code, mode, op1, op2));
 
-/* Define FINAL_PRESCAN_INSN if some processing needs to be done before
-   outputting the assembler code.  On the PowerPC, we remember if the current
-   insn is a prefixed insn where we need to emit a 'p' before the insn.
+  if (complement_final_p)
+    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
 
-   In addition, if the insn is part of a PC-relative reference to an external
-   label optimization, this is recorded also.  */
-void
-rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
-{
-  prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
-			    == MAYBE_PREFIXED_YES
-			    && get_attr_prefixed (insn) == PREFIXED_YES);
-  return;
+  emit_insn (gen_rtx_SET (dest, bool_rtx));
 }
 
-/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
-   We use it to emit a 'p' for prefixed insns that is set in
-   FINAL_PRESCAN_INSN.  */
-void
-rs6000_asm_output_opcode (FILE *stream)
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
+   operations are split immediately during RTL generation to allow for more
+   optimizations of the AND/IOR/XOR.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_di (rtx operands[3],
+			 enum rtx_code code,
+			 bool complement_final_p,
+			 bool complement_op1_p,
+			 bool complement_op2_p)
 {
-  if (prepend_p_to_next_insn)
+  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
+  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
+  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
+  enum hi_lo { hi = 0, lo = 1 };
+  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
+  size_t i;
+
+  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
+  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
+  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
+  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+
+  if (code == NOT)
+    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
+  else
     {
-      fprintf (stream, "p");
+      if (!CONST_INT_P (operands[2]))
+	{
+	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
+	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+	}
+      else
+	{
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  HOST_WIDE_INT value_hi_lo[2];
 
-      /* Reset the flag in the case where there are separate insn lines in the
-	 sequence, so the 'p' is only emitted for the first line.  This shows up
-	 when we are doing the PCREL_OPT optimization, in that the label created
-	 with %r<n> would have a leading 'p' printed.  */
-      prepend_p_to_next_insn = false;
-    }
+	  gcc_assert (!complement_final_p);
+	  gcc_assert (!complement_op1_p);
+	  gcc_assert (!complement_op2_p);
 
-  return;
-}
+	  value_hi_lo[hi] = value >> 32;
+	  value_hi_lo[lo] = value & lower_32bits;
 
-/* Emit the relocation to tie the next instruction to a previous instruction
-   that loads up an external address.  This is used to do the PCREL_OPT
-   optimization.  Note, the label is generated after the PLD of the got
-   pc-relative address to allow for the assembler to insert NOPs before the PLD
-   instruction.  The operand is a constant integer that is the label
-   number.  */
+	  for (i = 0; i < 2; i++)
+	    {
+	      HOST_WIDE_INT sub_value = value_hi_lo[i];
 
-void
-output_pcrel_opt_reloc (rtx label_num)
-{
-  rtx operands[1] = { label_num };
-  output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
-		   operands);
-}
+	      if (sub_value & sign_bit)
+		sub_value |= upper_32bits;
 
-/* Adjust the length of an INSN.  LENGTH is the currently-computed length and
-   should be adjusted to reflect any required changes.  This macro is used when
-   there is some systematic length adjustment required that would be difficult
-   to express in the length attribute.
+	      op2_hi_lo[i] = GEN_INT (sub_value);
 
-   In the PowerPC, we use this to adjust the length of an instruction if one or
-   more prefixed instructions are generated, using the attribute
-   num_prefixed_insns.  A prefixed instruction is 8 bytes instead of 4, but the
-   hardware requires that a prefied instruciton does not cross a 64-byte
-   boundary.  This means the compiler has to assume the length of the first
-   prefixed instruction is 12 bytes instead of 8 bytes.  Since the length is
-   already set for the non-prefixed instruction, we just need to udpate for the
-   difference.  */
+	      /* If this is an AND instruction, check to see if we need to load
+		 the value in a register.  */
+	      if (code == AND && sub_value != -1 && sub_value != 0
+		  && !and_operand (op2_hi_lo[i], SImode))
+		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+	    }
+	}
+    }
 
-int
-rs6000_adjust_insn_length (rtx_insn *insn, int length)
-{
-  if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
+  for (i = 0; i < 2; i++)
     {
-      rtx pattern = PATTERN (insn);
-      if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
-	  && get_attr_prefixed (insn) == PREFIXED_YES)
+      /* Split large IOR/XOR operations.  */
+      if ((code == IOR || code == XOR)
+	  && CONST_INT_P (op2_hi_lo[i])
+	  && !complement_final_p
+	  && !complement_op1_p
+	  && !complement_op2_p
+	  && !logical_const_operand (op2_hi_lo[i], SImode))
 	{
-	  int num_prefixed = get_attr_max_prefixed_insns (insn);
-	  length += 4 * (num_prefixed + 1);
+	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
+	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
+	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  /* Make sure the constant is sign extended.  */
+	  if ((hi_16bits & sign_bit) != 0)
+	    hi_16bits |= upper_32bits;
+
+	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
+				      code, SImode, false, false, false);
+
+	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
+				      code, SImode, false, false, false);
 	}
+      else
+	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
+				    code, SImode, complement_final_p,
+				    complement_op1_p, complement_op2_p);
     }
 
-  return length;
+  return;
 }
 
-\f
-#ifdef HAVE_GAS_HIDDEN
-# define USE_HIDDEN_LINKONCE 1
-#else
-# define USE_HIDDEN_LINKONCE 0
-#endif
+/* Split the insns that make up boolean operations operating on multiple GPR
+   registers.  The boolean MD patterns ensure that the inputs either are
+   exactly the same as the output registers, or there is no overlap.
 
-/* Fills in the label name that should be used for a 476 link stack thunk.  */
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
 
 void
-get_ppc476_thunk_name (char name[32])
+rs6000_split_logical (rtx operands[3],
+		      enum rtx_code code,
+		      bool complement_final_p,
+		      bool complement_op1_p,
+		      bool complement_op2_p)
 {
-  gcc_assert (TARGET_LINK_STACK);
-
-  if (USE_HIDDEN_LINKONCE)
-    sprintf (name, "__ppc476.get_thunk");
-  else
-    ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
-}
+  machine_mode mode = GET_MODE (operands[0]);
+  machine_mode sub_mode;
+  rtx op0, op1, op2;
+  int sub_size, regno0, regno1, nregs, i;
 
-/* This function emits the simple thunk routine that is used to preserve
-   the link stack on the 476 cpu.  */
+  /* If this is DImode, use the specialized version that can run before
+     register allocation.  */
+  if (mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical_di (operands, code, complement_final_p,
+			       complement_op1_p, complement_op2_p);
+      return;
+    }
 
-static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
-static void
-rs6000_code_end (void)
-{
-  char name[32];
-  tree decl;
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = (code == NOT) ? NULL_RTX : operands[2];
+  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
+  sub_size = GET_MODE_SIZE (sub_mode);
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
 
-  if (!TARGET_LINK_STACK)
-    return;
+  gcc_assert (reload_completed);
+  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
 
-  get_ppc476_thunk_name (name);
+  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
+  gcc_assert (nregs > 1);
 
-  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
-		     build_function_type_list (void_type_node, NULL_TREE));
-  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
-				   NULL_TREE, void_type_node);
-  TREE_PUBLIC (decl) = 1;
-  TREE_STATIC (decl) = 1;
+  if (op2 && REG_P (op2))
+    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
 
-#if RS6000_WEAK
-  if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
-    {
-      cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
-      targetm.asm_out.unique_section (decl, 0);
-      switch_to_section (get_named_section (decl, NULL, 0));
-      DECL_WEAK (decl) = 1;
-      ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
-      targetm.asm_out.globalize_label (asm_out_file, name);
-      targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
-      ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
-    }
-  else
-#endif
+  for (i = 0; i < nregs; i++)
     {
-      switch_to_section (text_section);
-      ASM_OUTPUT_LABEL (asm_out_file, name);
-    }
-
-  DECL_INITIAL (decl) = make_node (BLOCK);
-  current_function_decl = decl;
-  allocate_struct_function (decl, false);
-  init_function_start (decl);
-  first_function_block_is_cold = false;
-  /* Make sure unwind info is emitted for the thunk if needed.  */
-  final_start_function (emit_barrier (), asm_out_file, 1);
+      int offset = i * sub_size;
+      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
+      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
+      rtx sub_op2 = ((code == NOT)
+		     ? NULL_RTX
+		     : simplify_subreg (sub_mode, op2, mode, offset));
 
-  fputs ("\tblr\n", asm_out_file);
+      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
+				  complement_final_p, complement_op1_p,
+				  complement_op2_p);
+    }
 
-  final_end_function ();
-  init_insn_lengths ();
-  free_after_compilation (cfun);
-  set_cfun (NULL);
-  current_function_decl = NULL;
+  return;
 }
 
-/* Add r30 to hard reg set if the prologue sets it up and it is not
-   pic_offset_table_rtx.  */
+/* Emit instructions to move SRC to DST.  Called by splitters for
+   multi-register moves.  It will emit at most one instruction for
+   each register that is accessed; that is, it won't emit li/lis pairs
+   (or equivalent for 64-bit code).  One of SRC or DST must be a hard
+   register.  */
 
-static void
-rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
+void
+rs6000_split_multireg_move (rtx dst, rtx src)
 {
-  if (!TARGET_SINGLE_PIC_BASE
-      && TARGET_TOC
-      && TARGET_MINIMAL_TOC
-      && !constant_pool_empty_p ())
-    add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
-  if (cfun->machine->split_stack_argp_used)
-    add_to_hard_reg_set (&set->set, Pmode, 12);
+  /* The register number of the first register being moved.  */
+  int reg;
+  /* The mode that is to be moved.  */
+  machine_mode mode;
+  /* The mode that the move is being done in, and its size.  */
+  machine_mode reg_mode;
+  int reg_mode_size;
+  /* The number of registers that will be moved.  */
+  int nregs;
 
-  /* Make sure the hard reg set doesn't include r2, which was possibly added
-     via PIC_OFFSET_TABLE_REGNUM.  */
-  if (TARGET_TOC)
-    remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
-}
+  reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
+  mode = GET_MODE (dst);
+  nregs = hard_regno_nregs (reg, mode);
 
-\f
-/* Helper function for rs6000_split_logical to emit a logical instruction after
-   spliting the operation to single GPR registers.
+  /* If we have a vector quad register for MMA, and this is a load or store,
+     see if we can use vector paired load/stores.  */
+  if (mode == XOmode && TARGET_MMA
+      && (MEM_P (dst) || MEM_P (src)))
+    {
+      reg_mode = OOmode;
+      nregs /= 2;
+    }
+  /* If we have a vector pair/quad mode, split it into two/four separate
+     vectors.  */
+  else if (mode == OOmode || mode == XOmode)
+    reg_mode = V1TImode;
+  else if (FP_REGNO_P (reg))
+    reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
+	(TARGET_HARD_FLOAT ? DFmode : SFmode);
+  else if (ALTIVEC_REGNO_P (reg))
+    reg_mode = V16QImode;
+  else
+    reg_mode = word_mode;
+  reg_mode_size = GET_MODE_SIZE (reg_mode);
 
-   DEST is the destination register.
-   OP1 and OP2 are the input source registers.
-   CODE is the base operation (AND, IOR, XOR, NOT).
-   MODE is the machine mode.
-   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
-   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
-   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
+  gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
 
-static void
-rs6000_split_logical_inner (rtx dest,
-			    rtx op1,
-			    rtx op2,
-			    enum rtx_code code,
-			    machine_mode mode,
-			    bool complement_final_p,
-			    bool complement_op1_p,
-			    bool complement_op2_p)
-{
-  rtx bool_rtx;
+  /* TDmode residing in FP registers is special, since the ISA requires that
+     the lower-numbered word of a register pair is always the most significant
+     word, even in little-endian mode.  This does not match the usual subreg
+     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
+     the appropriate constituent registers "by hand" in little-endian mode.
 
-  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
-  if (op2 && CONST_INT_P (op2)
-      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
-      && !complement_final_p && !complement_op1_p && !complement_op2_p)
+     Note we do not need to check for destructive overlap here since TDmode
+     can only reside in even/odd register pairs.  */
+  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
     {
-      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
-      HOST_WIDE_INT value = INTVAL (op2) & mask;
+      rtx p_src, p_dst;
+      int i;
 
-      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
-      if (code == AND)
+      for (i = 0; i < nregs; i++)
 	{
-	  if (value == 0)
-	    {
-	      emit_insn (gen_rtx_SET (dest, const0_rtx));
-	      return;
-	    }
+	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
+	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
+	  else
+	    p_src = simplify_gen_subreg (reg_mode, src, mode,
+					 i * reg_mode_size);
 
-	  else if (value == mask)
-	    {
-	      if (!rtx_equal_p (dest, op1))
-		emit_insn (gen_rtx_SET (dest, op1));
-	      return;
-	    }
-	}
+	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
+	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
+	  else
+	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
+					 i * reg_mode_size);
 
-      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
-	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
-      else if (code == IOR || code == XOR)
-	{
-	  if (value == 0)
-	    {
-	      if (!rtx_equal_p (dest, op1))
-		emit_insn (gen_rtx_SET (dest, op1));
-	      return;
-	    }
+	  emit_insn (gen_rtx_SET (p_dst, p_src));
 	}
-    }
 
-  if (code == AND && mode == SImode
-      && !complement_final_p && !complement_op1_p && !complement_op2_p)
-    {
-      emit_insn (gen_andsi3 (dest, op1, op2));
       return;
     }
 
-  if (complement_op1_p)
-    op1 = gen_rtx_NOT (mode, op1);
-
-  if (complement_op2_p)
-    op2 = gen_rtx_NOT (mode, op2);
-
-  /* For canonical RTL, if only one arm is inverted it is the first.  */
-  if (!complement_op1_p && complement_op2_p)
-    std::swap (op1, op2);
-
-  bool_rtx = ((code == NOT)
-	      ? gen_rtx_NOT (mode, op1)
-	      : gen_rtx_fmt_ee (code, mode, op1, op2));
+  /* The __vector_pair and __vector_quad modes are multi-register
+     modes, so if we have to load or store the registers, we have to be
+     careful to properly swap them if we're in little endian mode
+     below.  This means the last register gets the first memory
+     location.  We also need to be careful of using the right register
+     numbers if we are splitting XO to OO.  */
+  if (mode == OOmode || mode == XOmode)
+    {
+      nregs = hard_regno_nregs (reg, mode);
+      int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
+      if (MEM_P (dst))
+	{
+	  unsigned offset = 0;
+	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-  if (complement_final_p)
-    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
+	  /* If we are reading an accumulator register, we have to
+	     deprime it before we can access it.  */
+	  if (TARGET_MMA
+	      && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+	    emit_insn (gen_mma_xxmfacc (src, src));
 
-  emit_insn (gen_rtx_SET (dest, bool_rtx));
-}
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
+	    {
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = adjust_address (dst, reg_mode, offset);
+	      rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
+	      offset += size;
+	      emit_insn (gen_rtx_SET (dst2, src2));
+	    }
 
-/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
-   operations are split immediately during RTL generation to allow for more
-   optimizations of the AND/IOR/XOR.
+	  return;
+	}
 
-   OPERANDS is an array containing the destination and two input operands.
-   CODE is the base operation (AND, IOR, XOR, NOT).
-   MODE is the machine mode.
-   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
-   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
-   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
-   CLOBBER_REG is either NULL or a scratch register of type CC to allow
-   formation of the AND instructions.  */
+      if (MEM_P (src))
+	{
+	  unsigned offset = 0;
+	  unsigned size = GET_MODE_SIZE (reg_mode);
 
-static void
-rs6000_split_logical_di (rtx operands[3],
-			 enum rtx_code code,
-			 bool complement_final_p,
-			 bool complement_op1_p,
-			 bool complement_op2_p)
-{
-  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
-  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
-  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
-  enum hi_lo { hi = 0, lo = 1 };
-  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
-  size_t i;
+	  for (int i = 0; i < nregs; i += reg_mode_nregs)
+	    {
+	      unsigned subreg =
+		(WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i);
+	      rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
+	      rtx src2 = adjust_address (src, reg_mode, offset);
+	      offset += size;
+	      emit_insn (gen_rtx_SET (dst2, src2));
+	    }
 
-  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
-  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
-  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
-  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+	  /* If we are writing an accumulator register, we have to
+	     prime it after we've written it.  */
+	  if (TARGET_MMA
+	      && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
-  if (code == NOT)
-    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
-  else
-    {
-      if (!CONST_INT_P (operands[2]))
-	{
-	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
-	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+	  return;
 	}
-      else
+
+      if (GET_CODE (src) == UNSPEC)
 	{
-	  HOST_WIDE_INT value = INTVAL (operands[2]);
-	  HOST_WIDE_INT value_hi_lo[2];
+	  gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
+	  gcc_assert (REG_P (dst));
+	  if (GET_MODE (src) == XOmode)
+	    gcc_assert (FP_REGNO_P (REGNO (dst)));
+	  if (GET_MODE (src) == OOmode)
+	    gcc_assert (VSX_REGNO_P (REGNO (dst)));
 
-	  gcc_assert (!complement_final_p);
-	  gcc_assert (!complement_op1_p);
-	  gcc_assert (!complement_op2_p);
+	  reg_mode = GET_MODE (XVECEXP (src, 0, 0));
+	  int nvecs = XVECLEN (src, 0);
+	  for (int i = 0; i < nvecs; i++)
+	    {
+	      int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i;
+	      rtx dst_i = gen_rtx_REG (reg_mode, reg + index);
+	      emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
+	    }
 
-	  value_hi_lo[hi] = value >> 32;
-	  value_hi_lo[lo] = value & lower_32bits;
+	  /* We are writing an accumulator register, so we have to
+	     prime it after we've written it.  */
+	  if (GET_MODE (src) == XOmode)
+	    emit_insn (gen_mma_xxmtacc (dst, dst));
 
-	  for (i = 0; i < 2; i++)
-	    {
-	      HOST_WIDE_INT sub_value = value_hi_lo[i];
+	  return;
+	}
 
-	      if (sub_value & sign_bit)
-		sub_value |= upper_32bits;
+      /* Register -> register moves can use common code.  */
+    }
 
-	      op2_hi_lo[i] = GEN_INT (sub_value);
+  if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
+    {
+      /* If we are reading an accumulator register, we have to
+	 deprime it before we can access it.  */
+      if (TARGET_MMA
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+	emit_insn (gen_mma_xxmfacc (src, src));
 
-	      /* If this is an AND instruction, check to see if we need to load
-		 the value in a register.  */
-	      if (code == AND && sub_value != -1 && sub_value != 0
-		  && !and_operand (op2_hi_lo[i], SImode))
-		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+      /* Move register range backwards, if we might have destructive
+	 overlap.  */
+      int i;
+      /* XO/OO are opaque so cannot use subregs. */
+      if (mode == OOmode || mode == XOmode )
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
 	    }
 	}
-    }
+      else
+	{
+	  for (i = nregs - 1; i >= 0; i--)
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 i * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 i * reg_mode_size)));
+	}
 
-  for (i = 0; i < 2; i++)
+      /* If we are writing an accumulator register, we have to
+	 prime it after we've written it.  */
+      if (TARGET_MMA
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+	emit_insn (gen_mma_xxmtacc (dst, dst));
+    }
+  else
     {
-      /* Split large IOR/XOR operations.  */
-      if ((code == IOR || code == XOR)
-	  && CONST_INT_P (op2_hi_lo[i])
-	  && !complement_final_p
-	  && !complement_op1_p
-	  && !complement_op2_p
-	  && !logical_const_operand (op2_hi_lo[i], SImode))
+      int i;
+      int j = -1;
+      bool used_update = false;
+      rtx restore_basereg = NULL_RTX;
+
+      if (MEM_P (src) && INT_REGNO_P (reg))
 	{
-	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
-	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
-	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
-	  rtx tmp = gen_reg_rtx (SImode);
+	  rtx breg;
 
-	  /* Make sure the constant is sign extended.  */
-	  if ((hi_16bits & sign_bit) != 0)
-	    hi_16bits |= upper_32bits;
+	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
+	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (src, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
+	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      src = replace_equiv_address (src, breg);
+	    }
+	  else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
+	    {
+	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (src, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
+		      emit_insn (gen_rtx_SET (ndst,
+					      gen_rtx_MEM (reg_mode,
+							   XEXP (src, 0))));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (basereg,
+					    XEXP (XEXP (src, 0), 1)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	      else
+		{
+		  rtx basereg = gen_rtx_REG (Pmode, reg);
+		  emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	    }
 
-	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
-				      code, SImode, false, false, false);
+	  breg = XEXP (src, 0);
+	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
+	    breg = XEXP (breg, 0);
 
-	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
-				      code, SImode, false, false, false);
+	  /* If the base register we are using to address memory is
+	     also a destination reg, then change that register last.  */
+	  if (REG_P (breg)
+	      && REGNO (breg) >= REGNO (dst)
+	      && REGNO (breg) < REGNO (dst) + nregs)
+	    j = REGNO (breg) - REGNO (dst);
 	}
-      else
-	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
-				    code, SImode, complement_final_p,
-				    complement_op1_p, complement_op2_p);
-    }
-
-  return;
-}
-
-/* Split the insns that make up boolean operations operating on multiple GPR
-   registers.  The boolean MD patterns ensure that the inputs either are
-   exactly the same as the output registers, or there is no overlap.
+      else if (MEM_P (dst) && INT_REGNO_P (reg))
+	{
+	  rtx breg;
 
-   OPERANDS is an array containing the destination and two input operands.
-   CODE is the base operation (AND, IOR, XOR, NOT).
-   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
-   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
-   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
+	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
+	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (dst, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
 
-void
-rs6000_split_logical (rtx operands[3],
-		      enum rtx_code code,
-		      bool complement_final_p,
-		      bool complement_op1_p,
-		      bool complement_op2_p)
-{
-  machine_mode mode = GET_MODE (operands[0]);
-  machine_mode sub_mode;
-  rtx op0, op1, op2;
-  int sub_size, regno0, regno1, nregs, i;
+	      /* We have to update the breg before doing the store.
+		 Use store with update, if available.  */
 
-  /* If this is DImode, use the specialized version that can run before
-     register allocation.  */
-  if (mode == DImode && !TARGET_POWERPC64)
-    {
-      rs6000_split_logical_di (operands, code, complement_final_p,
-			       complement_op1_p, complement_op2_p);
-      return;
-    }
+	      if (TARGET_UPDATE)
+		{
+		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		  emit_insn (TARGET_32BIT
+			     ? (TARGET_POWERPC64
+				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
+				: gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
+			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
+		  used_update = true;
+		}
+	      else
+		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      dst = replace_equiv_address (dst, breg);
+	    }
+	  else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
+		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    {
+	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		      emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
+							   XEXP (dst, 0)),
+					      nsrc));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (basereg,
+					    XEXP (XEXP (dst, 0), 1)));
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	      else
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
+		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
+			      && REG_P (basereg)
+			      && REG_P (offsetreg)
+			      && REGNO (basereg) != REGNO (offsetreg));
+		  if (REGNO (basereg) == 0)
+		    {
+		      rtx tmp = offsetreg;
+		      offsetreg = basereg;
+		      basereg = tmp;
+		    }
+		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
+		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	    }
+	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
+	}
 
-  op0 = operands[0];
-  op1 = operands[1];
-  op2 = (code == NOT) ? NULL_RTX : operands[2];
-  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
-  sub_size = GET_MODE_SIZE (sub_mode);
-  regno0 = REGNO (op0);
-  regno1 = REGNO (op1);
+      /* If we are reading an accumulator register, we have to
+	 deprime it before we can access it.  */
+      if (TARGET_MMA && REG_P (src)
+	  && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
+	emit_insn (gen_mma_xxmfacc (src, src));
 
-  gcc_assert (reload_completed);
-  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
-  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+      for (i = 0; i < nregs; i++)
+	{
+	  /* Calculate index to next subword.  */
+	  ++j;
+	  if (j == nregs)
+	    j = 0;
 
-  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
-  gcc_assert (nregs > 1);
+	  /* If compiler already emitted move of first word by
+	     store with update, no need to do anything.  */
+	  if (j == 0 && used_update)
+	    continue;
 
-  if (op2 && REG_P (op2))
-    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
+	  /* XO/OO are opaque so cannot use subregs. */
+	  if (mode == OOmode || mode == XOmode )
+	    {
+	      rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
+	      rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
+	      emit_insn (gen_rtx_SET (dst_i, src_i));
+	    }
+	  else
+	    emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
+							 j * reg_mode_size),
+				    simplify_gen_subreg (reg_mode, src, mode,
+							 j * reg_mode_size)));
+	}
 
-  for (i = 0; i < nregs; i++)
-    {
-      int offset = i * sub_size;
-      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
-      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
-      rtx sub_op2 = ((code == NOT)
-		     ? NULL_RTX
-		     : simplify_subreg (sub_mode, op2, mode, offset));
+      /* If we are writing an accumulator register, we have to
+	 prime it after we've written it.  */
+      if (TARGET_MMA && REG_P (dst)
+	  && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
+	emit_insn (gen_mma_xxmtacc (dst, dst));
 
-      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
-				  complement_final_p, complement_op1_p,
-				  complement_op2_p);
+      if (restore_basereg != NULL_RTX)
+	emit_insn (restore_basereg);
     }
-
-  return;
 }
-
 \f
 /* Return true if the peephole2 can combine a load involving a combination of
    an addis instruction and a load with an offset that can be fused together on


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-15 17:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-15 17:18 [gcc r11-8994] rs6000: Move rs6000_split_multireg_move to later in file Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).