public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs
@ 2015-04-06 18:19 Bill Schmidt
  2015-04-07 15:11 ` David Edelsohn
  0 siblings, 1 reply; 5+ messages in thread
From: Bill Schmidt @ 2015-04-06 18:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: dje.gcc

Hi,

The prologue and epilogue code to save/restore Altivec registers uses
the generic emit_move_insn logic.  This means that when VSX is available
on a little-endian target, we will generate xxswapd/stxvd2x for prologue
saves, and lxvd2x/xxswapd for epilogue restores.  This happens too late
to be cleaned up by swap optimization.  Since the stack save slots are
aligned, we should always use lvx and stvx for this purpose instead.
This improves performance on LE targets, is performance-neutral for BE,
and is always safe.

This change causes the previously failing test
gcc.target/powerpc/swaps-p8-2.c to succeed again.  This test started
failing when an unrelated change raised register pressure on the vector
registers, causing us to generate the above save/restore sequences.  The
test was testing whether all xxswapd instructions had been removed.
With this change, we no longer generate them for the save/restores, and
the test succeeds.

Because we no longer generate the two-instruction sequences, we no
longer need the code in rs6000_frame_related to identify the true source
register for the two-instruction store.  I've removed that along with
the extra argument it required, and adjusted the callers.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions, and one fixed failure.  Is this ok for trunk after 5.1
branches?  I'd also like to backport this as far as 4.8 for the
performance improvement.

Thanks,
Bill


2015-04-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/altivec.md (*altivec_lvx_<mode>_internal): Remove
	asterisk from name so this can be generated directly.
	(*altivec_stvx_<mode>_internal): Likewise.
	* config/rs6000/rs6000.c (rs6000_frame_related): Remove split_reg
	argument and logic that references it.
	(emit_frame_save): Remove last parameter from call to
	rs6000_frame_related.
	(rs6000_emit_prologue): Remove last parameter from eight calls to
	rs6000_frame_related.  Force generation of stvx instruction for
	Altivec register saves.  Remove split_reg handling, which is no
	longer needed.
	(rs6000_emit_epilogue):  Force generation of lvx instruction for
	Altivec register restores.


Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 221860)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -2455,7 +2455,7 @@
     }
 })
 
-(define_insn "*altivec_lvx_<mode>_internal"
+(define_insn "altivec_lvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "register_operand" "=v")
 	  (match_operand:VM2 1 "memory_operand" "Z"))
@@ -2478,7 +2478,7 @@
     }
 })
 
-(define_insn "*altivec_stvx_<mode>_internal"
+(define_insn "altivec_stvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "memory_operand" "=Z")
 	  (match_operand:VM2 1 "register_operand" "v"))
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 221860)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -22738,7 +22738,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg, rtx split_reg)
+		      rtx reg2, rtx rreg)
 {
   rtx real, temp;
 
@@ -22829,11 +22829,6 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE
 	  }
     }
 
-  /* If a store insn has been split into multiple insns, the
-     true source register is given by split_reg.  */
-  if (split_reg != NULL_RTX)
-    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
-
   RTX_FRAME_RELATED_P (insn) = 1;
   add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
@@ -22941,7 +22936,7 @@ emit_frame_save (rtx frame_reg, machine_mode mode,
   reg = gen_rtx_REG (mode, regno);
   insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
-			       NULL_RTX, NULL_RTX, NULL_RTX);
+			       NULL_RTX, NULL_RTX);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -23521,7 +23516,7 @@ rs6000_emit_prologue (void)
 
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    treg, GEN_INT (-info->total_size), NULL_RTX);
+			    treg, GEN_INT (-info->total_size));
       sp_off = frame_off = info->total_size;
     }
 
@@ -23606,7 +23601,7 @@ rs6000_emit_prologue (void)
 
 	  insn = emit_move_insn (mem, reg);
 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	  END_USE (0);
 	}
     }
@@ -23662,7 +23657,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset,
 				     DFmode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23741,7 +23736,7 @@ rs6000_emit_prologue (void)
 					 SAVRES_SAVE | SAVRES_GPR);
 
 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	}
 
       /* Move the static chain pointer back.  */
@@ -23791,7 +23786,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset + ptr_off,
 				     reg_mode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23807,7 +23802,7 @@ rs6000_emit_prologue (void)
 			     info->gp_save_offset + frame_off + reg_size * i);
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
     }
   else if (!WORLD_SAVE_P (info))
     {
@@ -24130,7 +24125,7 @@ rs6000_emit_prologue (void)
 				     info->altivec_save_offset + ptr_off,
 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
 	{
 	  /* The oddity mentioned above clobbered our frame reg.  */
@@ -24146,7 +24141,7 @@ rs6000_emit_prologue (void)
       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
 	  {
-	    rtx areg, savereg, mem, split_reg;
+	    rtx areg, savereg, mem;
 	    int offset;
 
 	    offset = (info->altivec_save_offset + frame_off
@@ -24162,20 +24157,13 @@ rs6000_emit_prologue (void)
 	    mem = gen_frame_mem (V4SImode,
 				 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
 
-	    insn = emit_move_insn (mem, savereg);
+	    /* Rather than emitting a generic move, force use of the stvx
+	       instruction, which we always want.  In particular we don't
+	       want xxpermdi/stxvd2x for little endian.  */
+	    insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
 
-	    /* When we split a VSX store into two insns, we need to make
-	       sure the DWARF info knows which register we are storing.
-	       Pass it in to be used on the appropriate note.  */
-	    if (!BYTES_BIG_ENDIAN
-		&& GET_CODE (PATTERN (insn)) == SET
-		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
-	      split_reg = savereg;
-	    else
-	      split_reg = NULL_RTX;
-
 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				  areg, GEN_INT (offset), split_reg);
+				  areg, GEN_INT (offset));
 	  }
     }
 
@@ -24817,7 +24805,10 @@ rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void)emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}
 
@@ -25020,7 +25011,10 @@ rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void)emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}
 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs
  2015-04-06 18:19 [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs Bill Schmidt
@ 2015-04-07 15:11 ` David Edelsohn
  2015-04-07 20:20   ` Segher Boessenkool
  2015-04-20 13:50   ` Bill Schmidt
  0 siblings, 2 replies; 5+ messages in thread
From: David Edelsohn @ 2015-04-07 15:11 UTC (permalink / raw)
  To: Bill Schmidt; +Cc: GCC Patches

On Mon, Apr 6, 2015 at 2:19 PM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi,
>
> The prologue and epilogue code to save/restore Altivec registers uses
> the generic emit_move_insn logic.  This means that when VSX is available
> on a little-endian target, we will generate xxswapd/stxvd2x for prologue
> saves, and lxvd2x/xxswapd for epilogue restores.  This happens too late
> to be cleaned up by swap optimization.  Since the stack save slots are
> aligned, we should always use lvx and stvx for this purpose instead.
> This improves performance on LE targets, is performance-neutral for BE,
> and is always safe.
>
> This change causes the previously failing test
> gcc.target/powerpc/swaps-p8-2.c to succeed again.  This test started
> failing when an unrelated change raised register pressure on the vector
> registers, causing us to generate the above save/restore sequences.  The
> test was testing whether all xxswapd instructions had been removed.
> With this change, we no longer generate them for the save/restores, and
> the test succeeds.
>
> Because we no longer generate the two-instruction sequences, we no
> longer need the code in rs6000_frame_related to identify the true source
> register for the two-instruction store.  I've removed that along with
> the extra argument it required, and adjusted the callers.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> regressions, and one fixed failure.  Is this ok for trunk after 5.1
> branches?  I'd also like to backport this as far as 4.8 for the
> performance improvement.
>
> Thanks,
> Bill
>
>
> 2015-04-06  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>         * config/rs6000/altivec.md (*altivec_lvx_<mode>_internal): Remove
>         asterisk from name so this can be generated directly.
>         (*altivec_stvx_<mode>_internal): Likewise.
>         * config/rs6000/rs6000.c (rs6000_frame_related): Remove split_reg
>         argument and logic that references it.
>         (emit_frame_save): Remove last parameter from call to
>         rs6000_frame_related.
>         (rs6000_emit_prologue): Remove last parameter from eight calls to
>         rs6000_frame_related.  Force generation of stvx instruction for
>         Altivec register saves.  Remove split_reg handling, which is no
>         longer needed.
>         (rs6000_emit_epilogue):  Force generation of lvx instruction for
>         Altivec register restores.

This is okay after GCC 5 branches.  But please insert a space between
(void) and emit_insn when casting the result.

> @@ -24817,7 +24805,10 @@ rs6000_emit_epilogue (int sibcall)
>                 mem = gen_frame_mem (V4SImode, addr);
>
>                 reg = gen_rtx_REG (V4SImode, i);
> -               emit_move_insn (reg, mem);
> +               /* Rather than emitting a generic move, force use of the
> +                  lvx instruction, which we always want.  In particular
> +                  we don't want lxvd2x/xxpermdi for little endian.  */
> +               (void)emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
>               }
>         }
>
> @@ -25020,7 +25011,10 @@ rs6000_emit_epilogue (int sibcall)
>                 mem = gen_frame_mem (V4SImode, addr);
>
>                 reg = gen_rtx_REG (V4SImode, i);
> -               emit_move_insn (reg, mem);
> +               /* Rather than emitting a generic move, force use of the
> +                  lvx instruction, which we always want.  In particular
> +                  we don't want lxvd2x/xxpermdi for little endian.  */
> +               (void)emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
>               }
>         }

Thanks, David

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs
  2015-04-07 15:11 ` David Edelsohn
@ 2015-04-07 20:20   ` Segher Boessenkool
  2015-04-07 20:24     ` Bill Schmidt
  2015-04-20 13:50   ` Bill Schmidt
  1 sibling, 1 reply; 5+ messages in thread
From: Segher Boessenkool @ 2015-04-07 20:20 UTC (permalink / raw)
  To: David Edelsohn; +Cc: Bill Schmidt, GCC Patches

On Tue, Apr 07, 2015 at 11:11:37AM -0400, David Edelsohn wrote:
> This is okay after GCC 5 branches.  But please insert a space between
> (void) and emit_insn when casting the result.

Or get rid of the cast completely, it isn't needed.  We already have
more than 900 calls of emit_insn without cast :-)


Segher

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs
  2015-04-07 20:20   ` Segher Boessenkool
@ 2015-04-07 20:24     ` Bill Schmidt
  0 siblings, 0 replies; 5+ messages in thread
From: Bill Schmidt @ 2015-04-07 20:24 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: David Edelsohn, GCC Patches

On Tue, 2015-04-07 at 15:20 -0500, Segher Boessenkool wrote:
> On Tue, Apr 07, 2015 at 11:11:37AM -0400, David Edelsohn wrote:
> > This is okay after GCC 5 branches.  But please insert a space between
> > (void) and emit_insn when casting the result.
> 
> Or get rid of the cast completely, it isn't needed.  We already have
> more than 900 calls of emit_insn without cast :-)

Those are bad!  Fix them now! ;)

Bill

> 
> 
> Segher
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs
  2015-04-07 15:11 ` David Edelsohn
  2015-04-07 20:20   ` Segher Boessenkool
@ 2015-04-20 13:50   ` Bill Schmidt
  1 sibling, 0 replies; 5+ messages in thread
From: Bill Schmidt @ 2015-04-20 13:50 UTC (permalink / raw)
  To: David Edelsohn; +Cc: GCC Patches

Hi,

I've added an assert to this patch to detect a related condition.  Prior
to this patch, the prologue code was causing us to incorrectly call the
expand-time routine rs6000_emit_vsx_le_store().  This was harmless, but
we should have caught this sooner.  The patch removes this problem, but
we should still be on the lookout for such behavior in the future.  I'll
commit this version after 5.1 is released, if there are no objections.

Thanks,
Bill


2015-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/altivec.md (*altivec_lvx_<mode>_internal): Remove
	asterisk from name so this can be generated directly.
	(*altivec_stvx_<mode>_internal): Likewise.
	* config/rs6000/rs6000.c (rs6000_emit_le_vsx_store): Add assert
	that this is never called during or after reload/lra.
	(rs6000_frame_related): Remove split_reg
	argument and logic that references it.
	(emit_frame_save): Remove last parameter from call to
	rs6000_frame_related.
	(rs6000_emit_prologue): Remove last parameter from eight calls to
	rs6000_frame_related.  Force generation of stvx instruction for
	Altivec register saves.  Remove split_reg handling, which is no
	longer needed.
	(rs6000_emit_epilogue):  Force generation of lvx instruction for
	Altivec register restores.


Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 222230)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -2455,7 +2455,7 @@
     }
 })
 
-(define_insn "*altivec_lvx_<mode>_internal"
+(define_insn "altivec_lvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "register_operand" "=v")
 	  (match_operand:VM2 1 "memory_operand" "Z"))
@@ -2478,7 +2478,7 @@
     }
 })
 
-(define_insn "*altivec_stvx_<mode>_internal"
+(define_insn "altivec_stvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "memory_operand" "=Z")
 	  (match_operand:VM2 1 "register_operand" "v"))
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 222230)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -8371,6 +8371,11 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, ma
 {
   rtx tmp, permute_src, permute_tmp;
 
+  /* This should never be called during or after reload, because it does
+     not re-permute the source register.  It is intended only for use
+     during expand.  */
+  gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
+
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -22768,7 +22773,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg, rtx split_reg)
+		      rtx reg2, rtx rreg)
 {
   rtx real, temp;
 
@@ -22859,11 +22864,6 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE
 	  }
     }
 
-  /* If a store insn has been split into multiple insns, the
-     true source register is given by split_reg.  */
-  if (split_reg != NULL_RTX)
-    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
-
   RTX_FRAME_RELATED_P (insn) = 1;
   add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
@@ -22971,7 +22971,7 @@ emit_frame_save (rtx frame_reg, machine_mode mode,
   reg = gen_rtx_REG (mode, regno);
   insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
-			       NULL_RTX, NULL_RTX, NULL_RTX);
+			       NULL_RTX, NULL_RTX);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -23551,7 +23551,7 @@ rs6000_emit_prologue (void)
 
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    treg, GEN_INT (-info->total_size), NULL_RTX);
+			    treg, GEN_INT (-info->total_size));
       sp_off = frame_off = info->total_size;
     }
 
@@ -23636,7 +23636,7 @@ rs6000_emit_prologue (void)
 
 	  insn = emit_move_insn (mem, reg);
 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	  END_USE (0);
 	}
     }
@@ -23692,7 +23692,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset,
 				     DFmode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23771,7 +23771,7 @@ rs6000_emit_prologue (void)
 					 SAVRES_SAVE | SAVRES_GPR);
 
 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
-				NULL_RTX, NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX);
 	}
 
       /* Move the static chain pointer back.  */
@@ -23821,7 +23821,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset + ptr_off,
 				     reg_mode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -23837,7 +23837,7 @@ rs6000_emit_prologue (void)
 			     info->gp_save_offset + frame_off + reg_size * i);
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
     }
   else if (!WORLD_SAVE_P (info))
     {
@@ -24160,7 +24160,7 @@ rs6000_emit_prologue (void)
 				     info->altivec_save_offset + ptr_off,
 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX);
       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
 	{
 	  /* The oddity mentioned above clobbered our frame reg.  */
@@ -24176,7 +24176,7 @@ rs6000_emit_prologue (void)
       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
 	  {
-	    rtx areg, savereg, mem, split_reg;
+	    rtx areg, savereg, mem;
 	    int offset;
 
 	    offset = (info->altivec_save_offset + frame_off
@@ -24192,20 +24192,13 @@ rs6000_emit_prologue (void)
 	    mem = gen_frame_mem (V4SImode,
 				 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
 
-	    insn = emit_move_insn (mem, savereg);
+	    /* Rather than emitting a generic move, force use of the stvx
+	       instruction, which we always want.  In particular we don't
+	       want xxpermdi/stxvd2x for little endian.  */
+	    insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
 
-	    /* When we split a VSX store into two insns, we need to make
-	       sure the DWARF info knows which register we are storing.
-	       Pass it in to be used on the appropriate note.  */
-	    if (!BYTES_BIG_ENDIAN
-		&& GET_CODE (PATTERN (insn)) == SET
-		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
-	      split_reg = savereg;
-	    else
-	      split_reg = NULL_RTX;
-
 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				  areg, GEN_INT (offset), split_reg);
+				  areg, GEN_INT (offset));
 	  }
     }
 
@@ -24847,7 +24840,10 @@ rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}
 
@@ -25050,7 +25046,10 @@ rs6000_emit_epilogue (int sibcall)
 		mem = gen_frame_mem (V4SImode, addr);
 
 		reg = gen_rtx_REG (V4SImode, i);
-		emit_move_insn (reg, mem);
+		/* Rather than emitting a generic move, force use of the
+		   lvx instruction, which we always want.  In particular
+		   we don't want lxvd2x/xxpermdi for little endian.  */
+		(void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
 	      }
 	}
 



On Tue, 2015-04-07 at 11:11 -0400, David Edelsohn wrote:
> On Mon, Apr 6, 2015 at 2:19 PM, Bill Schmidt
> <wschmidt@linux.vnet.ibm.com> wrote:
> > Hi,
> >
> > The prologue and epilogue code to save/restore Altivec registers uses
> > the generic emit_move_insn logic.  This means that when VSX is available
> > on a little-endian target, we will generate xxswapd/stxvd2x for prologue
> > saves, and lxvd2x/xxswapd for epilogue restores.  This happens too late
> > to be cleaned up by swap optimization.  Since the stack save slots are
> > aligned, we should always use lvx and stvx for this purpose instead.
> > This improves performance on LE targets, is performance-neutral for BE,
> > and is always safe.
> >
> > This change causes the previously failing test
> > gcc.target/powerpc/swaps-p8-2.c to succeed again.  This test started
> > failing when an unrelated change raised register pressure on the vector
> > registers, causing us to generate the above save/restore sequences.  The
> > test was testing whether all xxswapd instructions had been removed.
> > With this change, we no longer generate them for the save/restores, and
> > the test succeeds.
> >
> > Because we no longer generate the two-instruction sequences, we no
> > longer need the code in rs6000_frame_related to identify the true source
> > register for the two-instruction store.  I've removed that along with
> > the extra argument it required, and adjusted the callers.
> >
> > Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
> > regressions, and one fixed failure.  Is this ok for trunk after 5.1
> > branches?  I'd also like to backport this as far as 4.8 for the
> > performance improvement.
> >
> > Thanks,
> > Bill
> >
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-04-20 13:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-06 18:19 [PATCH, rs6000] Force lvx and stvx for prologue saves and epilogue restores of Altivec regs Bill Schmidt
2015-04-07 15:11 ` David Edelsohn
2015-04-07 20:20   ` Segher Boessenkool
2015-04-07 20:24     ` Bill Schmidt
2015-04-20 13:50   ` Bill Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).