public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RE: [PATCH, Mips] Compact branch/delay slot optimization.
@ 2015-10-06 14:00 Simon Dardis
  2015-10-28 14:16 ` Moore, Catherine
  0 siblings, 1 reply; 4+ messages in thread
From: Simon Dardis @ 2015-10-06 14:00 UTC (permalink / raw)
  To: Moore, Catherine, Matthew Fortune; +Cc: gcc-patches

Hello,

I'd like to resubmit the previous patch as it failed to check if the branch inside the sequence had a compact form.

Thanks,
Simon

gcc/
        * config/mips/mips.c: (mips_breakable_sequence_p): New function.
          (mips_break_sequence): New function. 
          (mips_reorg_process_insns) Use them. Use compact branches in selected
          situations.
    
gcc/testsuite/
        * gcc.target/mips/split-ds-sequence.c: Test for the above.

Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 228282)
+++ config/mips/mips.c	(working copy)
@@ -16973,6 +16973,34 @@
       }
 }
 
+/* A SEQUENCE is breakable iff the branch inside it has a compact form
+   and the target has compact branches.  */
+
+static bool
+mips_breakable_sequence_p (rtx_insn * insn)
+{
+  return (insn && GET_CODE (PATTERN (insn)) == SEQUENCE
+	  && TARGET_CB_MAYBE
+	  && get_attr_compact_form (SEQ_BEGIN (insn)) != COMPACT_FORM_NEVER);
+}
+
+/* Remove a SEQUENCE and replace it with the delay slot instruction
+   followed by the branch and return the instruction in the delay slot.
+   Return the first of the two new instructions.
+   Subroutine of mips_reorg_process_insns.  */
+
+static rtx_insn *
+mips_break_sequence (rtx_insn * insn)
+{
+  rtx_insn * before = PREV_INSN (insn);
+  rtx_insn * branch = SEQ_BEGIN (insn);
+  rtx_insn * ds = SEQ_END (insn);
+  remove_insn (insn);
+  add_insn_after (ds, before, NULL);
+  add_insn_after (branch, ds, NULL);
+  return ds;
+}
+
 /* Go through the instruction stream and insert nops where necessary.
    Also delete any high-part relocations whose partnering low parts
    are now all dead.  See if the whole function can then be put into
@@ -17065,6 +17093,68 @@
 	{
 	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
 	    {
+	      rtx_insn * next_active = next_active_insn (insn);
+	      /* Undo delay slots to avoid bubbles if the next instruction can
+		 be placed in a forbidden slot or the cost of adding an
+		 explicit NOP in a forbidden slot is OK and if the SEQUENCE is
+		 safely breakable.  */
+	      if (TARGET_CB_MAYBE
+		  && mips_breakable_sequence_p (insn)
+		  && INSN_P (SEQ_BEGIN (insn))
+		  && INSN_P (SEQ_END (insn))
+		  && ((next_active
+		       && INSN_P (next_active)
+		       && GET_CODE (PATTERN (next_active)) != SEQUENCE
+		       && get_attr_can_delay (next_active) == CAN_DELAY_YES)
+		      || !optimize_size))
+		{
+		  /* To hide a potential pipeline bubble, if we scan backwards
+		     from the current SEQUENCE and find that there is a load
+		     of a value that is used in the CTI and there are no
+		     dependencies between the CTI and instruction in the delay
+		     slot, break the sequence so the load delay is hidden.  */
+		  HARD_REG_SET uses;
+		  CLEAR_HARD_REG_SET (uses);
+		  note_uses (&PATTERN (SEQ_BEGIN (insn)), record_hard_reg_uses,
+			     &uses);
+		  HARD_REG_SET delay_sets;
+		  CLEAR_HARD_REG_SET (delay_sets);
+		  note_stores (PATTERN (SEQ_END (insn)), record_hard_reg_sets,
+			       &delay_sets);
+
+		  rtx prev = prev_active_insn (insn);
+		  if (prev
+		      && GET_CODE (PATTERN (prev)) == SET
+		      && MEM_P (SET_SRC (PATTERN (prev))))
+		    {
+		      HARD_REG_SET sets;
+		      CLEAR_HARD_REG_SET (sets);
+		      note_stores (PATTERN (prev), record_hard_reg_sets,
+				   &sets);
+
+		      /* Re-order if safe.  */
+		      if (!hard_reg_set_intersect_p (delay_sets, uses)
+			  && hard_reg_set_intersect_p (uses, sets))
+			{
+			  next_insn = mips_break_sequence (insn);
+			  /* Need to process the hazards of the newly
+			     introduced instructions.  */
+			  continue;
+			}
+		    }
+
+		  /* If we find an orphaned high-part relocation in a delay
+		     slot then we can convert to a compact branch and get
+		     the orphaned high part deleted.  */
+		  if (mips_orphaned_high_part_p (&htab, SEQ_END (insn)))
+		    {
+		      next_insn = mips_break_sequence (insn);
+		      /* Need to process the hazards of the newly
+			 introduced instructions.  */
+		      continue;
+		    }
+		}
+
 	      /* If we find an orphaned high-part relocation in a delay
 		 slot, it's easier to turn that instruction into a NOP than
 		 to delete it.  The delay slot will be a NOP either way.  */
@@ -17099,6 +17189,33 @@
 		{
 		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
 				     &delayed_reg, lo_reg, &fs_delay);
+		  /* When a compact branch introduces a forbidden slot hazard
+		     and the next useful instruction is a SEQUENCE of a jump
+		     and a non-nop instruction in the delay slot, remove the
+		     sequence and replace it with the delay slot instruction
+		     then the jump to clear the forbidden slot hazard.  */
+
+		  if (fs_delay)
+		    {
+		      /* Search onwards from the current position looking for
+			 a SEQUENCE.  We are looking for pipeline hazards here
+			 and do not need to worry about labels or barriers as
+			 the optimization only undoes delay slot filling which
+			 only affects the order of the branch and its delay
+			 slot.  */
+		      rtx_insn * next = next_active_insn (insn);
+		      if (next
+			  && USEFUL_INSN_P (next)
+			  && GET_CODE (PATTERN (next)) == SEQUENCE
+			  && mips_breakable_sequence_p (next))
+			{
+			  last_insn = insn;
+			  next_insn = mips_break_sequence (next);
+			  /* Need to process the hazards of the newly
+			     introduced instructions.  */
+			  continue;
+			}
+		    }
 		  last_insn = insn;
 		}
 	    }
Index: testsuite/gcc.target/mips/split-ds-sequence.c
===================================================================
--- testsuite/gcc.target/mips/split-ds-sequence.c	(revision 0)
+++ testsuite/gcc.target/mips/split-ds-sequence.c	(working copy)
@@ -0,0 +1,19 @@
+/* { dg-options "isa_rev>=6" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-mcompact-branches=never" } { "" } } */
+/* { dg-final { scan-assembler-not "nop" } } */
+
+int
+testg2 (int a, int c)
+{
+
+  int j = 0;
+  do
+    {
+      j += a;
+    }
+  while (j < 56);
+
+  j += c;
+  return j;
+
+}

-----Original Message-----
From: Simon Dardis 
Sent: 25 September 2015 15:56
To: Moore, Catherine
Cc: gcc-patches@gcc.gnu.org
Subject: [PATCH, Mips] Compact branch/delay slot optimization.

Hello,

The following patch adds three small optimizations related to compact branches for MIPSR6: 

When the result of a load is used by a delay slot branch immediately afterwards, undo the delay slot branch scheduling to hide the pipeline bubble if safe and use a compact branch instead.

Undo delay slot scheduling if an orphaned high-part relocation is in a delay slot and use a compact branch is used instead.

Undo delay slot scheduling in the case where a forbidden slot hazard is immediately followed by a delay slot branch. This would cause a nop to be inserted otherwise.

No regressions. OK to apply?

Thanks,
Simon

gcc/
        * config/mips/mips.c: (mips_break_sequence): New function. 
          (mips_reorg_process_insns) Use it. Use compact branches in selected
          situations.
    
gcc/testsuite/
        * gcc.target/mips/split-ds-sequence.c: Test for the above.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH, Mips] Compact branch/delay slot optimization.
  2015-10-06 14:00 [PATCH, Mips] Compact branch/delay slot optimization Simon Dardis
@ 2015-10-28 14:16 ` Moore, Catherine
  2015-11-11 13:41   ` Simon Dardis
  0 siblings, 1 reply; 4+ messages in thread
From: Moore, Catherine @ 2015-10-28 14:16 UTC (permalink / raw)
  To: Simon Dardis, Matthew Fortune; +Cc: gcc-patches



> -----Original Message-----
> From: Simon Dardis [mailto:Simon.Dardis@imgtec.com]
> Sent: Tuesday, October 06, 2015 10:00 AM
> To: Moore, Catherine; Matthew Fortune
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH, Mips] Compact branch/delay slot optimization.
> 
> Hello,
> 
> I'd like to resubmit the previous patch as it failed to check if the branch inside
> the sequence had a compact form.
> 
> Thanks,
> Simon
> 
> gcc/
>         * config/mips/mips.c: (mips_breakable_sequence_p): New function.
>           (mips_break_sequence): New function.
>           (mips_reorg_process_insns) Use them. Use compact branches in
> selected
>           situations.
> 
> gcc/testsuite/
>         * gcc.target/mips/split-ds-sequence.c: Test for the above.

Hi Simon,
This patch looks okay with the exception of one stylistic change.
Please change all instances of :
+mips_breakable_sequence_p (rtx_insn * insn)
To:
+mips_breakable_sequence_p (rtx_insn *insn)
Okay, with those changes.
Thanks,
Catherine


> 
> Index: config/mips/mips.c
> ==========================================================
> =========
> --- config/mips/mips.c	(revision 228282)
> +++ config/mips/mips.c	(working copy)
> @@ -16973,6 +16973,34 @@
>        }
>  }
> 
> +/* A SEQUENCE is breakable iff the branch inside it has a compact form
> +   and the target has compact branches.  */
> +
> +static bool
> +mips_breakable_sequence_p (rtx_insn * insn)
> +{
> +  return (insn && GET_CODE (PATTERN (insn)) == SEQUENCE
> +	  && TARGET_CB_MAYBE
> +	  && get_attr_compact_form (SEQ_BEGIN (insn)) !=
> COMPACT_FORM_NEVER);
> +}
> +
> +/* Remove a SEQUENCE and replace it with the delay slot instruction
> +   followed by the branch and return the instruction in the delay slot.
> +   Return the first of the two new instructions.
> +   Subroutine of mips_reorg_process_insns.  */
> +
> +static rtx_insn *
> +mips_break_sequence (rtx_insn * insn)
> +{
> +  rtx_insn * before = PREV_INSN (insn);
> +  rtx_insn * branch = SEQ_BEGIN (insn);
> +  rtx_insn * ds = SEQ_END (insn);
> +  remove_insn (insn);
> +  add_insn_after (ds, before, NULL);
> +  add_insn_after (branch, ds, NULL);
> +  return ds;
> +}
> +
>  /* Go through the instruction stream and insert nops where necessary.
>     Also delete any high-part relocations whose partnering low parts
>     are now all dead.  See if the whole function can then be put into
> @@ -17065,6 +17093,68 @@
>  	{
>  	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
>  	    {
> +	      rtx_insn * next_active = next_active_insn (insn);
> +	      /* Undo delay slots to avoid bubbles if the next instruction can
> +		 be placed in a forbidden slot or the cost of adding an
> +		 explicit NOP in a forbidden slot is OK and if the SEQUENCE is
> +		 safely breakable.  */
> +	      if (TARGET_CB_MAYBE
> +		  && mips_breakable_sequence_p (insn)
> +		  && INSN_P (SEQ_BEGIN (insn))
> +		  && INSN_P (SEQ_END (insn))
> +		  && ((next_active
> +		       && INSN_P (next_active)
> +		       && GET_CODE (PATTERN (next_active)) != SEQUENCE
> +		       && get_attr_can_delay (next_active) ==
> CAN_DELAY_YES)
> +		      || !optimize_size))
> +		{
> +		  /* To hide a potential pipeline bubble, if we scan backwards
> +		     from the current SEQUENCE and find that there is a load
> +		     of a value that is used in the CTI and there are no
> +		     dependencies between the CTI and instruction in the
> delay
> +		     slot, break the sequence so the load delay is hidden.  */
> +		  HARD_REG_SET uses;
> +		  CLEAR_HARD_REG_SET (uses);
> +		  note_uses (&PATTERN (SEQ_BEGIN (insn)),
> record_hard_reg_uses,
> +			     &uses);
> +		  HARD_REG_SET delay_sets;
> +		  CLEAR_HARD_REG_SET (delay_sets);
> +		  note_stores (PATTERN (SEQ_END (insn)),
> record_hard_reg_sets,
> +			       &delay_sets);
> +
> +		  rtx prev = prev_active_insn (insn);
> +		  if (prev
> +		      && GET_CODE (PATTERN (prev)) == SET
> +		      && MEM_P (SET_SRC (PATTERN (prev))))
> +		    {
> +		      HARD_REG_SET sets;
> +		      CLEAR_HARD_REG_SET (sets);
> +		      note_stores (PATTERN (prev), record_hard_reg_sets,
> +				   &sets);
> +
> +		      /* Re-order if safe.  */
> +		      if (!hard_reg_set_intersect_p (delay_sets, uses)
> +			  && hard_reg_set_intersect_p (uses, sets))
> +			{
> +			  next_insn = mips_break_sequence (insn);
> +			  /* Need to process the hazards of the newly
> +			     introduced instructions.  */
> +			  continue;
> +			}
> +		    }
> +
> +		  /* If we find an orphaned high-part relocation in a delay
> +		     slot then we can convert to a compact branch and get
> +		     the orphaned high part deleted.  */
> +		  if (mips_orphaned_high_part_p (&htab, SEQ_END (insn)))
> +		    {
> +		      next_insn = mips_break_sequence (insn);
> +		      /* Need to process the hazards of the newly
> +			 introduced instructions.  */
> +		      continue;
> +		    }
> +		}
> +
>  	      /* If we find an orphaned high-part relocation in a delay
>  		 slot, it's easier to turn that instruction into a NOP than
>  		 to delete it.  The delay slot will be a NOP either way.  */
> @@ -17099,6 +17189,33 @@
>  		{
>  		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
>  				     &delayed_reg, lo_reg, &fs_delay);
> +		  /* When a compact branch introduces a forbidden slot
> hazard
> +		     and the next useful instruction is a SEQUENCE of a jump
> +		     and a non-nop instruction in the delay slot, remove the
> +		     sequence and replace it with the delay slot instruction
> +		     then the jump to clear the forbidden slot hazard.  */
> +
> +		  if (fs_delay)
> +		    {
> +		      /* Search onwards from the current position looking for
> +			 a SEQUENCE.  We are looking for pipeline hazards
> here
> +			 and do not need to worry about labels or barriers as
> +			 the optimization only undoes delay slot filling which
> +			 only affects the order of the branch and its delay
> +			 slot.  */
> +		      rtx_insn * next = next_active_insn (insn);
> +		      if (next
> +			  && USEFUL_INSN_P (next)
> +			  && GET_CODE (PATTERN (next)) == SEQUENCE
> +			  && mips_breakable_sequence_p (next))
> +			{
> +			  last_insn = insn;
> +			  next_insn = mips_break_sequence (next);
> +			  /* Need to process the hazards of the newly
> +			     introduced instructions.  */
> +			  continue;
> +			}
> +		    }
>  		  last_insn = insn;
>  		}
>  	    }
> Index: testsuite/gcc.target/mips/split-ds-sequence.c
> ==========================================================
> =========
> --- testsuite/gcc.target/mips/split-ds-sequence.c	(revision 0)
> +++ testsuite/gcc.target/mips/split-ds-sequence.c	(working copy)
> @@ -0,0 +1,19 @@
> +/* { dg-options "isa_rev>=6" } */
> +/* { dg-skip-if "code quality test" { *-*-* } { "-mcompact-branches=never" }
> { "" } } */
> +/* { dg-final { scan-assembler-not "nop" } } */
> +
> +int
> +testg2 (int a, int c)
> +{
> +
> +  int j = 0;
> +  do
> +    {
> +      j += a;
> +    }
> +  while (j < 56);
> +
> +  j += c;
> +  return j;
> +
> +}
> 
> -----Original Message-----
> From: Simon Dardis
> Sent: 25 September 2015 15:56
> To: Moore, Catherine
> Cc: gcc-patches@gcc.gnu.org
> Subject: [PATCH, Mips] Compact branch/delay slot optimization.
> 
> Hello,
> 
> The following patch adds three small optimizations related to compact
> branches for MIPSR6:
> 
> When the result of a load is used by a delay slot branch immediately
> afterwards, undo the delay slot branch scheduling to hide the pipeline
> bubble if safe and use a compact branch instead.
> 
> Undo delay slot scheduling if an orphaned high-part relocation is in a delay
> slot and use a compact branch is used instead.
> 
> Undo delay slot scheduling in the case where a forbidden slot hazard is
> immediately followed by a delay slot branch. This would cause a nop to be
> inserted otherwise.
> 
> No regressions. OK to apply?
> 
> Thanks,
> Simon
> 
> gcc/
>         * config/mips/mips.c: (mips_break_sequence): New function.
>           (mips_reorg_process_insns) Use it. Use compact branches in selected
>           situations.
> 
> gcc/testsuite/
>         * gcc.target/mips/split-ds-sequence.c: Test for the above.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH, Mips] Compact branch/delay slot optimization.
  2015-10-28 14:16 ` Moore, Catherine
@ 2015-11-11 13:41   ` Simon Dardis
  0 siblings, 0 replies; 4+ messages in thread
From: Simon Dardis @ 2015-11-11 13:41 UTC (permalink / raw)
  To: Moore, Catherine, Matthew Fortune; +Cc: gcc-patches

Committed as r230160.

Thanks,
Simon

> -----Original Message-----
> From: Moore, Catherine [mailto:Catherine_Moore@mentor.com]
> Sent: 28 October 2015 14:00
> To: Simon Dardis; Matthew Fortune
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH, Mips] Compact branch/delay slot optimization.
> 
> 
> 
> > -----Original Message-----
> > From: Simon Dardis [mailto:Simon.Dardis@imgtec.com]
> > Sent: Tuesday, October 06, 2015 10:00 AM
> > To: Moore, Catherine; Matthew Fortune
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: RE: [PATCH, Mips] Compact branch/delay slot optimization.
> >
> > Hello,
> >
> > I'd like to resubmit the previous patch as it failed to check if the
> > branch inside the sequence had a compact form.
> >
> > Thanks,
> > Simon
> >
> > gcc/
> >         * config/mips/mips.c: (mips_breakable_sequence_p): New function.
> >           (mips_break_sequence): New function.
> >           (mips_reorg_process_insns) Use them. Use compact branches in
> > selected
> >           situations.
> >
> > gcc/testsuite/
> >         * gcc.target/mips/split-ds-sequence.c: Test for the above.
> 
> Hi Simon,
> This patch looks okay with the exception of one stylistic change.
> Please change all instances of :
> +mips_breakable_sequence_p (rtx_insn * insn)
> To:
> +mips_breakable_sequence_p (rtx_insn *insn)
> Okay, with those changes.
> Thanks,
> Catherine
> 
> 
> >
> > Index: config/mips/mips.c
> >
> ==========================================================
> > =========
> > --- config/mips/mips.c	(revision 228282)
> > +++ config/mips/mips.c	(working copy)
> > @@ -16973,6 +16973,34 @@
> >        }
> >  }
> >
> > +/* A SEQUENCE is breakable iff the branch inside it has a compact form
> > +   and the target has compact branches.  */
> > +
> > +static bool
> > +mips_breakable_sequence_p (rtx_insn * insn) {
> > +  return (insn && GET_CODE (PATTERN (insn)) == SEQUENCE
> > +	  && TARGET_CB_MAYBE
> > +	  && get_attr_compact_form (SEQ_BEGIN (insn)) !=
> > COMPACT_FORM_NEVER);
> > +}
> > +
> > +/* Remove a SEQUENCE and replace it with the delay slot instruction
> > +   followed by the branch and return the instruction in the delay slot.
> > +   Return the first of the two new instructions.
> > +   Subroutine of mips_reorg_process_insns.  */
> > +
> > +static rtx_insn *
> > +mips_break_sequence (rtx_insn * insn) {
> > +  rtx_insn * before = PREV_INSN (insn);
> > +  rtx_insn * branch = SEQ_BEGIN (insn);
> > +  rtx_insn * ds = SEQ_END (insn);
> > +  remove_insn (insn);
> > +  add_insn_after (ds, before, NULL);
> > +  add_insn_after (branch, ds, NULL);
> > +  return ds;
> > +}
> > +
> >  /* Go through the instruction stream and insert nops where necessary.
> >     Also delete any high-part relocations whose partnering low parts
> >     are now all dead.  See if the whole function can then be put into
> > @@ -17065,6 +17093,68 @@
> >  	{
> >  	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
> >  	    {
> > +	      rtx_insn * next_active = next_active_insn (insn);
> > +	      /* Undo delay slots to avoid bubbles if the next instruction can
> > +		 be placed in a forbidden slot or the cost of adding an
> > +		 explicit NOP in a forbidden slot is OK and if the SEQUENCE is
> > +		 safely breakable.  */
> > +	      if (TARGET_CB_MAYBE
> > +		  && mips_breakable_sequence_p (insn)
> > +		  && INSN_P (SEQ_BEGIN (insn))
> > +		  && INSN_P (SEQ_END (insn))
> > +		  && ((next_active
> > +		       && INSN_P (next_active)
> > +		       && GET_CODE (PATTERN (next_active)) != SEQUENCE
> > +		       && get_attr_can_delay (next_active) ==
> > CAN_DELAY_YES)
> > +		      || !optimize_size))
> > +		{
> > +		  /* To hide a potential pipeline bubble, if we scan backwards
> > +		     from the current SEQUENCE and find that there is a load
> > +		     of a value that is used in the CTI and there are no
> > +		     dependencies between the CTI and instruction in the
> > delay
> > +		     slot, break the sequence so the load delay is hidden.  */
> > +		  HARD_REG_SET uses;
> > +		  CLEAR_HARD_REG_SET (uses);
> > +		  note_uses (&PATTERN (SEQ_BEGIN (insn)),
> > record_hard_reg_uses,
> > +			     &uses);
> > +		  HARD_REG_SET delay_sets;
> > +		  CLEAR_HARD_REG_SET (delay_sets);
> > +		  note_stores (PATTERN (SEQ_END (insn)),
> > record_hard_reg_sets,
> > +			       &delay_sets);
> > +
> > +		  rtx prev = prev_active_insn (insn);
> > +		  if (prev
> > +		      && GET_CODE (PATTERN (prev)) == SET
> > +		      && MEM_P (SET_SRC (PATTERN (prev))))
> > +		    {
> > +		      HARD_REG_SET sets;
> > +		      CLEAR_HARD_REG_SET (sets);
> > +		      note_stores (PATTERN (prev), record_hard_reg_sets,
> > +				   &sets);
> > +
> > +		      /* Re-order if safe.  */
> > +		      if (!hard_reg_set_intersect_p (delay_sets, uses)
> > +			  && hard_reg_set_intersect_p (uses, sets))
> > +			{
> > +			  next_insn = mips_break_sequence (insn);
> > +			  /* Need to process the hazards of the newly
> > +			     introduced instructions.  */
> > +			  continue;
> > +			}
> > +		    }
> > +
> > +		  /* If we find an orphaned high-part relocation in a delay
> > +		     slot then we can convert to a compact branch and get
> > +		     the orphaned high part deleted.  */
> > +		  if (mips_orphaned_high_part_p (&htab, SEQ_END (insn)))
> > +		    {
> > +		      next_insn = mips_break_sequence (insn);
> > +		      /* Need to process the hazards of the newly
> > +			 introduced instructions.  */
> > +		      continue;
> > +		    }
> > +		}
> > +
> >  	      /* If we find an orphaned high-part relocation in a delay
> >  		 slot, it's easier to turn that instruction into a NOP than
> >  		 to delete it.  The delay slot will be a NOP either way.  */ @@
> > -17099,6 +17189,33 @@
> >  		{
> >  		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
> >  				     &delayed_reg, lo_reg, &fs_delay);
> > +		  /* When a compact branch introduces a forbidden slot
> > hazard
> > +		     and the next useful instruction is a SEQUENCE of a jump
> > +		     and a non-nop instruction in the delay slot, remove the
> > +		     sequence and replace it with the delay slot instruction
> > +		     then the jump to clear the forbidden slot hazard.  */
> > +
> > +		  if (fs_delay)
> > +		    {
> > +		      /* Search onwards from the current position looking for
> > +			 a SEQUENCE.  We are looking for pipeline hazards
> > here
> > +			 and do not need to worry about labels or barriers as
> > +			 the optimization only undoes delay slot filling which
> > +			 only affects the order of the branch and its delay
> > +			 slot.  */
> > +		      rtx_insn * next = next_active_insn (insn);
> > +		      if (next
> > +			  && USEFUL_INSN_P (next)
> > +			  && GET_CODE (PATTERN (next)) == SEQUENCE
> > +			  && mips_breakable_sequence_p (next))
> > +			{
> > +			  last_insn = insn;
> > +			  next_insn = mips_break_sequence (next);
> > +			  /* Need to process the hazards of the newly
> > +			     introduced instructions.  */
> > +			  continue;
> > +			}
> > +		    }
> >  		  last_insn = insn;
> >  		}
> >  	    }
> > Index: testsuite/gcc.target/mips/split-ds-sequence.c
> >
> ==========================================================
> > =========
> > --- testsuite/gcc.target/mips/split-ds-sequence.c	(revision 0)
> > +++ testsuite/gcc.target/mips/split-ds-sequence.c	(working copy)
> > @@ -0,0 +1,19 @@
> > +/* { dg-options "isa_rev>=6" } */
> > +/* { dg-skip-if "code quality test" { *-*-* } {
> > +"-mcompact-branches=never" }
> > { "" } } */
> > +/* { dg-final { scan-assembler-not "nop" } } */
> > +
> > +int
> > +testg2 (int a, int c)
> > +{
> > +
> > +  int j = 0;
> > +  do
> > +    {
> > +      j += a;
> > +    }
> > +  while (j < 56);
> > +
> > +  j += c;
> > +  return j;
> > +
> > +}
> >
> > -----Original Message-----
> > From: Simon Dardis
> > Sent: 25 September 2015 15:56
> > To: Moore, Catherine
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: [PATCH, Mips] Compact branch/delay slot optimization.
> >
> > Hello,
> >
> > The following patch adds three small optimizations related to compact
> > branches for MIPSR6:
> >
> > When the result of a load is used by a delay slot branch immediately
> > afterwards, undo the delay slot branch scheduling to hide the pipeline
> > bubble if safe and use a compact branch instead.
> >
> > Undo delay slot scheduling if an orphaned high-part relocation is in a
> > delay slot and use a compact branch is used instead.
> >
> > Undo delay slot scheduling in the case where a forbidden slot hazard
> > is immediately followed by a delay slot branch. This would cause a nop
> > to be inserted otherwise.
> >
> > No regressions. OK to apply?
> >
> > Thanks,
> > Simon
> >
> > gcc/
> >         * config/mips/mips.c: (mips_break_sequence): New function.
> >           (mips_reorg_process_insns) Use it. Use compact branches in selected
> >           situations.
> >
> > gcc/testsuite/
> >         * gcc.target/mips/split-ds-sequence.c: Test for the above.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH, Mips] Compact branch/delay slot optimization.
@ 2015-09-25 15:05 Simon Dardis
  0 siblings, 0 replies; 4+ messages in thread
From: Simon Dardis @ 2015-09-25 15:05 UTC (permalink / raw)
  To: Moore, Catherine; +Cc: gcc-patches

Hello,

The following patch adds three small optimizations related to compact branches for MIPSR6: 

When the result of a load is used by a delay slot branch immediately afterwards, undo the
delay slot branch scheduling to hide the pipeline bubble if safe and use a compact branch
instead.

Undo delay slot scheduling if an orphaned high-part relocation is in a delay slot and use a
compact branch is used instead.

Undo delay slot scheduling in the case where a forbidden slot hazard is immediately followed
by a delay slot branch. This would cause a nop to be inserted otherwise.

No regressions. OK to apply?

Thanks,
Simon

gcc/
        * config/mips/mips.c: (mips_break_sequence): New function. 
          (mips_reorg_process_insns) Use it. Use compact branches in selected
          situations.
    
gcc/testsuite/
        * gcc.target/mips/split-ds-sequence.c: Test for the above.

Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 227676)
+++ config/mips/mips.c	(working copy)
@@ -16973,6 +16973,23 @@
       }
 }
 
+/* Remove a SEQUENCE and replace it with the delay slot instruction
+   followed by the branch and return the instruction in the delay slot.
+   Return the first of the two new instructions.
+   Subroutine of mips_reorg_process_insns.  */
+
+static rtx_insn *
+mips_break_sequence (rtx_insn * insn)
+{
+  rtx_insn * before = PREV_INSN (insn);
+  rtx_insn * branch = SEQ_BEGIN (insn);
+  rtx_insn * ds = SEQ_END (insn);
+  remove_insn (insn);
+  add_insn_after (ds, before, NULL);
+  add_insn_after (branch, ds, NULL);
+  return ds;
+}
+
 /* Go through the instruction stream and insert nops where necessary.
    Also delete any high-part relocations whose partnering low parts
    are now all dead.  See if the whole function can then be put into
@@ -17065,6 +17082,66 @@
 	{
 	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
 	    {
+	      rtx_insn * next_active = next_active_insn (insn);
+	      /* Undo delay slots to avoid bubbles if the next instruction can
+		 be placed in a forbidden slot or the cost of adding an
+		 explicit NOP in a forbidden slot is OK.  */
+	      if (TARGET_CB_MAYBE
+		  && INSN_P (SEQ_BEGIN (insn))
+		  && INSN_P (SEQ_END (insn))
+		  && ((next_active
+		       && INSN_P (next_active)
+		       && GET_CODE (PATTERN (next_active)) != SEQUENCE
+		       && get_attr_can_delay (next_active) == CAN_DELAY_YES)
+		      || !optimize_size))
+		{
+		  /* To hide a potential pipeline bubble, if we scan backwards
+		     from the current SEQUENCE and find that there is a load
+		     of a value that is used in the CTI and there are no
+		     dependencies between the CTI and instruction in the delay
+		     slot, break the sequence so the load delay is hidden.  */
+		  HARD_REG_SET uses;
+		  CLEAR_HARD_REG_SET (uses);
+		  note_uses (&PATTERN (SEQ_BEGIN (insn)), record_hard_reg_uses,
+			     &uses);
+		  HARD_REG_SET delay_sets;
+		  CLEAR_HARD_REG_SET (delay_sets);
+		  note_stores (PATTERN (SEQ_END (insn)), record_hard_reg_sets,
+			       &delay_sets);
+
+		  rtx prev = prev_active_insn (insn);
+		  if (prev
+		      && GET_CODE (PATTERN (prev)) == SET
+		      && MEM_P (SET_SRC (PATTERN (prev))))
+		    {
+		      HARD_REG_SET sets;
+		      CLEAR_HARD_REG_SET (sets);
+		      note_stores (PATTERN (prev), record_hard_reg_sets,
+				   &sets);
+
+		      /* Re-order if safe.  */
+		      if (!hard_reg_set_intersect_p (delay_sets, uses)
+			  && hard_reg_set_intersect_p (uses, sets))
+			{
+			  next_insn = mips_break_sequence (insn);
+			  /* Need to process the hazards of the newly
+			     introduced instructions.  */
+			  continue;
+			}
+		    }
+
+		  /* If we find an orphaned high-part relocation in a delay
+		     slot then we can convert to a compact branch and get
+		     the orphaned high part deleted.  */
+		  if (mips_orphaned_high_part_p (&htab, SEQ_END (insn)))
+		    {
+		      next_insn = mips_break_sequence (insn);
+		      /* Need to process the hazards of the newly
+			 introduced instructions.  */
+		      continue;
+		    }
+		}
+
 	      /* If we find an orphaned high-part relocation in a delay
 		 slot, it's easier to turn that instruction into a NOP than
 		 to delete it.  The delay slot will be a NOP either way.  */
@@ -17099,6 +17176,32 @@
 		{
 		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
 				     &delayed_reg, lo_reg, &fs_delay);
+		  /* When a compact branch introduces a forbidden slot hazard
+		     and the next useful instruction is a SEQUENCE of a jump
+		     and a non-nop instruction in the delay slot, remove the
+		     sequence and replace it with the delay slot instruction
+		     then the jump to clear the forbidden slot hazard.  */
+
+		  if (fs_delay)
+		    {
+		      /* Search onwards from the current position looking for
+			 a SEQUENCE.  We are looking for pipeline hazards here
+			 and do not need to worry about labels or barriers as
+			 the optimization only undoes delay slot filling which
+			 only affects the order of the branch and its delay
+			 slot.  */
+		      rtx_insn * next = next_active_insn (insn);
+		      if (next
+			  && USEFUL_INSN_P (next)
+			  && GET_CODE (PATTERN (next)) == SEQUENCE)
+			{
+			  last_insn = insn;
+			  next_insn = mips_break_sequence (next);
+			  /* Need to process the hazards of the newly
+			     introduced instructions.  */
+			  continue;
+			}
+		    }
 		  last_insn = insn;
 		}
 	    }

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-11-11 13:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-06 14:00 [PATCH, Mips] Compact branch/delay slot optimization Simon Dardis
2015-10-28 14:16 ` Moore, Catherine
2015-11-11 13:41   ` Simon Dardis
  -- strict thread matches above, loose matches on Subject: below --
2015-09-25 15:05 Simon Dardis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).