public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* ifcvt/crossjump patch: Fix PR 42496, 21803
@ 2010-03-31 22:08 Bernd Schmidt
  2010-04-01 18:00 ` Steven Bosscher
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-03-31 22:08 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1428 bytes --]

The two PRs 42496 and 21803 show a problem with the RTL ifcvt pass:
sometimes, we end up with two identical insns, predicated with opposite
conditions.

As suggested in the PR, I've fixed this by reusing the crossjumping
code, adding a variant that looks for matching sequences at the start of
a block.  If we find matches between the then and else blocks, we delete
the tail sequence from the then block and the head sequence from the
else block, and end up with all instructions in their proper order.

Here's an example of the code finding both a head and a tail match:
        cmp     r3, r2                          cmp     r3, r2
        ldrhi   r3, [r5, #112]    |             ldr     r3, [r5, #112]
        addhi   r3, r3, r3, ls                  addhi   r3, r3, r3, ls
        movhi   r3, r3, asr #1                  movhi   r3, r3, asr #1
        strhi   r3, [r5, #112]    <
        ldrls   r3, [r5, #112]    <
        subls   r3, r3, #1                      subls   r3, r3, #1
        strls   r3, [r5, #112]    |             str     r3, [r5, #112]
        b       .L213                           b       .L213

Note that I've added the various EH sanity checks in the cfgcleanup.c
code after successful testing, just to make sure, and hoping I haven't
missed any other corner cases.

A previous arm-eabi{,mthumb} test run exposed one bug which I've fixed;
new test run now in progress overnight.  Ok for 4.6?


Bernd

[-- Attachment #2: ifcvt-pr42496.diff --]
[-- Type: text/plain, Size: 9702 bytes --]

	PR target/21803
	* ifcvt.c (cond_exec_process_if_block): Look for identical sequences
	at the start and end of the then/else blocks, and omit them from the
	conversion.
	* cfgcleanup.c (flow_find_cross_jump): No longer static.
	(flow_find_head_matching_sequence): New function.
	(old_insns_match_p): Check REG_EH_REGION notes for calls.
	* basic-block.h (flow_find_cross_jump,
	flow_find_head_matching_sequence): Declare functions.

	PR target/21803
	* gcc.target/arm/pr42496.c: New test.

Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 157454)
+++ ifcvt.c	(working copy)
@@ -385,6 +385,10 @@ cond_exec_process_if_block (ce_if_block_
   rtx false_expr;		/* test for then block insns */
   rtx true_prob_val;		/* probability of else block */
   rtx false_prob_val;		/* probability of then block */
+  rtx then_last_head = NULL_RTX;	/* Last match at the head of THEN */
+  rtx else_last_head = NULL_RTX;	/* Last match at the head of ELSE */
+  rtx then_first_tail = NULL_RTX;	/* First match at the tail of THEN */
+  rtx else_first_tail = NULL_RTX;	/* First match at the tail of ELSE */
   int n_insns;
   enum rtx_code false_code;
 
@@ -423,10 +427,71 @@ cond_exec_process_if_block (ce_if_block_
 
   if (else_bb)
     {
+      int n_matching;
+
       max *= 2;
       else_start = first_active_insn (else_bb);
       else_end = last_active_insn (else_bb, TRUE);
       n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
+
+      /* Look for matching sequences at the head and tail of the two blocks,
+	 and limit the range of insns to be converted if possible.  */
+      n_matching = flow_find_cross_jump (0, then_bb, else_bb,
+					 &then_first_tail, &else_first_tail);
+      if (then_first_tail == BB_HEAD (then_bb))
+	then_start = then_end = NULL_RTX;
+      if (else_first_tail == BB_HEAD (else_bb))
+	else_start = else_end = NULL_RTX;
+
+      if (n_matching > 0)
+	{
+	  if (then_end)
+	    then_end = prev_active_insn (then_first_tail);
+	  if (else_end)
+	    else_end = prev_active_insn (else_first_tail);
+	  n_insns -= 2 * n_matching;
+	}
+
+      if (then_start && else_start)
+	{
+	  n_matching
+	    = flow_find_head_matching_sequence (0, then_bb, else_bb,
+						&then_last_head,
+						&else_last_head);
+
+	  if (then_last_head == then_end)
+	    then_start = then_end = NULL_RTX;
+	  if (else_last_head == else_end)
+	    else_start = else_end = NULL_RTX;
+
+	  if (n_matching > 0)
+	    {
+	      rtx insn;
+
+	      if (then_start)
+		then_start = next_active_insn (then_last_head);
+	      if (else_start)
+		else_start = next_active_insn (else_last_head);
+	      n_insns -= 2 * n_matching;
+
+	      /* We won't pass the insns in the head sequence to
+		 cond_exec_process_insns, so we need to test them here
+		 to make sure that they don't clobber the condition.  */
+	      insn = BB_HEAD (then_bb);
+	      for (;;)
+		{
+		  if (!LABEL_P (insn) && !NOTE_P (insn)
+		      && !DEBUG_INSN_P (insn))
+		    {
+		      if (modified_in_p (test_expr, insn))
+			return FALSE;
+		    }
+		  if (insn == then_last_head)
+		    break;
+		  insn = NEXT_INSN (insn);
+		}
+	    }
+	}
     }
 
   if (n_insns > max)
@@ -570,7 +635,18 @@ cond_exec_process_if_block (ce_if_block_
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
 	     n_insns, (n_insns == 1) ? " was" : "s were");
 
-  /* Merge the blocks!  */
+  /* Merge the blocks!  If we had matching sequences, make sure to delete one
+     copy at the appropriate location first.  */
+  if (then_first_tail)
+    {
+      rtx from = then_first_tail;
+      if (!INSN_P (from))
+	from = next_active_insn (from);
+      delete_insn_chain (from, BB_END (then_bb), false);
+    }
+  if (else_last_head)
+    delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
+
   merge_if_block (ce_info);
   cond_exec_changed_p = TRUE;
   return TRUE;
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 157454)
+++ cfgcleanup.c	(working copy)
@@ -68,7 +68,6 @@ static bool crossjumps_occured;
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
 static bool old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
@@ -972,13 +971,27 @@ old_insns_match_p (int mode ATTRIBUTE_UN
      be filled that clobbers a parameter expected by the subroutine.
 
      ??? We take the simple route for now and assume that if they're
-     equal, they were constructed identically.  */
+     equal, they were constructed identically.
 
-  if (CALL_P (i1)
-      && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+     Also check for identical exception regions.  */
+
+  if (CALL_P (i1))
+    {
+      /* Ensure the same EH region.  */
+      rtx n1 = find_reg_note (i1, REG_EH_REGION, 0);
+      rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
+
+      if (!n1 && n2)
+	return false;
+
+      if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
+	return false;
+
+      if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
-	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
-    return false;
+	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
+	return false;
+    }
 
 #ifdef STACK_REGS
   /* If cross_jump_death_matters is not 0, the insn's mode
@@ -1024,7 +1037,7 @@ old_insns_match_p (int mode ATTRIBUTE_UN
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
-static int
+int
 flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
 		      basic_block bb2, rtx *f1, rtx *f2)
 {
@@ -1130,6 +1143,103 @@ flow_find_cross_jump (int mode ATTRIBUTE
   return ninsns;
 }
 
+/* Like flow_find_cross_jump, except start looking for a matching sequence from
+   the head of the two blocks.  Do not include jumps at the end.  */
+
+int
+flow_find_head_matching_sequence (int mode ATTRIBUTE_UNUSED, basic_block bb1,
+				  basic_block bb2, rtx *f1, rtx *f2)
+{
+  rtx i1, i2, last1, last2, beforelast1, beforelast2;
+  int ninsns = 0;
+  edge e;
+  edge_iterator ei;
+  int nehedges1 = 0, nehedges2 = 0;
+
+  FOR_EACH_EDGE (e, ei, bb1->succs)
+    if (e->flags & EDGE_EH)
+      nehedges1++;
+  FOR_EACH_EDGE (e, ei, bb2->succs)
+    if (e->flags & EDGE_EH)
+      nehedges2++;
+
+  i1 = BB_HEAD (bb1);
+  i2 = BB_HEAD (bb2);
+  last1 = beforelast1 = last2 = beforelast2 = NULL_RTX;
+
+  while (true)
+    {
+
+      /* Ignore notes.  */
+      while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
+	i1 = NEXT_INSN (i1);
+
+      while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
+	i2 = NEXT_INSN (i2);
+
+      if (JUMP_P (i1) || JUMP_P (i2))
+	break;
+
+      if ((i1 == BB_END (bb1) && i2 != BB_END (bb2)
+	   && nehedges1 > 0)
+	  || (i2 == BB_END (bb2) && i1 != BB_END (bb1)
+	      && nehedges2 > 0)
+	  || (i1 == BB_END (bb1) && i2 == BB_END (bb2)
+	      && nehedges1 != nehedges2))
+	break;
+
+      if (!old_insns_match_p (mode, i1, i2))
+	break;
+
+      merge_memattrs (i1, i2);
+
+      /* Don't begin a cross-jump with a NOTE insn.  */
+      if (INSN_P (i1))
+	{
+	  /* If the merged insns have different REG_EQUAL notes, then
+	     remove them.  */
+	  rtx equiv1 = find_reg_equal_equiv_note (i1);
+	  rtx equiv2 = find_reg_equal_equiv_note (i2);
+
+	  if (equiv1 && !equiv2)
+	    remove_note (i1, equiv1);
+	  else if (!equiv1 && equiv2)
+	    remove_note (i2, equiv2);
+	  else if (equiv1 && equiv2
+		   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
+	    {
+	      remove_note (i1, equiv1);
+	      remove_note (i2, equiv2);
+	    }
+
+	  beforelast1 = last1, beforelast2 = last2;
+	  last1 = i1, last2 = i2;
+	  ninsns++;
+	}
+
+      if (i1 == BB_END (bb1) || i2 == BB_END (bb2))
+	break;
+
+      i1 = NEXT_INSN (i1);
+      i2 = NEXT_INSN (i2);
+    }
+
+#ifdef HAVE_cc0
+  /* Don't allow a compare to be shared by cross-jumping unless the insn
+     after the compare is also shared.  */
+  if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1))
+    last1 = beforelast1, last2 = beforelast2, ninsns--;
+#endif
+
+  if (ninsns)
+    {
+      *f1 = last1;
+      *f2 = last2;
+    }
+
+  return ninsns;
+}
+
 /* Return true iff outgoing edges of BB1 and BB2 match, together with
    the branch instruction.  This means that if we commonize the control
    flow before end of the basic block, the semantic remains unchanged.
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 157454)
+++ basic-block.h	(working copy)
@@ -894,6 +899,10 @@ extern void rtl_make_eh_edge (sbitmap, b
 
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
+extern int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_head_matching_sequence (int, basic_block, basic_block,
+					     rtx *, rtx *);
+
 extern bool delete_unreachable_blocks (void);
 
 extern bool mark_dfs_back_edges (void);
Index: testsuite/gcc.target/arm/pr42496.c
===================================================================
--- testsuite/gcc.target/arm/pr42496.c	(revision 0)
+++ testsuite/gcc.target/arm/pr42496.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" }  */
+
+void foo(int i)
+{
+    extern int j;
+
+    if (i) {
+         j = 10;
+    }
+    else {
+          j = 20;
+    }
+}
+
+/* { dg-final { scan-assembler-not "strne" } } */
+/* { dg-final { scan-assembler-not "streq" } } */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-03-31 22:08 ifcvt/crossjump patch: Fix PR 42496, 21803 Bernd Schmidt
@ 2010-04-01 18:00 ` Steven Bosscher
  2010-04-01 18:01   ` Bernd Schmidt
                     ` (2 more replies)
  0 siblings, 3 replies; 95+ messages in thread
From: Steven Bosscher @ 2010-04-01 18:00 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

On Thu, Apr 1, 2010 at 12:29 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> The two PRs 42496 and 21803 show a problem with the RTL ifcvt pass:
> sometimes, we end up with two identical insns, predicated with opposite
> conditions.

Cool. But unfortunately I get an ICE on ia64 with this patch applied.

Do you still have access to ia64? Otherwise I'll see if I can debug this.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-01 18:00 ` Steven Bosscher
@ 2010-04-01 18:01   ` Bernd Schmidt
  2010-04-02  9:45   ` Bernd Schmidt
  2010-04-06  9:21   ` Bernd Schmidt
  2 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-01 18:01 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: GCC Patches

On 04/01/2010 07:00 PM, Steven Bosscher wrote:

> Do you still have access to ia64? Otherwise I'll see if I can debug this.

Compile farm only.  If you have a build already lying around it would be
helpful if you could dig into it a little.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-01 18:00 ` Steven Bosscher
  2010-04-01 18:01   ` Bernd Schmidt
@ 2010-04-02  9:45   ` Bernd Schmidt
  2010-04-06  9:21   ` Bernd Schmidt
  2 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-02  9:45 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: GCC Patches

On 04/01/2010 07:00 PM, Steven Bosscher wrote:
> On Thu, Apr 1, 2010 at 12:29 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> The two PRs 42496 and 21803 show a problem with the RTL ifcvt pass:
>> sometimes, we end up with two identical insns, predicated with opposite
>> conditions.
> 
> Cool. But unfortunately I get an ICE on ia64 with this patch applied.
> 
> Do you still have access to ia64? Otherwise I'll see if I can debug this.

I found it.  The problem is that we match one insn from the else block
twice - the head sequence matches for three insns, the tail sequence for
two, but the else block only has 5 insns in total.  Should be fixable.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-01 18:00 ` Steven Bosscher
  2010-04-01 18:01   ` Bernd Schmidt
  2010-04-02  9:45   ` Bernd Schmidt
@ 2010-04-06  9:21   ` Bernd Schmidt
  2010-04-10 10:37     ` Eric Botcazou
  2010-04-12 20:43     ` Jim Wilson
  2 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-06  9:21 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: GCC Patches, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1501 bytes --]

On 04/01/2010 07:00 PM, Steven Bosscher wrote:
> On Thu, Apr 1, 2010 at 12:29 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> The two PRs 42496 and 21803 show a problem with the RTL ifcvt pass:
>> sometimes, we end up with two identical insns, predicated with opposite
>> conditions.
> 
> Cool. But unfortunately I get an ICE on ia64 with this patch applied.

Here's a new version of the patch, with the ICE fixed.  Also, I've added
another optimization in the same spirit to ifcvt: if we have identical
sequences at the head of the then and else blocks, try to move them
backwards across the if statement.  This helps even on targets that
don't support conditional execution.

A slightly earlier version of this patch was bootstrapped and tested on
i686-linux and ia64-linux.  On ia64, there were the following problems,
none of which I believe to show a problem with the patch:

  * Examine the testcase gcc.c-torture/compile/920625-1.c: it has
    workarounds for an ia64 assembler bug related to predication.  The
    same issue now also occurs in a libstdc++ testcase and two
    libgfortran ones.  Jim - any ideas what to do about this?
  * One libmudflap test fails: pass54-frag.c, which seems to be a
    problem with the testcase - it passes when I add a return 0 at the
    end of main.
  * libmudflap.cth/pass40-frag is sometimes timing out, but when run
    manually it seems to work fine and I've also had successful runs
    from make check-target-libmudflap.

Ok for 4.6?


Bernd

[-- Attachment #2: ifcvt-v5.diff --]
[-- Type: text/plain, Size: 15392 bytes --]

	PR target/21803
	* ifcvt.c (cond_exec_process_if_block): Look for identical sequences
	at the start and end of the then/else blocks, and omit them from the
	conversion.
	(move_across_if): New function.
	(find_if_header): Call it.
	* cfgcleanup.c (flow_find_cross_jump): No longer static.
	(flow_find_head_matching_sequence): New function.
	(old_insns_match_p): Check REG_EH_REGION notes for calls.
	* basic-block.h (flow_find_cross_jump,
	flow_find_head_matching_sequence): Declare functions.

	PR target/21803
	* gcc.target/arm/pr42496.c: New test.

Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 157454)
+++ ifcvt.c	(working copy)
@@ -385,7 +385,11 @@ cond_exec_process_if_block (ce_if_block_
   rtx false_expr;		/* test for then block insns */
   rtx true_prob_val;		/* probability of else block */
   rtx false_prob_val;		/* probability of then block */
-  int n_insns;
+  rtx then_last_head = NULL_RTX;	/* Last match at the head of THEN */
+  rtx else_last_head = NULL_RTX;	/* Last match at the head of ELSE */
+  rtx then_first_tail = NULL_RTX;	/* First match at the tail of THEN */
+  rtx else_first_tail = NULL_RTX;	/* First match at the tail of ELSE */
+  int then_n_insns, else_n_insns, n_insns;
   enum rtx_code false_code;
 
   /* If test is comprised of && or || elements, and we've failed at handling
@@ -418,15 +422,81 @@ cond_exec_process_if_block (ce_if_block_
      number of insns and see if it is small enough to convert.  */
   then_start = first_active_insn (then_bb);
   then_end = last_active_insn (then_bb, TRUE);
-  n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  n_insns = then_n_insns;
   max = MAX_CONDITIONAL_EXECUTE;
 
   if (else_bb)
     {
+      int n_matching;
+
       max *= 2;
       else_start = first_active_insn (else_bb);
       else_end = last_active_insn (else_bb, TRUE);
-      n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
+      else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb);
+      n_insns += else_n_insns;
+
+      /* Look for matching sequences at the head and tail of the two blocks,
+	 and limit the range of insns to be converted if possible.  */
+      n_matching = flow_find_cross_jump (0, then_bb, else_bb,
+					 &then_first_tail, &else_first_tail);
+      if (then_first_tail == BB_HEAD (then_bb))
+	then_start = then_end = NULL_RTX;
+      if (else_first_tail == BB_HEAD (else_bb))
+	else_start = else_end = NULL_RTX;
+
+      if (n_matching > 0)
+	{
+	  if (then_end)
+	    then_end = prev_active_insn (then_first_tail);
+	  if (else_end)
+	    else_end = prev_active_insn (else_first_tail);
+	  n_insns -= 2 * n_matching;
+	}
+
+      if (then_start && else_start)
+	{
+	  int longest_match = MIN (then_n_insns - n_matching,
+				   else_n_insns - n_matching);
+	  n_matching
+	    = flow_find_head_matching_sequence (0, then_bb, else_bb,
+						&then_last_head,
+						&else_last_head,
+						longest_match);
+      
+	  if (then_last_head == then_end)
+	    then_start = then_end = NULL_RTX;
+	  if (else_last_head == else_end)
+	    else_start = else_end = NULL_RTX;
+
+	  if (n_matching > 0)
+	    {
+	      rtx insn;
+
+	      if (then_start)
+		then_start = next_active_insn (then_last_head);
+	      if (else_start)
+		else_start = next_active_insn (else_last_head);
+	      n_insns -= 2 * n_matching;
+
+	      /* We won't pass the insns in the head sequence to
+		 cond_exec_process_insns, so we need to test them here
+		 to make sure that they don't clobber the condition.  */
+	      insn = BB_HEAD (then_bb);
+	      for (;;)
+		{
+		  if (!LABEL_P (insn) && !NOTE_P (insn)
+		      && !DEBUG_INSN_P (insn))
+		    {
+		      if (modified_in_p (test_expr, insn))
+			return FALSE;
+		    }
+		  if (insn == then_last_head)
+		    break;
+		  insn = NEXT_INSN (insn);
+		}
+	    }
+	}
     }
 
   if (n_insns > max)
@@ -570,7 +640,18 @@ cond_exec_process_if_block (ce_if_block_
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
 	     n_insns, (n_insns == 1) ? " was" : "s were");
 
-  /* Merge the blocks!  */
+  /* Merge the blocks!  If we had matching sequences, make sure to delete one
+     copy at the appropriate location first.  */
+  if (then_first_tail)
+    {
+      rtx from = then_first_tail;
+      if (!INSN_P (from))
+	from = next_active_insn (from);
+      delete_insn_chain (from, BB_END (then_bb), false);
+    }
+  if (else_last_head)
+    delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
+
   merge_if_block (ce_info);
   cond_exec_changed_p = TRUE;
   return TRUE;
@@ -2895,6 +2976,151 @@ noce_find_if_block (basic_block test_bb,
   return FALSE;
 }
 \f
+/* Try to move identical code from the THEN and ELSE blocks backwards across a
+   conditional jump into the IF block.  CE_INFO describes the structure we
+   found.  */
+
+static bool
+move_across_if (struct ce_if_block * ce_info)
+{
+  basic_block test_bb = ce_info->test_bb;
+  basic_block then_bb = ce_info->then_bb;
+  basic_block else_bb = ce_info->else_bb;
+  edge cur_edge;
+  edge_iterator ei;
+  rtx test_expr;
+  rtx then_start, else_start;
+  rtx then_last_head, else_last_head;
+  int n_matching;
+
+  if (!else_bb)
+    return false;
+
+  ce_info->last_test_bb = test_bb;
+
+  /* We only ever should get here after reload.  */
+  gcc_assert (reload_completed);
+
+  /* The THEN and ELSE blocks must have exactly one predecessor.  */
+  if (EDGE_COUNT (then_bb->preds) != 1 || EDGE_COUNT (else_bb->preds) != 1)
+    return FALSE;
+
+  if (else_bb == EXIT_BLOCK_PTR || then_bb == EXIT_BLOCK_PTR)
+    return FALSE;
+
+  /* The edges of the THEN and ELSE blocks cannot have complex edges.  */
+  FOR_EACH_EDGE (cur_edge, ei, then_bb->preds)
+    {
+      if (cur_edge->flags & EDGE_COMPLEX)
+	return FALSE;
+    }
+
+  FOR_EACH_EDGE (cur_edge, ei, else_bb->preds)
+    {
+      if (cur_edge->flags & EDGE_COMPLEX)
+	return FALSE;
+    }
+
+  num_possible_if_blocks++;
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "\nIF-THEN%s block found, pass %d, start block %d "
+	       "[insn %d], then %d [%d]",
+	       (else_bb) ? "-ELSE" : "",
+	       ce_info->pass,
+	       test_bb->index,
+	       BB_HEAD (test_bb) ? (int)INSN_UID (BB_HEAD (test_bb)) : -1,
+	       then_bb->index,
+	       BB_HEAD (then_bb) ? (int)INSN_UID (BB_HEAD (then_bb)) : -1);
+
+      fprintf (dump_file, ", else %d [%d]",
+	       else_bb->index,
+	       BB_HEAD (else_bb) ? (int)INSN_UID (BB_HEAD (else_bb)) : -1);
+
+      fputc ('\n', dump_file);
+    }
+
+  /* Do the real work.  */
+
+  ce_info->else_bb = else_bb;
+  /* Find the conditional jump to the ELSE or JOIN part, and isolate
+     the test.  */
+  test_expr = cond_exec_get_condition (BB_END (test_bb));
+  if (! test_expr)
+    return FALSE;
+
+  /* We only have to avoid clobbering the expression; try to reduce it
+     to a single reg.  */
+  if ((GET_RTX_CLASS (GET_CODE (test_expr)) == RTX_COMM_COMPARE
+       || GET_RTX_CLASS (GET_CODE (test_expr)) == RTX_COMPARE)
+      && REG_P (XEXP (test_expr, 0))
+      && CONSTANT_P (XEXP (test_expr, 1)))
+    test_expr = XEXP (test_expr, 0);
+
+  /* If the conditional jump is more than just a conditional jump,
+     then we can not do conditional execution conversion on this block.  */
+  if (! onlyjump_p (BB_END (test_bb)))
+    return FALSE;
+
+  /* Collect the bounds of where we're to search, skipping any labels, jumps
+     and notes at the beginning and end of the block.  Then count the total
+     number of insns and see if it is small enough to convert.  */
+  then_start = first_active_insn (then_bb);
+  else_start = first_active_insn (else_bb);
+
+  if (then_start == NULL || else_start == NULL)
+    return FALSE;
+  
+  n_matching
+    = flow_find_head_matching_sequence (0, then_bb, else_bb,
+					&then_last_head,
+					&else_last_head, 0);
+      
+  if (n_matching > 0)
+    {
+      rtx then_insn, else_insn;
+      rtx last_then = NULL_RTX, last_else = NULL_RTX;
+
+      then_insn = then_start;
+      else_insn = else_start;
+      for (;;)
+	{
+	  if (modified_in_p (test_expr, then_insn))
+	    break;
+
+	  if (then_insn == BB_END (then_bb)
+	      && (find_reg_note (then_insn, REG_EH_REGION, 0)
+		  || control_flow_insn_p (then_insn)))
+	    break;
+	  
+	  last_then = then_insn;
+	  last_else = else_insn;
+
+	  if (then_insn == then_last_head)
+	    break;
+	  then_insn = next_active_insn (then_insn);
+	  else_insn = next_active_insn (else_insn);
+	}
+      if (last_then == NULL_RTX)
+	return FALSE;
+
+      df_set_bb_dirty (test_bb);
+      df_set_bb_dirty (then_bb);
+      df_set_bb_dirty (else_bb);
+      reorder_insns (then_start, last_then,
+		     PREV_INSN (BB_END (test_bb)));
+      delete_insn_chain (else_start, last_else, false);
+
+      num_true_changes++;
+      num_updated_if_blocks++;
+      return TRUE;
+    }
+
+  return FALSE;
+}
+\f
 
 /* Merge the blocks and mark for local life update.  */
 
@@ -3072,6 +3298,9 @@ find_if_header (basic_block test_bb, int
       && cond_exec_find_if_block (&ce_info))
     goto success;
 
+  if (reload_completed && move_across_if (&ce_info))
+    goto success;
+
   if (HAVE_trap
       && optab_handler (ctrap_optab, word_mode)->insn_code != CODE_FOR_nothing
       && find_cond_trap (test_bb, then_edge, else_edge))
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 157454)
+++ basic-block.h	(working copy)
@@ -894,6 +894,10 @@ extern void rtl_make_eh_edge (sbitmap, b
 
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
+extern int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_head_matching_sequence (int, basic_block, basic_block,
+					     rtx *, rtx *, int);
+
 extern bool delete_unreachable_blocks (void);
 
 extern bool mark_dfs_back_edges (void);
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 157454)
+++ cfgcleanup.c	(working copy)
@@ -68,7 +68,6 @@ static bool crossjumps_occured;
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
 static bool old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
@@ -972,13 +971,27 @@ old_insns_match_p (int mode ATTRIBUTE_UN
      be filled that clobbers a parameter expected by the subroutine.
 
      ??? We take the simple route for now and assume that if they're
-     equal, they were constructed identically.  */
+     equal, they were constructed identically.
 
-  if (CALL_P (i1)
-      && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+     Also check for identical exception regions.  */
+
+  if (CALL_P (i1))
+    {
+      /* Ensure the same EH region.  */
+      rtx n1 = find_reg_note (i1, REG_EH_REGION, 0);
+      rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
+
+      if (!n1 && n2)
+	return false;
+
+      if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
+	return false;
+
+      if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
-	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
-    return false;
+	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
+	return false;
+    }
 
 #ifdef STACK_REGS
   /* If cross_jump_death_matters is not 0, the insn's mode
@@ -1024,7 +1037,7 @@ old_insns_match_p (int mode ATTRIBUTE_UN
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
-static int
+int
 flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
 		      basic_block bb2, rtx *f1, rtx *f2)
 {
@@ -1130,6 +1143,108 @@ flow_find_cross_jump (int mode ATTRIBUTE
   return ninsns;
 }
 
+/* Like flow_find_cross_jump, except start looking for a matching sequence from
+   the head of the two blocks.  Do not include jumps at the end.
+   If STOP_AFTER is nonzero, stop after finding that many matching
+   instructions.  */
+
+int
+flow_find_head_matching_sequence (int mode ATTRIBUTE_UNUSED, basic_block bb1,
+				  basic_block bb2, rtx *f1, rtx *f2,
+				  int stop_after)
+{
+  rtx i1, i2, last1, last2, beforelast1, beforelast2;
+  int ninsns = 0;
+  edge e;
+  edge_iterator ei;
+  int nehedges1 = 0, nehedges2 = 0;
+
+  FOR_EACH_EDGE (e, ei, bb1->succs)
+    if (e->flags & EDGE_EH)
+      nehedges1++;
+  FOR_EACH_EDGE (e, ei, bb2->succs)
+    if (e->flags & EDGE_EH)
+      nehedges2++;
+
+  i1 = BB_HEAD (bb1);
+  i2 = BB_HEAD (bb2);
+  last1 = beforelast1 = last2 = beforelast2 = NULL_RTX;
+
+  while (true)
+    {
+
+      /* Ignore notes.  */
+      while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
+	i1 = NEXT_INSN (i1);
+
+      while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
+	i2 = NEXT_INSN (i2);
+
+      if (NOTE_P (i1) || NOTE_P (i2)
+	  || JUMP_P (i1) || JUMP_P (i2))
+	break;
+
+      if ((i1 == BB_END (bb1) && i2 != BB_END (bb2)
+	   && nehedges1 > 0)
+	  || (i2 == BB_END (bb2) && i1 != BB_END (bb1)
+	      && nehedges2 > 0)
+	  || (i1 == BB_END (bb1) && i2 == BB_END (bb2)
+	      && nehedges1 != nehedges2))
+	break;
+
+      if (!old_insns_match_p (mode, i1, i2))
+	break;
+
+      merge_memattrs (i1, i2);
+
+      /* Don't begin a cross-jump with a NOTE insn.  */
+      if (INSN_P (i1))
+	{
+	  /* If the merged insns have different REG_EQUAL notes, then
+	     remove them.  */
+	  rtx equiv1 = find_reg_equal_equiv_note (i1);
+	  rtx equiv2 = find_reg_equal_equiv_note (i2);
+
+	  if (equiv1 && !equiv2)
+	    remove_note (i1, equiv1);
+	  else if (!equiv1 && equiv2)
+	    remove_note (i2, equiv2);
+	  else if (equiv1 && equiv2
+		   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
+	    {
+	      remove_note (i1, equiv1);
+	      remove_note (i2, equiv2);
+	    }
+
+	  beforelast1 = last1, beforelast2 = last2;
+	  last1 = i1, last2 = i2;
+	  ninsns++;
+	}
+
+      if (i1 == BB_END (bb1) || i2 == BB_END (bb2)
+	  || (stop_after > 0 && ninsns == stop_after))
+	break;
+
+      i1 = NEXT_INSN (i1);
+      i2 = NEXT_INSN (i2);
+    }
+
+#ifdef HAVE_cc0
+  /* Don't allow a compare to be shared by cross-jumping unless the insn
+     after the compare is also shared.  */
+  if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1))
+    last1 = beforelast1, last2 = beforelast2, ninsns--;
+#endif
+
+  if (ninsns)
+    {
+      *f1 = last1;
+      *f2 = last2;
+    }
+
+  return ninsns;
+}
+
 /* Return true iff outgoing edges of BB1 and BB2 match, together with
    the branch instruction.  This means that if we commonize the control
    flow before end of the basic block, the semantic remains unchanged.
Index: testsuite/gcc.target/arm/pr42496.c
===================================================================
--- testsuite/gcc.target/arm/pr42496.c	(revision 0)
+++ testsuite/gcc.target/arm/pr42496.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" }  */
+
+void foo(int i)
+{
+    extern int j;
+
+    if (i) {
+         j = 10;
+    }
+    else {
+          j = 20;
+    }
+}
+
+/* { dg-final { scan-assembler-not "strne" } } */
+/* { dg-final { scan-assembler-not "streq" } } */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-06  9:21   ` Bernd Schmidt
@ 2010-04-10 10:37     ` Eric Botcazou
  2010-04-12 23:34       ` Bernd Schmidt
  2010-04-14 21:09       ` Bernd Schmidt
  2010-04-12 20:43     ` Jim Wilson
  1 sibling, 2 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-04-10 10:37 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Here's a new version of the patch, with the ICE fixed.  Also, I've added
> another optimization in the same spirit to ifcvt: if we have identical
> sequences at the head of the then and else blocks, try to move them
> backwards across the if statement.  This helps even on targets that
> don't support conditional execution.

Could you split it up though, i.e. have a first patch to fix PR 21803/42496 
and a second patch for the additional optimization?  This helps reghunting.

Adding EH checking code for calls to old_insns_match_p is a step forward but 
isn't sufficient when -fnon-call-exceptions is enabled (Java, Ada).

The ATTRIBUTE_UNUSED on 'mode' in flow_find_cross_jump is already bogus, so 
the new one in flow_find_head_matching_sequence is as well.

There is no comment in flow_find_head_matching_sequence about the purpose of 
the EH edges checking code.

The "Don't begin a cross-jump with a NOTE insn" block is almost identical to 
that of flow_find_cross_jump, please factor them out.

+	      /* We won't pass the insns in the head sequence to
+		 cond_exec_process_insns, so we need to test them here
+		 to make sure that they don't clobber the condition.  */
+	      insn = BB_HEAD (then_bb);
+	      for (;;)
+		{
+		  if (!LABEL_P (insn) && !NOTE_P (insn)
+		      && !DEBUG_INSN_P (insn))
+		    {
+		      if (modified_in_p (test_expr, insn))
+			return FALSE;
+		    }
+		  if (insn == then_last_head)
+		    break;
+		  insn = NEXT_INSN (insn);
+		}

for (insn = BB_HEAD (then_bb);
     insn != NEXT_INSN (then_last_head);
     insn = NEXT_INSN (insn))
  if (!LABEL_P (insn) && !NOTE_P (insn) && !DEBUG_INSN_P (insn)
      && modified_in_p (test_expr, insn))
    return FALSE;

Why returning FALSE and not cancelling the sequence merging only?

+  /* Merge the blocks!  If we had matching sequences, make sure to delete one
+     copy at the appropriate location first.  */

I'd expand on the last sentence: "at the appropriate location first: delete 
the copy in the THEN branch for a tail sequence so that the remaining one is
executed last for both branches, and delete the copy in the ELSE branch for
a head sequence so that the remaining one is executed first for both branches"

There are trailing spaces and tabs on some lines in the ifcvt.c hunk.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-06  9:21   ` Bernd Schmidt
  2010-04-10 10:37     ` Eric Botcazou
@ 2010-04-12 20:43     ` Jim Wilson
  1 sibling, 0 replies; 95+ messages in thread
From: Jim Wilson @ 2010-04-12 20:43 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Steven Bosscher, GCC Patches

On Tue, 2010-04-06 at 11:22 +0100, Bernd Schmidt wrote:
>   * Examine the testcase gcc.c-torture/compile/920625-1.c: it has
>     workarounds for an ia64 assembler bug related to predication.  The
>     same issue now also occurs in a libstdc++ testcase and two
>     libgfortran ones.  Jim - any ideas what to do about this?

Sorry, I didn't notice that there was a question for me in here.

There is a known problem that the assembler can't handle some complex
uses of predicate registers correctly.  However, fixing the assembler to
handle all known broken cases will require a complete rewrite of the
dependency violation code which is probably about a man-month of work,
and hence unlikely to happen.  This issue looks a bit simpler, and may
not require a complete rewrite, but this would take some research.

Mostly this has been OK because ifcvt wasn't smart enough to emit the
cases that the assembler couldn't handle.  If ifcvt is getting smarter,
and triggering the assembler bug more often, then we will have to do
something about this.

Meanwhile, there is a way to work around this in the compiler.  The
compiler can emit directives to tell the assembler about predicate
register relationships.  There is already some code for this in the
emit_predicate_relation_info function in ia64.c which is called during
md reorg.  This handles a couple of cases at the moment.

It looks like emit_predicate_relation_info needs to handle one more
case.  If we change
       (p17) cmp.geu p6, p7 = r42, r51
       (p16) cmp.gtu p6, p7 = r42, r51
to
       (p17) cmp.geu p6, p7 = r42, r51
       (p16) cmp.gtu p6, p7 = r42, r51
       .pred.rel.mutex p6, p7
Then the assembler will stop warning about the code.

It looks like the assembler is failing to handle this after a call insn,
because p6/p7 are call-clobbered.  p16/p17 are call saved, which in
theory should be enough, but the assembler apparently only gets this
right if it knows that p6/p6 are mutex beforehand.  In which case the
assembler can tell that each instruction individually does not destroy
the pre-existing mutex relationship and hence we still have a p6/p7
mutex at the end.  But if we don't know that p6/p7 are mutex beforehand,
then we have to keep track of a matrix of mutex relationships in order
to deduce that we end up with a p6/p7 mutex at the end, and the
assembler currently doesn't do that.  This part requires a major
rewrite.

I think all we need here is a bugzilla report against the compiler to
document that we don't handle this case correctly, and that it is
generating testsuite failures, and then when I or someone else has time
we can try to fix this problem in emit_predicate_relation_info.  We
should also remove the hack in gcc.c-torture/compile/920625-1.c when we
do this.

Jim


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-10 10:37     ` Eric Botcazou
@ 2010-04-12 23:34       ` Bernd Schmidt
  2010-04-13 21:14         ` Eric Botcazou
  2010-04-14 21:09       ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-12 23:34 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1972 bytes --]

On 04/10/2010 12:35 PM, Eric Botcazou wrote:
> Could you split it up though, i.e. have a first patch to fix PR 21803/42496 
> and a second patch for the additional optimization?  This helps reghunting.

Ok.  Here's the first part.  Thanks for the review.

> Adding EH checking code for calls to old_insns_match_p is a step forward but 
> isn't sufficient when -fnon-call-exceptions is enabled (Java, Ada).

The move_across_if changes (motivated by failures) use
control_flow_insn_p which I believe takes care of the issue for that
part.  Do you feel anything else is necessary in the context of this patch?

> The ATTRIBUTE_UNUSED on 'mode' in flow_find_cross_jump is already bogus, so 
> the new one in flow_find_head_matching_sequence is as well.

Eliminated the new one.

> There is no comment in flow_find_head_matching_sequence about the purpose of 
> the EH edges checking code.

It's basically a sanity check; I'm not even sure it can trigger.  I
looked at outgoing_edges_match to find things I may need to test for;
this was one the pieces I added.  I've added a small comment now; let me
know if you feel something else is necessary.

> The "Don't begin a cross-jump with a NOTE insn" block is almost identical to 
> that of flow_find_cross_jump, please factor them out.

Done.

[convert while into for]

Done.

> Why returning FALSE and not cancelling the sequence merging only?

Doesn't really matter as cond_exec_process_insns will then fail for the
same reason later on.

> +  /* Merge the blocks!  If we had matching sequences, make sure to delete one
> +     copy at the appropriate location first.  */
> 
> I'd expand on the last sentence: [...]

Done.

> There are trailing spaces and tabs on some lines in the ifcvt.c hunk.

Hopefully fixed.

Retested this version with arm-linux-gnueabi
(qemu-system-armv7{arch=armv7-a/thumb,thumb,}).  No new failures (one
set of tests now times out on a different multilib, compared to a clean
baseline).  Ok?


Bernd

[-- Attachment #2: ifcvt-v8.diff --]
[-- Type: text/plain, Size: 12101 bytes --]

	PR target/21803
	* ifcvt.c (cond_exec_process_if_block): Look for identical sequences
	at the start and end of the then/else blocks, and omit them from the
	conversion.
	* cfgcleanup.c (flow_find_cross_jump): No longer static.
	(flow_find_head_matching_sequence): New function.
	(old_insns_match_p): Check REG_EH_REGION notes for calls.
	* basic-block.h (flow_find_cross_jump,
	flow_find_head_matching_sequence): Declare functions.

	PR target/21803
	* gcc.target/arm/pr42496.c: New test.

Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 158199)
+++ ifcvt.c	(working copy)
@@ -385,7 +385,11 @@ cond_exec_process_if_block (ce_if_block_
   rtx false_expr;		/* test for then block insns */
   rtx true_prob_val;		/* probability of else block */
   rtx false_prob_val;		/* probability of then block */
-  int n_insns;
+  rtx then_last_head = NULL_RTX;	/* Last match at the head of THEN */
+  rtx else_last_head = NULL_RTX;	/* Last match at the head of ELSE */
+  rtx then_first_tail = NULL_RTX;	/* First match at the tail of THEN */
+  rtx else_first_tail = NULL_RTX;	/* First match at the tail of ELSE */
+  int then_n_insns, else_n_insns, n_insns;
   enum rtx_code false_code;
 
   /* If test is comprised of && or || elements, and we've failed at handling
@@ -418,15 +422,78 @@ cond_exec_process_if_block (ce_if_block_
      number of insns and see if it is small enough to convert.  */
   then_start = first_active_insn (then_bb);
   then_end = last_active_insn (then_bb, TRUE);
-  n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  n_insns = then_n_insns;
   max = MAX_CONDITIONAL_EXECUTE;
 
   if (else_bb)
     {
+      int n_matching;
+
       max *= 2;
       else_start = first_active_insn (else_bb);
       else_end = last_active_insn (else_bb, TRUE);
-      n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
+      else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb);
+      n_insns += else_n_insns;
+
+      /* Look for matching sequences at the head and tail of the two blocks,
+	 and limit the range of insns to be converted if possible.  */
+      n_matching = flow_find_cross_jump (0, then_bb, else_bb,
+					 &then_first_tail, &else_first_tail);
+      if (then_first_tail == BB_HEAD (then_bb))
+	then_start = then_end = NULL_RTX;
+      if (else_first_tail == BB_HEAD (else_bb))
+	else_start = else_end = NULL_RTX;
+
+      if (n_matching > 0)
+	{
+	  if (then_end)
+	    then_end = prev_active_insn (then_first_tail);
+	  if (else_end)
+	    else_end = prev_active_insn (else_first_tail);
+	  n_insns -= 2 * n_matching;
+	}
+
+      if (then_start && else_start)
+	{
+	  int longest_match = MIN (then_n_insns - n_matching,
+				   else_n_insns - n_matching);
+	  n_matching
+	    = flow_find_head_matching_sequence (then_bb, else_bb,
+						&then_last_head,
+						&else_last_head,
+						longest_match);
+
+	  if (n_matching > 0)
+	    {
+	      rtx insn;
+
+	      /* We won't pass the insns in the head sequence to
+		 cond_exec_process_insns, so we need to test them here
+		 to make sure that they don't clobber the condition.  */
+	      for (insn = BB_HEAD (then_bb);
+		   insn != NEXT_INSN (then_last_head);
+		   insn = NEXT_INSN (insn))
+		if (!LABEL_P (insn) && !NOTE_P (insn)
+		    && !DEBUG_INSN_P (insn)
+		    && modified_in_p (test_expr, insn))
+		  return FALSE;
+	    }
+
+	  if (then_last_head == then_end)
+	    then_start = then_end = NULL_RTX;
+	  if (else_last_head == else_end)
+	    else_start = else_end = NULL_RTX;
+
+	  if (n_matching > 0)
+	    {
+	      if (then_start)
+		then_start = next_active_insn (then_last_head);
+	      if (else_start)
+		else_start = next_active_insn (else_last_head);
+	      n_insns -= 2 * n_matching;
+	    }
+	}
     }
 
   if (n_insns > max)
@@ -570,7 +637,21 @@ cond_exec_process_if_block (ce_if_block_
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
 	     n_insns, (n_insns == 1) ? " was" : "s were");
 
-  /* Merge the blocks!  */
+  /* Merge the blocks!  If we had matching sequences, make sure to delete one
+     copy at the appropriate location first: delete the copy in the THEN branch
+     for a tail sequence so that the remaining one is executed last for both
+     branches, and delete the copy in the ELSE branch for a head sequence so
+     that the remaining one is executed first for both branches.  */
+  if (then_first_tail)
+    {
+      rtx from = then_first_tail;
+      if (!INSN_P (from))
+	from = next_active_insn (from);
+      delete_insn_chain (from, BB_END (then_bb), false);
+    }
+  if (else_last_head)
+    delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
+
   merge_if_block (ce_info);
   cond_exec_changed_p = TRUE;
   return TRUE;
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 158199)
+++ cfgcleanup.c	(working copy)
@@ -68,7 +68,6 @@ static bool crossjumps_occured;
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
 static bool old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
@@ -972,13 +971,27 @@ old_insns_match_p (int mode ATTRIBUTE_UN
      be filled that clobbers a parameter expected by the subroutine.
 
      ??? We take the simple route for now and assume that if they're
-     equal, they were constructed identically.  */
+     equal, they were constructed identically.
 
-  if (CALL_P (i1)
-      && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+     Also check for identical exception regions.  */
+
+  if (CALL_P (i1))
+    {
+      /* Ensure the same EH region.  */
+      rtx n1 = find_reg_note (i1, REG_EH_REGION, 0);
+      rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
+
+      if (!n1 && n2)
+	return false;
+
+      if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
+	return false;
+
+      if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
-	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
-    return false;
+	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
+	return false;
+    }
 
 #ifdef STACK_REGS
   /* If cross_jump_death_matters is not 0, the insn's mode
@@ -1017,6 +1030,28 @@ old_insns_match_p (int mode ATTRIBUTE_UN
   return false;
 }
 \f
+/* When comparing two insns in flow_find_cross_jump or
+   flow_find_head_matching_sequence, esure the notes match.  */
+static void
+merge_notes (rtx i1, rtx i2)
+{
+  /* If the merged insns have different REG_EQUAL notes, then
+     remove them.  */
+  rtx equiv1 = find_reg_equal_equiv_note (i1);
+  rtx equiv2 = find_reg_equal_equiv_note (i2);
+
+  if (equiv1 && !equiv2)
+    remove_note (i1, equiv1);
+  else if (!equiv1 && equiv2)
+    remove_note (i2, equiv2);
+  else if (equiv1 && equiv2
+	   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
+    {
+      remove_note (i1, equiv1);
+      remove_note (i2, equiv2);
+    }
+}
+
 /* Look through the insns at the end of BB1 and BB2 and find the longest
    sequence that are equivalent.  Store the first insns for that sequence
    in *F1 and *F2 and return the sequence length.
@@ -1024,7 +1059,7 @@ old_insns_match_p (int mode ATTRIBUTE_UN
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
-static int
+int
 flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
 		      basic_block bb2, rtx *f1, rtx *f2)
 {
@@ -1074,21 +1109,7 @@ flow_find_cross_jump (int mode ATTRIBUTE
       /* Don't begin a cross-jump with a NOTE insn.  */
       if (INSN_P (i1))
 	{
-	  /* If the merged insns have different REG_EQUAL notes, then
-	     remove them.  */
-	  rtx equiv1 = find_reg_equal_equiv_note (i1);
-	  rtx equiv2 = find_reg_equal_equiv_note (i2);
-
-	  if (equiv1 && !equiv2)
-	    remove_note (i1, equiv1);
-	  else if (!equiv1 && equiv2)
-	    remove_note (i2, equiv2);
-	  else if (equiv1 && equiv2
-		   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
-	    {
-	      remove_note (i1, equiv1);
-	      remove_note (i2, equiv2);
-	    }
+	  merge_notes (i1, i2);
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
@@ -1130,6 +1151,95 @@ flow_find_cross_jump (int mode ATTRIBUTE
   return ninsns;
 }
 
+/* Like flow_find_cross_jump, except start looking for a matching sequence from
+   the head of the two blocks.  Do not include jumps at the end.
+   If STOP_AFTER is nonzero, stop after finding that many matching
+   instructions.  */
+
+int
+flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1,
+				  rtx *f2, int stop_after)
+{
+  rtx i1, i2, last1, last2, beforelast1, beforelast2;
+  int ninsns = 0;
+  edge e;
+  edge_iterator ei;
+  int nehedges1 = 0, nehedges2 = 0;
+
+  FOR_EACH_EDGE (e, ei, bb1->succs)
+    if (e->flags & EDGE_EH)
+      nehedges1++;
+  FOR_EACH_EDGE (e, ei, bb2->succs)
+    if (e->flags & EDGE_EH)
+      nehedges2++;
+
+  i1 = BB_HEAD (bb1);
+  i2 = BB_HEAD (bb2);
+  last1 = beforelast1 = last2 = beforelast2 = NULL_RTX;
+
+  while (true)
+    {
+
+      /* Ignore notes.  */
+      while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
+	i1 = NEXT_INSN (i1);
+
+      while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
+	i2 = NEXT_INSN (i2);
+
+      if (NOTE_P (i1) || NOTE_P (i2)
+	  || JUMP_P (i1) || JUMP_P (i2))
+	break;
+
+      /* A sanity check to make sure we're not merging insns with different
+	 effects on EH.  */
+      if ((i1 == BB_END (bb1) && i2 != BB_END (bb2)
+	   && nehedges1 > 0)
+	  || (i2 == BB_END (bb2) && i1 != BB_END (bb1)
+	      && nehedges2 > 0)
+	  || (i1 == BB_END (bb1) && i2 == BB_END (bb2)
+	      && nehedges1 != nehedges2))
+	break;
+
+      if (!old_insns_match_p (0, i1, i2))
+	break;
+
+      merge_memattrs (i1, i2);
+
+      /* Don't begin a cross-jump with a NOTE insn.  */
+      if (INSN_P (i1))
+	{
+	  merge_notes (i1, i2);
+
+	  beforelast1 = last1, beforelast2 = last2;
+	  last1 = i1, last2 = i2;
+	  ninsns++;
+	}
+
+      if (i1 == BB_END (bb1) || i2 == BB_END (bb2)
+	  || (stop_after > 0 && ninsns == stop_after))
+	break;
+
+      i1 = NEXT_INSN (i1);
+      i2 = NEXT_INSN (i2);
+    }
+
+#ifdef HAVE_cc0
+  /* Don't allow a compare to be shared by cross-jumping unless the insn
+     after the compare is also shared.  */
+  if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1))
+    last1 = beforelast1, last2 = beforelast2, ninsns--;
+#endif
+
+  if (ninsns)
+    {
+      *f1 = last1;
+      *f2 = last2;
+    }
+
+  return ninsns;
+}
+
 /* Return true iff outgoing edges of BB1 and BB2 match, together with
    the branch instruction.  This means that if we commonize the control
    flow before end of the basic block, the semantic remains unchanged.
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 158199)
+++ basic-block.h	(working copy)
@@ -894,6 +894,10 @@ extern void rtl_make_eh_edge (sbitmap, b
 
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
+extern int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_head_matching_sequence (basic_block, basic_block,
+					     rtx *, rtx *, int);
+
 extern bool delete_unreachable_blocks (void);
 
 extern bool mark_dfs_back_edges (void);
Index: testsuite/gcc.target/arm/pr42496.c
===================================================================
--- testsuite/gcc.target/arm/pr42496.c	(revision 0)
+++ testsuite/gcc.target/arm/pr42496.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" }  */
+
+void foo(int i)
+{
+    extern int j;
+
+    if (i) {
+         j = 10;
+    }
+    else {
+          j = 20;
+    }
+}
+
+/* { dg-final { scan-assembler-not "strne" } } */
+/* { dg-final { scan-assembler-not "streq" } } */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-12 23:34       ` Bernd Schmidt
@ 2010-04-13 21:14         ` Eric Botcazou
  2010-04-13 21:36           ` Bernd Schmidt
  2010-04-14 20:51           ` Bernd Schmidt
  0 siblings, 2 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-04-13 21:14 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> The move_across_if changes (motivated by failures) use
> control_flow_insn_p which I believe takes care of the issue for that
> part.  Do you feel anything else is necessary in the context of this patch?

No, probably not.

> > The ATTRIBUTE_UNUSED on 'mode' in flow_find_cross_jump is already bogus,
> > so the new one in flow_find_head_matching_sequence is as well.
>
> Eliminated the new one.

Please remove the old one as well.

> > There is no comment in flow_find_head_matching_sequence about the purpose
> > of the EH edges checking code.
>
> It's basically a sanity check; I'm not even sure it can trigger.  I
> looked at outgoing_edges_match to find things I may need to test for;
> this was one the pieces I added.  I've added a small comment now; let me
> know if you feel something else is necessary.

OK, but add one more sentence explaining what the tests are testing.

> Retested this version with arm-linux-gnueabi
> (qemu-system-armv7{arch=armv7-a/thumb,thumb,}).  No new failures (one
> set of tests now times out on a different multilib, compared to a clean
> baseline).  Ok?

OK modulo the nits above and

+/* When comparing two insns in flow_find_cross_jump or
+   flow_find_head_matching_sequence, esure the notes match.  */
+static void
+merge_notes (rtx i1, rtx i2)

"When comparing insns I1 and I2 in..., ensure the notes match" + missing blank 
line after the comment.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-13 21:14         ` Eric Botcazou
@ 2010-04-13 21:36           ` Bernd Schmidt
  2010-04-13 21:51             ` Eric Botcazou
  2010-04-14 20:51           ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-13 21:36 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 474 bytes --]

On 04/13/2010 11:03 PM, Eric Botcazou wrote:
>>> The ATTRIBUTE_UNUSED on 'mode' in flow_find_cross_jump is already bogus,
>>> so the new one in flow_find_head_matching_sequence is as well.
>>
>> Eliminated the new one.
> 
> Please remove the old one as well.

Hmm, ok - from everything in this file?  Looks like reg-stack runs
really late nowadays; I'm guessing if CLEANUP_POST_REGSTACK was still
necessary we'd have noticed by now?

IOW, how about the patch below?


Bernd

[-- Attachment #2: cc.diff --]
[-- Type: text/plain, Size: 7712 bytes --]

	* cfgcleanup.c (try_crossjump_to_edge): Remove useless declaration.  Remove
	argument MODE.  All callers changed.
	(try_crossjump_bb): Likewise.
	(outgoing_edges_match): Likewise.
	(flow_find_cross_jump): Likewise.
	(old_insns_match_p): Likewise.  Remove unused STACK_REGS block.
	* basic-block.h (CLEANUP_POST_REGSTACK): Remove.
	(CLEANUP_THREADING, CLEANUP_NO_INSN_DEL, CLEANUP_CFGLAYOUT): Renumber.

Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 158134)
+++ cfgcleanup.c	(working copy)
@@ -65,12 +65,6 @@ static bool first_pass;
 /* Set to true if crossjumps occured in the latest run of try_optimize_cfg.  */
 static bool crossjumps_occured;
 
-static bool try_crossjump_to_edge (int, edge, edge);
-static bool try_crossjump_bb (int, basic_block);
-static bool outgoing_edges_match (int, basic_block, basic_block);
-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
-static bool old_insns_match_p (int, rtx, rtx);
-
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
 static void merge_blocks_move_successor_nojumps (basic_block, basic_block);
 static bool try_optimize_cfg (int);
@@ -945,7 +939,7 @@ merge_memattrs (rtx x, rtx y)
 /* Return true if I1 and I2 are equivalent and thus can be crossjumped.  */
 
 static bool
-old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx i1, rtx i2)
+old_insns_match_p (rtx i1, rtx i2)
 {
   rtx p1, p2;
 
@@ -980,36 +974,6 @@ old_insns_match_p (int mode ATTRIBUTE_UN
 	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
     return false;
 
-#ifdef STACK_REGS
-  /* If cross_jump_death_matters is not 0, the insn's mode
-     indicates whether or not the insn contains any stack-like
-     regs.  */
-
-  if ((mode & CLEANUP_POST_REGSTACK) && stack_regs_mentioned (i1))
-    {
-      /* If register stack conversion has already been done, then
-	 death notes must also be compared before it is certain that
-	 the two instruction streams match.  */
-
-      rtx note;
-      HARD_REG_SET i1_regset, i2_regset;
-
-      CLEAR_HARD_REG_SET (i1_regset);
-      CLEAR_HARD_REG_SET (i2_regset);
-
-      for (note = REG_NOTES (i1); note; note = XEXP (note, 1))
-	if (REG_NOTE_KIND (note) == REG_DEAD && STACK_REG_P (XEXP (note, 0)))
-	  SET_HARD_REG_BIT (i1_regset, REGNO (XEXP (note, 0)));
-
-      for (note = REG_NOTES (i2); note; note = XEXP (note, 1))
-	if (REG_NOTE_KIND (note) == REG_DEAD && STACK_REG_P (XEXP (note, 0)))
-	  SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0)));
-
-      if (!hard_reg_set_equal_p (i1_regset, i2_regset))
-	return false;
-    }
-#endif
-
   if (reload_completed
       ? rtx_renumbered_equal_p (p1, p2) : rtx_equal_p (p1, p2))
     return true;
@@ -1025,8 +989,7 @@ old_insns_match_p (int mode ATTRIBUTE_UN
    store the head of the blocks in *F1 and *F2.  */
 
 static int
-flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
-		      basic_block bb2, rtx *f1, rtx *f2)
+flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2)
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
@@ -1066,7 +1029,7 @@ flow_find_cross_jump (int mode ATTRIBUTE
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
 
-      if (!old_insns_match_p (mode, i1, i2))
+      if (!old_insns_match_p (i1, i2))
 	break;
 
       merge_memattrs (i1, i2);
@@ -1137,7 +1100,7 @@ flow_find_cross_jump (int mode ATTRIBUTE
    We may assume that there exists one edge with a common destination.  */
 
 static bool
-outgoing_edges_match (int mode, basic_block bb1, basic_block bb2)
+outgoing_edges_match (basic_block bb1, basic_block bb2)
 {
   int nehedges1 = 0, nehedges2 = 0;
   edge fallthru1 = 0, fallthru2 = 0;
@@ -1320,7 +1283,7 @@ outgoing_edges_match (int mode, basic_bl
 		  rr.update_label_nuses = false;
 		  for_each_rtx (&BB_END (bb1), replace_label, &rr);
 
-		  match = old_insns_match_p (mode, BB_END (bb1), BB_END (bb2));
+		  match = old_insns_match_p (BB_END (bb1), BB_END (bb2));
 		  if (dump_file && match)
 		    fprintf (dump_file,
 			     "Tablejumps in bb %i and %i match.\n",
@@ -1342,7 +1305,7 @@ outgoing_edges_match (int mode, basic_bl
 
   /* First ensure that the instructions match.  There may be many outgoing
      edges so this test is generally cheaper.  */
-  if (!old_insns_match_p (mode, BB_END (bb1), BB_END (bb2)))
+  if (!old_insns_match_p (BB_END (bb1), BB_END (bb2)))
     return false;
 
   /* Search the outgoing edges, ensure that the counts do match, find possible
@@ -1438,7 +1401,7 @@ block_has_preserve_label (basic_block bb
    (maybe the middle of) E1->SRC to (maybe the middle of) E2->SRC.  */
 
 static bool
-try_crossjump_to_edge (int mode, edge e1, edge e2)
+try_crossjump_to_edge (edge e1, edge e2)
 {
   int nmatch;
   basic_block src1 = e1->src, src2 = e2->src;
@@ -1494,11 +1457,11 @@ try_crossjump_to_edge (int mode, edge e1
     return false;
 
   /* Look for the common insn sequence, part the first ...  */
-  if (!outgoing_edges_match (mode, src1, src2))
+  if (!outgoing_edges_match (src1, src2))
     return false;
 
   /* ... and part the second.  */
-  nmatch = flow_find_cross_jump (mode, src1, src2, &newpos1, &newpos2);
+  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
@@ -1675,7 +1638,7 @@ try_crossjump_to_edge (int mode, edge e1
    any changes made.  */
 
 static bool
-try_crossjump_bb (int mode, basic_block bb)
+try_crossjump_bb (basic_block bb)
 {
   edge e, e2, fallthru;
   bool changed;
@@ -1748,7 +1711,7 @@ try_crossjump_bb (int mode, basic_block 
 		  && !(df_get_bb_dirty (fallthru->src))))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, fallthru))
+	  if (try_crossjump_to_edge (e, fallthru))
 	    {
 	      changed = true;
 	      ix = 0;
@@ -1798,7 +1761,7 @@ try_crossjump_bb (int mode, basic_block 
 		  && !(df_get_bb_dirty (e2->src))))
 	    continue;
 
-	  if (try_crossjump_to_edge (mode, e, e2))
+	  if (try_crossjump_to_edge (e, e2))
 	    {
 	      changed = true;
 	      ev2 = bb;
@@ -2019,7 +1982,7 @@ try_optimize_cfg (int mode)
 
 	      /* Look for shared code between blocks.  */
 	      if ((mode & CLEANUP_CROSSJUMP)
-		  && try_crossjump_bb (mode, b))
+		  && try_crossjump_bb (b))
 		changed_here = true;
 
 	      /* Don't get confused by the index shift caused by
@@ -2031,7 +1994,7 @@ try_optimize_cfg (int mode)
 	    }
 
 	  if ((mode & CLEANUP_CROSSJUMP)
-	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
+	      && try_crossjump_bb (EXIT_BLOCK_PTR))
 	    changed = true;
 
 #ifdef ENABLE_CHECKING
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 158134)
+++ basic-block.h	(working copy)
@@ -812,12 +812,10 @@ edge find_edge (basic_block, basic_block
 #define CLEANUP_EXPENSIVE	1	/* Do relatively expensive optimizations
 					   except for edge forwarding */
 #define CLEANUP_CROSSJUMP	2	/* Do crossjumping.  */
-#define CLEANUP_POST_REGSTACK	4	/* We run after reg-stack and need
-					   to care REG_DEAD notes.  */
-#define CLEANUP_THREADING	8	/* Do jump threading.  */
-#define CLEANUP_NO_INSN_DEL	16	/* Do not try to delete trivially dead
+#define CLEANUP_THREADING	4	/* Do jump threading.  */
+#define CLEANUP_NO_INSN_DEL	8	/* Do not try to delete trivially dead
 					   insns.  */
-#define CLEANUP_CFGLAYOUT	32	/* Do cleanup in cfglayout mode.  */
+#define CLEANUP_CFGLAYOUT	16	/* Do cleanup in cfglayout mode.  */
 
 /* In lcm.c */
 extern struct edge_list *pre_edge_lcm (int, sbitmap *, sbitmap *,

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-13 21:36           ` Bernd Schmidt
@ 2010-04-13 21:51             ` Eric Botcazou
  0 siblings, 0 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-04-13 21:51 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Hmm, ok - from everything in this file?

No, just that of flow_find_cross_jump.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-13 21:14         ` Eric Botcazou
  2010-04-13 21:36           ` Bernd Schmidt
@ 2010-04-14 20:51           ` Bernd Schmidt
  1 sibling, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-14 20:51 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 454 bytes --]

On 04/13/2010 11:03 PM, Eric Botcazou wrote:
> OK modulo the nits above and
> 
> +/* When comparing two insns in flow_find_cross_jump or
> +   flow_find_head_matching_sequence, esure the notes match.  */
> +static void
> +merge_notes (rtx i1, rtx i2)
> 
> "When comparing insns I1 and I2 in..., ensure the notes match" + missing blank 
> line after the comment.

Thanks.  Here's what I committed after another bootstrap/test cycle on
i686-linux.


Bernd

[-- Attachment #2: ifcvt-v9.diff --]
[-- Type: text/plain, Size: 13864 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 158344)
+++ ChangeLog	(working copy)
@@ -1,3 +1,16 @@
+2010-04-14  Bernd Schmidt  <bernd.schmidt@codesourcery.com>
+	
+	PR target/21803
+	* ifcvt.c (cond_exec_process_if_block): Look for identical sequences
+	at the start and end of the then/else blocks, and omit them from the
+	conversion.
+	* cfgcleanup.c (flow_find_cross_jump): No longer static.  Remove MODE
+	argument; all callers changed.  Pass zero to old_insns_match_p instead.
+	(flow_find_head_matching_sequence): New function.
+	(old_insns_match_p): Check REG_EH_REGION notes for calls.
+	* basic-block.h (flow_find_cross_jump,
+	flow_find_head_matching_sequence): Declare functions.
+
 2010-04-14  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/i386.md (*popcountsi2_cmp_zext): Remove mode attribute
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 158344)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2010-04-14  Bernd Schmidt  <bernd.schmidt@codesourcery.com>
+
+	PR target/21803
+	* gcc.target/arm/pr42496.c: New test.
+
 2010-04-14  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gnat.dg/class_wide.adb: Rename into...
Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 158344)
+++ ifcvt.c	(working copy)
@@ -385,7 +385,11 @@ cond_exec_process_if_block (ce_if_block_
   rtx false_expr;		/* test for then block insns */
   rtx true_prob_val;		/* probability of else block */
   rtx false_prob_val;		/* probability of then block */
-  int n_insns;
+  rtx then_last_head = NULL_RTX;	/* Last match at the head of THEN */
+  rtx else_last_head = NULL_RTX;	/* Last match at the head of ELSE */
+  rtx then_first_tail = NULL_RTX;	/* First match at the tail of THEN */
+  rtx else_first_tail = NULL_RTX;	/* First match at the tail of ELSE */
+  int then_n_insns, else_n_insns, n_insns;
   enum rtx_code false_code;
 
   /* If test is comprised of && or || elements, and we've failed at handling
@@ -418,15 +422,78 @@ cond_exec_process_if_block (ce_if_block_
      number of insns and see if it is small enough to convert.  */
   then_start = first_active_insn (then_bb);
   then_end = last_active_insn (then_bb, TRUE);
-  n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+  n_insns = then_n_insns;
   max = MAX_CONDITIONAL_EXECUTE;
 
   if (else_bb)
     {
+      int n_matching;
+
       max *= 2;
       else_start = first_active_insn (else_bb);
       else_end = last_active_insn (else_bb, TRUE);
-      n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
+      else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb);
+      n_insns += else_n_insns;
+
+      /* Look for matching sequences at the head and tail of the two blocks,
+	 and limit the range of insns to be converted if possible.  */
+      n_matching = flow_find_cross_jump (then_bb, else_bb,
+					 &then_first_tail, &else_first_tail);
+      if (then_first_tail == BB_HEAD (then_bb))
+	then_start = then_end = NULL_RTX;
+      if (else_first_tail == BB_HEAD (else_bb))
+	else_start = else_end = NULL_RTX;
+
+      if (n_matching > 0)
+	{
+	  if (then_end)
+	    then_end = prev_active_insn (then_first_tail);
+	  if (else_end)
+	    else_end = prev_active_insn (else_first_tail);
+	  n_insns -= 2 * n_matching;
+	}
+
+      if (then_start && else_start)
+	{
+	  int longest_match = MIN (then_n_insns - n_matching,
+				   else_n_insns - n_matching);
+	  n_matching
+	    = flow_find_head_matching_sequence (then_bb, else_bb,
+						&then_last_head,
+						&else_last_head,
+						longest_match);
+
+	  if (n_matching > 0)
+	    {
+	      rtx insn;
+
+	      /* We won't pass the insns in the head sequence to
+		 cond_exec_process_insns, so we need to test them here
+		 to make sure that they don't clobber the condition.  */
+	      for (insn = BB_HEAD (then_bb);
+		   insn != NEXT_INSN (then_last_head);
+		   insn = NEXT_INSN (insn))
+		if (!LABEL_P (insn) && !NOTE_P (insn)
+		    && !DEBUG_INSN_P (insn)
+		    && modified_in_p (test_expr, insn))
+		  return FALSE;
+	    }
+
+	  if (then_last_head == then_end)
+	    then_start = then_end = NULL_RTX;
+	  if (else_last_head == else_end)
+	    else_start = else_end = NULL_RTX;
+
+	  if (n_matching > 0)
+	    {
+	      if (then_start)
+		then_start = next_active_insn (then_last_head);
+	      if (else_start)
+		else_start = next_active_insn (else_last_head);
+	      n_insns -= 2 * n_matching;
+	    }
+	}
     }
 
   if (n_insns > max)
@@ -570,7 +637,21 @@ cond_exec_process_if_block (ce_if_block_
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
 	     n_insns, (n_insns == 1) ? " was" : "s were");
 
-  /* Merge the blocks!  */
+  /* Merge the blocks!  If we had matching sequences, make sure to delete one
+     copy at the appropriate location first: delete the copy in the THEN branch
+     for a tail sequence so that the remaining one is executed last for both
+     branches, and delete the copy in the ELSE branch for a head sequence so
+     that the remaining one is executed first for both branches.  */
+  if (then_first_tail)
+    {
+      rtx from = then_first_tail;
+      if (!INSN_P (from))
+	from = next_active_insn (from);
+      delete_insn_chain (from, BB_END (then_bb), false);
+    }
+  if (else_last_head)
+    delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
+
   merge_if_block (ce_info);
   cond_exec_changed_p = TRUE;
   return TRUE;
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 158344)
+++ basic-block.h	(working copy)
@@ -894,6 +894,10 @@ extern void rtl_make_eh_edge (sbitmap, b
 
 /* In cfgcleanup.c.  */
 extern bool cleanup_cfg (int);
+extern int flow_find_cross_jump (basic_block, basic_block, rtx *, rtx *);
+extern int flow_find_head_matching_sequence (basic_block, basic_block,
+					     rtx *, rtx *, int);
+
 extern bool delete_unreachable_blocks (void);
 
 extern bool mark_dfs_back_edges (void);
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 158344)
+++ cfgcleanup.c	(working copy)
@@ -68,7 +68,6 @@ static bool crossjumps_occured;
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
-static int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *);
 static bool old_insns_match_p (int, rtx, rtx);
 
 static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block);
@@ -972,13 +971,27 @@ old_insns_match_p (int mode ATTRIBUTE_UN
      be filled that clobbers a parameter expected by the subroutine.
 
      ??? We take the simple route for now and assume that if they're
-     equal, they were constructed identically.  */
+     equal, they were constructed identically.
 
-  if (CALL_P (i1)
-      && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+     Also check for identical exception regions.  */
+
+  if (CALL_P (i1))
+    {
+      /* Ensure the same EH region.  */
+      rtx n1 = find_reg_note (i1, REG_EH_REGION, 0);
+      rtx n2 = find_reg_note (i2, REG_EH_REGION, 0);
+
+      if (!n1 && n2)
+	return false;
+
+      if (n1 && (!n2 || XEXP (n1, 0) != XEXP (n2, 0)))
+	return false;
+
+      if (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
 			CALL_INSN_FUNCTION_USAGE (i2))
-	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2)))
-    return false;
+	  || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))
+	return false;
+    }
 
 #ifdef STACK_REGS
   /* If cross_jump_death_matters is not 0, the insn's mode
@@ -1017,6 +1030,29 @@ old_insns_match_p (int mode ATTRIBUTE_UN
   return false;
 }
 \f
+/* When comparing insns I1 and I2 in flow_find_cross_jump or
+   flow_find_head_matching_sequence, ensure the notes match.  */
+
+static void
+merge_notes (rtx i1, rtx i2)
+{
+  /* If the merged insns have different REG_EQUAL notes, then
+     remove them.  */
+  rtx equiv1 = find_reg_equal_equiv_note (i1);
+  rtx equiv2 = find_reg_equal_equiv_note (i2);
+
+  if (equiv1 && !equiv2)
+    remove_note (i1, equiv1);
+  else if (!equiv1 && equiv2)
+    remove_note (i2, equiv2);
+  else if (equiv1 && equiv2
+	   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
+    {
+      remove_note (i1, equiv1);
+      remove_note (i2, equiv2);
+    }
+}
+
 /* Look through the insns at the end of BB1 and BB2 and find the longest
    sequence that are equivalent.  Store the first insns for that sequence
    in *F1 and *F2 and return the sequence length.
@@ -1024,9 +1060,8 @@ old_insns_match_p (int mode ATTRIBUTE_UN
    To simplify callers of this function, if the blocks match exactly,
    store the head of the blocks in *F1 and *F2.  */
 
-static int
-flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1,
-		      basic_block bb2, rtx *f1, rtx *f2)
+int
+flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx *f1, rtx *f2)
 {
   rtx i1, i2, last1, last2, afterlast1, afterlast2;
   int ninsns = 0;
@@ -1066,7 +1101,7 @@ flow_find_cross_jump (int mode ATTRIBUTE
       if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2))
 	break;
 
-      if (!old_insns_match_p (mode, i1, i2))
+      if (!old_insns_match_p (0, i1, i2))
 	break;
 
       merge_memattrs (i1, i2);
@@ -1074,21 +1109,7 @@ flow_find_cross_jump (int mode ATTRIBUTE
       /* Don't begin a cross-jump with a NOTE insn.  */
       if (INSN_P (i1))
 	{
-	  /* If the merged insns have different REG_EQUAL notes, then
-	     remove them.  */
-	  rtx equiv1 = find_reg_equal_equiv_note (i1);
-	  rtx equiv2 = find_reg_equal_equiv_note (i2);
-
-	  if (equiv1 && !equiv2)
-	    remove_note (i1, equiv1);
-	  else if (!equiv1 && equiv2)
-	    remove_note (i2, equiv2);
-	  else if (equiv1 && equiv2
-		   && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
-	    {
-	      remove_note (i1, equiv1);
-	      remove_note (i2, equiv2);
-	    }
+	  merge_notes (i1, i2);
 
 	  afterlast1 = last1, afterlast2 = last2;
 	  last1 = i1, last2 = i2;
@@ -1130,6 +1151,97 @@ flow_find_cross_jump (int mode ATTRIBUTE
   return ninsns;
 }
 
+/* Like flow_find_cross_jump, except start looking for a matching sequence from
+   the head of the two blocks.  Do not include jumps at the end.
+   If STOP_AFTER is nonzero, stop after finding that many matching
+   instructions.  */
+
+int
+flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1,
+				  rtx *f2, int stop_after)
+{
+  rtx i1, i2, last1, last2, beforelast1, beforelast2;
+  int ninsns = 0;
+  edge e;
+  edge_iterator ei;
+  int nehedges1 = 0, nehedges2 = 0;
+
+  FOR_EACH_EDGE (e, ei, bb1->succs)
+    if (e->flags & EDGE_EH)
+      nehedges1++;
+  FOR_EACH_EDGE (e, ei, bb2->succs)
+    if (e->flags & EDGE_EH)
+      nehedges2++;
+
+  i1 = BB_HEAD (bb1);
+  i2 = BB_HEAD (bb2);
+  last1 = beforelast1 = last2 = beforelast2 = NULL_RTX;
+
+  while (true)
+    {
+
+      /* Ignore notes.  */
+      while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
+	i1 = NEXT_INSN (i1);
+
+      while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
+	i2 = NEXT_INSN (i2);
+
+      if (NOTE_P (i1) || NOTE_P (i2)
+	  || JUMP_P (i1) || JUMP_P (i2))
+	break;
+
+      /* A sanity check to make sure we're not merging insns with different
+	 effects on EH.  If only one of them ends a basic block, it shouldn't
+	 have an EH edge; if both end a basic block, there should be the same
+	 number of EH edges.  */
+      if ((i1 == BB_END (bb1) && i2 != BB_END (bb2)
+	   && nehedges1 > 0)
+	  || (i2 == BB_END (bb2) && i1 != BB_END (bb1)
+	      && nehedges2 > 0)
+	  || (i1 == BB_END (bb1) && i2 == BB_END (bb2)
+	      && nehedges1 != nehedges2))
+	break;
+
+      if (!old_insns_match_p (0, i1, i2))
+	break;
+
+      merge_memattrs (i1, i2);
+
+      /* Don't begin a cross-jump with a NOTE insn.  */
+      if (INSN_P (i1))
+	{
+	  merge_notes (i1, i2);
+
+	  beforelast1 = last1, beforelast2 = last2;
+	  last1 = i1, last2 = i2;
+	  ninsns++;
+	}
+
+      if (i1 == BB_END (bb1) || i2 == BB_END (bb2)
+	  || (stop_after > 0 && ninsns == stop_after))
+	break;
+
+      i1 = NEXT_INSN (i1);
+      i2 = NEXT_INSN (i2);
+    }
+
+#ifdef HAVE_cc0
+  /* Don't allow a compare to be shared by cross-jumping unless the insn
+     after the compare is also shared.  */
+  if (ninsns && reg_mentioned_p (cc0_rtx, last1) && sets_cc0_p (last1))
+    last1 = beforelast1, last2 = beforelast2, ninsns--;
+#endif
+
+  if (ninsns)
+    {
+      *f1 = last1;
+      *f2 = last2;
+    }
+
+  return ninsns;
+}
+
 /* Return true iff outgoing edges of BB1 and BB2 match, together with
    the branch instruction.  This means that if we commonize the control
    flow before end of the basic block, the semantic remains unchanged.
@@ -1498,7 +1610,7 @@ try_crossjump_to_edge (int mode, edge e1
     return false;
 
   /* ... and part the second.  */
-  nmatch = flow_find_cross_jump (mode, src1, src2, &newpos1, &newpos2);
+  nmatch = flow_find_cross_jump (src1, src2, &newpos1, &newpos2);
 
   /* Don't proceed with the crossjump unless we found a sufficient number
      of matching instructions or the 'from' block was totally matched
Index: testsuite/gcc.target/arm/pr42496.c
===================================================================
--- testsuite/gcc.target/arm/pr42496.c	(revision 0)
+++ testsuite/gcc.target/arm/pr42496.c	(revision 0)
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" }  */
+
+void foo(int i)
+{
+    extern int j;
+
+    if (i) {
+         j = 10;
+    }
+    else {
+          j = 20;
+    }
+}
+
+/* { dg-final { scan-assembler-not "strne" } } */
+/* { dg-final { scan-assembler-not "streq" } } */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-10 10:37     ` Eric Botcazou
  2010-04-12 23:34       ` Bernd Schmidt
@ 2010-04-14 21:09       ` Bernd Schmidt
  2010-04-19 22:05         ` Eric Botcazou
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-14 21:09 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1039 bytes --]

On 04/10/2010 12:35 PM, Eric Botcazou wrote:

> Could you split it up though, i.e. have a first patch to fix PR 21803/42496 
> and a second patch for the additional optimization?  This helps reghunting.

Here's the second part.  This one should help most architectures, not
just the ones with conditional execution.  I've observed it helps on
i686 and arm, with the following being a typical result:

 .L18:
        ldr     r1, [r2, #4]
        cmp     r1, #34
-       it      hi
-       ldrhi   r3, .L98+12
-       bhi     .L28
        ldr     r3, .L98+12
+       bhi     .L28
        ldrb    r2, [r3, #4]    @ zero_extendqisi2
        cbz     r2, .L29
        ldr     r3, [r3, #8]

Compared with the previous version, this one has one additional bugfix
(reject all jumps that have a PARALLEL pattern).  Successfully tested
earlier on arm-linux-gnueabi
(qemu-system-armv7{arch=armv7-a/thumb,thumb,}), also tested with a SPEC
run on Cortex-A9 within CodeSourcery's gcc-4.4; another re-bootstrap on
i686-linux now in progress.  Ok?


Bernd

[-- Attachment #2: ifcvt-across.diff --]
[-- Type: text/plain, Size: 4972 bytes --]

	* ifcvt.c (move_across_if): New function.
	(find_if_header): Call it.

Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 158357)
+++ ifcvt.c	(working copy)
@@ -2976,6 +2976,152 @@ noce_find_if_block (basic_block test_bb,
   return FALSE;
 }
 \f
+/* Try to move identical code from the THEN and ELSE blocks backwards across a
+   conditional jump into the IF block.  CE_INFO describes the structure we
+   found.  */
+
+static bool
+move_across_if (struct ce_if_block * ce_info)
+{
+  basic_block test_bb = ce_info->test_bb;
+  basic_block then_bb = ce_info->then_bb;
+  basic_block else_bb = ce_info->else_bb;
+  edge cur_edge;
+  edge_iterator ei;
+  rtx test_expr;
+  rtx then_start, else_start;
+  rtx then_last_head, else_last_head;
+  int n_matching;
+
+  if (!else_bb)
+    return false;
+
+  ce_info->last_test_bb = test_bb;
+
+  /* We only ever should get here after reload.  */
+  gcc_assert (reload_completed);
+
+  /* The THEN and ELSE blocks must have exactly one predecessor.  */
+  if (EDGE_COUNT (then_bb->preds) != 1 || EDGE_COUNT (else_bb->preds) != 1)
+    return FALSE;
+
+  if (else_bb == EXIT_BLOCK_PTR || then_bb == EXIT_BLOCK_PTR)
+    return FALSE;
+
+  /* The edges of the THEN and ELSE blocks cannot have complex edges.  */
+  FOR_EACH_EDGE (cur_edge, ei, then_bb->preds)
+    {
+      if (cur_edge->flags & EDGE_COMPLEX)
+	return FALSE;
+    }
+
+  FOR_EACH_EDGE (cur_edge, ei, else_bb->preds)
+    {
+      if (cur_edge->flags & EDGE_COMPLEX)
+	return FALSE;
+    }
+
+  num_possible_if_blocks++;
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "\nIF-THEN%s block found, pass %d, start block %d "
+	       "[insn %d], then %d [%d]",
+	       (else_bb) ? "-ELSE" : "",
+	       ce_info->pass,
+	       test_bb->index,
+	       BB_HEAD (test_bb) ? (int)INSN_UID (BB_HEAD (test_bb)) : -1,
+	       then_bb->index,
+	       BB_HEAD (then_bb) ? (int)INSN_UID (BB_HEAD (then_bb)) : -1);
+
+      fprintf (dump_file, ", else %d [%d]",
+	       else_bb->index,
+	       BB_HEAD (else_bb) ? (int)INSN_UID (BB_HEAD (else_bb)) : -1);
+
+      fputc ('\n', dump_file);
+    }
+
+  /* Do the real work.  */
+
+  ce_info->else_bb = else_bb;
+  /* Find the conditional jump to the ELSE or JOIN part, and isolate
+     the test.  */
+  test_expr = cond_exec_get_condition (BB_END (test_bb));
+  if (! test_expr)
+    return FALSE;
+
+  /* We only have to avoid clobbering the expression; try to reduce it
+     to a single reg.  */
+  if ((GET_RTX_CLASS (GET_CODE (test_expr)) == RTX_COMM_COMPARE
+       || GET_RTX_CLASS (GET_CODE (test_expr)) == RTX_COMPARE)
+      && REG_P (XEXP (test_expr, 0))
+      && CONSTANT_P (XEXP (test_expr, 1)))
+    test_expr = XEXP (test_expr, 0);
+
+  /* If the conditional jump is more than just a conditional jump,
+     we cannot safely move insns across it.  */
+  if (! onlyjump_p (BB_END (test_bb))
+      || GET_CODE (PATTERN (BB_END (test_bb))) == PARALLEL)
+    return FALSE;
+
+  /* Collect the bounds of where we're to search, skipping any labels, jumps
+     and notes at the beginning and end of the block.  Then count the total
+     number of insns and see if it is small enough to convert.  */
+  then_start = first_active_insn (then_bb);
+  else_start = first_active_insn (else_bb);
+
+  if (then_start == NULL || else_start == NULL)
+    return FALSE;
+  
+  n_matching
+    = flow_find_head_matching_sequence (0, then_bb, else_bb,
+					&then_last_head,
+					&else_last_head, 0);
+      
+  if (n_matching > 0)
+    {
+      rtx then_insn, else_insn;
+      rtx last_then = NULL_RTX, last_else = NULL_RTX;
+
+      then_insn = then_start;
+      else_insn = else_start;
+      for (;;)
+	{
+	  if (modified_in_p (test_expr, then_insn))
+	    break;
+
+	  if (then_insn == BB_END (then_bb)
+	      && (find_reg_note (then_insn, REG_EH_REGION, 0)
+		  || control_flow_insn_p (then_insn)))
+	    break;
+	  
+	  last_then = then_insn;
+	  last_else = else_insn;
+
+	  if (then_insn == then_last_head)
+	    break;
+	  then_insn = next_active_insn (then_insn);
+	  else_insn = next_active_insn (else_insn);
+	}
+      if (last_then == NULL_RTX)
+	return FALSE;
+
+      df_set_bb_dirty (test_bb);
+      df_set_bb_dirty (then_bb);
+      df_set_bb_dirty (else_bb);
+      reorder_insns (then_start, last_then,
+		     PREV_INSN (BB_END (test_bb)));
+      delete_insn_chain (else_start, last_else, false);
+
+      num_true_changes++;
+      num_updated_if_blocks++;
+      return TRUE;
+    }
+
+  return FALSE;
+}
+\f
 
 /* Merge the blocks and mark for local life update.  */
 
@@ -3153,6 +3299,9 @@ find_if_header (basic_block test_bb, int
       && cond_exec_find_if_block (&ce_info))
     goto success;
 
+  if (reload_completed && move_across_if (&ce_info))
+    goto success;
+
   if (HAVE_trap
       && optab_handler (ctrap_optab, word_mode)->insn_code != CODE_FOR_nothing
       && find_cond_trap (test_bb, then_edge, else_edge))

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-14 21:09       ` Bernd Schmidt
@ 2010-04-19 22:05         ` Eric Botcazou
  2010-04-19 22:14           ` Steven Bosscher
                             ` (2 more replies)
  0 siblings, 3 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-04-19 22:05 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Here's the second part.  This one should help most architectures, not
> just the ones with conditional execution.  I've observed it helps on
> i686 and arm, with the following being a typical result:
>
>  .L18:
>         ldr     r1, [r2, #4]
>         cmp     r1, #34
> -       it      hi
> -       ldrhi   r3, .L98+12
> -       bhi     .L28
>         ldr     r3, .L98+12
> +       bhi     .L28
>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>         cbz     r2, .L29
>         ldr     r3, [r3, #8]

I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
Conceptually it's a reversed form of cross jumping so it could be implemented 
more generally in cfgcleanup.c.  And other transformations should already be 
able to apply this kind of optimizations.  Do you have testcases?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:05         ` Eric Botcazou
@ 2010-04-19 22:14           ` Steven Bosscher
  2010-04-19 22:18             ` Steven Bosscher
                               ` (2 more replies)
  2010-04-20 12:30           ` Bernd Schmidt
  2010-07-20 20:43           ` Bernd Schmidt
  2 siblings, 3 replies; 95+ messages in thread
From: Steven Bosscher @ 2010-04-19 22:14 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

On Tue, Apr 20, 2010 at 12:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> Here's the second part.  This one should help most architectures, not
>> just the ones with conditional execution.  I've observed it helps on
>> i686 and arm, with the following being a typical result:
>>
>>  .L18:
>>         ldr     r1, [r2, #4]
>>         cmp     r1, #34
>> -       it      hi
>> -       ldrhi   r3, .L98+12
>> -       bhi     .L28
>>         ldr     r3, .L98+12
>> +       bhi     .L28
>>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>         cbz     r2, .L29
>>         ldr     r3, [r3, #8]
>
> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
> Conceptually it's a reversed form of cross jumping so it could be implemented
> more generally in cfgcleanup.c.

I have a patch somewhere for this "head merging" (opposite of tail
merging). It implements this in cfgcleanup.c. I intend to finish it in
this stage1 cycle. It needs a bit of TLC but it's conceptually quite
easy.

>  And other transformations should already be
> able to apply this kind of optimizations.  Do you have testcases?

I originally wrote my patch for geng

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:14           ` Steven Bosscher
@ 2010-04-19 22:18             ` Steven Bosscher
  2010-04-19 22:47               ` Steven Bosscher
  2010-04-20 10:34             ` Eric Botcazou
  2010-04-20 11:26             ` Bernd Schmidt
  2 siblings, 1 reply; 95+ messages in thread
From: Steven Bosscher @ 2010-04-19 22:18 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

 On Tue, Apr 20, 2010 at 12:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> Here's the second part.  This one should help most architectures, not
>> just the ones with conditional execution.  I've observed it helps on
>> i686 and arm, with the following being a typical result:
>>
>>  .L18:
>>         ldr     r1, [r2, #4]
>>         cmp     r1, #34
>> -       it      hi
>> -       ldrhi   r3, .L98+12
>> -       bhi     .L28
>>         ldr     r3, .L98+12
>> +       bhi     .L28
>>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>         cbz     r2, .L29
>>         ldr     r3, [r3, #8]
>
> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
> Conceptually it's a reversed form of cross jumping so it could be implemented
> more generally in cfgcleanup.c.

I have a patch somewhere for this "head merging" (opposite of tail
merging). It implements this in cfgcleanup.c. I intend to finish it in
this stage1 cycle. It needs a bit of TLC but it's conceptually quite
easy.

>  And other transformations should already be
> able to apply this kind of optimizations.  Do you have testcases?

I originally wrote my patch for gtype-desc.c (in the build directory).
There are lots of cases for head-merging calls to the marker functions
there. See

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:18             ` Steven Bosscher
@ 2010-04-19 22:47               ` Steven Bosscher
  0 siblings, 0 replies; 95+ messages in thread
From: Steven Bosscher @ 2010-04-19 22:47 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

Crap, this is so not my day!

On Tue, Apr 20, 2010 at 12:14 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
>  On Tue, Apr 20, 2010 at 12:05 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>>> Here's the second part.  This one should help most architectures, not
>>> just the ones with conditional execution.  I've observed it helps on
>>> i686 and arm, with the following being a typical result:
>>>
>>>  .L18:
>>>         ldr     r1, [r2, #4]
>>>         cmp     r1, #34
>>> -       it      hi
>>> -       ldrhi   r3, .L98+12
>>> -       bhi     .L28
>>>         ldr     r3, .L98+12
>>> +       bhi     .L28
>>>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>>         cbz     r2, .L29
>>>         ldr     r3, [r3, #8]
>>
>> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
>> Conceptually it's a reversed form of cross jumping so it could be implemented
>> more generally in cfgcleanup.c.
>
> I have a patch somewhere for this "head merging" (opposite of tail
> merging). It implements this in cfgcleanup.c. I intend to finish it in
> this stage1 cycle. It needs a bit of TLC but it's conceptually quite
> easy.
>
>>  And other transformations should already be
>> able to apply this kind of optimizations.  Do you have testcases?
>
> I originally wrote my patch for gtype-desc.c (in the build directory).
> There are lots of cases for head-merging calls to the marker functions
> there. See

As I was saying: see gt_ggc_mx_gimple_statement_d for example:

      switch (gimple_statement_structure (&((*x))))
        {
        case GSS_BASE:
          gt_ggc_m_15basic_block_def ((*x).gsbase.bb);
          gt_ggc_m_9tree_node ((*x).gsbase.block);
          break;
        case GSS_WITH_OPS:
          gt_ggc_m_15basic_block_def ((*x).gsops.opbase.gsbase.bb);
          gt_ggc_m_9tree_node ((*x).gsops.opbase.gsbase.block);
          {
            size_t i0;
            size_t l0 = (size_t)(((*x).gsops).opbase.gsbase.num_ops);
            for (i0 = 0; i0 != l0; i0++) {
              gt_ggc_m_9tree_node ((*x).gsops.op[i0]);
            }
          }
          break;
        case GSS_WITH_MEM_OPS_BASE:
          gt_ggc_m_15basic_block_def ((*x).gsmembase.opbase.gsbase.bb);
          gt_ggc_m_9tree_node ((*x).gsmembase.opbase.gsbase.block);
          break;
       etc.

In all case labels, the first two calls can be merged. My patch used
to handle cases like this back when I hacked it (early ~gcc 4.4). I
don't know if this happens in code written by human, but IIRC there
are situations like this in Brad Lucier's Scheme interpreter, too.

Sorry for the many posts. I had my mouse pointer hovering over the
"send" button :-(

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:14           ` Steven Bosscher
  2010-04-19 22:18             ` Steven Bosscher
@ 2010-04-20 10:34             ` Eric Botcazou
  2010-04-20 11:26             ` Bernd Schmidt
  2 siblings, 0 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-04-20 10:34 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

> I have a patch somewhere for this "head merging" (opposite of tail
> merging). It implements this in cfgcleanup.c. I intend to finish it in
> this stage1 cycle. It needs a bit of TLC but it's conceptually quite
> easy.

OK, thanks for stepping in.  Let's wait for your patch then and decide what to 
do at that point.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:14           ` Steven Bosscher
  2010-04-19 22:18             ` Steven Bosscher
  2010-04-20 10:34             ` Eric Botcazou
@ 2010-04-20 11:26             ` Bernd Schmidt
  2010-04-23  9:25               ` Eric Botcazou
  2 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-20 11:26 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: Eric Botcazou, gcc-patches, Jim Wilson

On 04/20/2010 12:11 AM, Steven Bosscher wrote:
> I have a patch somewhere for this "head merging" (opposite of tail
> merging). It implements this in cfgcleanup.c. I intend to finish it in
> this stage1 cycle. It needs a bit of TLC but it's conceptually quite
> easy.

Would you mind posting this?


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:05         ` Eric Botcazou
  2010-04-19 22:14           ` Steven Bosscher
@ 2010-04-20 12:30           ` Bernd Schmidt
  2010-07-20 20:43           ` Bernd Schmidt
  2 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-04-20 12:30 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

On 04/20/2010 12:05 AM, Eric Botcazou wrote:
>> Here's the second part.  This one should help most architectures, not
>> just the ones with conditional execution.  I've observed it helps on
>> i686 and arm, with the following being a typical result:
>>
>>  .L18:
>>         ldr     r1, [r2, #4]
>>         cmp     r1, #34
>> -       it      hi
>> -       ldrhi   r3, .L98+12
>> -       bhi     .L28
>>         ldr     r3, .L98+12
>> +       bhi     .L28
>>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>         cbz     r2, .L29
>>         ldr     r3, [r3, #8]
> 
> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
> Conceptually it's a reversed form of cross jumping so it could be implemented 
> more generally in cfgcleanup.c.  And other transformations should already be 
> able to apply this kind of optimizations.  Do you have testcases?

When compiling a large collection of input files, it seems to trigger in
about 1/4 of them.  Here's a basic example:

int a, b, c, d;
int foo ()
{
  if (a)
    bar (b, c, d);
  else
    baz (b, c, d);
}

foo:				foo:
	pushl	%ebp			pushl	%ebp
	movl	%esp, %ebp		movl	%esp, %ebp
	subl	$24, %esp		subl	$24, %esp
	movl	a, %eax			movl	a, %eax
	testl	%eax, %eax		testl	%eax, %eax
	jne	.L6	      <
	movl	d, %eax			movl	d, %eax
	movl	%eax, 8(%esp)		movl	%eax, 8(%esp)
	movl	c, %eax			movl	c, %eax
	movl	%eax, 4(%esp)		movl	%eax, 4(%esp)
	movl	b, %eax			movl	b, %eax
	movl	%eax, (%esp)		movl	%eax, (%esp)
			      >		jne	.L6
	call	baz			call	baz
	leave				leave
	ret				ret
	.p2align 4,,7			.p2align 4,,7
	.p2align 3			.p2align 3
.L6:				.L6:
	movl	d, %eax	      |		.p2align 4,,8
	movl	%eax, 8(%esp) <
	movl	c, %eax	      <
	movl	%eax, 4(%esp) <
	movl	b, %eax	      <
	movl	%eax, (%esp)  <
	call	bar			call	bar
	leave				leave
			      >		.p2align 4,,7
	ret				ret


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-20 11:26             ` Bernd Schmidt
@ 2010-04-23  9:25               ` Eric Botcazou
  2010-04-23 11:15                 ` Steven Bosscher
  0 siblings, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-04-23  9:25 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Would you mind posting this?

PR rtl-optimization/20070 contains a lot of material related to this (and you 
were apparently involved at some point, see comment #24).

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-23  9:25               ` Eric Botcazou
@ 2010-04-23 11:15                 ` Steven Bosscher
  2010-05-15 11:24                   ` Steven Bosscher
  0 siblings, 1 reply; 95+ messages in thread
From: Steven Bosscher @ 2010-04-23 11:15 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

On Fri, Apr 23, 2010 at 10:28 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> Would you mind posting this?
>
> PR rtl-optimization/20070 contains a lot of material related to this (and you
> were apparently involved at some point, see comment #24).

Right, and I believe the head-merging patch is on the ia64 box that I
usually perform my bootstrap&testing runs on. But I've somehow managed
to send that machine's boot image to the bits&bytes graveyard, which
is why I can't post it right now :-(

I intend to work on the PR20070 patch when I'm done with other things
I'm working on now (LTO stuff).

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-23 11:15                 ` Steven Bosscher
@ 2010-05-15 11:24                   ` Steven Bosscher
  2010-05-28 10:00                     ` Eric Botcazou
  0 siblings, 1 reply; 95+ messages in thread
From: Steven Bosscher @ 2010-05-15 11:24 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

On Fri, Apr 23, 2010 at 11:52 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Fri, Apr 23, 2010 at 10:28 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>>> Would you mind posting this?
>>
>> PR rtl-optimization/20070 contains a lot of material related to this (and you
>> were apparently involved at some point, see comment #24).
>
> Right, and I believe the head-merging patch is on the ia64 box that I
> usually perform my bootstrap&testing runs on. But I've somehow managed
> to send that machine's boot image to the bits&bytes graveyard, which
> is why I can't post it right now :-(

It looks like that machine will never come back again. It doesn't seem
right to hold up Bernd's patch for something I probably won't be able
to retrieve, and I have other plans right now than re-implementing
it...

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-05-15 11:24                   ` Steven Bosscher
@ 2010-05-28 10:00                     ` Eric Botcazou
  2010-05-28 11:20                       ` Steven Bosscher
  0 siblings, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-05-28 10:00 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

> It looks like that machine will never come back again. It doesn't seem
> right to hold up Bernd's patch for something I probably won't be able
> to retrieve, and I have other plans right now than re-implementing
> it...

What's the relationship with the patch you attached in comment #31 of the 
audit trail of PR rtl-optimization/20070?  There is a nice comment about an 
earlier version in comment #29.

You're still the assignee for this PR.  Do you intend to keep working on it?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-05-28 10:00                     ` Eric Botcazou
@ 2010-05-28 11:20                       ` Steven Bosscher
  0 siblings, 0 replies; 95+ messages in thread
From: Steven Bosscher @ 2010-05-28 11:20 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Jim Wilson

On Fri, May 28, 2010 at 11:46 AM, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> It looks like that machine will never come back again. It doesn't seem
>> right to hold up Bernd's patch for something I probably won't be able
>> to retrieve, and I have other plans right now than re-implementing
>> it...
>
> What's the relationship with the patch you attached in comment #31 of the
> audit trail of PR rtl-optimization/20070?  There is a nice comment about an
> earlier version in comment #29.

It is related but not the same. The patches for PR20070 make
crossjumping work before register allocation and improve one EEMBC
benchmark by if-convering three-way branches.  Bernd's patches adds
another, third feature of head-merging in ifcvt.c.  So it's related,
touches the same files, but it's three different optimizations.

> You're still the assignee for this PR.  Do you intend to keep working on it?

Right now other things are more important for me, but I will pick it
up again eventually, before the end of stage 1. I wouldn't mind if
there's a foster parent for that patch, of course.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-04-19 22:05         ` Eric Botcazou
  2010-04-19 22:14           ` Steven Bosscher
  2010-04-20 12:30           ` Bernd Schmidt
@ 2010-07-20 20:43           ` Bernd Schmidt
  2010-07-22 19:47             ` Eric Botcazou
  2010-08-02 15:57             ` Jeff Law
  2 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-20 20:43 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1341 bytes --]

On 04/20/2010 12:05 AM, Eric Botcazou wrote:
> Bernd Schmidt wrote: 
>> Here's the second part.  This one should help most architectures, not
>> just the ones with conditional execution.  I've observed it helps on
>> i686 and arm, with the following being a typical result:
>>
>>  .L18:
>>         ldr     r1, [r2, #4]
>>         cmp     r1, #34
>> -       it      hi
>> -       ldrhi   r3, .L98+12
>> -       bhi     .L28
>>         ldr     r3, .L98+12
>> +       bhi     .L28
>>         ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>         cbz     r2, .L29
>>         ldr     r3, [r3, #8]
> 
> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
> Conceptually it's a reversed form of cross jumping so it could be implemented 
> more generally in cfgcleanup.c.  And other transformations should already be 
> able to apply this kind of optimizations.  Do you have testcases?

Here's a new patch.  A testcase is included; as I mentioned before this
triggers quite frequently.  This is PR44374.

I've moved and reused code from dead_or_predicable for a new function
can_move_insns_across.  The tests in dead_or_predicable were still
somewhat ad-hoc, after the patch I believe it's using the exact
necessary and sufficient conditions for moving code.

Bootstrapped and regression tested on i686-linux.  Ok?


Bernd

[-- Attachment #2: headmerge.diff --]
[-- Type: text/plain, Size: 24558 bytes --]

	PR rtl-optimization/44374
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

testsuite/
	PR rtl-optimization/44374
	* gcc.target/arm/headmerge-1.c: New test.

Index: ifcvt.c
===================================================================
--- ifcvt.c.orig
+++ ifcvt.c
@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_blo
 static int cond_exec_find_if_block (ce_if_block_t *);
 static int find_if_case_1 (basic_block, edge, edge);
 static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
 static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       basic_block, int);
 static void noce_emit_move_insn (rtx, rtx);
@@ -3877,15 +3876,6 @@ find_if_case_2 (basic_block test_bb, edg
   return TRUE;
 }
 
-/* A subroutine of dead_or_predicable called through for_each_rtx.
-   Return 1 if a memory is found.  */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
-  return MEM_P (*px);
-}
-
 /* Used by the code above to perform the actual rtl transformations.
    Return TRUE if successful.
 
@@ -3987,131 +3977,32 @@ dead_or_predicable (basic_block test_bb,
       earliest = jump;
     }
 #endif
+  /* If we allocated new pseudos (e.g. in the conditional move
+     expander called from noce_emit_cmove), we must resize the
+     array first.  */
+  if (max_regno < max_reg_num ())
+    max_regno = max_reg_num ();
+
   /* Try the NCE path if the CE path did not result in any changes.  */
   if (n_validated_changes == 0)
     {
+      rtx cond;
       /* In the non-conditional execution case, we have to verify that there
 	 are no trapping operations, no calls, no references to memory, and
 	 that any registers modified are dead at the branch site.  */
 
-      rtx insn, cond, prev;
-      bitmap merge_set, merge_set_noclobber, test_live, test_set;
-      unsigned i, fail = 0;
-      bitmap_iterator bi;
-
-      /* Check for no calls or trapping operations.  */
-      for (insn = head; ; insn = NEXT_INSN (insn))
-	{
-	  if (CALL_P (insn))
-	    return FALSE;
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      if (may_trap_p (PATTERN (insn)))
-		return FALSE;
-
-	      /* ??? Even non-trapping memories such as stack frame
-		 references must be avoided.  For stores, we collect
-		 no lifetime info; for reads, we'd have to assert
-		 true_dependence false against every store in the
-		 TEST range.  */
-	      if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
-		return FALSE;
-	    }
-	  if (insn == end)
-	    break;
-	}
-
-      if (! any_condjump_p (jump))
+      if (!any_condjump_p (jump))
 	return FALSE;
 
       /* Find the extent of the conditional.  */
       cond = noce_get_condition (jump, &earliest, false);
-      if (! cond)
+      if (!cond)
 	return FALSE;
 
-      /* Collect:
-	   MERGE_SET = set of registers set in MERGE_BB
-	   MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
-	     that are really set, not just clobbered.
-	   TEST_LIVE = set of registers live at EARLIEST
-	   TEST_SET = set of registers set between EARLIEST and the
-	     end of the block.  */
-
-      merge_set = BITMAP_ALLOC (&reg_obstack);
-      merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
-      test_live = BITMAP_ALLOC (&reg_obstack);
-      test_set = BITMAP_ALLOC (&reg_obstack);
-
-      /* ??? bb->local_set is only valid during calculate_global_regs_live,
-	 so we must recompute usage for MERGE_BB.  Not so bad, I suppose,
-         since we've already asserted that MERGE_BB is small.  */
-      /* If we allocated new pseudos (e.g. in the conditional move
-	 expander called from noce_emit_cmove), we must resize the
-	 array first.  */
-      if (max_regno < max_reg_num ())
-	max_regno = max_reg_num ();
-
-      FOR_BB_INSNS (merge_bb, insn)
-	{
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, merge_set);
-	      df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
-	    }
-	}
-
-      /* For small register class machines, don't lengthen lifetimes of
-	 hard registers before reload.  */
-      if (! reload_completed
-	  && targetm.small_register_classes_for_mode_p (VOIDmode))
-	{
-          EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
-	    {
-	      if (i < FIRST_PSEUDO_REGISTER
-		  && ! fixed_regs[i]
-		  && ! global_regs[i])
-		fail = 1;
-	    }
-	}
-
-      /* For TEST, we're interested in a range of insns, not a whole block.
-	 Moreover, we're interested in the insns live from OTHER_BB.  */
-
-      /* The loop below takes the set of live registers
-         after JUMP, and calculates the live set before EARLIEST. */
-      bitmap_copy (test_live, df_get_live_in (other_bb));
-      df_simulate_initialize_backwards (test_bb, test_live);
-      for (insn = jump; ; insn = prev)
-	{
-	  if (INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, test_set);
-	      df_simulate_one_insn_backwards (test_bb, insn, test_live);
-	    }
-	  prev = PREV_INSN (insn);
-	  if (insn == earliest)
-	    break;
-	}
-
-      /* We can perform the transformation if
-	   MERGE_SET_NOCLOBBER & TEST_SET
-	 and
-	   MERGE_SET & TEST_LIVE)
-	 and
-	   TEST_SET & DF_LIVE_IN (merge_bb)
-	 are empty.  */
-
-      if (bitmap_intersect_p (test_set, merge_set_noclobber)
-	  || bitmap_intersect_p (test_live, merge_set)
-	  || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
-	fail = 1;
-
-      BITMAP_FREE (merge_set_noclobber);
-      BITMAP_FREE (merge_set);
-      BITMAP_FREE (test_live);
-      BITMAP_FREE (test_set);
-
-      if (fail)
+      if (!can_move_insns_across (head, end, earliest, jump,
+				  merge_bb, df_get_live_out (merge_bb),
+				  BB_END (merge_bb),
+				  df_get_live_in (other_bb), NULL))
 	return FALSE;
     }
 
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c.orig
+++ cfgcleanup.c
@@ -66,6 +66,10 @@ static bool first_pass;
 /* Set to true if crossjumps occured in the latest run of try_optimize_cfg.  */
 static bool crossjumps_occured;
 
+/* Set to true if we couldn't run an optimization due to stale liveness
+   information; we should run df_analyze to enable more opportunities.  */
+static bool block_was_dirty;
+
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
@@ -1927,6 +1931,171 @@ try_crossjump_bb (int mode, basic_block 
   return changed;
 }
 
+/* Search the successors of BB for common insn sequences.  When found,
+   share code between them by moving it across the basic block
+   boundary.  Return true if any changes made.  */
+
+static bool
+try_head_merge_bb (basic_block bb)
+{
+  int max_match = INT_MAX;
+  edge e0;
+  bool changed;
+  unsigned ix;
+  rtx e0_last_head;
+  unsigned nedges = EDGE_COUNT (bb->succs);
+
+  /* Nothing to do if there is not at least two outgoing edges.  */
+  if (nedges < 2)
+    return false;
+
+  /* Don't crossjump if this block ends in a computed jump,
+     unless we are optimizing for size.  */
+  if (optimize_bb_for_size_p (bb)
+      && bb != EXIT_BLOCK_PTR
+      && computed_jump_p (BB_END (bb)))
+    return false;
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      basic_block other_bb = e->dest;
+      if ((e->flags & EDGE_ABNORMAL)
+	  || EDGE_COUNT (other_bb->preds) != 1)
+	return false;
+    }
+
+  e0 = EDGE_SUCC (bb, 0);
+  e0_last_head = NULL_RTX;
+  changed = false;
+  if (df_get_bb_dirty (e0->dest))
+    {
+      block_was_dirty = true;
+      return false;
+    }
+
+  for (ix = 1; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      rtx e0_last, e_last;
+      int nmatch;
+
+      nmatch = flow_find_head_matching_sequence (e0->dest, e->dest,
+						 &e0_last, &e_last, 0);
+      if (nmatch == 0)
+	return false;
+
+      if (nmatch < max_match)
+	{
+	  max_match = nmatch;
+	  e0_last_head = e0_last;
+	}
+    }
+
+  /* If we matched an entire block, we probably have to avoid moving the
+     last insn.  */
+  if (max_match > 0
+      && e0_last_head == BB_END (e0->dest)
+      && (find_reg_note (e0_last_head, REG_EH_REGION, 0)
+	  || control_flow_insn_p (e0_last_head)))
+    {
+      max_match--;
+      if (max_match == 0)
+	return false;
+      do
+	e0_last_head = prev_real_insn (e0_last_head);
+      while (DEBUG_INSN_P (e0_last_head));
+    }
+
+  if (max_match > 0)
+    {
+      bool moveall;
+      rtx jump = BB_END (bb);
+      rtx cond, move_before;
+      rtx *currptr = XNEWVEC (rtx, nedges);
+      rtx *headptr = XNEWVEC (rtx, nedges);
+
+      cond = get_condition (jump, &move_before, true, false);
+
+      if (cond == NULL_RTX)
+	move_before = jump;
+      for (ix = 0; ix < nedges; ix++)
+	{
+	  rtx head = BB_HEAD (EDGE_SUCC (bb, ix)->dest);
+	  while (!NONDEBUG_INSN_P (head))
+	    head = NEXT_INSN (head);
+	  currptr[ix] = head;
+	  headptr[ix] = head;
+	}
+
+      do
+	{
+	  rtx move_upto;
+	  moveall = can_move_insns_across (currptr[0], e0_last_head,
+					   move_before, jump, e0->dest,
+					   df_get_live_out (e0->dest),
+					   BB_END (e0->dest), NULL, &move_upto);
+	  if (!moveall && move_upto == NULL_RTX)
+	    {
+	      if (jump == move_before)
+		break;
+
+	      /* Try again, using a different insertion point.  */
+	      move_before = jump;
+	      continue;
+	    }
+
+	  changed = true;
+	  for (;;)
+	    {
+	      if (currptr[0] == move_upto)
+		break;
+	      for (ix = 0; ix < nedges; ix++)
+		{
+		  rtx curr = currptr[ix];
+		  do
+		    curr = NEXT_INSN (curr);
+		  while (!NONDEBUG_INSN_P (curr));
+		  currptr[ix] = curr;
+		}
+	    }
+
+	  reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
+	  df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
+	  df_set_bb_dirty (bb);
+	  for (ix = 1; ix < nedges; ix++)
+	    {
+	      df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest);
+	      delete_insn_chain (headptr[ix], currptr[ix], false);
+	    }
+	  if (!moveall)
+	    {
+	      if (jump == move_before)
+		break;
+
+	      /* Try again, using a different insertion point.  */
+	      move_before = jump;
+	      for (ix = 0; ix < nedges; ix++)
+		{
+		  rtx curr = currptr[ix];
+		  do
+		    curr = NEXT_INSN (curr);
+		  while (!NONDEBUG_INSN_P (curr));
+		  currptr[ix] = headptr[ix] = curr;
+		}
+	    }
+	}
+      while (!moveall);
+
+      free (currptr);
+      free (headptr);
+    }
+
+  crossjumps_occured |= changed;
+
+  return changed;
+}
+
 /* Return true if BB contains just bb note, or bb note followed
    by only DEBUG_INSNs.  */
 
@@ -1972,6 +2141,7 @@ try_optimize_cfg (int mode)
 	 one predecessor, they may be combined.  */
       do
 	{
+	  block_was_dirty = false;
 	  changed = false;
 	  iterations++;
 
@@ -2170,6 +2340,13 @@ try_optimize_cfg (int mode)
 		  && try_crossjump_bb (mode, b))
 		changed_here = true;
 
+	      if ((mode & CLEANUP_CROSSJUMP)
+		  /* This can lengthen register lifetimes.  Do it only after
+		     reload.  */
+		  && reload_completed
+		  && try_head_merge_bb (b))
+		changed_here = true;
+
 	      /* Don't get confused by the index shift caused by
 		 deleting blocks.  */
 	      if (!changed_here)
@@ -2182,6 +2359,9 @@ try_optimize_cfg (int mode)
 	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
 	    changed = true;
 
+	  if (block_was_dirty)
+	    df_analyze ();
+
 #ifdef ENABLE_CHECKING
 	  if (changed)
 	    verify_flow_info ();
@@ -2366,8 +2546,7 @@ cleanup_cfg (int mode)
 	  if ((mode & CLEANUP_EXPENSIVE) && !reload_completed
 	      && !delete_trivially_dead_insns (get_insns (), max_reg_num ()))
 	    break;
-	  else if ((mode & CLEANUP_CROSSJUMP)
-		   && crossjumps_occured)
+	  if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured)
 	    run_fast_dce ();
 	}
       else
Index: df.h
===================================================================
--- df.h.orig
+++ df.h
@@ -992,7 +992,8 @@ extern void df_simulate_one_insn_backwar
 extern void df_simulate_finalize_backwards (basic_block, bitmap);
 extern void df_simulate_initialize_forwards (basic_block, bitmap);
 extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
-
+extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
+				   rtx, regset, rtx *);
 /* Functions defined in df-scan.c.  */
 
 extern void df_scan_alloc (bitmap);
Index: df-problems.c
===================================================================
--- df-problems.c.orig
+++ df-problems.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  
 #include "basic-block.h"
 #include "sbitmap.h"
 #include "bitmap.h"
+#include "target.h"
 #include "timevar.h"
 #include "df.h"
 #include "except.h"
@@ -3804,6 +3805,27 @@ df_simulate_find_defs (rtx insn, bitmap 
     }
 }
 
+/* Find the set of uses for INSN.  This includes partial defs.  */
+
+static void
+df_simulate_find_uses (rtx insn, bitmap uses)
+{
+  df_ref *rec;
+  unsigned int uid = INSN_UID (insn);
+
+  for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++)
+    {
+      df_ref def = *rec;
+      if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
+	bitmap_set_bit (uses, DF_REF_REGNO (def));
+    }
+  for (rec = DF_INSN_UID_USES (uid); *rec; rec++)
+    {
+      df_ref use = *rec;
+      bitmap_set_bit (uses, DF_REF_REGNO (use));
+    }
+}
+
 /* Find the set of real DEFs, which are not clobbers, for INSN.  */
 
 void
@@ -4031,7 +4053,272 @@ df_simulate_one_insn_forwards (basic_blo
     }
   df_simulate_fixup_sets (bb, live);
 }
+\f
+/* Used by the next two functions to encode information about the
+   memory references we found.  */
+#define MEMREF_NORMAL 1
+#define MEMREF_VOLATILE 2
+
+/* A subroutine of can_move_insns_across_p called through for_each_rtx.
+   Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found.  */
+
+static int
+find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+  if (!MEM_P (x))
+    return 0;
+  return MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
+}
+
+/* A subroutine of can_move_insns_across_p called through note_stores.
+   DATA points to an integer in which we set either the bit for
+   MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM
+   of either kind.  */
+
+static void
+find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		    void *data ATTRIBUTE_UNUSED)
+{
+  int *pflags = (int *)data;
+  if (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  /* Treat stores to SP as stores to memory, this will prevent problems
+     when there are references to the stack frame.  */
+  if (x == stack_pointer_rtx)
+    *pflags |= MEMREF_VOLATILE;
+  if (!MEM_P (x))
+    return;
+  *pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
+}
 
+/* Return true if it is safe to move a group of insns, described by
+   the range FROM to TO, backwards across another group of insns,
+   described by ACROSS_FROM to ACROSS_TO.  It is assumed that there
+   are no insns between ACROSS_TO and FROM, but they may be in
+   different basic blocks; MERGE_BB and ACROSS_BB say which.  The
+   caller must also pass some lifetime information; LIVE is the set of
+   live registers at a point LIVE_POINT, which may either be TO, or a
+   later insn in the same basic block, suitable for scanning backwards
+   and reaching TO.
+
+   This function may be called in one of two cases: either we try to
+   move identical instructions from all successor blocks into their
+   predecessor, or we try to move from only one successor block.  If
+   OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with
+   the second case.  It should contain a set of registers live at the
+   end of ACROSS_TO which must not be clobbered by moving the insns.
+   In that case, we're also more careful about moving memory references
+   and trapping insns.
+
+   We return false if it is not safe to move the entire group, but it
+   may still be possible to move a subgroup.  PMOVE_UPTO, if nonnull,
+   is set to point at the last moveable insn in such a case.  */
+
+bool
+can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
+		       basic_block merge_bb, regset live, rtx live_point,
+		       regset other_branch_live, rtx *pmove_upto)
+{
+  rtx insn, next, max_to;
+  bitmap merge_set, merge_use, merge_live;
+  bitmap test_set, test_use;
+  unsigned i, fail = 0;
+  bitmap_iterator bi;
+  int memrefs_in_across = 0;
+  int mem_sets_in_across = 0;
+  bool trapping_insns_in_across = false;
+
+  if (pmove_upto != NULL)
+    *pmove_upto = NULL_RTX;
+
+  /* Find real bounds, ignoring debug insns.  */
+  while (!NONDEBUG_INSN_P (from) && from != to)
+    from = NEXT_INSN (from);
+  while (!NONDEBUG_INSN_P (to) && from != to)
+    to = PREV_INSN (to);
+
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,
+					     NULL);
+	  note_stores (PATTERN (insn), find_memory_stores,
+		       &mem_sets_in_across);
+	  /* This is used just to find sets of the stack pointer.  */
+	  memrefs_in_across |= mem_sets_in_across;
+	  trapping_insns_in_across |= may_trap_p (PATTERN (insn));
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Collect:
+     MERGE_SET = set of registers set in MERGE_BB
+     MERGE_USE = set of registers used in MERGE_BB and live at its top
+     MERGE_LIVE = set of registers live at the point inside the MERGE
+     range that we've reached during scanning
+     TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END.
+     TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END,
+     and live before ACROSS_FROM.  */
+
+  merge_set = BITMAP_ALLOC (&reg_obstack);
+  merge_use = BITMAP_ALLOC (&reg_obstack);
+  merge_live = BITMAP_ALLOC (&reg_obstack);
+  test_set = BITMAP_ALLOC (&reg_obstack);
+  test_use = BITMAP_ALLOC (&reg_obstack);
+
+  /* Compute the set of registers set and used in the ACROSS range.  */
+  if (other_branch_live != NULL)
+    bitmap_copy (test_use, other_branch_live);
+  df_simulate_initialize_backwards (merge_bb, test_use);
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  df_simulate_find_defs (insn, test_set);
+	  df_simulate_defs (insn, test_use);
+	  df_simulate_uses (insn, test_use);
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Compute an upper bound for the amount of insns moved, by finding
+     the first insn in MERGE that sets a register in TEST_USE, or uses
+     a register in TEST_SET.  We also check for calls, trapping operations,
+     and memory references.  */
+  max_to = NULL_RTX;
+  for (insn = from; ; insn = next)
+    {
+      if (CALL_P (insn))
+	break;
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (may_trap_p (PATTERN (insn))
+	      && (trapping_insns_in_across || other_branch_live != NULL))
+	    break;
+
+	  /* We cannot move memory stores past each other, or move memory
+	     reads past stores, at least not without tracking them and
+	     calling true_dependence on every pair.
+
+	     If there is no other branch and no memory references or
+	     sets in the ACROSS range, we can move memory references
+	     freely, even volatile ones.
+
+	     Otherwise, the rules are as follows: volatile memory
+	     references and stores can't be moved at all, and any type
+	     of memory reference can't be moved if there are volatile
+	     accesses or stores in the ACROSS range.  That leaves
+	     normal reads, which can be moved, as the trapping case is
+	     dealt with elsewhere.  */
+	  if (other_branch_live != NULL || memrefs_in_across != 0)
+	    {
+	      int mem_ref_flags = 0;
+	      int mem_set_flags = 0;
+	      note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags);
+	      mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory,
+					    NULL);
+	      /* Catch sets of the stack pointer.  */
+	      mem_ref_flags |= mem_set_flags;
+
+	      if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE)
+		break;
+	      if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0)
+		break;
+	      if (mem_set_flags != 0
+		  || (mem_sets_in_across != 0 && mem_ref_flags != 0))
+		break;
+	    }
+	  df_simulate_find_uses (insn, merge_use);
+	  /* We're only interested in uses which use a value live at
+	     the top, not one previously set in this block.  */
+	  bitmap_and_compl_into (merge_use, merge_set);
+	  df_simulate_find_defs (insn, merge_set);
+	  if (bitmap_intersect_p (merge_set, test_use)
+	      || bitmap_intersect_p (merge_use, test_set))
+	    break;
+	  max_to = insn;
+	}
+      next = NEXT_INSN (insn);
+      if (insn == to)
+	break;
+    }
+  if (max_to != to)
+    fail = 1;
+
+  if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
+    goto out;
+
+  /* Now, lower this upper bound by also taking into account that
+     a range of insns moved across ACROSS must not leave a register
+     live at the end that will be clobbered in ACROSS.  */
+  bitmap_copy (merge_live, live);
+  df_simulate_initialize_backwards (merge_bb, merge_live);
+  /* Scan and update life information until we reach the point we're
+     interested in.  */
+  for (insn = live_point; insn != max_to; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (merge_bb, insn, merge_live);
+
+  /* Now scan backwards to find a point where TEST_SET & LIVE == 0.
+     Insns in the MERGE range that set registers which are also set
+     in the ACROSS range may still be moved as long as we also move
+     later insns which use the results of the set, and make the
+     register dead again.  This is verified by the condition stated
+     above.  */
+  for (; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (!bitmap_intersect_p (test_set, merge_live))
+	    {
+	      max_to = insn;
+	      break;
+	    }
+
+	  df_simulate_one_insn_backwards (merge_bb, insn, merge_live);
+	}
+      next = PREV_INSN (insn);
+      if (insn == from)
+	{
+	  fail = 1;
+	  goto out;
+	}
+    }
+
+  if (max_to != to)
+    fail = 1;
+
+  if (pmove_upto)
+    *pmove_upto = max_to;
+
+  /* For small register class machines, don't lengthen lifetimes of
+     hard registers before reload.  */
+  if (! reload_completed
+      && targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+	{
+	  if (i < FIRST_PSEUDO_REGISTER
+	      && ! fixed_regs[i]
+	      && ! global_regs[i])
+	    fail = 1;
+	}
+    }
+
+ out:
+  BITMAP_FREE (merge_set);
+  BITMAP_FREE (merge_use);
+  BITMAP_FREE (merge_live);
+  BITMAP_FREE (test_set);
+  BITMAP_FREE (test_use);
+
+  return !fail;
+}
 
 \f
 /*----------------------------------------------------------------------------
Index: Makefile.in
===================================================================
--- Makefile.in.orig
+++ Makefile.in
@@ -3154,7 +3154,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYST
 df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
-   $(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
+   $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
 df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
Index: testsuite/gcc.target/arm/headmerge-1.c
===================================================================
--- /dev/null
+++ testsuite/gcc.target/arm/headmerge-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "#120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-20 20:43           ` Bernd Schmidt
@ 2010-07-22 19:47             ` Eric Botcazou
  2010-07-22 21:09               ` Bernd Schmidt
  2010-08-02 15:57             ` Jeff Law
  1 sibling, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-22 19:47 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Here's a new patch.  A testcase is included; as I mentioned before this
> triggers quite frequently.  This is PR44374.
>
> I've moved and reused code from dead_or_predicable for a new function
> can_move_insns_across.  The tests in dead_or_predicable were still
> somewhat ad-hoc, after the patch I believe it's using the exact
> necessary and sufficient conditions for moving code.

I'll look into it tomorrow.  Btw, would you mind taking a look at the audit 
trail of PR rtl-opt/44484?  TIA.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-22 19:47             ` Eric Botcazou
@ 2010-07-22 21:09               ` Bernd Schmidt
  2010-07-23 22:06                 ` Eric Botcazou
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-22 21:09 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 861 bytes --]

On 07/22/2010 09:47 PM, Eric Botcazou wrote:
>> Here's a new patch.  A testcase is included; as I mentioned before this
>> triggers quite frequently.  This is PR44374.
>>
>> I've moved and reused code from dead_or_predicable for a new function
>> can_move_insns_across.  The tests in dead_or_predicable were still
>> somewhat ad-hoc, after the patch I believe it's using the exact
>> necessary and sufficient conditions for moving code.
> 
> I'll look into it tomorrow.

Before you do that, here's a new version.  This corrects a few errors in
the register lifetime handling, and adds support for moving across two
basic blocks, which is very useful for switch statements but happens in
other cases as well.  A new testcase demonstrates the usefulness; I'm
tempted to add it to gcc.dg but I wouldn't like to try to verify that it
passes on all ports...


Bernd

[-- Attachment #2: headmerge3.diff --]
[-- Type: text/plain, Size: 29374 bytes --]

	PR rtl-optimization/44374
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	(simulate_backwards_to_point): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(simulate_backwards_to_point): New function.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

testsuite/
	PR rtl-optimization/44374
	* gcc.target/arm/headmerge-1.c: New test.
	* gcc.target/arm/headmerge-2.c: New test.
	
Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 162372)
+++ ifcvt.c	(working copy)
@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_blo
 static int cond_exec_find_if_block (ce_if_block_t *);
 static int find_if_case_1 (basic_block, edge, edge);
 static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
 static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       basic_block, int);
 static void noce_emit_move_insn (rtx, rtx);
@@ -3877,15 +3876,6 @@ find_if_case_2 (basic_block test_bb, edg
   return TRUE;
 }
 
-/* A subroutine of dead_or_predicable called through for_each_rtx.
-   Return 1 if a memory is found.  */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
-  return MEM_P (*px);
-}
-
 /* Used by the code above to perform the actual rtl transformations.
    Return TRUE if successful.
 
@@ -3987,131 +3977,38 @@ dead_or_predicable (basic_block test_bb,
       earliest = jump;
     }
 #endif
+  /* If we allocated new pseudos (e.g. in the conditional move
+     expander called from noce_emit_cmove), we must resize the
+     array first.  */
+  if (max_regno < max_reg_num ())
+    max_regno = max_reg_num ();
+
   /* Try the NCE path if the CE path did not result in any changes.  */
   if (n_validated_changes == 0)
     {
+      rtx cond;
+      regset live;
+      bool success;
+
       /* In the non-conditional execution case, we have to verify that there
 	 are no trapping operations, no calls, no references to memory, and
 	 that any registers modified are dead at the branch site.  */
 
-      rtx insn, cond, prev;
-      bitmap merge_set, merge_set_noclobber, test_live, test_set;
-      unsigned i, fail = 0;
-      bitmap_iterator bi;
-
-      /* Check for no calls or trapping operations.  */
-      for (insn = head; ; insn = NEXT_INSN (insn))
-	{
-	  if (CALL_P (insn))
-	    return FALSE;
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      if (may_trap_p (PATTERN (insn)))
-		return FALSE;
-
-	      /* ??? Even non-trapping memories such as stack frame
-		 references must be avoided.  For stores, we collect
-		 no lifetime info; for reads, we'd have to assert
-		 true_dependence false against every store in the
-		 TEST range.  */
-	      if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
-		return FALSE;
-	    }
-	  if (insn == end)
-	    break;
-	}
-
-      if (! any_condjump_p (jump))
+      if (!any_condjump_p (jump))
 	return FALSE;
 
       /* Find the extent of the conditional.  */
       cond = noce_get_condition (jump, &earliest, false);
-      if (! cond)
+      if (!cond)
 	return FALSE;
 
-      /* Collect:
-	   MERGE_SET = set of registers set in MERGE_BB
-	   MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
-	     that are really set, not just clobbered.
-	   TEST_LIVE = set of registers live at EARLIEST
-	   TEST_SET = set of registers set between EARLIEST and the
-	     end of the block.  */
-
-      merge_set = BITMAP_ALLOC (&reg_obstack);
-      merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
-      test_live = BITMAP_ALLOC (&reg_obstack);
-      test_set = BITMAP_ALLOC (&reg_obstack);
-
-      /* ??? bb->local_set is only valid during calculate_global_regs_live,
-	 so we must recompute usage for MERGE_BB.  Not so bad, I suppose,
-         since we've already asserted that MERGE_BB is small.  */
-      /* If we allocated new pseudos (e.g. in the conditional move
-	 expander called from noce_emit_cmove), we must resize the
-	 array first.  */
-      if (max_regno < max_reg_num ())
-	max_regno = max_reg_num ();
-
-      FOR_BB_INSNS (merge_bb, insn)
-	{
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, merge_set);
-	      df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
-	    }
-	}
-
-      /* For small register class machines, don't lengthen lifetimes of
-	 hard registers before reload.  */
-      if (! reload_completed
-	  && targetm.small_register_classes_for_mode_p (VOIDmode))
-	{
-          EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
-	    {
-	      if (i < FIRST_PSEUDO_REGISTER
-		  && ! fixed_regs[i]
-		  && ! global_regs[i])
-		fail = 1;
-	    }
-	}
-
-      /* For TEST, we're interested in a range of insns, not a whole block.
-	 Moreover, we're interested in the insns live from OTHER_BB.  */
-
-      /* The loop below takes the set of live registers
-         after JUMP, and calculates the live set before EARLIEST. */
-      bitmap_copy (test_live, df_get_live_in (other_bb));
-      df_simulate_initialize_backwards (test_bb, test_live);
-      for (insn = jump; ; insn = prev)
-	{
-	  if (INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, test_set);
-	      df_simulate_one_insn_backwards (test_bb, insn, test_live);
-	    }
-	  prev = PREV_INSN (insn);
-	  if (insn == earliest)
-	    break;
-	}
-
-      /* We can perform the transformation if
-	   MERGE_SET_NOCLOBBER & TEST_SET
-	 and
-	   MERGE_SET & TEST_LIVE)
-	 and
-	   TEST_SET & DF_LIVE_IN (merge_bb)
-	 are empty.  */
-
-      if (bitmap_intersect_p (test_set, merge_set_noclobber)
-	  || bitmap_intersect_p (test_live, merge_set)
-	  || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
-	fail = 1;
-
-      BITMAP_FREE (merge_set_noclobber);
-      BITMAP_FREE (merge_set);
-      BITMAP_FREE (test_live);
-      BITMAP_FREE (test_set);
-
-      if (fail)
+      live = BITMAP_ALLOC (&reg_obstack);
+      simulate_backwards_to_point (merge_bb, live, end);
+      success = can_move_insns_across (head, end, earliest, jump,
+				       merge_bb, live,
+				       df_get_live_in (other_bb), NULL);
+      BITMAP_FREE (live);
+      if (!success)
 	return FALSE;
     }
 
Index: df.h
===================================================================
--- df.h	(revision 162372)
+++ df.h	(working copy)
@@ -992,7 +992,9 @@ extern void df_simulate_one_insn_backwar
 extern void df_simulate_finalize_backwards (basic_block, bitmap);
 extern void df_simulate_initialize_forwards (basic_block, bitmap);
 extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
-
+extern void simulate_backwards_to_point (basic_block, regset, rtx);
+extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
+				   regset, rtx *);
 /* Functions defined in df-scan.c.  */
 
 extern void df_scan_alloc (bitmap);
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 162372)
+++ cfgcleanup.c	(working copy)
@@ -66,6 +66,10 @@ static bool first_pass;
 /* Set to true if crossjumps occured in the latest run of try_optimize_cfg.  */
 static bool crossjumps_occured;
 
+/* Set to true if we couldn't run an optimization due to stale liveness
+   information; we should run df_analyze to enable more opportunities.  */
+static bool block_was_dirty;
+
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
@@ -1927,6 +1931,261 @@ try_crossjump_bb (int mode, basic_block 
   return changed;
 }
 
+/* Search the successors of BB for common insn sequences.  When found,
+   share code between them by moving it across the basic block
+   boundary.  Return true if any changes made.  */
+
+static bool
+try_head_merge_bb (basic_block bb)
+{
+  basic_block final_dest_bb = NULL;
+  int max_match = INT_MAX;
+  edge e0;
+  rtx *headptr, *currptr;
+  bool changed, moveall;
+  unsigned ix;
+  rtx e0_last_head, cond, move_before;
+  unsigned nedges = EDGE_COUNT (bb->succs);
+  rtx jump = BB_END (bb);
+  regset live, live_union;
+
+  /* Nothing to do if there is not at least two outgoing edges.  */
+  if (nedges < 2)
+    return false;
+
+  /* Don't crossjump if this block ends in a computed jump,
+     unless we are optimizing for size.  */
+  if (optimize_bb_for_size_p (bb)
+      && bb != EXIT_BLOCK_PTR
+      && computed_jump_p (BB_END (bb)))
+    return false;
+
+  cond = get_condition (jump, &move_before, true, false);
+  if (cond == NULL_RTX)
+    move_before = jump;
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      basic_block other_bb = e->dest;
+
+      if (df_get_bb_dirty (other_bb))
+	{
+	  block_was_dirty = true;
+	  return false;
+	}
+
+      if (e->flags & EDGE_ABNORMAL)
+	return false;
+
+      /* Normally, all destination blocks must only be reachable from this
+	 block, i.e. they must have one incoming edge.
+
+	 There is one special case we can handle, that of multiple consecutive
+	 jumps where the first jumps to one of the targets of the second jump.
+	 This happens frequently in switch statements for default labels.
+	 The structure is as follows:
+	 FINAL_DEST_BB
+	 ....
+	 if (cond) jump A;
+	 fall through
+	 BB
+	 jump with targets A, B, C, D...
+	 A
+	 has two incoming edges, from FINAL_DEST_BB and BB
+
+	 In this case, we can try to move the insns through BB and into
+	 FINAL_DEST_BB.  */
+      if (EDGE_COUNT (other_bb->preds) != 1)
+	{
+	  edge incoming_edge, incoming_bb_other_edge;
+	  edge_iterator ei;
+
+	  if (final_dest_bb != NULL
+	      || EDGE_COUNT (other_bb->preds) != 2)
+	    return false;
+
+	  /* We must be able to move the insns across the whole block.  */
+	  move_before = BB_HEAD (bb);
+	  while (!NONDEBUG_INSN_P (move_before))
+	    move_before = NEXT_INSN (move_before);
+
+	  FOR_EACH_EDGE (incoming_edge, ei, bb->preds)
+	    if (incoming_edge->dest == bb)
+	      break;
+	  final_dest_bb = incoming_edge->src;
+	  if (EDGE_COUNT (final_dest_bb->succs) != 2)
+	    return false;
+	  FOR_EACH_EDGE (incoming_bb_other_edge, ei, final_dest_bb->succs)
+	    if (incoming_bb_other_edge != incoming_edge)
+	      break;
+	  if (incoming_bb_other_edge->dest != other_bb)
+	    return false;
+	}
+    }
+
+  e0 = EDGE_SUCC (bb, 0);
+  e0_last_head = NULL_RTX;
+  changed = false;
+
+  for (ix = 1; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      rtx e0_last, e_last;
+      int nmatch;
+
+      nmatch = flow_find_head_matching_sequence (e0->dest, e->dest,
+						 &e0_last, &e_last, 0);
+      if (nmatch == 0)
+	return false;
+
+      if (nmatch < max_match)
+	{
+	  max_match = nmatch;
+	  e0_last_head = e0_last;
+	}
+    }
+
+  /* If we matched an entire block, we probably have to avoid moving the
+     last insn.  */
+  if (max_match > 0
+      && e0_last_head == BB_END (e0->dest)
+      && (find_reg_note (e0_last_head, REG_EH_REGION, 0)
+	  || control_flow_insn_p (e0_last_head)))
+    {
+      max_match--;
+      if (max_match == 0)
+	return false;
+      do
+	e0_last_head = prev_real_insn (e0_last_head);
+      while (DEBUG_INSN_P (e0_last_head));
+    }
+
+  if (max_match == 0)
+    return false;
+
+  /* We must find a union of the live registers at each of the end points.  */
+  live = BITMAP_ALLOC (NULL);
+  live_union = BITMAP_ALLOC (NULL);
+
+  currptr = XNEWVEC (rtx, nedges);
+  headptr = XNEWVEC (rtx, nedges);
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      int j;
+      basic_block merge_bb = EDGE_SUCC (bb, ix)->dest;
+      rtx head = BB_HEAD (merge_bb);
+
+      while (!NONDEBUG_INSN_P (head))
+	head = NEXT_INSN (head);
+      headptr[ix] = head;
+      currptr[ix] = head;
+
+      /* Compute the end point and live information  */
+      for (j = 1; j < max_match; j++)
+	do
+	  head = NEXT_INSN (head);
+	while (!NONDEBUG_INSN_P (head));
+      simulate_backwards_to_point (merge_bb, live, head);
+      IOR_REG_SET (live_union, live);
+    }
+
+  /* If we're moving across two blocks, verify the validity of the
+     first move, then adjust the target and let the loop below deal
+     with the final move.  */
+  if (final_dest_bb != NULL)
+    {
+      rtx move_upto;
+
+      moveall = can_move_insns_across (currptr[0], e0_last_head, move_before,
+				       jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall)
+	e0_last_head = move_upto;
+      if (e0_last_head == NULL_RTX)
+	goto out;
+
+      jump = BB_END (final_dest_bb);
+      cond = get_condition (jump, &move_before, true, false);
+      if (cond == NULL_RTX)
+	move_before = jump;
+    }
+
+  do
+    {
+      rtx move_upto;
+      moveall = can_move_insns_across (currptr[0], e0_last_head,
+				       move_before, jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall && move_upto == NULL_RTX)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  continue;
+	}
+
+      if (final_dest_bb && !moveall)
+	/* We haven't checked whether a partial move would be OK for the first
+	   move, so we have to fail this case.  */
+	break;
+
+      changed = true;
+      for (;;)
+	{
+	  if (currptr[0] == move_upto)
+	    break;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = curr;
+	    }
+	}
+
+      reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
+      df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
+      if (final_dest_bb != NULL)
+	df_set_bb_dirty (final_dest_bb);
+      df_set_bb_dirty (bb);
+      for (ix = 1; ix < nedges; ix++)
+	{
+	  df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest);
+	  delete_insn_chain (headptr[ix], currptr[ix], false);
+	}
+      if (!moveall)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = headptr[ix] = curr;
+	    }
+	}
+    }
+  while (!moveall);
+
+ out:
+  free (currptr);
+  free (headptr);
+
+  crossjumps_occured |= changed;
+
+  return changed;
+}
+
 /* Return true if BB contains just bb note, or bb note followed
    by only DEBUG_INSNs.  */
 
@@ -1972,6 +2231,7 @@ try_optimize_cfg (int mode)
 	 one predecessor, they may be combined.  */
       do
 	{
+	  block_was_dirty = false;
 	  changed = false;
 	  iterations++;
 
@@ -2170,6 +2430,13 @@ try_optimize_cfg (int mode)
 		  && try_crossjump_bb (mode, b))
 		changed_here = true;
 
+	      if ((mode & CLEANUP_CROSSJUMP)
+		  /* This can lengthen register lifetimes.  Do it only after
+		     reload.  */
+		  && reload_completed
+		  && try_head_merge_bb (b))
+		changed_here = true;
+
 	      /* Don't get confused by the index shift caused by
 		 deleting blocks.  */
 	      if (!changed_here)
@@ -2182,6 +2449,9 @@ try_optimize_cfg (int mode)
 	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
 	    changed = true;
 
+	  if (block_was_dirty)
+	    df_analyze ();
+
 #ifdef ENABLE_CHECKING
 	  if (changed)
 	    verify_flow_info ();
@@ -2366,8 +2636,7 @@ cleanup_cfg (int mode)
 	  if ((mode & CLEANUP_EXPENSIVE) && !reload_completed
 	      && !delete_trivially_dead_insns (get_insns (), max_reg_num ()))
 	    break;
-	  else if ((mode & CLEANUP_CROSSJUMP)
-		   && crossjumps_occured)
+	  if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured)
 	    run_fast_dce ();
 	}
       else
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 162372)
+++ df-problems.c	(working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  
 #include "basic-block.h"
 #include "sbitmap.h"
 #include "bitmap.h"
+#include "target.h"
 #include "timevar.h"
 #include "df.h"
 #include "except.h"
@@ -3804,6 +3805,27 @@ df_simulate_find_defs (rtx insn, bitmap 
     }
 }
 
+/* Find the set of uses for INSN.  This includes partial defs.  */
+
+static void
+df_simulate_find_uses (rtx insn, bitmap uses)
+{
+  df_ref *rec;
+  unsigned int uid = INSN_UID (insn);
+
+  for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++)
+    {
+      df_ref def = *rec;
+      if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
+	bitmap_set_bit (uses, DF_REF_REGNO (def));
+    }
+  for (rec = DF_INSN_UID_USES (uid); *rec; rec++)
+    {
+      df_ref use = *rec;
+      bitmap_set_bit (uses, DF_REF_REGNO (use));
+    }
+}
+
 /* Find the set of real DEFs, which are not clobbers, for INSN.  */
 
 void
@@ -4031,7 +4053,297 @@ df_simulate_one_insn_forwards (basic_blo
     }
   df_simulate_fixup_sets (bb, live);
 }
+\f
+/* Used by the next two functions to encode information about the
+   memory references we found.  */
+#define MEMREF_NORMAL 1
+#define MEMREF_VOLATILE 2
+
+/* A subroutine of can_move_insns_across_p called through for_each_rtx.
+   Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found.  */
+
+static int
+find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+  if (!MEM_P (x))
+    return 0;
+  if (MEM_VOLATILE_P (x))
+    return MEMREF_VOLATILE;
+  if (MEM_READONLY_P (x))
+    return 0;
+
+  return MEMREF_NORMAL;
+}
+
+/* A subroutine of can_move_insns_across_p called through note_stores.
+   DATA points to an integer in which we set either the bit for
+   MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM
+   of either kind.  */
+
+static void
+find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		    void *data ATTRIBUTE_UNUSED)
+{
+  int *pflags = (int *)data;
+  if (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  /* Treat stores to SP as stores to memory, this will prevent problems
+     when there are references to the stack frame.  */
+  if (x == stack_pointer_rtx)
+    *pflags |= MEMREF_VOLATILE;
+  if (!MEM_P (x))
+    return;
+  *pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
+}
+
+/* Scan BB backwards, using df_simulate functions to keep track of
+   lifetimes, up to insn POINT.  The result is stored in LIVE.  */
+
+void
+simulate_backwards_to_point (basic_block bb, regset live, rtx point)
+{
+  rtx insn;
+  bitmap_copy (live, df_get_live_out (bb));
+  df_simulate_initialize_backwards (bb, live);
+
+  /* Scan and update life information until we reach the point we're
+     interested in.  */
+  for (insn = BB_END (bb); insn != point; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (bb, insn, live);
+}
+
+/* Return true if it is safe to move a group of insns, described by
+   the range FROM to TO, backwards across another group of insns,
+   described by ACROSS_FROM to ACROSS_TO.  It is assumed that there
+   are no insns between ACROSS_TO and FROM, but they may be in
+   different basic blocks; MERGE_BB is the block from which the
+   insns will be moved.  The caller must pass in a regset MERGE_LIVE
+   which specifies the registers live after TO.
+
+   This function may be called in one of two cases: either we try to
+   move identical instructions from all successor blocks into their
+   predecessor, or we try to move from only one successor block.  If
+   OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with
+   the second case.  It should contain a set of registers live at the
+   end of ACROSS_TO which must not be clobbered by moving the insns.
+   In that case, we're also more careful about moving memory references
+   and trapping insns.
+
+   We return false if it is not safe to move the entire group, but it
+   may still be possible to move a subgroup.  PMOVE_UPTO, if nonnull,
+   is set to point at the last moveable insn in such a case.  */
+
+bool
+can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
+		       basic_block merge_bb, regset merge_live,
+		       regset other_branch_live, rtx *pmove_upto)
+{
+  rtx insn, next, max_to;
+  bitmap merge_set, merge_use, local_merge_live;
+  bitmap test_set, test_use;
+  unsigned i, fail = 0;
+  bitmap_iterator bi;
+  int memrefs_in_across = 0;
+  int mem_sets_in_across = 0;
+  bool trapping_insns_in_across = false;
+
+  if (pmove_upto != NULL)
+    *pmove_upto = NULL_RTX;
+
+  /* Find real bounds, ignoring debug insns.  */
+  while (!NONDEBUG_INSN_P (from) && from != to)
+    from = NEXT_INSN (from);
+  while (!NONDEBUG_INSN_P (to) && from != to)
+    to = PREV_INSN (to);
+
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,
+					     NULL);
+	  note_stores (PATTERN (insn), find_memory_stores,
+		       &mem_sets_in_across);
+	  /* This is used just to find sets of the stack pointer.  */
+	  memrefs_in_across |= mem_sets_in_across;
+	  trapping_insns_in_across |= may_trap_p (PATTERN (insn));
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Collect:
+     MERGE_SET = set of registers set in MERGE_BB
+     MERGE_USE = set of registers used in MERGE_BB and live at its top
+     MERGE_LIVE = set of registers live at the point inside the MERGE
+     range that we've reached during scanning
+     TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END.
+     TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END,
+     and live before ACROSS_FROM.  */
+
+  merge_set = BITMAP_ALLOC (&reg_obstack);
+  merge_use = BITMAP_ALLOC (&reg_obstack);
+  local_merge_live = BITMAP_ALLOC (&reg_obstack);
+  test_set = BITMAP_ALLOC (&reg_obstack);
+  test_use = BITMAP_ALLOC (&reg_obstack);
+
+  /* Compute the set of registers set and used in the ACROSS range.  */
+  if (other_branch_live != NULL)
+    bitmap_copy (test_use, other_branch_live);
+  df_simulate_initialize_backwards (merge_bb, test_use);
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  df_simulate_find_defs (insn, test_set);
+	  df_simulate_defs (insn, test_use);
+	  df_simulate_uses (insn, test_use);
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Compute an upper bound for the amount of insns moved, by finding
+     the first insn in MERGE that sets a register in TEST_USE, or uses
+     a register in TEST_SET.  We also check for calls, trapping operations,
+     and memory references.  */
+  max_to = NULL_RTX;
+  for (insn = from; ; insn = next)
+    {
+      if (CALL_P (insn))
+	break;
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (may_trap_p (PATTERN (insn))
+	      && (trapping_insns_in_across || other_branch_live != NULL))
+	    break;
+
+	  /* We cannot move memory stores past each other, or move memory
+	     reads past stores, at least not without tracking them and
+	     calling true_dependence on every pair.
+
+	     If there is no other branch and no memory references or
+	     sets in the ACROSS range, we can move memory references
+	     freely, even volatile ones.
+
+	     Otherwise, the rules are as follows: volatile memory
+	     references and stores can't be moved at all, and any type
+	     of memory reference can't be moved if there are volatile
+	     accesses or stores in the ACROSS range.  That leaves
+	     normal reads, which can be moved, as the trapping case is
+	     dealt with elsewhere.  */
+	  if (other_branch_live != NULL || memrefs_in_across != 0)
+	    {
+	      int mem_ref_flags = 0;
+	      int mem_set_flags = 0;
+	      note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags);
+	      mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory,
+					    NULL);
+	      /* Catch sets of the stack pointer.  */
+	      mem_ref_flags |= mem_set_flags;
+
+	      if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE)
+		break;
+	      if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0)
+		break;
+	      if (mem_set_flags != 0
+		  || (mem_sets_in_across != 0 && mem_ref_flags != 0))
+		break;
+	    }
+	  df_simulate_find_uses (insn, merge_use);
+	  /* We're only interested in uses which use a value live at
+	     the top, not one previously set in this block.  */
+	  bitmap_and_compl_into (merge_use, merge_set);
+	  df_simulate_find_defs (insn, merge_set);
+	  if (bitmap_intersect_p (merge_set, test_use)
+	      || bitmap_intersect_p (merge_use, test_set))
+	    break;
+	  max_to = insn;
+	}
+      next = NEXT_INSN (insn);
+      if (insn == to)
+	break;
+    }
+  if (max_to != to)
+    fail = 1;
+
+  if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
+    goto out;
+
+  /* Now, lower this upper bound by also taking into account that
+     a range of insns moved across ACROSS must not leave a register
+     live at the end that will be clobbered in ACROSS.  We need to
+     find a point where TEST_SET & LIVE == 0.
+
+     Insns in the MERGE range that set registers which are also set
+     in the ACROSS range may still be moved as long as we also move
+     later insns which use the results of the set, and make the
+     register dead again.  This is verified by the condition stated
+     above.  We only need to test it for registers that are set in
+     the moved region.
+
+     MERGE_LIVE is provided by the caller and holds live registers after
+     TO.  */
+  bitmap_copy (local_merge_live, merge_live);
+  for (insn = to; insn != max_to; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (merge_bb, insn, local_merge_live);
+
+  /* We're not interested in registers that aren't set in the moved
+     region at all.  */
+  bitmap_and_into (local_merge_live, merge_set);
+  for (;;)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (!bitmap_intersect_p (test_set, local_merge_live))
+	    {
+	      max_to = insn;
+	      break;
+	    }
+
+	  df_simulate_one_insn_backwards (merge_bb, insn,
+					  local_merge_live);
+	}
+      if (insn == from)
+	{
+	  fail = 1;
+	  goto out;
+	}
+      insn = PREV_INSN (insn);
+    }
 
+  if (max_to != to)
+    fail = 1;
+
+  if (pmove_upto)
+    *pmove_upto = max_to;
+
+  /* For small register class machines, don't lengthen lifetimes of
+     hard registers before reload.  */
+  if (! reload_completed
+      && targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+	{
+	  if (i < FIRST_PSEUDO_REGISTER
+	      && ! fixed_regs[i]
+	      && ! global_regs[i])
+	    fail = 1;
+	}
+    }
+
+ out:
+  BITMAP_FREE (merge_set);
+  BITMAP_FREE (merge_use);
+  BITMAP_FREE (local_merge_live);
+  BITMAP_FREE (test_set);
+  BITMAP_FREE (test_use);
+
+  return !fail;
+}
 
 \f
 /*----------------------------------------------------------------------------
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 162372)
+++ Makefile.in	(working copy)
@@ -3154,7 +3154,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYST
 df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
-   $(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
+   $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
 df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
Index: testsuite/gcc.target/arm/headmerge-1.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "#120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}
Index: testsuite/gcc.target/arm/headmerge-2.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+extern void foo3 (int);
+extern void foo4 (int);
+extern void foo5 (int);
+extern void foo6 (int);
+
+void t (int x, int y)
+{
+  switch (y)
+    {
+    case 1:
+      foo1 (120);
+      break;
+    case 5:
+      foo2 (120);
+      break;
+    case 7:
+      foo3 (120);
+      break;
+    case 10:
+      foo4 (120);
+      break;
+    case 13:
+      foo5 (120);
+      break;
+    default:
+      foo6 (120);
+      break;
+    }
+}

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-22 21:09               ` Bernd Schmidt
@ 2010-07-23 22:06                 ` Eric Botcazou
  2010-07-23 22:13                   ` Bernd Schmidt
  2010-07-27 15:31                   ` Jeff Law
  0 siblings, 2 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-07-23 22:06 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Before you do that, here's a new version.  This corrects a few errors in
> the register lifetime handling, and adds support for moving across two
> basic blocks, which is very useful for switch statements but happens in
> other cases as well.  

This implementation really moves insns whereas cross-jumping, the reversed  
transformation, is implemented by means of operations on the CFG.  Although 
this is probably not as straightforward in this direction, did you consider 
the CFG approach instead?  Wouldn't it simplify a little the integration in 
the cfgcleanup.c framework?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-23 22:06                 ` Eric Botcazou
@ 2010-07-23 22:13                   ` Bernd Schmidt
  2010-07-24 13:07                     ` Eric Botcazou
  2010-07-27 15:31                   ` Jeff Law
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-23 22:13 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

On 07/24/2010 12:05 AM, Eric Botcazou wrote:
>> Before you do that, here's a new version.  This corrects a few errors in
>> the register lifetime handling, and adds support for moving across two
>> basic blocks, which is very useful for switch statements but happens in
>> other cases as well.  
> 
> This implementation really moves insns whereas cross-jumping, the reversed  
> transformation, is implemented by means of operations on the CFG.  Although 
> this is probably not as straightforward in this direction, did you consider 
> the CFG approach instead?  Wouldn't it simplify a little the integration in 
> the cfgcleanup.c framework?

Please be more specific about what you envision.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-23 22:13                   ` Bernd Schmidt
@ 2010-07-24 13:07                     ` Eric Botcazou
  2010-07-26  9:42                       ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-24 13:07 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

> Please be more specific about what you envision.

See try_crossjump_to_edge and try_crossjump_bb: no code is actually moved, 
blocks are split and edges redirected instead.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-24 13:07                     ` Eric Botcazou
@ 2010-07-26  9:42                       ` Bernd Schmidt
  2010-07-26 13:40                         ` Paolo Bonzini
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-26  9:42 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches, Steven Bosscher, Jim Wilson

On 07/24/2010 03:07 PM, Eric Botcazou wrote:
>> Please be more specific about what you envision.
> 
> See try_crossjump_to_edge and try_crossjump_bb: no code is actually moved, 
> blocks are split and edges redirected instead.

Yeah, but that can't work for this optimization. I don't see how you can
do it without moving insns across jumps.  Please give an example of how
you would transform code.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26  9:42                       ` Bernd Schmidt
@ 2010-07-26 13:40                         ` Paolo Bonzini
  2010-07-26 13:50                           ` Paolo Bonzini
                                             ` (2 more replies)
  0 siblings, 3 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-26 13:40 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/26/2010 11:42 AM, Bernd Schmidt wrote:
> On 07/24/2010 03:07 PM, Eric Botcazou wrote:
>>> Please be more specific about what you envision.
>>
>> See try_crossjump_to_edge and try_crossjump_bb: no code is actually moved,
>> blocks are split and edges redirected instead.
>
> Yeah, but that can't work for this optimization. I don't see how you can
> do it without moving insns across jumps.  Please give an example of how
> you would transform code.

You could:
- split the destination BB before the jump (into BB11 and BB12)
- split the source BBs after the last moved instruction (into BB21 and 
BB22, BB31 and BB32, etc.)
- redirect the jumps to BBn1 (n>=2) to go to BBn2.
- graft BB21 between BB11 and BB12, remove all BBn1 for n>2

I don't know if this is worth though, it can always be done later.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26 13:40                         ` Paolo Bonzini
@ 2010-07-26 13:50                           ` Paolo Bonzini
  2010-07-26 13:56                           ` Bernd Schmidt
  2010-07-28 21:44                           ` Bernd Schmidt
  2 siblings, 0 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-26 13:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/26/2010 11:42 AM, Bernd Schmidt wrote:
> On 07/24/2010 03:07 PM, Eric Botcazou wrote:
>>> Please be more specific about what you envision.
>>
>> See try_crossjump_to_edge and try_crossjump_bb: no code is actually moved,
>> blocks are split and edges redirected instead.
>
> Yeah, but that can't work for this optimization. I don't see how you can
> do it without moving insns across jumps.  Please give an example of how
> you would transform code.

You could:
- split the destination BB before the jump (into BB11 and BB12)
- split the source BBs after the last moved instruction (into BB21 and 
BB22, BB31 and BB32, etc.)
- redirect the jumps to BBn1 (n>=2) to go to BBn2.
- graft BB21 between BB11 and BB12, remove all BBn1 for n>2

I don't know if this is worth though, it can always be done later.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26 13:40                         ` Paolo Bonzini
  2010-07-26 13:50                           ` Paolo Bonzini
@ 2010-07-26 13:56                           ` Bernd Schmidt
  2010-07-26 14:14                             ` Paolo Bonzini
  2010-07-27  8:31                             ` Eric Botcazou
  2010-07-28 21:44                           ` Bernd Schmidt
  2 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-26 13:56 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/26/2010 03:40 PM, Paolo Bonzini wrote:
> You could:
> - split the destination BB before the jump (into BB11 and BB12)
> - split the source BBs after the last moved instruction (into BB21 and
> BB22, BB31 and BB32, etc.)
> - redirect the jumps to BBn1 (n>=2) to go to BBn2.
> - graft BB21 between BB11 and BB12, remove all BBn1 for n>2

How is this simpler and better than just having a single line calling
reorder_insns?  It seems pointless given that it produces the same
result, with a lot more effort.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26 13:56                           ` Bernd Schmidt
@ 2010-07-26 14:14                             ` Paolo Bonzini
  2010-07-27  8:31                             ` Eric Botcazou
  1 sibling, 0 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-26 14:14 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/26/2010 03:49 PM, Bernd Schmidt wrote:
> On 07/26/2010 03:40 PM, Paolo Bonzini wrote:
>> You could:
>> - split the destination BB before the jump (into BB11 and BB12)
>> - split the source BBs after the last moved instruction (into BB21 and
>> BB22, BB31 and BB32, etc.)
>> - redirect the jumps to BBn1 (n>=2) to go to BBn2.
>> - graft BB21 between BB11 and BB12, remove all BBn1 for n>2
>
> How is this simpler and better than just having a single line calling
> reorder_insns?  It seems pointless given that it produces the same
> result, with a lot more effort.

I can't say I disagree (even if you include in the picture deleting the 
duplicated insns in other basic blocks).

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26 13:56                           ` Bernd Schmidt
  2010-07-26 14:14                             ` Paolo Bonzini
@ 2010-07-27  8:31                             ` Eric Botcazou
  2010-07-27  9:37                               ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-27  8:31 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> How is this simpler and better than just having a single line calling
> reorder_insns?  It seems pointless given that it produces the same
> result, with a lot more effort.

It's the canonical way of doing this kind of transformations these days.  The 
underlying machinery is supposed to do all the heavy lifting, you just have 
to drive it.

In particular, I want to avoid kludges like:

+/* Set to true if we couldn't run an optimization due to stale liveness
+   information; we should run df_analyze to enable more opportunities.  */
+static bool block_was_dirty;

@@ -2182,6 +2449,9 @@ try_optimize_cfg (int mode)
 	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
 	    changed = true;
 
+	  if (block_was_dirty)
+	    df_analyze ();
+
 #ifdef ENABLE_CHECKING
 	  if (changed)
 	    verify_flow_info ();

that shouldn't be necessary.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27  8:31                             ` Eric Botcazou
@ 2010-07-27  9:37                               ` Bernd Schmidt
  2010-07-27 13:35                                 ` Bernd Schmidt
                                                   ` (2 more replies)
  0 siblings, 3 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-27  9:37 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/27/2010 09:44 AM, Eric Botcazou wrote:
>> How is this simpler and better than just having a single line calling
>> reorder_insns?  It seems pointless given that it produces the same
>> result, with a lot more effort.
> 
> It's the canonical way of doing this kind of transformations these days.  The 
> underlying machinery is supposed to do all the heavy lifting, you just have 
> to drive it.

That's a non-argument, and false IMO.  Not every optimization can be
represented cleanly as a set of CFG manipulations, and this one can't.
Are you saying we should restrict ourselves to not doing it?

> In particular, I want to avoid kludges like:
> 
> +/* Set to true if we couldn't run an optimization due to stale liveness
> +   information; we should run df_analyze to enable more opportunities.  */
> +static bool block_was_dirty;
> 
> @@ -2182,6 +2449,9 @@ try_optimize_cfg (int mode)
>  	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
>  	    changed = true;
>  
> +	  if (block_was_dirty)
> +	    df_analyze ();
> +
>  #ifdef ENABLE_CHECKING
>  	  if (changed)
>  	    verify_flow_info ();
> 
> that shouldn't be necessary.

Still not an argument.  Why shouldn't it be necessary?  It is logical
that by moving code, we change the liveness of registers.  We have to
verify the liveness of registers before moving code, hence, to iterate,
we have to recompute it.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27  9:37                               ` Bernd Schmidt
@ 2010-07-27 13:35                                 ` Bernd Schmidt
  2010-07-27 22:38                                   ` Eric Botcazou
  2010-07-27 17:39                                 ` Jeff Law
  2010-07-27 22:23                                 ` Eric Botcazou
  2 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-27 13:35 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/27/2010 11:21 AM, Bernd Schmidt wrote:
> On 07/27/2010 09:44 AM, Eric Botcazou wrote:
>> In particular, I want to avoid kludges like:
>>
>> +/* Set to true if we couldn't run an optimization due to stale liveness
>> +   information; we should run df_analyze to enable more opportunities.  */
>> +static bool block_was_dirty;
>>
>> @@ -2182,6 +2449,9 @@ try_optimize_cfg (int mode)
>>  	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
>>  	    changed = true;
>>  
>> +	  if (block_was_dirty)
>> +	    df_analyze ();
>> +
>>  #ifdef ENABLE_CHECKING
>>  	  if (changed)
>>  	    verify_flow_info ();
>>
>> that shouldn't be necessary.
> 
> Still not an argument.  Why shouldn't it be necessary?  It is logical
> that by moving code, we change the liveness of registers.  We have to
> verify the liveness of registers before moving code, hence, to iterate,
> we have to recompute it.

BTW, this is essentially the same thing that's done in the main loop in
ifcvt.c (do you also see it as a kludge there?), where I originally
implemented this optimization, and you were the one who suggested I move
it to cfgcleanup.c.  Maybe you misunderstood the optimization back then
and thought it was just CFG manipulation?  That's simply not the case;
the analogy with crossjumping doesn't entirely hold.

Please explain what you are thinking.  If you have a clever way to do
it, show it.  If you have just not thought it through sufficiently,
please do not continue to hold up a useful improvement.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-23 22:06                 ` Eric Botcazou
  2010-07-23 22:13                   ` Bernd Schmidt
@ 2010-07-27 15:31                   ` Jeff Law
  2010-07-27 22:18                     ` Eric Botcazou
  1 sibling, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-27 15:31 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/23/10 16:05, Eric Botcazou wrote:
>> Before you do that, here's a new version.  This corrects a few errors in
>> the register lifetime handling, and adds support for moving across two
>> basic blocks, which is very useful for switch statements but happens in
>> other cases as well.
> This implementation really moves insns whereas cross-jumping, the reversed
> transformation, is implemented by means of operations on the CFG.  Although
> this is probably not as straightforward in this direction, did you consider
> the CFG approach instead?  Wouldn't it simplify a little the integration in
> the cfgcleanup.c framework?
It's probably worth noting that these optimizations are more effective 
when they're allowed to move insns.   So while limiting to CFG 
approaches may simplify things, it also leads to fewer opportunities to 
commonize code.

jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27  9:37                               ` Bernd Schmidt
  2010-07-27 13:35                                 ` Bernd Schmidt
@ 2010-07-27 17:39                                 ` Jeff Law
  2010-07-27 22:05                                   ` Bernd Schmidt
  2010-07-27 22:23                                 ` Eric Botcazou
  2 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-27 17:39 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 03:21, Bernd Schmidt wrote:
> In particular, I want to avoid kludges like:
>> +/* Set to true if we couldn't run an optimization due to stale liveness
>> +   information; we should run df_analyze to enable more opportunities.  */
>> +static bool block_was_dirty;
>>
>> @@ -2182,6 +2449,9 @@ try_optimize_cfg (int mode)
>>   	&&  try_crossjump_bb (mode, EXIT_BLOCK_PTR))
>>   	    changed = true;
>>
>> +	  if (block_was_dirty)
>> +	    df_analyze ();
>> +
>>   #ifdef ENABLE_CHECKING
>>   	  if (changed)
>>   	    verify_flow_info ();
>>
>> that shouldn't be necessary.
> Still not an argument.  Why shouldn't it be necessary?  It is logical
> that by moving code, we change the liveness of registers.  We have to
> verify the liveness of registers before moving code, hence, to iterate,
> we have to recompute it.

It seems to me implementing this optimization well requires insn 
movement which is going to affect register lifetimes.   Furthermore, 
this optimization is sitting inside a while (changed) style loop.  At 
the least we need to mark blocks where the life data has become 
inaccurate so that we don't mis-optimize based on inaccurate life data.  
I haven't thought deeply about the problem, but it may well be the case 
that as the cfgcleanup loop iterates new opportunities may be exposed 
and thus it'd be useful to go ahead and update the life information.

What I'm more concerned about is placement of this optimization in 
cfgcleanup -- one could argue this optimization isn't strictly a cfg 
cleanup given the need to move insns from one block to another (contrast 
to our cross jumping implementation which just scrambles the cfg).

One could formulate a head merging algorithm which worked solely on the 
CFG, but I doubt it's going to be very effective.

jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 17:39                                 ` Jeff Law
@ 2010-07-27 22:05                                   ` Bernd Schmidt
  2010-07-27 22:40                                     ` Eric Botcazou
  2010-07-29 17:28                                     ` Jeff Law
  0 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-27 22:05 UTC (permalink / raw)
  To: Jeff Law
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

Thanks for looking at this, Jeff.

On 07/27/2010 07:09 PM, Jeff Law wrote:
> It seems to me implementing this optimization well requires insn
> movement which is going to affect register lifetimes.   Furthermore,
> this optimization is sitting inside a while (changed) style loop.  At
> the least we need to mark blocks where the life data has become
> inaccurate so that we don't mis-optimize based on inaccurate life data. 
> I haven't thought deeply about the problem, but it may well be the case
> that as the cfgcleanup loop iterates new opportunities may be exposed
> and thus it'd be useful to go ahead and update the life information.

I'm fairly certain I've observed this to happen.  Note that other
transformations done in cfgcleanup can make life information inaccurate
before we even try to run head merging, making it impossible to do the
analysis.

> What I'm more concerned about is placement of this optimization in
> cfgcleanup -- one could argue this optimization isn't strictly a cfg
> cleanup given the need to move insns from one block to another (contrast
> to our cross jumping implementation which just scrambles the cfg).

Originally I'd placed this in ifcvt.c, by analogy with find_if_case_1
and find_if_case_2, which do some very similar transformations.  Eric
requested I move it to cfgcleanup.c and now seems unhappy about the
consequences.  I can see two possible reasons for the request: handling
switch statements as well as ifs (also possible in ifcvt by placing it
right at the top of find_if_header or doing it before calling
find_if_header), and the possibility that other cfg cleanups expose more
opportunities.

I think in theory, it is probably more powerful to do it in cfg cleanup,
which is why I did not object to the request to do it there.  However, I
did not see anything wrong in principle with the original patch that did
it in ifcvt (and I think it could have been applied at the time, and
would have improved gcc).

I wouldn't necessarily mind putting it back into ifcvt, but we might
need to insert calls to cleanup_cfg to get the full benefit (possibly
restricted to situations where we made a block empty except for a jump).

> One could formulate a head merging algorithm which worked solely on the
> CFG, but I doubt it's going to be very effective.

Well, I don't know what if anything Eric has in mind, but assuming we have

BB1
lots of stuff
if (x) goto A;
BB2
y = 1;
goto C;
BB3
A: y = 1;
goto D;

how can we possibly avoid code movement?  The whole purpose is that we
want to have only one copy of the assignment, and that only works if
it's before the jump.  Never mind that we couldn't merge it into the end
of BB1 since the jump can't be in the middle of a basic block.  So it
seems fairly obvious to me that any kind of simple CFG manipulation
fails entirely to achieve the right result.

Even if it can be thought of as "reverse" cross jumping, in terms of
implementation the transformations in ifcvt are a somewhat better match
than the crossjump code.  For one thing, we need almost exactly the same
code to check liveness information.

I think we can also discard the suggestion of simulating the effect of a
single reorder_insns call with a series of complex CFG transformations,
as that seems entirely pointless.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 15:31                   ` Jeff Law
@ 2010-07-27 22:18                     ` Eric Botcazou
  2010-07-28 17:07                       ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-27 22:18 UTC (permalink / raw)
  To: Jeff Law; +Cc: Bernd Schmidt, gcc-patches, Steven Bosscher, Jim Wilson

> It's probably worth noting that these optimizations are more effective
> when they're allowed to move insns.   So while limiting to CFG
> approaches may simplify things, it also leads to fewer opportunities to
> commonize code.

Do you have a concrete example of such an optimization that would be doable 
with code movements but not with CFG manipulations?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27  9:37                               ` Bernd Schmidt
  2010-07-27 13:35                                 ` Bernd Schmidt
  2010-07-27 17:39                                 ` Jeff Law
@ 2010-07-27 22:23                                 ` Eric Botcazou
  2010-07-27 23:04                                   ` Bernd Schmidt
  2010-07-27 23:08                                   ` Paolo Bonzini
  2 siblings, 2 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-07-27 22:23 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> That's a non-argument, and false IMO.  Not every optimization can be
> represented cleanly as a set of CFG manipulations, and this one can't.

I wrote "this kind of transformations", not "all transformations".

What about the algorithm sketched by Paolo?

> Are you saying we should restrict ourselves to not doing it?

I'm saying that optimizations run in cfgcleanup.c must play by the rules.

> Still not an argument.  Why shouldn't it be necessary?  It is logical
> that by moving code, we change the liveness of registers.  We have to
> verify the liveness of registers before moving code, hence, to iterate,
> we have to recompute it.

Because this will be done automatically if you use the appropriate API.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 13:35                                 ` Bernd Schmidt
@ 2010-07-27 22:38                                   ` Eric Botcazou
  2010-07-28 16:58                                     ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-27 22:38 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> BTW, this is essentially the same thing that's done in the main loop in
> ifcvt.c (do you also see it as a kludge there?), where I originally
> implemented this optimization, and you were the one who suggested I move
> it to cfgcleanup.c.  Maybe you misunderstood the optimization back then
> and thought it was just CFG manipulation?  That's simply not the case;
> the analogy with crossjumping doesn't entirely hold.

Yes, I still think that it will be more useful in cfgcleanup.c.  And it's 
another form of code commonization so I still think that implementing it using 
CFG manipulations is the best approach.

> Please explain what you are thinking.  If you have a clever way to do
> it, show it.  If you have just not thought it through sufficiently,
> please do not continue to hold up a useful improvement.

Fair enough.  Since I don't have enough time at the moment to experiment 
myself, I guess I have to withdraw my objections.

Please run this by another maintainer though.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:05                                   ` Bernd Schmidt
@ 2010-07-27 22:40                                     ` Eric Botcazou
  2010-07-28 17:06                                       ` Jeff Law
  2010-07-29 17:28                                     ` Jeff Law
  1 sibling, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-27 22:40 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> Well, I don't know what if anything Eric has in mind, but assuming we have
>
> BB1
> lots of stuff
> if (x) goto A;
> BB2
> y = 1;
> goto C;
> BB3
> A: y = 1;
> goto D;
>
> how can we possibly avoid code movement?

Split BB2 and BB3 after "y = 1;" and redirect the edges from BB1.  Then split 
BB1 before the test and insert one instance of the common heads.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:23                                 ` Eric Botcazou
@ 2010-07-27 23:04                                   ` Bernd Schmidt
  2010-07-28  8:40                                     ` Eric Botcazou
  2010-07-28 18:31                                     ` Jeff Law
  2010-07-27 23:08                                   ` Paolo Bonzini
  1 sibling, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-27 23:04 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/28/2010 12:18 AM, Eric Botcazou wrote:
>> That's a non-argument, and false IMO.  Not every optimization can be
>> represented cleanly as a set of CFG manipulations, and this one can't.
> 
> I wrote "this kind of transformations", not "all transformations".
> 
> What about the algorithm sketched by Paolo?

Paolo's "algorithm" was
- split the destination BB before the jump (into BB11 and BB12)
- split the source BBs after the last moved instruction (into BB21 and
BB22, BB31 and BB32, etc.)
- redirect the jumps to BBn1 (n>=2) to go to BBn2.
- graft BB21 between BB11 and BB12, remove all BBn1 for n>2

which has exactly the effect of one call to reorder_insns and a few more
to delete_insn, except it creates lots of garbage BBs only to delete
them immediately again.

What exactly is gained by that?  Certainly not readability.  You're
moving insns, so reorder_insns is the correct API.  The suggestion is
obviously absurd in my eyes.

>> Are you saying we should restrict ourselves to not doing it?
> 
> I'm saying that optimizations run in cfgcleanup.c must play by the rules.

If your "rules" lead to an absurd result, the rules are bogus.  Who
decided those "rules" anyway?

>> Still not an argument.  Why shouldn't it be necessary?  It is logical
>> that by moving code, we change the liveness of registers.  We have to
>> verify the liveness of registers before moving code, hence, to iterate,
>> we have to recompute it.
> 
> Because this will be done automatically if you use the appropriate API.

Then I don't think we have the "appropriate API".  The CFG contortions
mentioned above certainly do nothing to solve this problem.  I still
think you're simply missing something here.

> Fair enough.  Since I don't have enough time at the moment to experiment 
> myself, I guess I have to withdraw my objections.
> 
> Please run this by another maintainer though.

Thanks.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:23                                 ` Eric Botcazou
  2010-07-27 23:04                                   ` Bernd Schmidt
@ 2010-07-27 23:08                                   ` Paolo Bonzini
  1 sibling, 0 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-27 23:08 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Steven Bosscher, Jim Wilson

On Wed, Jul 28, 2010 at 00:18, Eric Botcazou <ebotcazou@adacore.com> wrote:
>> That's a non-argument, and false IMO.  Not every optimization can be
>> represented cleanly as a set of CFG manipulations, and this one can't.
>
> I wrote "this kind of transformations", not "all transformations".
>
> What about the algorithm sketched by Paolo?

That's what Bernd referred to when he said, "I think we can also
discard the suggestion of simulating the effect of a single
reorder_insns call with a series of complex CFG transformations, as
that seems entirely pointless."

I actually agree with him.  I don't think it is _that_ complex
(particularly because my sketch did more than a single reorder_insns),
but I agree it is pointless.  It is faking that head merging is a pure
CFG transformation when in fact it isn't.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 23:04                                   ` Bernd Schmidt
@ 2010-07-28  8:40                                     ` Eric Botcazou
  2010-07-28 10:13                                       ` Bernd Schmidt
  2010-07-28 18:31                                     ` Jeff Law
  1 sibling, 1 reply; 95+ messages in thread
From: Eric Botcazou @ 2010-07-28  8:40 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> What exactly is gained by that?  Certainly not readability.

Avoiding kludges like the one I already mentioned.

> You're moving insns, so reorder_insns is the correct API.  The suggestion is
> obviously absurd in my eyes.

The first sentence is equally absurd these days because of CFG layout mode.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28  8:40                                     ` Eric Botcazou
@ 2010-07-28 10:13                                       ` Bernd Schmidt
  2010-07-28 19:40                                         ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-28 10:13 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/28/2010 10:35 AM, Eric Botcazou wrote:
>> What exactly is gained by that?  Certainly not readability.
> 
> Avoiding kludges like the one I already mentioned.

As I already pointed out, it's
a) not a kludge, but quite necessary and done like that elsewhere
b) not avoided by pretending a reorder_insns operation is a CFG operation.

Hence, your comment makes no sense.  You've never replied with anything
of substance to either point a) or point b).

>> You're moving insns, so reorder_insns is the correct API.  The suggestion is
>> obviously absurd in my eyes.
> 
> The first sentence is equally absurd these days because of CFG layout mode.

Explain how that is relevant to the current discussion.  I believe it's
completely beside the point, as that's only concerned with the layout of
basic blocks, not with the placement of insns within them.  Moving insns
from one basic block to another (while explicitly avoiding touching
things like final jump insns) doesn't affect that, so IMO you're still
not making any sense.

Maybe that's the thing you're missing?  No control flow insns are ever
touched or moved at all by the patch.  The CFG is in every case the same
afterwards as it was before (although it may be cleaned up, but that's a
different job done already by the other code in cfglcleanup).  That's a
pretty strong hint that we're not dealing with a CFG operation.

Your attempts at patch review for this issue all have been drive-by
one-liners like this, which were at best insufficiently explained, and
at worst completely nonsensical.  It has been extremely frustrating.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:38                                   ` Eric Botcazou
@ 2010-07-28 16:58                                     ` Jeff Law
  2010-07-29  8:25                                       ` Eric Botcazou
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-28 16:58 UTC (permalink / raw)
  To: Eric Botcazou
  Cc: Bernd Schmidt, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 16:28, Eric Botcazou wrote:
>> BTW, this is essentially the same thing that's done in the main loop in
>> ifcvt.c (do you also see it as a kludge there?), where I originally
>> implemented this optimization, and you were the one who suggested I move
>> it to cfgcleanup.c.  Maybe you misunderstood the optimization back then
>> and thought it was just CFG manipulation?  That's simply not the case;
>> the analogy with crossjumping doesn't entirely hold.
> Yes, I still think that it will be more useful in cfgcleanup.c.
While I don't think head merging is a perfect fit for cfgcleanup.c, I 
don't think it's worth a huge argument.  I'll go along with the final 
version (whatever it looks like) in cfgcleanup.c

>   And it's
> another form of code commonization so I still think that implementing it using
> CFG manipulations is the best approach.
I think this is the root of the disagreement.  I'll keep looking at it.


> Please run this by another maintainer though.
I'll own reviewing.

Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:40                                     ` Eric Botcazou
@ 2010-07-28 17:06                                       ` Jeff Law
  0 siblings, 0 replies; 95+ messages in thread
From: Jeff Law @ 2010-07-28 17:06 UTC (permalink / raw)
  To: Eric Botcazou
  Cc: Bernd Schmidt, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 16:38, Eric Botcazou wrote:
>> Well, I don't know what if anything Eric has in mind, but assuming we have
>>
>> BB1
>> lots of stuff
>> if (x) goto A;
>> BB2
>> y = 1;
>> goto C;
>> BB3
>> A: y = 1;
>> goto D;
>>
>> how can we possibly avoid code movement?
> Split BB2 and BB3 after "y = 1;" and redirect the edges from BB1.  Then split
> BB1 before the test and insert one instance of the common heads.
Which to me seems more convoluted than just moving insns implementing 
the common code.  And I think that's the whole point behind the 
disagreement.  While we *can* formulate this as a series of CFG 
manipulations I think it actually makes the resulting transformation 
more difficult to understand.

Also note that trying to pluck common insns that aren't at the head of 
the blocks is more difficult to do as a pure CFG transformation (it's 
doable, just ugly) while it ought to be trivial to just move them around.


Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:18                     ` Eric Botcazou
@ 2010-07-28 17:07                       ` Jeff Law
  2010-07-28 17:38                         ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-28 17:07 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: Bernd Schmidt, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 16:16, Eric Botcazou wrote:
>> It's probably worth noting that these optimizations are more effective
>> when they're allowed to move insns.   So while limiting to CFG
>> approaches may simplify things, it also leads to fewer opportunities to
>> commonize code.
> Do you have a concrete example of such an optimization that would be doable
> with code movements but not with CFG manipulations?
Think about plucking a common insn from the middle of a block rather 
than strictly at the head or tail.

Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 17:07                       ` Jeff Law
@ 2010-07-28 17:38                         ` Bernd Schmidt
  0 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-28 17:38 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/28/2010 07:05 PM, Jeff Law wrote:
> On 07/27/10 16:16, Eric Botcazou wrote:
>>> It's probably worth noting that these optimizations are more
>>> effective when they're allowed to move insns.   So while limiting
>>> to CFG approaches may simplify things, it also leads to fewer
>>> opportunities to commonize code.
>> Do you have a concrete example of such an optimization that would
>> be doable with code movements but not with CFG manipulations?
> Think about plucking a common insn from the middle of a block rather 
> than strictly at the head or tail.

In a sense that's what happens here if you consider the middle as
anything between the code_label or note_insn_basic_block and the final jump.

Of course, moving out of the middle can also be disguised with a CFG
scheme like the one Paolo described (you just need to split a block
twice), but the real question is, why would anyone want to do that?
Just to follow arbitrary, misunderstood rules?

> Which to me seems more convoluted than just moving insns implementing
> the common code.  And I think that's the whole point behind the
> disagreement.

There must be something else, Eric seems to think using CFG manipulation
would somehow eliminate the need to verify register lifetimes.  If you
agree with me that this is simply impossible, I think we can bury that
part of the issue.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 23:04                                   ` Bernd Schmidt
  2010-07-28  8:40                                     ` Eric Botcazou
@ 2010-07-28 18:31                                     ` Jeff Law
  2010-07-28 18:36                                       ` Paolo Bonzini
  2010-07-29  9:07                                       ` Eric Botcazou
  1 sibling, 2 replies; 95+ messages in thread
From: Jeff Law @ 2010-07-28 18:31 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 16:45, Bernd Schmidt wrote:
> On 07/28/2010 12:18 AM, Eric Botcazou wrote:
>>> That's a non-argument, and false IMO.  Not every optimization can be
>>> represented cleanly as a set of CFG manipulations, and this one can't.
>> I wrote "this kind of transformations", not "all transformations".
>>
>> What about the algorithm sketched by Paolo?
> Paolo's "algorithm" was
> - split the destination BB before the jump (into BB11 and BB12)
> - split the source BBs after the last moved instruction (into BB21 and
> BB22, BB31 and BB32, etc.)
> - redirect the jumps to BBn1 (n>=2) to go to BBn2.
> - graft BB21 between BB11 and BB12, remove all BBn1 for n>2
>
> which has exactly the effect of one call to reorder_insns and a few more
> to delete_insn, except it creates lots of garbage BBs only to delete
> them immediately again.
>
> What exactly is gained by that?  Certainly not readability.  You're
> moving insns, so reorder_insns is the correct API.  The suggestion is
> obviously absurd in my eyes.

Can we all agree that the problem can be viewed as either cfg 
manipulations or insn movement and that what we're really arguing about 
is which is the most appropriate way to view the problem?

If so, then we really need to determine which implementation is the 
easiest to understand, implement & maintain.


>> I'm saying that optimizations run in cfgcleanup.c must play by the rules.
> If your "rules" lead to an absurd result, the rules are bogus.  Who
> decided those "rules" anyway?
I'm not aware of an such rule.   I can see the value in placing such 
rules on cfgcleanup.c's worker bees which is part of the reason why I 
originally suggested this optimization (if implemented as insn movement) 
be placed somewhere other than cfgcleanup.


Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 18:31                                     ` Jeff Law
@ 2010-07-28 18:36                                       ` Paolo Bonzini
  2010-07-29  9:07                                       ` Eric Botcazou
  1 sibling, 0 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-28 18:36 UTC (permalink / raw)
  To: Jeff Law
  Cc: Bernd Schmidt, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/28/2010 08:25 PM, Jeff Law wrote:
>>> I'm saying that optimizations run in cfgcleanup.c must play by the
>>> rules.
>> If your "rules" lead to an absurd result, the rules are bogus. Who
>> decided those "rules" anyway?
>
> I'm not aware of an such rule. I can see the value in placing such rules
> on cfgcleanup.c's worker bees which is part of the reason why I
> originally suggested this optimization (if implemented as insn movement)
> be placed somewhere other than cfgcleanup.

Personally I don't care about how the pass is implemented, I think it 
fits more in cfgcleanup.c anyway than in if-conversion (because it 
doesn't remove the conditional execution).

There may be another advantage in putting it in cfgcleanup; using flags 
to control head-merging may be more suitable than the relatively rigid 
pass manager.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 10:13                                       ` Bernd Schmidt
@ 2010-07-28 19:40                                         ` Jeff Law
  2010-07-28 20:15                                           ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-28 19:40 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/28/10 04:04, Bernd Schmidt wrote:
>
> As I already pointed out, it's
> a) not a kludge, but quite necessary and done like that elsewhere
> b) not avoided by pretending a reorder_insns operation is a CFG operation.
>
> Hence, your comment makes no sense.  You've never replied with anything
> of substance to either point a) or point b).
Presumably when we split the blocks we trigger an DF update, either 
immediate or deferred.  At least that's what makes sense to me (since in 
the CFG formulation there isn't any real insn movement, just block 
splitting and edge redirection).  While I see value in having the DF 
update happen automagically as a result of our CFG manipulations, I'm 
still not seeing how CFG manipulations are a cleaner way to express this 
optimization.


>
> Maybe that's the thing you're missing?  No control flow insns are ever
> touched or moved at all by the patch.  The CFG is in every case the same
> afterwards as it was before (although it may be cleaned up, but that's a
> different job done already by the other code in cfglcleanup).  That's a
> pretty strong hint that we're not dealing with a CFG operation.
I doubt that's what Eric is missing -- there's really two ways to 
formulate this optimization, moving insns without manipulating the CFG 
and strictly with CFG manipulations.  It appears to me y'all differ on 
which of the implementations is preferred.

> Your attempts at patch review for this issue all have been drive-by
> one-liners like this, which were at best insufficiently explained, and
> at worst completely nonsensical.  It has been extremely frustrating.
I can see it's frustrating for both of you -- I'd ask that everyone 
remember that you both are advocating a solution you believe in.  It's 
not personal, but just a matter of different technical opinions.

jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 19:40                                         ` Jeff Law
@ 2010-07-28 20:15                                           ` Bernd Schmidt
  2010-07-29 16:00                                             ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-28 20:15 UTC (permalink / raw)
  To: Jeff Law
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/28/2010 09:39 PM, Jeff Law wrote:
>  On 07/28/10 04:04, Bernd Schmidt wrote:
>>
>> As I already pointed out, it's
>> a) not a kludge, but quite necessary and done like that elsewhere
>> b) not avoided by pretending a reorder_insns operation is a CFG
>> operation.
>>
>> Hence, your comment makes no sense.  You've never replied with anything
>> of substance to either point a) or point b).
> Presumably when we split the blocks we trigger an DF update, either
> immediate or deferred.

All I can see in cfgcleanup or cfgrtl are manual calls to
df_set_bb_dirty to show we don't know anything about register lives in
modified blocks.  This is also done by my new pass if it modifies stuff.
 If we want to get accurate lifetime data (and we need it to verify we
can move insns), we'll then have to call df_analyze at some point as far
as I know.  We certainly don't want to do that for every change we make,
so it has to happen at the top level when every other possibility has
been exhausted.  So I don't see how we'd avoid the "kludge".  Again, see
ifcvt.c, it's precisely the same structure.

The df_analyze call is new simply because no code in cfgcleanup.c
currently needs to look at register lifetimes.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-26 13:40                         ` Paolo Bonzini
  2010-07-26 13:50                           ` Paolo Bonzini
  2010-07-26 13:56                           ` Bernd Schmidt
@ 2010-07-28 21:44                           ` Bernd Schmidt
  2010-07-29 14:31                             ` Jeff Law
  2 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-28 21:44 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/26/2010 03:40 PM, Paolo Bonzini wrote:
> - split the destination BB before the jump (into BB11 and BB12)
> - split the source BBs after the last moved instruction (into BB21 and
> BB22, BB31 and BB32, etc.)
> - redirect the jumps to BBn1 (n>=2) to go to BBn2.
> - graft BB21 between BB11 and BB12, remove all BBn1 for n>2

The funny thing is that when you look at merge_blocks_move and its
subroutines, which presumably would be used in the last step, you'll
find it's implemented using... reorder_insns!.

So, the above creates several useless BB structures, moves them about,
deletes them again, performs the roughly same number of reorder_insns
and delete_insn calls as the code it would be replacing, only to end up
with the same CFG as when it started.  All in order to pretend we're
doing a CFG operation.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 16:58                                     ` Jeff Law
@ 2010-07-29  8:25                                       ` Eric Botcazou
  0 siblings, 0 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-07-29  8:25 UTC (permalink / raw)
  To: Jeff Law
  Cc: Bernd Schmidt, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> I think this is the root of the disagreement.  I'll keep looking at it.
>
> > Please run this by another maintainer though.
>
> I'll own reviewing.

Thanks.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 18:31                                     ` Jeff Law
  2010-07-28 18:36                                       ` Paolo Bonzini
@ 2010-07-29  9:07                                       ` Eric Botcazou
  1 sibling, 0 replies; 95+ messages in thread
From: Eric Botcazou @ 2010-07-29  9:07 UTC (permalink / raw)
  To: Jeff Law
  Cc: Bernd Schmidt, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

> Can we all agree that the problem can be viewed as either cfg
> manipulations or insn movement and that what we're really arguing about
> is which is the most appropriate way to view the problem?

Yes.

> If so, then we really need to determine which implementation is the
> easiest to understand, implement & maintain.

Yes, and I think only experiments can give a definitive answer.  That's all 
what I requested from Bernd.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 21:44                           ` Bernd Schmidt
@ 2010-07-29 14:31                             ` Jeff Law
  0 siblings, 0 replies; 95+ messages in thread
From: Jeff Law @ 2010-07-29 14:31 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Paolo Bonzini, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/28/10 15:30, Bernd Schmidt wrote:
> On 07/26/2010 03:40 PM, Paolo Bonzini wrote:
>> - split the destination BB before the jump (into BB11 and BB12)
>> - split the source BBs after the last moved instruction (into BB21 and
>> BB22, BB31 and BB32, etc.)
>> - redirect the jumps to BBn1 (n>=2) to go to BBn2.
>> - graft BB21 between BB11 and BB12, remove all BBn1 for n>2
> The funny thing is that when you look at merge_blocks_move and its
> subroutines, which presumably would be used in the last step, you'll
> find it's implemented using... reorder_insns!.
Quite amusing.  I guess that eliminates the need to ponder an 
implementation of reorder_insns as a CFG manipulation and how that would 
affect readability of the resulting code -- I wasn't going to suggest we 
actually make that change only ponder the impacts as a proxy for the 
main issue we're stuck on.

Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-28 20:15                                           ` Bernd Schmidt
@ 2010-07-29 16:00                                             ` Jeff Law
  2010-07-29 16:21                                               ` Paolo Bonzini
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-29 16:00 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/28/10 13:54, Bernd Schmidt wrote:
> On 07/28/2010 09:39 PM, Jeff Law wrote:
>>   On 07/28/10 04:04, Bernd Schmidt wrote:
>>> As I already pointed out, it's
>>> a) not a kludge, but quite necessary and done like that elsewhere
>>> b) not avoided by pretending a reorder_insns operation is a CFG
>>> operation.
>>>
>>> Hence, your comment makes no sense.  You've never replied with anything
>>> of substance to either point a) or point b).
>> Presumably when we split the blocks we trigger an DF update, either
>> immediate or deferred.
> All I can see in cfgcleanup or cfgrtl are manual calls to
> df_set_bb_dirty to show we don't know anything about register lives in
> modified blocks.  This is also done by my new pass if it modifies stuff.
So we've got the markers to allow us to do a deferred update, but with 
nothing needing the accurate DF info in the cfgcleanup loop, we just 
ignore the markers (within the context of that loop).    Which certainly 
makes sense if nothing in cfgcleanup has needed accurate register 
lifetime until now.

I'm having an awful hard time seeing what modeling this optimization as 
a series of CFG manipulations is going to win us right now.   It's more 
complex than using reorder_insns, it doesn't keep the DF information 
up-to-date, and generates unnecessary churn in the CFG and other data 
structures.

Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-29 16:00                                             ` Jeff Law
@ 2010-07-29 16:21                                               ` Paolo Bonzini
  2010-07-29 17:09                                                 ` Bernd Schmidt
  2010-07-30  0:55                                                 ` Steven Bosscher
  0 siblings, 2 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-29 16:21 UTC (permalink / raw)
  To: Jeff Law
  Cc: Bernd Schmidt, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/29/2010 05:26 PM, Jeff Law wrote:
>>>
>> All I can see in cfgcleanup or cfgrtl are manual calls to
>> df_set_bb_dirty to show we don't know anything about register lives in
>> modified blocks.  This is also done by my new pass if it modifies stuff.
>
> So we've got the markers to allow us to do a deferred update, but with
> nothing needing the accurate DF info in the cfgcleanup loop, we just
> ignore the markers (within the context of that loop).    Which certainly
> makes sense if nothing in cfgcleanup has needed accurate register
> lifetime until now.

Correct.  _Operand scan_ can be made automatic, but not dataflow analysis.

> I'm having an awful hard time seeing what modeling this optimization as
> a series of CFG manipulations is going to win us right now.   It's more
> complex than using reorder_insns, it doesn't keep the DF information
> up-to-date, and generates unnecessary churn in the CFG and other data
> structures.

Just to state it once more, I agree.

I have two remaining doubts, which were shadowed by the discussion so 
far.  Don't worry, it's small stuff. :)

I'd like to have a note to the reader that df_analyze is only invoked 
when you do crossjumping.  Please add an assert like

   if (block_was_dirty)
     {
       gcc_assert (mode & CLEANUP_CROSSJUMP);
       df_analyze ();
     }

We do not use dataflow otherwise, and it is not necessary to call it 
gratuitously.  Passes know that CFG cleanup destroys dataflow and call 
it themselves if necessary.

Second, crossjumping is now more expensive.  Does it buy much really to 
iterate it?  Something like

   mode &= ~CLEANUP_CROSSJUMP;

just before iterating may still leave it "good enough".  Steven, do you 
remember anything?  This anyway can be done separately after the patch 
goes in.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-29 16:21                                               ` Paolo Bonzini
@ 2010-07-29 17:09                                                 ` Bernd Schmidt
  2010-07-29 17:13                                                   ` Paolo Bonzini
  2010-07-30  0:55                                                 ` Steven Bosscher
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-29 17:09 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/29/2010 06:19 PM, Paolo Bonzini wrote:
> I'd like to have a note to the reader that df_analyze is only invoked
> when you do crossjumping.  Please add an assert like
> 
>   if (block_was_dirty)
>     {
>       gcc_assert (mode & CLEANUP_CROSSJUMP);
>       df_analyze ();
>     }

Can do.

> We do not use dataflow otherwise, and it is not necessary to call it
> gratuitously.  Passes know that CFG cleanup destroys dataflow and call
> it themselves if necessary.

Then again, we probably won't lose much by calling df_analyze during
cfgcleanup if the following pass needs it anyway - right?

> Second, crossjumping is now more expensive.  Does it buy much really to
> iterate it?  Something like
> 
>   mode &= ~CLEANUP_CROSSJUMP;
> 
> just before iterating may still leave it "good enough".

A quick experiment shows that this causes many missed opportunities.
(Placed it after the run_fast_dce call).

Another issue that I stumbled across is that cfgcleanup uses the
df_bb_dirty flag for other reasons: it uses it to test whether a block
has changed previously, and retries its optimizations only if that is
the case.  We probably need a different flag to indicate "block changed
during cfgcleanup" and set it from df_bb_dirty just before calling
df_analyze.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-29 17:09                                                 ` Bernd Schmidt
@ 2010-07-29 17:13                                                   ` Paolo Bonzini
  0 siblings, 0 replies; 95+ messages in thread
From: Paolo Bonzini @ 2010-07-29 17:13 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 07/29/2010 07:00 PM, Bernd Schmidt wrote:
> On 07/29/2010 06:19 PM, Paolo Bonzini wrote:
>> I'd like to have a note to the reader that df_analyze is only invoked
>> when you do crossjumping.  Please add an assert like
>>
>>    if (block_was_dirty)
>>      {
>>        gcc_assert (mode & CLEANUP_CROSSJUMP);
>>        df_analyze ();
>>      }
>
> Can do.
>
>> We do not use dataflow otherwise, and it is not necessary to call it
>> gratuitously.  Passes know that CFG cleanup destroys dataflow and call
>> it themselves if necessary.
>
> Then again, we probably won't lose much by calling df_analyze during
> cfgcleanup if the following pass needs it anyway - right?

What I meant is I want to document that it's for a special case.  I 
wouldn't like someone to randomly remove the if just because it happens 
to fix his bug.  Certainly I didn't want to imply any further change. :-)

>> Second, crossjumping is now more expensive.  Does it buy much really to
>> iterate it?  Something like
>>
>>    mode &= ~CLEANUP_CROSSJUMP;
>>
>> just before iterating may still leave it "good enough".
>
> A quick experiment shows that this causes many missed opportunities.
> (Placed it after the run_fast_dce call).

Thanks.  Wishful thinking.

Paolo

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-27 22:05                                   ` Bernd Schmidt
  2010-07-27 22:40                                     ` Eric Botcazou
@ 2010-07-29 17:28                                     ` Jeff Law
  2010-07-29 17:43                                       ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-07-29 17:28 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/27/10 15:35, Bernd Schmidt wrote:
> Thanks for looking at this, Jeff.
>
> On 07/27/2010 07:09 PM, Jeff Law wrote:
>> It seems to me implementing this optimization well requires insn
>> movement which is going to affect register lifetimes.   Furthermore,
>> this optimization is sitting inside a while (changed) style loop.  At
>> the least we need to mark blocks where the life data has become
>> inaccurate so that we don't mis-optimize based on inaccurate life data.
>> I haven't thought deeply about the problem, but it may well be the case
>> that as the cfgcleanup loop iterates new opportunities may be exposed
>> and thus it'd be useful to go ahead and update the life information.
> I'm fairly certain I've observed this to happen.  Note that other
> transformations done in cfgcleanup can make life information inaccurate
> before we even try to run head merging, making it impossible to do the
> analysis.
If that occurs, we will have set block_was_dirty and changed which tells 
head merging to do nothing.  At the bottom of the loop we call 
df_analyze to get things up-to-date, then start the next iteration which 
then allows head merging its chance at any blocks which were dirty on 
the previous iteration, right?

Presumably there's no blocks marked as dirty when cfgcleanup starts :-)

Jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-29 17:28                                     ` Jeff Law
@ 2010-07-29 17:43                                       ` Bernd Schmidt
  0 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-07-29 17:43 UTC (permalink / raw)
  To: Jeff Law
  Cc: Eric Botcazou, Paolo Bonzini, gcc-patches, Steven Bosscher, Jim Wilson

On 07/29/2010 07:23 PM, Jeff Law wrote:
>  On 07/27/10 15:35, Bernd Schmidt wrote:
>> Thanks for looking at this, Jeff.
>>
>> On 07/27/2010 07:09 PM, Jeff Law wrote:
>>> It seems to me implementing this optimization well requires insn
>>> movement which is going to affect register lifetimes.   Furthermore,
>>> this optimization is sitting inside a while (changed) style loop.  At
>>> the least we need to mark blocks where the life data has become
>>> inaccurate so that we don't mis-optimize based on inaccurate life data.
>>> I haven't thought deeply about the problem, but it may well be the case
>>> that as the cfgcleanup loop iterates new opportunities may be exposed
>>> and thus it'd be useful to go ahead and update the life information.
>> I'm fairly certain I've observed this to happen.  Note that other
>> transformations done in cfgcleanup can make life information inaccurate
>> before we even try to run head merging, making it impossible to do the
>> analysis.
> If that occurs, we will have set block_was_dirty and changed which tells
> head merging to do nothing.  At the bottom of the loop we call
> df_analyze to get things up-to-date, then start the next iteration which
> then allows head merging its chance at any blocks which were dirty on
> the previous iteration, right?

That's the plan.

> Presumably there's no blocks marked as dirty when cfgcleanup starts :-)

I guess there might be if the previous pass modifies things that affect
liveness.  The effect would be to postpone head-merging until the second
pass of try_cleanup_cfg.  Also, as I said other cfgcleanup
transformations can mark blocks dirty.  If there's a lot going on we'll
just have to iterate until nothing can be improved anymore.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-29 16:21                                               ` Paolo Bonzini
  2010-07-29 17:09                                                 ` Bernd Schmidt
@ 2010-07-30  0:55                                                 ` Steven Bosscher
  1 sibling, 0 replies; 95+ messages in thread
From: Steven Bosscher @ 2010-07-30  0:55 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Jeff Law, Bernd Schmidt, Eric Botcazou, gcc-patches, Jim Wilson

On Thu, Jul 29, 2010 at 6:19 PM, Paolo Bonzini <bonzini@gnu.org> wrote:

> Second, crossjumping is now more expensive.  Does it buy much really to
> iterate it?  Something like
>
>  mode &= ~CLEANUP_CROSSJUMP;
>
> just before iterating may still leave it "good enough".  Steven, do you
> remember anything?  This anyway can be done separately after the patch goes
> in.

Iterating is often helpful. Crossjumping only merges single pairs of
basic blocks per iteration, but never across a control flow statement.
If you iterate, you usually find that the previous iteration exposed
further opportunities. And crossjumping is not very expensive anyway.

<plug>
I just hopes someone picks up the patches of PR20070 for pre-reload
crossjumping, that's even more helpful than iterating.
</plug>

Ciao!
Steven

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-07-20 20:43           ` Bernd Schmidt
  2010-07-22 19:47             ` Eric Botcazou
@ 2010-08-02 15:57             ` Jeff Law
  2010-08-02 15:59               ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-08-02 15:57 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 07/20/10 14:42, Bernd Schmidt wrote:
> On 04/20/2010 12:05 AM, Eric Botcazou wrote:
>> Bernd Schmidt wrote:
>>> Here's the second part.  This one should help most architectures, not
>>> just the ones with conditional execution.  I've observed it helps on
>>> i686 and arm, with the following being a typical result:
>>>
>>>   .L18:
>>>          ldr     r1, [r2, #4]
>>>          cmp     r1, #34
>>> -       it      hi
>>> -       ldrhi   r3, .L98+12
>>> -       bhi     .L28
>>>          ldr     r3, .L98+12
>>> +       bhi     .L28
>>>          ldrb    r2, [r3, #4]    @ zero_extendqisi2
>>>          cbz     r2, .L29
>>>          ldr     r3, [r3, #8]
>> I'm uncomfortable with this patch because I'm not sure it belongs in ifcvt.c.
>> Conceptually it's a reversed form of cross jumping so it could be implemented
>> more generally in cfgcleanup.c.  And other transformations should already be
>> able to apply this kind of optimizations.  Do you have testcases?
> Here's a new patch.  A testcase is included; as I mentioned before this
> triggers quite frequently.  This is PR44374.
>
> I've moved and reused code from dead_or_predicable for a new function
> can_move_insns_across.  The tests in dead_or_predicable were still
> somewhat ad-hoc, after the patch I believe it's using the exact
> necessary and sufficient conditions for moving code.
>
> Bootstrapped and regression tested on i686-linux.  Ok?
Was this the last version of the patch?  It looks pretty good to me.

jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-02 15:57             ` Jeff Law
@ 2010-08-02 15:59               ` Bernd Schmidt
  2010-08-02 16:05                 ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-02 15:59 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 08/02/2010 05:56 PM, Jeff Law wrote:
>  On 07/20/10 14:42, Bernd Schmidt wrote:
>> Bootstrapped and regression tested on i686-linux.  Ok?
> Was this the last version of the patch?  It looks pretty good to me.

Yes, but some changes are necessary and I expect I'll resubmit a new one
shortly.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-02 15:59               ` Bernd Schmidt
@ 2010-08-02 16:05                 ` Jeff Law
  2010-08-02 16:15                   ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-08-02 16:05 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 08/02/10 09:59, Bernd Schmidt wrote:
> On 08/02/2010 05:56 PM, Jeff Law wrote:
>>   On 07/20/10 14:42, Bernd Schmidt wrote:
>>> Bootstrapped and regression tested on i686-linux.  Ok?
>> Was this the last version of the patch?  It looks pretty good to me.
> Yes, but some changes are necessary and I expect I'll resubmit a new one
> shortly.
>
OK.  If you could highlight in a quick blurb what changed it'd be 
appreciated -- it'll save me from having to look over the whole thing 
again to figure out what changed from the previous version.

Thanks,

jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-02 16:05                 ` Jeff Law
@ 2010-08-02 16:15                   ` Bernd Schmidt
  2010-08-03 14:10                     ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-02 16:15 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 08/02/2010 06:05 PM, Jeff Law wrote:
>  On 08/02/10 09:59, Bernd Schmidt wrote:
>> On 08/02/2010 05:56 PM, Jeff Law wrote:
>>>   On 07/20/10 14:42, Bernd Schmidt wrote:
>>>> Bootstrapped and regression tested on i686-linux.  Ok?
>>> Was this the last version of the patch?  It looks pretty good to me.
>> Yes, but some changes are necessary and I expect I'll resubmit a new one
>> shortly.
>>
> OK.  If you could highlight in a quick blurb what changed it'd be
> appreciated -- it'll save me from having to look over the whole thing
> again to figure out what changed from the previous version.

I intend to make the change I previously mentioned to add a per-bb flag
which notes it's been modified, so that we can use that on the second
pass to decide whether or not to try to optimize it, rather than using
df_get_bb_dirty (since that gets cleared on df_analyze).  Earlier
versions of gcc had a BB_DIRTY bit in bb->flags, I'll reintroduce that
as BB_MODIFIED.  That's cheaper to test anyway.

The other change I'll make is to be slightly more careful wrt. volatile
asms, not moving memory references across them.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-02 16:15                   ` Bernd Schmidt
@ 2010-08-03 14:10                     ` Bernd Schmidt
  2010-08-03 15:16                       ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-03 14:10 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 1354 bytes --]

On 08/02/2010 06:15 PM, Bernd Schmidt wrote:
> On 08/02/2010 06:05 PM, Jeff Law wrote:
>> OK.  If you could highlight in a quick blurb what changed it'd be
>> appreciated -- it'll save me from having to look over the whole thing
>> again to figure out what changed from the previous version.
> 
> I intend to make the change I previously mentioned to add a per-bb flag
> which notes it's been modified, so that we can use that on the second
> pass to decide whether or not to try to optimize it, rather than using
> df_get_bb_dirty (since that gets cleared on df_analyze).  Earlier
> versions of gcc had a BB_DIRTY bit in bb->flags, I'll reintroduce that
> as BB_MODIFIED.  That's cheaper to test anyway.
> 
> The other change I'll make is to be slightly more careful wrt. volatile
> asms, not moving memory references across them.

Did that, and also fixed a crash I saw with a PPC cross compiler -
mustn't try to look at insns in EXIT_BLOCK.  Note that there's still a
call to clear_bb_flags which I think is left over from before we were
using df_get_bb_dirty and now has a purpose again.

New patch below; search for BB_MODIFIED, ASM_OPERANDS and EXIT_BLOCK_PTR
to find these changes.  Also, added the two testcases for i386 as well
and Paolo's suggestion of a gcc_assert before df_analyze.

Bootstrapped and regression tested on i686-linux.


Bernd

[-- Attachment #2: headmerge5.diff --]
[-- Type: text/plain, Size: 31024 bytes --]

	PR rtl-optimization/44374
	* basic-block.h (enum bb_flags): Add BB_MODIFIED.
	* df-core.c (df_set_bb_dirty): Set it.
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
	than df_get_bb_dirty.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

testsuite/
	PR rtl-optimization/44374
	* gcc.target/arm/headmerge-1.c: New test.
	* gcc.target/arm/headmerge-2.c: New test.
	* gcc.target/i386/headmerge-1.c: New test.
	* gcc.target/i386/headmerge-2.c: New test.

Index: df-core.c
===================================================================
--- df-core.c	(revision 162823)
+++ df-core.c	(working copy)
@@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb)
 void
 df_set_bb_dirty (basic_block bb)
 {
+  bb->flags |= BB_MODIFIED;
   if (df)
     {
       int p;
Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 162823)
+++ ifcvt.c	(working copy)
@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_blo
 static int cond_exec_find_if_block (ce_if_block_t *);
 static int find_if_case_1 (basic_block, edge, edge);
 static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
 static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       basic_block, int);
 static void noce_emit_move_insn (rtx, rtx);
@@ -3875,15 +3874,6 @@ find_if_case_2 (basic_block test_bb, edg
   return TRUE;
 }
 
-/* A subroutine of dead_or_predicable called through for_each_rtx.
-   Return 1 if a memory is found.  */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
-  return MEM_P (*px);
-}
-
 /* Used by the code above to perform the actual rtl transformations.
    Return TRUE if successful.
 
@@ -3985,131 +3975,38 @@ dead_or_predicable (basic_block test_bb,
       earliest = jump;
     }
 #endif
+  /* If we allocated new pseudos (e.g. in the conditional move
+     expander called from noce_emit_cmove), we must resize the
+     array first.  */
+  if (max_regno < max_reg_num ())
+    max_regno = max_reg_num ();
+
   /* Try the NCE path if the CE path did not result in any changes.  */
   if (n_validated_changes == 0)
     {
+      rtx cond;
+      regset live;
+      bool success;
+
       /* In the non-conditional execution case, we have to verify that there
 	 are no trapping operations, no calls, no references to memory, and
 	 that any registers modified are dead at the branch site.  */
 
-      rtx insn, cond, prev;
-      bitmap merge_set, merge_set_noclobber, test_live, test_set;
-      unsigned i, fail = 0;
-      bitmap_iterator bi;
-
-      /* Check for no calls or trapping operations.  */
-      for (insn = head; ; insn = NEXT_INSN (insn))
-	{
-	  if (CALL_P (insn))
-	    return FALSE;
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      if (may_trap_p (PATTERN (insn)))
-		return FALSE;
-
-	      /* ??? Even non-trapping memories such as stack frame
-		 references must be avoided.  For stores, we collect
-		 no lifetime info; for reads, we'd have to assert
-		 true_dependence false against every store in the
-		 TEST range.  */
-	      if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
-		return FALSE;
-	    }
-	  if (insn == end)
-	    break;
-	}
-
-      if (! any_condjump_p (jump))
+      if (!any_condjump_p (jump))
 	return FALSE;
 
       /* Find the extent of the conditional.  */
       cond = noce_get_condition (jump, &earliest, false);
-      if (! cond)
+      if (!cond)
 	return FALSE;
 
-      /* Collect:
-	   MERGE_SET = set of registers set in MERGE_BB
-	   MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
-	     that are really set, not just clobbered.
-	   TEST_LIVE = set of registers live at EARLIEST
-	   TEST_SET = set of registers set between EARLIEST and the
-	     end of the block.  */
-
-      merge_set = BITMAP_ALLOC (&reg_obstack);
-      merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
-      test_live = BITMAP_ALLOC (&reg_obstack);
-      test_set = BITMAP_ALLOC (&reg_obstack);
-
-      /* ??? bb->local_set is only valid during calculate_global_regs_live,
-	 so we must recompute usage for MERGE_BB.  Not so bad, I suppose,
-         since we've already asserted that MERGE_BB is small.  */
-      /* If we allocated new pseudos (e.g. in the conditional move
-	 expander called from noce_emit_cmove), we must resize the
-	 array first.  */
-      if (max_regno < max_reg_num ())
-	max_regno = max_reg_num ();
-
-      FOR_BB_INSNS (merge_bb, insn)
-	{
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, merge_set);
-	      df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
-	    }
-	}
-
-      /* For small register class machines, don't lengthen lifetimes of
-	 hard registers before reload.  */
-      if (! reload_completed
-	  && targetm.small_register_classes_for_mode_p (VOIDmode))
-	{
-          EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
-	    {
-	      if (i < FIRST_PSEUDO_REGISTER
-		  && ! fixed_regs[i]
-		  && ! global_regs[i])
-		fail = 1;
-	    }
-	}
-
-      /* For TEST, we're interested in a range of insns, not a whole block.
-	 Moreover, we're interested in the insns live from OTHER_BB.  */
-
-      /* The loop below takes the set of live registers
-         after JUMP, and calculates the live set before EARLIEST. */
-      bitmap_copy (test_live, df_get_live_in (other_bb));
-      df_simulate_initialize_backwards (test_bb, test_live);
-      for (insn = jump; ; insn = prev)
-	{
-	  if (INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, test_set);
-	      df_simulate_one_insn_backwards (test_bb, insn, test_live);
-	    }
-	  prev = PREV_INSN (insn);
-	  if (insn == earliest)
-	    break;
-	}
-
-      /* We can perform the transformation if
-	   MERGE_SET_NOCLOBBER & TEST_SET
-	 and
-	   MERGE_SET & TEST_LIVE)
-	 and
-	   TEST_SET & DF_LIVE_IN (merge_bb)
-	 are empty.  */
-
-      if (bitmap_intersect_p (test_set, merge_set_noclobber)
-	  || bitmap_intersect_p (test_live, merge_set)
-	  || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
-	fail = 1;
-
-      BITMAP_FREE (merge_set_noclobber);
-      BITMAP_FREE (merge_set);
-      BITMAP_FREE (test_live);
-      BITMAP_FREE (test_set);
-
-      if (fail)
+      live = BITMAP_ALLOC (&reg_obstack);
+      simulate_backwards_to_point (merge_bb, live, end);
+      success = can_move_insns_across (head, end, earliest, jump,
+				       merge_bb, live,
+				       df_get_live_in (other_bb), NULL);
+      BITMAP_FREE (live);
+      if (!success)
 	return FALSE;
     }
 
Index: df.h
===================================================================
--- df.h	(revision 162823)
+++ df.h	(working copy)
@@ -985,7 +985,9 @@ extern void df_simulate_one_insn_backwar
 extern void df_simulate_finalize_backwards (basic_block, bitmap);
 extern void df_simulate_initialize_forwards (basic_block, bitmap);
 extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
-
+extern void simulate_backwards_to_point (basic_block, regset, rtx);
+extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
+				   regset, rtx *);
 /* Functions defined in df-scan.c.  */
 
 extern void df_scan_alloc (bitmap);
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 162823)
+++ cfgcleanup.c	(working copy)
@@ -66,6 +66,10 @@ static bool first_pass;
 /* Set to true if crossjumps occured in the latest run of try_optimize_cfg.  */
 static bool crossjumps_occured;
 
+/* Set to true if we couldn't run an optimization due to stale liveness
+   information; we should run df_analyze to enable more opportunities.  */
+static bool block_was_dirty;
+
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
@@ -432,7 +436,7 @@ try_forward_edges (int mode, basic_block
       int counter, goto_locus;
       bool threaded = false;
       int nthreaded_edges = 0;
-      bool may_thread = first_pass | df_get_bb_dirty (b);
+      bool may_thread = first_pass || (b->flags & BB_MODIFIED) != 0;
 
       /* Skip complex edges because we don't know how to update them.
 
@@ -467,7 +471,7 @@ try_forward_edges (int mode, basic_block
 	{
 	  basic_block new_target = NULL;
 	  bool new_target_threaded = false;
-	  may_thread |= df_get_bb_dirty (target);
+	  may_thread |= (target->flags & BB_MODIFIED) != 0;
 
 	  if (FORWARDER_BLOCK_P (target)
 	      && !(single_succ_edge (target)->flags & EDGE_CROSSING)
@@ -1857,8 +1861,8 @@ try_crossjump_bb (int mode, basic_block 
 	  /* If nothing changed since the last attempt, there is nothing
 	     we can do.  */
 	  if (!first_pass
-	      && (!(df_get_bb_dirty (e->src))
-		  && !(df_get_bb_dirty (fallthru->src))))
+	      && !((e->src->flags & BB_MODIFIED)
+		   || (fallthru->src->flags & BB_MODIFIED)))
 	    continue;
 
 	  if (try_crossjump_to_edge (mode, e, fallthru))
@@ -1907,8 +1911,8 @@ try_crossjump_bb (int mode, basic_block 
 	  /* If nothing changed since the last attempt, there is nothing
 	     we can do.  */
 	  if (!first_pass
-	      && (!(df_get_bb_dirty (e->src))
-		  && !(df_get_bb_dirty (e2->src))))
+	      && !((e->src->flags & BB_MODIFIED)
+		   || (e2->src->flags & BB_MODIFIED)))
 	    continue;
 
 	  if (try_crossjump_to_edge (mode, e, e2))
@@ -1927,6 +1931,265 @@ try_crossjump_bb (int mode, basic_block 
   return changed;
 }
 
+/* Search the successors of BB for common insn sequences.  When found,
+   share code between them by moving it across the basic block
+   boundary.  Return true if any changes made.  */
+
+static bool
+try_head_merge_bb (basic_block bb)
+{
+  basic_block final_dest_bb = NULL;
+  int max_match = INT_MAX;
+  edge e0;
+  rtx *headptr, *currptr;
+  bool changed, moveall;
+  unsigned ix;
+  rtx e0_last_head, cond, move_before;
+  unsigned nedges = EDGE_COUNT (bb->succs);
+  rtx jump = BB_END (bb);
+  regset live, live_union;
+
+  /* Nothing to do if there is not at least two outgoing edges.  */
+  if (nedges < 2)
+    return false;
+
+  /* Don't crossjump if this block ends in a computed jump,
+     unless we are optimizing for size.  */
+  if (optimize_bb_for_size_p (bb)
+      && bb != EXIT_BLOCK_PTR
+      && computed_jump_p (BB_END (bb)))
+    return false;
+
+  cond = get_condition (jump, &move_before, true, false);
+  if (cond == NULL_RTX)
+    move_before = jump;
+
+  for (ix = 0; ix < nedges; ix++)
+    if (EDGE_SUCC (bb, ix)->dest == EXIT_BLOCK_PTR)
+      return false;
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      basic_block other_bb = e->dest;
+
+      if (df_get_bb_dirty (other_bb))
+	{
+	  block_was_dirty = true;
+	  return false;
+	}
+
+      if (e->flags & EDGE_ABNORMAL)
+	return false;
+
+      /* Normally, all destination blocks must only be reachable from this
+	 block, i.e. they must have one incoming edge.
+
+	 There is one special case we can handle, that of multiple consecutive
+	 jumps where the first jumps to one of the targets of the second jump.
+	 This happens frequently in switch statements for default labels.
+	 The structure is as follows:
+	 FINAL_DEST_BB
+	 ....
+	 if (cond) jump A;
+	 fall through
+	 BB
+	 jump with targets A, B, C, D...
+	 A
+	 has two incoming edges, from FINAL_DEST_BB and BB
+
+	 In this case, we can try to move the insns through BB and into
+	 FINAL_DEST_BB.  */
+      if (EDGE_COUNT (other_bb->preds) != 1)
+	{
+	  edge incoming_edge, incoming_bb_other_edge;
+	  edge_iterator ei;
+
+	  if (final_dest_bb != NULL
+	      || EDGE_COUNT (other_bb->preds) != 2)
+	    return false;
+
+	  /* We must be able to move the insns across the whole block.  */
+	  move_before = BB_HEAD (bb);
+	  while (!NONDEBUG_INSN_P (move_before))
+	    move_before = NEXT_INSN (move_before);
+
+	  FOR_EACH_EDGE (incoming_edge, ei, bb->preds)
+	    if (incoming_edge->dest == bb)
+	      break;
+	  final_dest_bb = incoming_edge->src;
+	  if (EDGE_COUNT (final_dest_bb->succs) != 2)
+	    return false;
+	  FOR_EACH_EDGE (incoming_bb_other_edge, ei, final_dest_bb->succs)
+	    if (incoming_bb_other_edge != incoming_edge)
+	      break;
+	  if (incoming_bb_other_edge->dest != other_bb)
+	    return false;
+	}
+    }
+
+  e0 = EDGE_SUCC (bb, 0);
+  e0_last_head = NULL_RTX;
+  changed = false;
+
+  for (ix = 1; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      rtx e0_last, e_last;
+      int nmatch;
+
+      nmatch = flow_find_head_matching_sequence (e0->dest, e->dest,
+						 &e0_last, &e_last, 0);
+      if (nmatch == 0)
+	return false;
+
+      if (nmatch < max_match)
+	{
+	  max_match = nmatch;
+	  e0_last_head = e0_last;
+	}
+    }
+
+  /* If we matched an entire block, we probably have to avoid moving the
+     last insn.  */
+  if (max_match > 0
+      && e0_last_head == BB_END (e0->dest)
+      && (find_reg_note (e0_last_head, REG_EH_REGION, 0)
+	  || control_flow_insn_p (e0_last_head)))
+    {
+      max_match--;
+      if (max_match == 0)
+	return false;
+      do
+	e0_last_head = prev_real_insn (e0_last_head);
+      while (DEBUG_INSN_P (e0_last_head));
+    }
+
+  if (max_match == 0)
+    return false;
+
+  /* We must find a union of the live registers at each of the end points.  */
+  live = BITMAP_ALLOC (NULL);
+  live_union = BITMAP_ALLOC (NULL);
+
+  currptr = XNEWVEC (rtx, nedges);
+  headptr = XNEWVEC (rtx, nedges);
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      int j;
+      basic_block merge_bb = EDGE_SUCC (bb, ix)->dest;
+      rtx head = BB_HEAD (merge_bb);
+
+      while (!NONDEBUG_INSN_P (head))
+	head = NEXT_INSN (head);
+      headptr[ix] = head;
+      currptr[ix] = head;
+
+      /* Compute the end point and live information  */
+      for (j = 1; j < max_match; j++)
+	do
+	  head = NEXT_INSN (head);
+	while (!NONDEBUG_INSN_P (head));
+      simulate_backwards_to_point (merge_bb, live, head);
+      IOR_REG_SET (live_union, live);
+    }
+
+  /* If we're moving across two blocks, verify the validity of the
+     first move, then adjust the target and let the loop below deal
+     with the final move.  */
+  if (final_dest_bb != NULL)
+    {
+      rtx move_upto;
+
+      moveall = can_move_insns_across (currptr[0], e0_last_head, move_before,
+				       jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall)
+	e0_last_head = move_upto;
+      if (e0_last_head == NULL_RTX)
+	goto out;
+
+      jump = BB_END (final_dest_bb);
+      cond = get_condition (jump, &move_before, true, false);
+      if (cond == NULL_RTX)
+	move_before = jump;
+    }
+
+  do
+    {
+      rtx move_upto;
+      moveall = can_move_insns_across (currptr[0], e0_last_head,
+				       move_before, jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall && move_upto == NULL_RTX)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  continue;
+	}
+
+      if (final_dest_bb && !moveall)
+	/* We haven't checked whether a partial move would be OK for the first
+	   move, so we have to fail this case.  */
+	break;
+
+      changed = true;
+      for (;;)
+	{
+	  if (currptr[0] == move_upto)
+	    break;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = curr;
+	    }
+	}
+
+      reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
+      df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
+      if (final_dest_bb != NULL)
+	df_set_bb_dirty (final_dest_bb);
+      df_set_bb_dirty (bb);
+      for (ix = 1; ix < nedges; ix++)
+	{
+	  df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest);
+	  delete_insn_chain (headptr[ix], currptr[ix], false);
+	}
+      if (!moveall)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = headptr[ix] = curr;
+	    }
+	}
+    }
+  while (!moveall);
+
+ out:
+  free (currptr);
+  free (headptr);
+
+  crossjumps_occured |= changed;
+
+  return changed;
+}
+
 /* Return true if BB contains just bb note, or bb note followed
    by only DEBUG_INSNs.  */
 
@@ -1972,6 +2235,7 @@ try_optimize_cfg (int mode)
 	 one predecessor, they may be combined.  */
       do
 	{
+	  block_was_dirty = false;
 	  changed = false;
 	  iterations++;
 
@@ -2170,6 +2434,13 @@ try_optimize_cfg (int mode)
 		  && try_crossjump_bb (mode, b))
 		changed_here = true;
 
+	      if ((mode & CLEANUP_CROSSJUMP)
+		  /* This can lengthen register lifetimes.  Do it only after
+		     reload.  */
+		  && reload_completed
+		  && try_head_merge_bb (b))
+		changed_here = true;
+
 	      /* Don't get confused by the index shift caused by
 		 deleting blocks.  */
 	      if (!changed_here)
@@ -2182,6 +2453,13 @@ try_optimize_cfg (int mode)
 	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
 	    changed = true;
 
+	  if (block_was_dirty)
+	    {
+	      /* This should only be set by head-merging.  */
+	      gcc_assert (mode & CLEANUP_CROSSJUMP);
+	      df_analyze ();
+	    }
+
 #ifdef ENABLE_CHECKING
 	  if (changed)
 	    verify_flow_info ();
@@ -2366,8 +2644,7 @@ cleanup_cfg (int mode)
 	  if ((mode & CLEANUP_EXPENSIVE) && !reload_completed
 	      && !delete_trivially_dead_insns (get_insns (), max_reg_num ()))
 	    break;
-	  else if ((mode & CLEANUP_CROSSJUMP)
-		   && crossjumps_occured)
+	  if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured)
 	    run_fast_dce ();
 	}
       else
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 162823)
+++ df-problems.c	(working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  
 #include "basic-block.h"
 #include "sbitmap.h"
 #include "bitmap.h"
+#include "target.h"
 #include "timevar.h"
 #include "df.h"
 #include "except.h"
@@ -3500,6 +3501,27 @@ df_simulate_find_defs (rtx insn, bitmap 
     }
 }
 
+/* Find the set of uses for INSN.  This includes partial defs.  */
+
+static void
+df_simulate_find_uses (rtx insn, bitmap uses)
+{
+  df_ref *rec;
+  unsigned int uid = INSN_UID (insn);
+
+  for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++)
+    {
+      df_ref def = *rec;
+      if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
+	bitmap_set_bit (uses, DF_REF_REGNO (def));
+    }
+  for (rec = DF_INSN_UID_USES (uid); *rec; rec++)
+    {
+      df_ref use = *rec;
+      bitmap_set_bit (uses, DF_REF_REGNO (use));
+    }
+}
+
 /* Find the set of real DEFs, which are not clobbers, for INSN.  */
 
 void
@@ -3727,7 +3749,301 @@ df_simulate_one_insn_forwards (basic_blo
     }
   df_simulate_fixup_sets (bb, live);
 }
+\f
+/* Used by the next two functions to encode information about the
+   memory references we found.  */
+#define MEMREF_NORMAL 1
+#define MEMREF_VOLATILE 2
+
+/* A subroutine of can_move_insns_across_p called through for_each_rtx.
+   Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found.  */
+
+static int
+find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == ASM_OPERANDS && MEM_VOLATILE_P (x))
+    return MEMREF_VOLATILE;
+
+  if (!MEM_P (x))
+    return 0;
+  if (MEM_VOLATILE_P (x))
+    return MEMREF_VOLATILE;
+  if (MEM_READONLY_P (x))
+    return 0;
+
+  return MEMREF_NORMAL;
+}
+
+/* A subroutine of can_move_insns_across_p called through note_stores.
+   DATA points to an integer in which we set either the bit for
+   MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM
+   of either kind.  */
+
+static void
+find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		    void *data ATTRIBUTE_UNUSED)
+{
+  int *pflags = (int *)data;
+  if (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  /* Treat stores to SP as stores to memory, this will prevent problems
+     when there are references to the stack frame.  */
+  if (x == stack_pointer_rtx)
+    *pflags |= MEMREF_VOLATILE;
+  if (!MEM_P (x))
+    return;
+  *pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
+}
+
+/* Scan BB backwards, using df_simulate functions to keep track of
+   lifetimes, up to insn POINT.  The result is stored in LIVE.  */
+
+void
+simulate_backwards_to_point (basic_block bb, regset live, rtx point)
+{
+  rtx insn;
+  bitmap_copy (live, df_get_live_out (bb));
+  df_simulate_initialize_backwards (bb, live);
+
+  /* Scan and update life information until we reach the point we're
+     interested in.  */
+  for (insn = BB_END (bb); insn != point; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (bb, insn, live);
+}
+
+/* Return true if it is safe to move a group of insns, described by
+   the range FROM to TO, backwards across another group of insns,
+   described by ACROSS_FROM to ACROSS_TO.  It is assumed that there
+   are no insns between ACROSS_TO and FROM, but they may be in
+   different basic blocks; MERGE_BB is the block from which the
+   insns will be moved.  The caller must pass in a regset MERGE_LIVE
+   which specifies the registers live after TO.
+
+   This function may be called in one of two cases: either we try to
+   move identical instructions from all successor blocks into their
+   predecessor, or we try to move from only one successor block.  If
+   OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with
+   the second case.  It should contain a set of registers live at the
+   end of ACROSS_TO which must not be clobbered by moving the insns.
+   In that case, we're also more careful about moving memory references
+   and trapping insns.
+
+   We return false if it is not safe to move the entire group, but it
+   may still be possible to move a subgroup.  PMOVE_UPTO, if nonnull,
+   is set to point at the last moveable insn in such a case.  */
+
+bool
+can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
+		       basic_block merge_bb, regset merge_live,
+		       regset other_branch_live, rtx *pmove_upto)
+{
+  rtx insn, next, max_to;
+  bitmap merge_set, merge_use, local_merge_live;
+  bitmap test_set, test_use;
+  unsigned i, fail = 0;
+  bitmap_iterator bi;
+  int memrefs_in_across = 0;
+  int mem_sets_in_across = 0;
+  bool trapping_insns_in_across = false;
+
+  if (pmove_upto != NULL)
+    *pmove_upto = NULL_RTX;
+
+  /* Find real bounds, ignoring debug insns.  */
+  while (!NONDEBUG_INSN_P (from) && from != to)
+    from = NEXT_INSN (from);
+  while (!NONDEBUG_INSN_P (to) && from != to)
+    to = PREV_INSN (to);
+
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,
+					     NULL);
+	  note_stores (PATTERN (insn), find_memory_stores,
+		       &mem_sets_in_across);
+	  /* This is used just to find sets of the stack pointer.  */
+	  memrefs_in_across |= mem_sets_in_across;
+	  trapping_insns_in_across |= may_trap_p (PATTERN (insn));
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Collect:
+     MERGE_SET = set of registers set in MERGE_BB
+     MERGE_USE = set of registers used in MERGE_BB and live at its top
+     MERGE_LIVE = set of registers live at the point inside the MERGE
+     range that we've reached during scanning
+     TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END.
+     TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END,
+     and live before ACROSS_FROM.  */
+
+  merge_set = BITMAP_ALLOC (&reg_obstack);
+  merge_use = BITMAP_ALLOC (&reg_obstack);
+  local_merge_live = BITMAP_ALLOC (&reg_obstack);
+  test_set = BITMAP_ALLOC (&reg_obstack);
+  test_use = BITMAP_ALLOC (&reg_obstack);
+
+  /* Compute the set of registers set and used in the ACROSS range.  */
+  if (other_branch_live != NULL)
+    bitmap_copy (test_use, other_branch_live);
+  df_simulate_initialize_backwards (merge_bb, test_use);
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  df_simulate_find_defs (insn, test_set);
+	  df_simulate_defs (insn, test_use);
+	  df_simulate_uses (insn, test_use);
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Compute an upper bound for the amount of insns moved, by finding
+     the first insn in MERGE that sets a register in TEST_USE, or uses
+     a register in TEST_SET.  We also check for calls, trapping operations,
+     and memory references.  */
+  max_to = NULL_RTX;
+  for (insn = from; ; insn = next)
+    {
+      if (CALL_P (insn))
+	break;
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (may_trap_p (PATTERN (insn))
+	      && (trapping_insns_in_across || other_branch_live != NULL))
+	    break;
+
+	  /* We cannot move memory stores past each other, or move memory
+	     reads past stores, at least not without tracking them and
+	     calling true_dependence on every pair.
+
+	     If there is no other branch and no memory references or
+	     sets in the ACROSS range, we can move memory references
+	     freely, even volatile ones.
+
+	     Otherwise, the rules are as follows: volatile memory
+	     references and stores can't be moved at all, and any type
+	     of memory reference can't be moved if there are volatile
+	     accesses or stores in the ACROSS range.  That leaves
+	     normal reads, which can be moved, as the trapping case is
+	     dealt with elsewhere.  */
+	  if (other_branch_live != NULL || memrefs_in_across != 0)
+	    {
+	      int mem_ref_flags = 0;
+	      int mem_set_flags = 0;
+	      note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags);
+	      mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory,
+					    NULL);
+	      /* Catch sets of the stack pointer.  */
+	      mem_ref_flags |= mem_set_flags;
+
+	      if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE)
+		break;
+	      if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0)
+		break;
+	      if (mem_set_flags != 0
+		  || (mem_sets_in_across != 0 && mem_ref_flags != 0))
+		break;
+	    }
+	  df_simulate_find_uses (insn, merge_use);
+	  /* We're only interested in uses which use a value live at
+	     the top, not one previously set in this block.  */
+	  bitmap_and_compl_into (merge_use, merge_set);
+	  df_simulate_find_defs (insn, merge_set);
+	  if (bitmap_intersect_p (merge_set, test_use)
+	      || bitmap_intersect_p (merge_use, test_set))
+	    break;
+	  max_to = insn;
+	}
+      next = NEXT_INSN (insn);
+      if (insn == to)
+	break;
+    }
+  if (max_to != to)
+    fail = 1;
+
+  if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
+    goto out;
+
+  /* Now, lower this upper bound by also taking into account that
+     a range of insns moved across ACROSS must not leave a register
+     live at the end that will be clobbered in ACROSS.  We need to
+     find a point where TEST_SET & LIVE == 0.
+
+     Insns in the MERGE range that set registers which are also set
+     in the ACROSS range may still be moved as long as we also move
+     later insns which use the results of the set, and make the
+     register dead again.  This is verified by the condition stated
+     above.  We only need to test it for registers that are set in
+     the moved region.
 
+     MERGE_LIVE is provided by the caller and holds live registers after
+     TO.  */
+  bitmap_copy (local_merge_live, merge_live);
+  for (insn = to; insn != max_to; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (merge_bb, insn, local_merge_live);
+
+  /* We're not interested in registers that aren't set in the moved
+     region at all.  */
+  bitmap_and_into (local_merge_live, merge_set);
+  for (;;)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (!bitmap_intersect_p (test_set, local_merge_live))
+	    {
+	      max_to = insn;
+	      break;
+	    }
+
+	  df_simulate_one_insn_backwards (merge_bb, insn,
+					  local_merge_live);
+	}
+      if (insn == from)
+	{
+	  fail = 1;
+	  goto out;
+	}
+      insn = PREV_INSN (insn);
+    }
+
+  if (max_to != to)
+    fail = 1;
+
+  if (pmove_upto)
+    *pmove_upto = max_to;
+
+  /* For small register class machines, don't lengthen lifetimes of
+     hard registers before reload.  */
+  if (! reload_completed
+      && targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+	{
+	  if (i < FIRST_PSEUDO_REGISTER
+	      && ! fixed_regs[i]
+	      && ! global_regs[i])
+	    fail = 1;
+	}
+    }
+
+ out:
+  BITMAP_FREE (merge_set);
+  BITMAP_FREE (merge_use);
+  BITMAP_FREE (local_merge_live);
+  BITMAP_FREE (test_set);
+  BITMAP_FREE (test_use);
+
+  return !fail;
+}
 
 \f
 /*----------------------------------------------------------------------------
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 162823)
+++ Makefile.in	(working copy)
@@ -3160,7 +3160,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYST
 df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
-   $(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
+   $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
 df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 162823)
+++ basic-block.h	(working copy)
@@ -246,7 +246,12 @@ enum bb_flags
 
   /* Set on blocks that cannot be threaded through.
      Only used in cfgcleanup.c.  */
-  BB_NONTHREADABLE_BLOCK = 1 << 11
+  BB_NONTHREADABLE_BLOCK = 1 << 11,
+
+  /* Set on blocks that were modified in some way.  Used during
+     try_optimize_cfg to determine when to retry optimizations during
+     the second and later passes.  It is set in df_set_bb_dirty.  */
+  BB_MODIFIED = 1 << 12
 };
 
 /* Dummy flag for convenience in the hot/cold partitioning code.  */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-03 14:10                     ` Bernd Schmidt
@ 2010-08-03 15:16                       ` Jeff Law
  2010-08-03 15:31                         ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-08-03 15:16 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 08/03/10 08:09, Bernd Schmidt wrote:
> On 08/02/2010 06:15 PM, Bernd Schmidt wrote:
>> On 08/02/2010 06:05 PM, Jeff Law wrote:
>>> OK.  If you could highlight in a quick blurb what changed it'd be
>>> appreciated -- it'll save me from having to look over the whole thing
>>> again to figure out what changed from the previous version.
>> I intend to make the change I previously mentioned to add a per-bb flag
>> which notes it's been modified, so that we can use that on the second
>> pass to decide whether or not to try to optimize it, rather than using
>> df_get_bb_dirty (since that gets cleared on df_analyze).  Earlier
>> versions of gcc had a BB_DIRTY bit in bb->flags, I'll reintroduce that
>> as BB_MODIFIED.  That's cheaper to test anyway.
>>
>> The other change I'll make is to be slightly more careful wrt. volatile
>> asms, not moving memory references across them.
> Did that, and also fixed a crash I saw with a PPC cross compiler -
> mustn't try to look at insns in EXIT_BLOCK.  Note that there's still a
> call to clear_bb_flags which I think is left over from before we were
> using df_get_bb_dirty and now has a purpose again.
>
> New patch below; search for BB_MODIFIED, ASM_OPERANDS and EXIT_BLOCK_PTR
> to find these changes.  Also, added the two testcases for i386 as well
> and Paolo's suggestion of a gcc_assert before df_analyze.
>
> Bootstrapped and regression tested on i686-linux.
>
>
> Bernd
The testsuite changes weren't attached to the patch.

I guess one could ask whether or not we really need to carry a bit in 
the BB structure if its only use is head merging -- we could just as 
easily have a bitmap indicating what blocks changed that we allocate & 
free within cfgcleanup.

If we go with the BB_MODIFIED bit in the BB structure, then I would 
suggest documenting BB_MODIFIED slightly better; you describe how it's 
currently used, but I think it's just as important to describe why one 
would use it rather than check if the block is dirty.

I don't see anything which ever clears BB_MODIFIED, so that bit is going 
to accumulate in the block over time.  I don't think it's a correctness 
issue right now, but it could be one day.  With its "like bb_dirty, but 
not quite" semantics we ought to have some clarity on when its cleared 
so that nobody gets surprised in the future.

jeff




^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-03 15:16                       ` Jeff Law
@ 2010-08-03 15:31                         ` Bernd Schmidt
  2010-08-03 17:13                           ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-03 15:31 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 2221 bytes --]

On 08/03/2010 05:15 PM, Jeff Law wrote:
>  On 08/03/10 08:09, Bernd Schmidt wrote:
>> On 08/02/2010 06:15 PM, Bernd Schmidt wrote:
>>> On 08/02/2010 06:05 PM, Jeff Law wrote:
>>>> OK.  If you could highlight in a quick blurb what changed it'd be
>>>> appreciated -- it'll save me from having to look over the whole thing
>>>> again to figure out what changed from the previous version.
>>> I intend to make the change I previously mentioned to add a per-bb flag
>>> which notes it's been modified, so that we can use that on the second
>>> pass to decide whether or not to try to optimize it, rather than using
>>> df_get_bb_dirty (since that gets cleared on df_analyze).  Earlier
>>> versions of gcc had a BB_DIRTY bit in bb->flags, I'll reintroduce that
>>> as BB_MODIFIED.  That's cheaper to test anyway.
>>>
>>> The other change I'll make is to be slightly more careful wrt. volatile
>>> asms, not moving memory references across them.
>> Did that, and also fixed a crash I saw with a PPC cross compiler -
>> mustn't try to look at insns in EXIT_BLOCK.  Note that there's still a
>> call to clear_bb_flags which I think is left over from before we were
>> using df_get_bb_dirty and now has a purpose again.
>>
>> New patch below; search for BB_MODIFIED, ASM_OPERANDS and EXIT_BLOCK_PTR
>> to find these changes.  Also, added the two testcases for i386 as well
>> and Paolo's suggestion of a gcc_assert before df_analyze.
>>
>> Bootstrapped and regression tested on i686-linux.
>>
>>
>> Bernd
> The testsuite changes weren't attached to the patch.

quilt vs svn problem; now svn added them.

> I guess one could ask whether or not we really need to carry a bit in
> the BB structure if its only use is head merging -- we could just as
> easily have a bitmap indicating what blocks changed that we allocate &
> free within cfgcleanup.

We have a flags word anyway, so I think using that is the simplest way.
 That way we also don't have to worry about BB numbers being stable.
I'm open to suggestions for a better comment.

> I don't see anything which ever clears BB_MODIFIED, so that bit is going
> to accumulate in the block over time.

That's what I meant when I mentioned the call to clear_bb_flags above.


Bernd

[-- Attachment #2: hmtest.diff --]
[-- Type: text/plain, Size: 2669 bytes --]

Index: testsuite/gcc.target/arm/headmerge-1.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "#120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}
Index: testsuite/gcc.target/arm/headmerge-2.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+extern void foo3 (int);
+extern void foo4 (int);
+extern void foo5 (int);
+extern void foo6 (int);
+
+void t (int x, int y)
+{
+  switch (y)
+    {
+    case 1:
+      foo1 (120);
+      break;
+    case 5:
+      foo2 (120);
+      break;
+    case 7:
+      foo3 (120);
+      break;
+    case 10:
+      foo4 (120);
+      break;
+    case 13:
+      foo5 (120);
+      break;
+    default:
+      foo6 (120);
+      break;
+    }
+}
Index: testsuite/gcc.target/i386/headmerge-1.c
===================================================================
--- testsuite/gcc.target/i386/headmerge-1.c	(revision 0)
+++ testsuite/gcc.target/i386/headmerge-1.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}
Index: testsuite/gcc.target/i386/headmerge-2.c
===================================================================
--- testsuite/gcc.target/i386/headmerge-2.c	(revision 0)
+++ testsuite/gcc.target/i386/headmerge-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+extern void foo3 (int);
+extern void foo4 (int);
+extern void foo5 (int);
+extern void foo6 (int);
+
+void t (int x, int y)
+{
+  switch (y)
+    {
+    case 1:
+      foo1 (120);
+      break;
+    case 5:
+      foo2 (120);
+      break;
+    case 7:
+      foo3 (120);
+      break;
+    case 10:
+      foo4 (120);
+      break;
+    case 13:
+      foo5 (120);
+      break;
+    default:
+      foo6 (120);
+      break;
+    }
+}

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-03 15:31                         ` Bernd Schmidt
@ 2010-08-03 17:13                           ` Jeff Law
  2010-08-04 13:36                             ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-08-03 17:13 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 08/03/10 09:31, Bernd Schmidt wrote:
> On 08/03/2010 05:15 PM, Jeff Law wrote:
>>   On 08/03/10 08:09, Bernd Schmidt wrote:
>>> On 08/02/2010 06:15 PM, Bernd Schmidt wrote:
>>>> On 08/02/2010 06:05 PM, Jeff Law wrote:
>>>>> OK.  If you could highlight in a quick blurb what changed it'd be
>>>>> appreciated -- it'll save me from having to look over the whole thing
>>>>> again to figure out what changed from the previous version.
>>>> I intend to make the change I previously mentioned to add a per-bb flag
>>>> which notes it's been modified, so that we can use that on the second
>>>> pass to decide whether or not to try to optimize it, rather than using
>>>> df_get_bb_dirty (since that gets cleared on df_analyze).  Earlier
>>>> versions of gcc had a BB_DIRTY bit in bb->flags, I'll reintroduce that
>>>> as BB_MODIFIED.  That's cheaper to test anyway.
>>>>
>>>> The other change I'll make is to be slightly more careful wrt. volatile
>>>> asms, not moving memory references across them.
>>> Did that, and also fixed a crash I saw with a PPC cross compiler -
>>> mustn't try to look at insns in EXIT_BLOCK.  Note that there's still a
>>> call to clear_bb_flags which I think is left over from before we were
>>> using df_get_bb_dirty and now has a purpose again.
>>>
>>> New patch below; search for BB_MODIFIED, ASM_OPERANDS and EXIT_BLOCK_PTR
>>> to find these changes.  Also, added the two testcases for i386 as well
>>> and Paolo's suggestion of a gcc_assert before df_analyze.
>>>
>>> Bootstrapped and regression tested on i686-linux.
>>>
>>>
>>> Bernd
>> The testsuite changes weren't attached to the patch.
> quilt vs svn problem; now svn added them.
np.  They're clearly not the meat of the patch :-)

>> I guess one could ask whether or not we really need to carry a bit in
>> the BB structure if its only use is head merging -- we could just as
>> easily have a bitmap indicating what blocks changed that we allocate&
>> free within cfgcleanup.
> We have a flags word anyway, so I think using that is the simplest way.
>   That way we also don't have to worry about BB numbers being stable.
> I'm open to suggestions for a better comment.
OK.  WRT the comment, we might want to just say that BB_MODIFIED is set 
at the same time as a block is marked dirty, but is not cleared during a 
df_analyze allowing a pass to update the DF information and still know 
what blocks were modified.

>> I don't see anything which ever clears BB_MODIFIED, so that bit is going
>> to accumulate in the block over time.
> That's what I meant when I mentioned the call to clear_bb_flags above.
Duh.  I shouldn't review code first thing in the morning.

jeff


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-03 17:13                           ` Jeff Law
@ 2010-08-04 13:36                             ` Bernd Schmidt
  2010-08-30 16:00                               ` Bernd Schmidt
  2010-10-02 13:07                               ` ifcvt/crossjump patch: Fix PR 42496, 21803 H.J. Lu
  0 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-04 13:36 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 317 bytes --]

On 08/03/2010 07:12 PM, Jeff Law wrote:
> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
> at the same time as a block is marked dirty, but is not cleared during a
> df_analyze allowing a pass to update the DF information and still know
> what blocks were modified.

New patch below.


Bernd

[-- Attachment #2: headmerge6.diff --]
[-- Type: text/plain, Size: 33717 bytes --]

	PR rtl-optimization/44374
	* basic-block.h (enum bb_flags): Add BB_MODIFIED.
	* df-core.c (df_set_bb_dirty): Set it.
	* ifcvt.c (find_memory): Remove function.
	(dead_or_predicable): Use can_move_insns_across.
	* df.h (can_move_insns_across): Declare function.
	* cfgcleanup.c (block_was_dirty): New static variable.
	(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
	than df_get_bb_dirty.
	(try_head_merge_bb): New static function.
	(try_optimize_cfg): Call it.  Call df_analyze if block_was_dirty
	is set.
	* df-problems.c: Include "target.h"
	(df_simulate_find_uses): New static function.
	(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
	(find_memory, find_memory_store): New static functions.
	(can_move_insns_across): New function.
	* Makefile.in (df-problems.o): Update dependencies.

testsuite/
	PR rtl-optimization/44374
	* gcc.target/arm/headmerge-1.c: New test.
	* gcc.target/arm/headmerge-2.c: New test.
	* gcc.target/i386/headmerge-1.c: New test.
	* gcc.target/i386/headmerge-2.c: New test.

Index: testsuite/gcc.target/arm/headmerge-2.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+extern void foo3 (int);
+extern void foo4 (int);
+extern void foo5 (int);
+extern void foo6 (int);
+
+void t (int x, int y)
+{
+  switch (y)
+    {
+    case 1:
+      foo1 (120);
+      break;
+    case 5:
+      foo2 (120);
+      break;
+    case 7:
+      foo3 (120);
+      break;
+    case 10:
+      foo4 (120);
+      break;
+    case 13:
+      foo5 (120);
+      break;
+    default:
+      foo6 (120);
+      break;
+    }
+}
Index: testsuite/gcc.target/arm/headmerge-1.c
===================================================================
--- testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
+++ testsuite/gcc.target/arm/headmerge-1.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "#120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}
Index: testsuite/gcc.target/i386/headmerge-1.c
===================================================================
--- testsuite/gcc.target/i386/headmerge-1.c	(revision 0)
+++ testsuite/gcc.target/i386/headmerge-1.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+
+void t (int x, int y)
+{
+  if (y < 5)
+    foo1 (120);
+  else
+    foo2 (120);
+}
Index: testsuite/gcc.target/i386/headmerge-2.c
===================================================================
--- testsuite/gcc.target/i386/headmerge-2.c	(revision 0)
+++ testsuite/gcc.target/i386/headmerge-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* { dg-do compile }  */
+/* { dg-options "-O2" }  */
+/* { dg-final { scan-assembler-times "120" 1 } } */
+
+extern void foo1 (int);
+extern void foo2 (int);
+extern void foo3 (int);
+extern void foo4 (int);
+extern void foo5 (int);
+extern void foo6 (int);
+
+void t (int x, int y)
+{
+  switch (y)
+    {
+    case 1:
+      foo1 (120);
+      break;
+    case 5:
+      foo2 (120);
+      break;
+    case 7:
+      foo3 (120);
+      break;
+    case 10:
+      foo4 (120);
+      break;
+    case 13:
+      foo5 (120);
+      break;
+    default:
+      foo6 (120);
+      break;
+    }
+}
Index: df-core.c
===================================================================
--- df-core.c	(revision 162823)
+++ df-core.c	(working copy)
@@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb)
 void
 df_set_bb_dirty (basic_block bb)
 {
+  bb->flags |= BB_MODIFIED;
   if (df)
     {
       int p;
Index: ifcvt.c
===================================================================
--- ifcvt.c	(revision 162823)
+++ ifcvt.c	(working copy)
@@ -101,7 +101,6 @@ static int noce_find_if_block (basic_blo
 static int cond_exec_find_if_block (ce_if_block_t *);
 static int find_if_case_1 (basic_block, edge, edge);
 static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
 static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       basic_block, int);
 static void noce_emit_move_insn (rtx, rtx);
@@ -3875,15 +3874,6 @@ find_if_case_2 (basic_block test_bb, edg
   return TRUE;
 }
 
-/* A subroutine of dead_or_predicable called through for_each_rtx.
-   Return 1 if a memory is found.  */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
-  return MEM_P (*px);
-}
-
 /* Used by the code above to perform the actual rtl transformations.
    Return TRUE if successful.
 
@@ -3985,131 +3975,38 @@ dead_or_predicable (basic_block test_bb,
       earliest = jump;
     }
 #endif
+  /* If we allocated new pseudos (e.g. in the conditional move
+     expander called from noce_emit_cmove), we must resize the
+     array first.  */
+  if (max_regno < max_reg_num ())
+    max_regno = max_reg_num ();
+
   /* Try the NCE path if the CE path did not result in any changes.  */
   if (n_validated_changes == 0)
     {
+      rtx cond;
+      regset live;
+      bool success;
+
       /* In the non-conditional execution case, we have to verify that there
 	 are no trapping operations, no calls, no references to memory, and
 	 that any registers modified are dead at the branch site.  */
 
-      rtx insn, cond, prev;
-      bitmap merge_set, merge_set_noclobber, test_live, test_set;
-      unsigned i, fail = 0;
-      bitmap_iterator bi;
-
-      /* Check for no calls or trapping operations.  */
-      for (insn = head; ; insn = NEXT_INSN (insn))
-	{
-	  if (CALL_P (insn))
-	    return FALSE;
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      if (may_trap_p (PATTERN (insn)))
-		return FALSE;
-
-	      /* ??? Even non-trapping memories such as stack frame
-		 references must be avoided.  For stores, we collect
-		 no lifetime info; for reads, we'd have to assert
-		 true_dependence false against every store in the
-		 TEST range.  */
-	      if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
-		return FALSE;
-	    }
-	  if (insn == end)
-	    break;
-	}
-
-      if (! any_condjump_p (jump))
+      if (!any_condjump_p (jump))
 	return FALSE;
 
       /* Find the extent of the conditional.  */
       cond = noce_get_condition (jump, &earliest, false);
-      if (! cond)
+      if (!cond)
 	return FALSE;
 
-      /* Collect:
-	   MERGE_SET = set of registers set in MERGE_BB
-	   MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
-	     that are really set, not just clobbered.
-	   TEST_LIVE = set of registers live at EARLIEST
-	   TEST_SET = set of registers set between EARLIEST and the
-	     end of the block.  */
-
-      merge_set = BITMAP_ALLOC (&reg_obstack);
-      merge_set_noclobber = BITMAP_ALLOC (&reg_obstack);
-      test_live = BITMAP_ALLOC (&reg_obstack);
-      test_set = BITMAP_ALLOC (&reg_obstack);
-
-      /* ??? bb->local_set is only valid during calculate_global_regs_live,
-	 so we must recompute usage for MERGE_BB.  Not so bad, I suppose,
-         since we've already asserted that MERGE_BB is small.  */
-      /* If we allocated new pseudos (e.g. in the conditional move
-	 expander called from noce_emit_cmove), we must resize the
-	 array first.  */
-      if (max_regno < max_reg_num ())
-	max_regno = max_reg_num ();
-
-      FOR_BB_INSNS (merge_bb, insn)
-	{
-	  if (NONDEBUG_INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, merge_set);
-	      df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
-	    }
-	}
-
-      /* For small register class machines, don't lengthen lifetimes of
-	 hard registers before reload.  */
-      if (! reload_completed
-	  && targetm.small_register_classes_for_mode_p (VOIDmode))
-	{
-          EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
-	    {
-	      if (i < FIRST_PSEUDO_REGISTER
-		  && ! fixed_regs[i]
-		  && ! global_regs[i])
-		fail = 1;
-	    }
-	}
-
-      /* For TEST, we're interested in a range of insns, not a whole block.
-	 Moreover, we're interested in the insns live from OTHER_BB.  */
-
-      /* The loop below takes the set of live registers
-         after JUMP, and calculates the live set before EARLIEST. */
-      bitmap_copy (test_live, df_get_live_in (other_bb));
-      df_simulate_initialize_backwards (test_bb, test_live);
-      for (insn = jump; ; insn = prev)
-	{
-	  if (INSN_P (insn))
-	    {
-	      df_simulate_find_defs (insn, test_set);
-	      df_simulate_one_insn_backwards (test_bb, insn, test_live);
-	    }
-	  prev = PREV_INSN (insn);
-	  if (insn == earliest)
-	    break;
-	}
-
-      /* We can perform the transformation if
-	   MERGE_SET_NOCLOBBER & TEST_SET
-	 and
-	   MERGE_SET & TEST_LIVE)
-	 and
-	   TEST_SET & DF_LIVE_IN (merge_bb)
-	 are empty.  */
-
-      if (bitmap_intersect_p (test_set, merge_set_noclobber)
-	  || bitmap_intersect_p (test_live, merge_set)
-	  || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
-	fail = 1;
-
-      BITMAP_FREE (merge_set_noclobber);
-      BITMAP_FREE (merge_set);
-      BITMAP_FREE (test_live);
-      BITMAP_FREE (test_set);
-
-      if (fail)
+      live = BITMAP_ALLOC (&reg_obstack);
+      simulate_backwards_to_point (merge_bb, live, end);
+      success = can_move_insns_across (head, end, earliest, jump,
+				       merge_bb, live,
+				       df_get_live_in (other_bb), NULL);
+      BITMAP_FREE (live);
+      if (!success)
 	return FALSE;
     }
 
Index: df.h
===================================================================
--- df.h	(revision 162823)
+++ df.h	(working copy)
@@ -985,7 +985,9 @@ extern void df_simulate_one_insn_backwar
 extern void df_simulate_finalize_backwards (basic_block, bitmap);
 extern void df_simulate_initialize_forwards (basic_block, bitmap);
 extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap);
-
+extern void simulate_backwards_to_point (basic_block, regset, rtx);
+extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset,
+				   regset, rtx *);
 /* Functions defined in df-scan.c.  */
 
 extern void df_scan_alloc (bitmap);
Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 162823)
+++ cfgcleanup.c	(working copy)
@@ -66,6 +66,10 @@ static bool first_pass;
 /* Set to true if crossjumps occured in the latest run of try_optimize_cfg.  */
 static bool crossjumps_occured;
 
+/* Set to true if we couldn't run an optimization due to stale liveness
+   information; we should run df_analyze to enable more opportunities.  */
+static bool block_was_dirty;
+
 static bool try_crossjump_to_edge (int, edge, edge);
 static bool try_crossjump_bb (int, basic_block);
 static bool outgoing_edges_match (int, basic_block, basic_block);
@@ -432,7 +436,7 @@ try_forward_edges (int mode, basic_block
       int counter, goto_locus;
       bool threaded = false;
       int nthreaded_edges = 0;
-      bool may_thread = first_pass | df_get_bb_dirty (b);
+      bool may_thread = first_pass || (b->flags & BB_MODIFIED) != 0;
 
       /* Skip complex edges because we don't know how to update them.
 
@@ -467,7 +471,7 @@ try_forward_edges (int mode, basic_block
 	{
 	  basic_block new_target = NULL;
 	  bool new_target_threaded = false;
-	  may_thread |= df_get_bb_dirty (target);
+	  may_thread |= (target->flags & BB_MODIFIED) != 0;
 
 	  if (FORWARDER_BLOCK_P (target)
 	      && !(single_succ_edge (target)->flags & EDGE_CROSSING)
@@ -1857,8 +1861,8 @@ try_crossjump_bb (int mode, basic_block 
 	  /* If nothing changed since the last attempt, there is nothing
 	     we can do.  */
 	  if (!first_pass
-	      && (!(df_get_bb_dirty (e->src))
-		  && !(df_get_bb_dirty (fallthru->src))))
+	      && !((e->src->flags & BB_MODIFIED)
+		   || (fallthru->src->flags & BB_MODIFIED)))
 	    continue;
 
 	  if (try_crossjump_to_edge (mode, e, fallthru))
@@ -1907,8 +1911,8 @@ try_crossjump_bb (int mode, basic_block 
 	  /* If nothing changed since the last attempt, there is nothing
 	     we can do.  */
 	  if (!first_pass
-	      && (!(df_get_bb_dirty (e->src))
-		  && !(df_get_bb_dirty (e2->src))))
+	      && !((e->src->flags & BB_MODIFIED)
+		   || (e2->src->flags & BB_MODIFIED)))
 	    continue;
 
 	  if (try_crossjump_to_edge (mode, e, e2))
@@ -1927,6 +1931,265 @@ try_crossjump_bb (int mode, basic_block 
   return changed;
 }
 
+/* Search the successors of BB for common insn sequences.  When found,
+   share code between them by moving it across the basic block
+   boundary.  Return true if any changes made.  */
+
+static bool
+try_head_merge_bb (basic_block bb)
+{
+  basic_block final_dest_bb = NULL;
+  int max_match = INT_MAX;
+  edge e0;
+  rtx *headptr, *currptr;
+  bool changed, moveall;
+  unsigned ix;
+  rtx e0_last_head, cond, move_before;
+  unsigned nedges = EDGE_COUNT (bb->succs);
+  rtx jump = BB_END (bb);
+  regset live, live_union;
+
+  /* Nothing to do if there is not at least two outgoing edges.  */
+  if (nedges < 2)
+    return false;
+
+  /* Don't crossjump if this block ends in a computed jump,
+     unless we are optimizing for size.  */
+  if (optimize_bb_for_size_p (bb)
+      && bb != EXIT_BLOCK_PTR
+      && computed_jump_p (BB_END (bb)))
+    return false;
+
+  cond = get_condition (jump, &move_before, true, false);
+  if (cond == NULL_RTX)
+    move_before = jump;
+
+  for (ix = 0; ix < nedges; ix++)
+    if (EDGE_SUCC (bb, ix)->dest == EXIT_BLOCK_PTR)
+      return false;
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      basic_block other_bb = e->dest;
+
+      if (df_get_bb_dirty (other_bb))
+	{
+	  block_was_dirty = true;
+	  return false;
+	}
+
+      if (e->flags & EDGE_ABNORMAL)
+	return false;
+
+      /* Normally, all destination blocks must only be reachable from this
+	 block, i.e. they must have one incoming edge.
+
+	 There is one special case we can handle, that of multiple consecutive
+	 jumps where the first jumps to one of the targets of the second jump.
+	 This happens frequently in switch statements for default labels.
+	 The structure is as follows:
+	 FINAL_DEST_BB
+	 ....
+	 if (cond) jump A;
+	 fall through
+	 BB
+	 jump with targets A, B, C, D...
+	 A
+	 has two incoming edges, from FINAL_DEST_BB and BB
+
+	 In this case, we can try to move the insns through BB and into
+	 FINAL_DEST_BB.  */
+      if (EDGE_COUNT (other_bb->preds) != 1)
+	{
+	  edge incoming_edge, incoming_bb_other_edge;
+	  edge_iterator ei;
+
+	  if (final_dest_bb != NULL
+	      || EDGE_COUNT (other_bb->preds) != 2)
+	    return false;
+
+	  /* We must be able to move the insns across the whole block.  */
+	  move_before = BB_HEAD (bb);
+	  while (!NONDEBUG_INSN_P (move_before))
+	    move_before = NEXT_INSN (move_before);
+
+	  FOR_EACH_EDGE (incoming_edge, ei, bb->preds)
+	    if (incoming_edge->dest == bb)
+	      break;
+	  final_dest_bb = incoming_edge->src;
+	  if (EDGE_COUNT (final_dest_bb->succs) != 2)
+	    return false;
+	  FOR_EACH_EDGE (incoming_bb_other_edge, ei, final_dest_bb->succs)
+	    if (incoming_bb_other_edge != incoming_edge)
+	      break;
+	  if (incoming_bb_other_edge->dest != other_bb)
+	    return false;
+	}
+    }
+
+  e0 = EDGE_SUCC (bb, 0);
+  e0_last_head = NULL_RTX;
+  changed = false;
+
+  for (ix = 1; ix < nedges; ix++)
+    {
+      edge e = EDGE_SUCC (bb, ix);
+      rtx e0_last, e_last;
+      int nmatch;
+
+      nmatch = flow_find_head_matching_sequence (e0->dest, e->dest,
+						 &e0_last, &e_last, 0);
+      if (nmatch == 0)
+	return false;
+
+      if (nmatch < max_match)
+	{
+	  max_match = nmatch;
+	  e0_last_head = e0_last;
+	}
+    }
+
+  /* If we matched an entire block, we probably have to avoid moving the
+     last insn.  */
+  if (max_match > 0
+      && e0_last_head == BB_END (e0->dest)
+      && (find_reg_note (e0_last_head, REG_EH_REGION, 0)
+	  || control_flow_insn_p (e0_last_head)))
+    {
+      max_match--;
+      if (max_match == 0)
+	return false;
+      do
+	e0_last_head = prev_real_insn (e0_last_head);
+      while (DEBUG_INSN_P (e0_last_head));
+    }
+
+  if (max_match == 0)
+    return false;
+
+  /* We must find a union of the live registers at each of the end points.  */
+  live = BITMAP_ALLOC (NULL);
+  live_union = BITMAP_ALLOC (NULL);
+
+  currptr = XNEWVEC (rtx, nedges);
+  headptr = XNEWVEC (rtx, nedges);
+
+  for (ix = 0; ix < nedges; ix++)
+    {
+      int j;
+      basic_block merge_bb = EDGE_SUCC (bb, ix)->dest;
+      rtx head = BB_HEAD (merge_bb);
+
+      while (!NONDEBUG_INSN_P (head))
+	head = NEXT_INSN (head);
+      headptr[ix] = head;
+      currptr[ix] = head;
+
+      /* Compute the end point and live information  */
+      for (j = 1; j < max_match; j++)
+	do
+	  head = NEXT_INSN (head);
+	while (!NONDEBUG_INSN_P (head));
+      simulate_backwards_to_point (merge_bb, live, head);
+      IOR_REG_SET (live_union, live);
+    }
+
+  /* If we're moving across two blocks, verify the validity of the
+     first move, then adjust the target and let the loop below deal
+     with the final move.  */
+  if (final_dest_bb != NULL)
+    {
+      rtx move_upto;
+
+      moveall = can_move_insns_across (currptr[0], e0_last_head, move_before,
+				       jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall)
+	e0_last_head = move_upto;
+      if (e0_last_head == NULL_RTX)
+	goto out;
+
+      jump = BB_END (final_dest_bb);
+      cond = get_condition (jump, &move_before, true, false);
+      if (cond == NULL_RTX)
+	move_before = jump;
+    }
+
+  do
+    {
+      rtx move_upto;
+      moveall = can_move_insns_across (currptr[0], e0_last_head,
+				       move_before, jump, e0->dest, live_union,
+				       NULL, &move_upto);
+      if (!moveall && move_upto == NULL_RTX)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  continue;
+	}
+
+      if (final_dest_bb && !moveall)
+	/* We haven't checked whether a partial move would be OK for the first
+	   move, so we have to fail this case.  */
+	break;
+
+      changed = true;
+      for (;;)
+	{
+	  if (currptr[0] == move_upto)
+	    break;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = curr;
+	    }
+	}
+
+      reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
+      df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
+      if (final_dest_bb != NULL)
+	df_set_bb_dirty (final_dest_bb);
+      df_set_bb_dirty (bb);
+      for (ix = 1; ix < nedges; ix++)
+	{
+	  df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest);
+	  delete_insn_chain (headptr[ix], currptr[ix], false);
+	}
+      if (!moveall)
+	{
+	  if (jump == move_before)
+	    break;
+
+	  /* Try again, using a different insertion point.  */
+	  move_before = jump;
+	  for (ix = 0; ix < nedges; ix++)
+	    {
+	      rtx curr = currptr[ix];
+	      do
+		curr = NEXT_INSN (curr);
+	      while (!NONDEBUG_INSN_P (curr));
+	      currptr[ix] = headptr[ix] = curr;
+	    }
+	}
+    }
+  while (!moveall);
+
+ out:
+  free (currptr);
+  free (headptr);
+
+  crossjumps_occured |= changed;
+
+  return changed;
+}
+
 /* Return true if BB contains just bb note, or bb note followed
    by only DEBUG_INSNs.  */
 
@@ -1972,6 +2235,7 @@ try_optimize_cfg (int mode)
 	 one predecessor, they may be combined.  */
       do
 	{
+	  block_was_dirty = false;
 	  changed = false;
 	  iterations++;
 
@@ -2170,6 +2434,13 @@ try_optimize_cfg (int mode)
 		  && try_crossjump_bb (mode, b))
 		changed_here = true;
 
+	      if ((mode & CLEANUP_CROSSJUMP)
+		  /* This can lengthen register lifetimes.  Do it only after
+		     reload.  */
+		  && reload_completed
+		  && try_head_merge_bb (b))
+		changed_here = true;
+
 	      /* Don't get confused by the index shift caused by
 		 deleting blocks.  */
 	      if (!changed_here)
@@ -2182,6 +2453,13 @@ try_optimize_cfg (int mode)
 	      && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
 	    changed = true;
 
+	  if (block_was_dirty)
+	    {
+	      /* This should only be set by head-merging.  */
+	      gcc_assert (mode & CLEANUP_CROSSJUMP);
+	      df_analyze ();
+	    }
+
 #ifdef ENABLE_CHECKING
 	  if (changed)
 	    verify_flow_info ();
@@ -2366,8 +2644,7 @@ cleanup_cfg (int mode)
 	  if ((mode & CLEANUP_EXPENSIVE) && !reload_completed
 	      && !delete_trivially_dead_insns (get_insns (), max_reg_num ()))
 	    break;
-	  else if ((mode & CLEANUP_CROSSJUMP)
-		   && crossjumps_occured)
+	  if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured)
 	    run_fast_dce ();
 	}
       else
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 162823)
+++ df-problems.c	(working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  
 #include "basic-block.h"
 #include "sbitmap.h"
 #include "bitmap.h"
+#include "target.h"
 #include "timevar.h"
 #include "df.h"
 #include "except.h"
@@ -3500,6 +3501,27 @@ df_simulate_find_defs (rtx insn, bitmap 
     }
 }
 
+/* Find the set of uses for INSN.  This includes partial defs.  */
+
+static void
+df_simulate_find_uses (rtx insn, bitmap uses)
+{
+  df_ref *rec;
+  unsigned int uid = INSN_UID (insn);
+
+  for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++)
+    {
+      df_ref def = *rec;
+      if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))
+	bitmap_set_bit (uses, DF_REF_REGNO (def));
+    }
+  for (rec = DF_INSN_UID_USES (uid); *rec; rec++)
+    {
+      df_ref use = *rec;
+      bitmap_set_bit (uses, DF_REF_REGNO (use));
+    }
+}
+
 /* Find the set of real DEFs, which are not clobbers, for INSN.  */
 
 void
@@ -3727,7 +3749,301 @@ df_simulate_one_insn_forwards (basic_blo
     }
   df_simulate_fixup_sets (bb, live);
 }
+\f
+/* Used by the next two functions to encode information about the
+   memory references we found.  */
+#define MEMREF_NORMAL 1
+#define MEMREF_VOLATILE 2
+
+/* A subroutine of can_move_insns_across_p called through for_each_rtx.
+   Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found.  */
+
+static int
+find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == ASM_OPERANDS && MEM_VOLATILE_P (x))
+    return MEMREF_VOLATILE;
+
+  if (!MEM_P (x))
+    return 0;
+  if (MEM_VOLATILE_P (x))
+    return MEMREF_VOLATILE;
+  if (MEM_READONLY_P (x))
+    return 0;
+
+  return MEMREF_NORMAL;
+}
+
+/* A subroutine of can_move_insns_across_p called through note_stores.
+   DATA points to an integer in which we set either the bit for
+   MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM
+   of either kind.  */
+
+static void
+find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		    void *data ATTRIBUTE_UNUSED)
+{
+  int *pflags = (int *)data;
+  if (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  /* Treat stores to SP as stores to memory, this will prevent problems
+     when there are references to the stack frame.  */
+  if (x == stack_pointer_rtx)
+    *pflags |= MEMREF_VOLATILE;
+  if (!MEM_P (x))
+    return;
+  *pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL;
+}
+
+/* Scan BB backwards, using df_simulate functions to keep track of
+   lifetimes, up to insn POINT.  The result is stored in LIVE.  */
+
+void
+simulate_backwards_to_point (basic_block bb, regset live, rtx point)
+{
+  rtx insn;
+  bitmap_copy (live, df_get_live_out (bb));
+  df_simulate_initialize_backwards (bb, live);
+
+  /* Scan and update life information until we reach the point we're
+     interested in.  */
+  for (insn = BB_END (bb); insn != point; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (bb, insn, live);
+}
+
+/* Return true if it is safe to move a group of insns, described by
+   the range FROM to TO, backwards across another group of insns,
+   described by ACROSS_FROM to ACROSS_TO.  It is assumed that there
+   are no insns between ACROSS_TO and FROM, but they may be in
+   different basic blocks; MERGE_BB is the block from which the
+   insns will be moved.  The caller must pass in a regset MERGE_LIVE
+   which specifies the registers live after TO.
+
+   This function may be called in one of two cases: either we try to
+   move identical instructions from all successor blocks into their
+   predecessor, or we try to move from only one successor block.  If
+   OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with
+   the second case.  It should contain a set of registers live at the
+   end of ACROSS_TO which must not be clobbered by moving the insns.
+   In that case, we're also more careful about moving memory references
+   and trapping insns.
+
+   We return false if it is not safe to move the entire group, but it
+   may still be possible to move a subgroup.  PMOVE_UPTO, if nonnull,
+   is set to point at the last moveable insn in such a case.  */
+
+bool
+can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
+		       basic_block merge_bb, regset merge_live,
+		       regset other_branch_live, rtx *pmove_upto)
+{
+  rtx insn, next, max_to;
+  bitmap merge_set, merge_use, local_merge_live;
+  bitmap test_set, test_use;
+  unsigned i, fail = 0;
+  bitmap_iterator bi;
+  int memrefs_in_across = 0;
+  int mem_sets_in_across = 0;
+  bool trapping_insns_in_across = false;
+
+  if (pmove_upto != NULL)
+    *pmove_upto = NULL_RTX;
+
+  /* Find real bounds, ignoring debug insns.  */
+  while (!NONDEBUG_INSN_P (from) && from != to)
+    from = NEXT_INSN (from);
+  while (!NONDEBUG_INSN_P (to) && from != to)
+    to = PREV_INSN (to);
+
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,
+					     NULL);
+	  note_stores (PATTERN (insn), find_memory_stores,
+		       &mem_sets_in_across);
+	  /* This is used just to find sets of the stack pointer.  */
+	  memrefs_in_across |= mem_sets_in_across;
+	  trapping_insns_in_across |= may_trap_p (PATTERN (insn));
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Collect:
+     MERGE_SET = set of registers set in MERGE_BB
+     MERGE_USE = set of registers used in MERGE_BB and live at its top
+     MERGE_LIVE = set of registers live at the point inside the MERGE
+     range that we've reached during scanning
+     TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END.
+     TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END,
+     and live before ACROSS_FROM.  */
+
+  merge_set = BITMAP_ALLOC (&reg_obstack);
+  merge_use = BITMAP_ALLOC (&reg_obstack);
+  local_merge_live = BITMAP_ALLOC (&reg_obstack);
+  test_set = BITMAP_ALLOC (&reg_obstack);
+  test_use = BITMAP_ALLOC (&reg_obstack);
+
+  /* Compute the set of registers set and used in the ACROSS range.  */
+  if (other_branch_live != NULL)
+    bitmap_copy (test_use, other_branch_live);
+  df_simulate_initialize_backwards (merge_bb, test_use);
+  for (insn = across_to; ; insn = next)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  df_simulate_find_defs (insn, test_set);
+	  df_simulate_defs (insn, test_use);
+	  df_simulate_uses (insn, test_use);
+	}
+      next = PREV_INSN (insn);
+      if (insn == across_from)
+	break;
+    }
+
+  /* Compute an upper bound for the amount of insns moved, by finding
+     the first insn in MERGE that sets a register in TEST_USE, or uses
+     a register in TEST_SET.  We also check for calls, trapping operations,
+     and memory references.  */
+  max_to = NULL_RTX;
+  for (insn = from; ; insn = next)
+    {
+      if (CALL_P (insn))
+	break;
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (may_trap_p (PATTERN (insn))
+	      && (trapping_insns_in_across || other_branch_live != NULL))
+	    break;
+
+	  /* We cannot move memory stores past each other, or move memory
+	     reads past stores, at least not without tracking them and
+	     calling true_dependence on every pair.
+
+	     If there is no other branch and no memory references or
+	     sets in the ACROSS range, we can move memory references
+	     freely, even volatile ones.
+
+	     Otherwise, the rules are as follows: volatile memory
+	     references and stores can't be moved at all, and any type
+	     of memory reference can't be moved if there are volatile
+	     accesses or stores in the ACROSS range.  That leaves
+	     normal reads, which can be moved, as the trapping case is
+	     dealt with elsewhere.  */
+	  if (other_branch_live != NULL || memrefs_in_across != 0)
+	    {
+	      int mem_ref_flags = 0;
+	      int mem_set_flags = 0;
+	      note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags);
+	      mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory,
+					    NULL);
+	      /* Catch sets of the stack pointer.  */
+	      mem_ref_flags |= mem_set_flags;
+
+	      if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE)
+		break;
+	      if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0)
+		break;
+	      if (mem_set_flags != 0
+		  || (mem_sets_in_across != 0 && mem_ref_flags != 0))
+		break;
+	    }
+	  df_simulate_find_uses (insn, merge_use);
+	  /* We're only interested in uses which use a value live at
+	     the top, not one previously set in this block.  */
+	  bitmap_and_compl_into (merge_use, merge_set);
+	  df_simulate_find_defs (insn, merge_set);
+	  if (bitmap_intersect_p (merge_set, test_use)
+	      || bitmap_intersect_p (merge_use, test_set))
+	    break;
+	  max_to = insn;
+	}
+      next = NEXT_INSN (insn);
+      if (insn == to)
+	break;
+    }
+  if (max_to != to)
+    fail = 1;
+
+  if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
+    goto out;
+
+  /* Now, lower this upper bound by also taking into account that
+     a range of insns moved across ACROSS must not leave a register
+     live at the end that will be clobbered in ACROSS.  We need to
+     find a point where TEST_SET & LIVE == 0.
+
+     Insns in the MERGE range that set registers which are also set
+     in the ACROSS range may still be moved as long as we also move
+     later insns which use the results of the set, and make the
+     register dead again.  This is verified by the condition stated
+     above.  We only need to test it for registers that are set in
+     the moved region.
 
+     MERGE_LIVE is provided by the caller and holds live registers after
+     TO.  */
+  bitmap_copy (local_merge_live, merge_live);
+  for (insn = to; insn != max_to; insn = PREV_INSN (insn))
+    df_simulate_one_insn_backwards (merge_bb, insn, local_merge_live);
+
+  /* We're not interested in registers that aren't set in the moved
+     region at all.  */
+  bitmap_and_into (local_merge_live, merge_set);
+  for (;;)
+    {
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  if (!bitmap_intersect_p (test_set, local_merge_live))
+	    {
+	      max_to = insn;
+	      break;
+	    }
+
+	  df_simulate_one_insn_backwards (merge_bb, insn,
+					  local_merge_live);
+	}
+      if (insn == from)
+	{
+	  fail = 1;
+	  goto out;
+	}
+      insn = PREV_INSN (insn);
+    }
+
+  if (max_to != to)
+    fail = 1;
+
+  if (pmove_upto)
+    *pmove_upto = max_to;
+
+  /* For small register class machines, don't lengthen lifetimes of
+     hard registers before reload.  */
+  if (! reload_completed
+      && targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+	{
+	  if (i < FIRST_PSEUDO_REGISTER
+	      && ! fixed_regs[i]
+	      && ! global_regs[i])
+	    fail = 1;
+	}
+    }
+
+ out:
+  BITMAP_FREE (merge_set);
+  BITMAP_FREE (merge_use);
+  BITMAP_FREE (local_merge_live);
+  BITMAP_FREE (test_set);
+  BITMAP_FREE (test_use);
+
+  return !fail;
+}
 
 \f
 /*----------------------------------------------------------------------------
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 162823)
+++ Makefile.in	(working copy)
@@ -3160,7 +3160,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYST
 df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
-   $(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
+   $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h
 df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 162823)
+++ basic-block.h	(working copy)
@@ -246,7 +246,13 @@ enum bb_flags
 
   /* Set on blocks that cannot be threaded through.
      Only used in cfgcleanup.c.  */
-  BB_NONTHREADABLE_BLOCK = 1 << 11
+  BB_NONTHREADABLE_BLOCK = 1 << 11,
+
+  /* Set on blocks that were modified in some way.  This bit is set in
+     df_set_bb_dirty, but not cleared by df_analyze, so it can be used
+     to test whether a block has been modified prior to a df_analyze
+     call.  */
+  BB_MODIFIED = 1 << 12
 };
 
 /* Dummy flag for convenience in the hot/cold partitioning code.  */

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-04 13:36                             ` Bernd Schmidt
@ 2010-08-30 16:00                               ` Bernd Schmidt
  2010-09-20 10:23                                 ` Bernd Schmidt
  2010-10-02 13:07                               ` ifcvt/crossjump patch: Fix PR 42496, 21803 H.J. Lu
  1 sibling, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-08-30 16:00 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
> On 08/03/2010 07:12 PM, Jeff Law wrote:
>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>> at the same time as a block is marked dirty, but is not cleared during a
>> df_analyze allowing a pass to update the DF information and still know
>> what blocks were modified.
> 
> New patch below.

Ping?  You said the previous version looked pretty good - is this one OK?


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-30 16:00                               ` Bernd Schmidt
@ 2010-09-20 10:23                                 ` Bernd Schmidt
  2010-09-20 16:25                                   ` Jeff Law
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-09-20 10:23 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 08/30/2010 05:23 PM, Bernd Schmidt wrote:
> On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>>> at the same time as a block is marked dirty, but is not cleared during a
>>> df_analyze allowing a pass to update the DF information and still know
>>> what blocks were modified.
>>
>> New patch below.

Ping^2.
http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00280.html


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-20 10:23                                 ` Bernd Schmidt
@ 2010-09-20 16:25                                   ` Jeff Law
  2010-09-23 15:53                                     ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Jeff Law @ 2010-09-20 16:25 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

  On 09/20/10 04:03, Bernd Schmidt wrote:
> On 08/30/2010 05:23 PM, Bernd Schmidt wrote:
>> On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is 
>>>> set
>>>> at the same time as a block is marked dirty, but is not cleared 
>>>> during a
>>>> df_analyze allowing a pass to update the DF information and still know
>>>> what blocks were modified.
>>>
>>> New patch below.
>
> Ping^2.
> http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00280.html
Sorry, I thought this had been approved long ago.  Approved :-)
jeff

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-20 16:25                                   ` Jeff Law
@ 2010-09-23 15:53                                     ` Bernd Schmidt
  2010-09-23 22:00                                       ` Richard Guenther
  2010-09-27 15:56                                       ` Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803" Hans-Peter Nilsson
  0 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-09-23 15:53 UTC (permalink / raw)
  To: Jeff Law; +Cc: Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On 09/20/2010 05:56 PM, Jeff Law wrote:
>  On 09/20/10 04:03, Bernd Schmidt wrote:
>> On 08/30/2010 05:23 PM, Bernd Schmidt wrote:
>>> On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
>>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is
>>>>> set
>>>>> at the same time as a block is marked dirty, but is not cleared
>>>>> during a
>>>>> df_analyze allowing a pass to update the DF information and still know
>>>>> what blocks were modified.
>>>>
>>>> New patch below.
>>
>> Ping^2.
>> http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00280.html
> Sorry, I thought this had been approved long ago.  Approved :-)

Thanks.  Committed with a small fix found while testing on x86_64: In
try_head_merge_bb, in the case where we try to move across multiple
blocks to optimize for a switch statement, the final destination block
must be the only predecessor of the block we're looking at.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-23 15:53                                     ` Bernd Schmidt
@ 2010-09-23 22:00                                       ` Richard Guenther
  2010-09-23 22:03                                         ` Richard Guenther
  2010-09-27 15:56                                       ` Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803" Hans-Peter Nilsson
  1 sibling, 1 reply; 95+ messages in thread
From: Richard Guenther @ 2010-09-23 22:00 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Thu, Sep 23, 2010 at 12:07 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 09/20/2010 05:56 PM, Jeff Law wrote:
>>  On 09/20/10 04:03, Bernd Schmidt wrote:
>>> On 08/30/2010 05:23 PM, Bernd Schmidt wrote:
>>>> On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
>>>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is
>>>>>> set
>>>>>> at the same time as a block is marked dirty, but is not cleared
>>>>>> during a
>>>>>> df_analyze allowing a pass to update the DF information and still know
>>>>>> what blocks were modified.
>>>>>
>>>>> New patch below.
>>>
>>> Ping^2.
>>> http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00280.html
>> Sorry, I thought this had been approved long ago.  Approved :-)
>
> Thanks.  Committed with a small fix found while testing on x86_64: In
> try_head_merge_bb, in the case where we try to move across multiple
> blocks to optimize for a switch statement, the final destination block
> must be the only predecessor of the block we're looking at.

I now get an bootstrap fail building Ada on x86_64-linux in

Program received signal SIGSEGV, Segmentation fault.
0x0000000000c9ebf6 in df_simulate_one_insn_backwards (bb=0x7ffff3f0ad68,
    insn=0x0, live=0x50bab30)
    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3640
3640      if (!NONDEBUG_INSN_P (insn))
(gdb) up
#1  0x0000000000c9f112 in simulate_backwards_to_point (bb=0x7ffff3f0ad68,
    live=0x50bab30, point=0x7ffff3a461b0)
    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3813
3813        df_simulate_one_insn_backwards (bb, insn, live);
#2  0x0000000002d2fe2f in try_head_merge_bb (bb=0x7ffff3f0ad00)
    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2094
#3  0x0000000002d30c2e in try_optimize_cfg (mode=2)
    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2437
#4  0x0000000002d311a1 in cleanup_cfg (mode=2)
    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2627
#5  0x0000000002d842ad in rest_of_handle_stack_adjustments ()
    at /space/rguenther/src/svn/trunk/gcc/combine-stack-adj.c:555

and your patch is in range and looks related.  Yes I do have another
patch applied, but that affects profiling only which is disabled.

Checking w/o now.

Richard.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-23 22:00                                       ` Richard Guenther
@ 2010-09-23 22:03                                         ` Richard Guenther
  2010-09-23 22:18                                           ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Richard Guenther @ 2010-09-23 22:03 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Thu, Sep 23, 2010 at 4:34 PM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Sep 23, 2010 at 12:07 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> On 09/20/2010 05:56 PM, Jeff Law wrote:
>>>  On 09/20/10 04:03, Bernd Schmidt wrote:
>>>> On 08/30/2010 05:23 PM, Bernd Schmidt wrote:
>>>>> On 08/04/2010 03:35 PM, Bernd Schmidt wrote:
>>>>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is
>>>>>>> set
>>>>>>> at the same time as a block is marked dirty, but is not cleared
>>>>>>> during a
>>>>>>> df_analyze allowing a pass to update the DF information and still know
>>>>>>> what blocks were modified.
>>>>>>
>>>>>> New patch below.
>>>>
>>>> Ping^2.
>>>> http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00280.html
>>> Sorry, I thought this had been approved long ago.  Approved :-)
>>
>> Thanks.  Committed with a small fix found while testing on x86_64: In
>> try_head_merge_bb, in the case where we try to move across multiple
>> blocks to optimize for a switch statement, the final destination block
>> must be the only predecessor of the block we're looking at.
>
> I now get an bootstrap fail building Ada on x86_64-linux in
>
> Program received signal SIGSEGV, Segmentation fault.
> 0x0000000000c9ebf6 in df_simulate_one_insn_backwards (bb=0x7ffff3f0ad68,
>    insn=0x0, live=0x50bab30)
>    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3640
> 3640      if (!NONDEBUG_INSN_P (insn))
> (gdb) up
> #1  0x0000000000c9f112 in simulate_backwards_to_point (bb=0x7ffff3f0ad68,
>    live=0x50bab30, point=0x7ffff3a461b0)
>    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3813
> 3813        df_simulate_one_insn_backwards (bb, insn, live);
> #2  0x0000000002d2fe2f in try_head_merge_bb (bb=0x7ffff3f0ad00)
>    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2094
> #3  0x0000000002d30c2e in try_optimize_cfg (mode=2)
>    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2437
> #4  0x0000000002d311a1 in cleanup_cfg (mode=2)
>    at /space/rguenther/src/svn/trunk/gcc/cfgcleanup.c:2627
> #5  0x0000000002d842ad in rest_of_handle_stack_adjustments ()
>    at /space/rguenther/src/svn/trunk/gcc/combine-stack-adj.c:555
>
> and your patch is in range and looks related.  Yes I do have another
> patch applied, but that affects profiling only which is disabled.
>
> Checking w/o now.

Same issue with clean r164564.

raised STORAGE_ERROR : stack overflow (or erroneous memory access)
make[3]: *** [ada/exp_ch3.o] Error 1
make[3]: *** Waiting for unfinished jobs....
rm gfdl.pod cpp.pod fsf-funding.pod gcc.pod gcov.pod
make[3]: Leaving directory `/home/abuild/rguenther/obj/gcc'
make[2]: *** [all-stage2-gcc] Error 2
make[2]: Leaving directory `/home/abuild/rguenther/obj'
make[1]: *** [stage2-bubble] Error 2
make[1]: Leaving directory `/home/abuild/rguenther/obj'
make: *** [all] Error 2

Richard.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-23 22:03                                         ` Richard Guenther
@ 2010-09-23 22:18                                           ` Bernd Schmidt
  2010-09-24 11:29                                             ` Richard Guenther
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-09-23 22:18 UTC (permalink / raw)
  To: Richard Guenther
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 671 bytes --]

On 09/23/2010 04:41 PM, Richard Guenther wrote:
>> I now get an bootstrap fail building Ada on x86_64-linux in
>>
>> Program received signal SIGSEGV, Segmentation fault.
>> 0x0000000000c9ebf6 in df_simulate_one_insn_backwards (bb=0x7ffff3f0ad68,
>>    insn=0x0, live=0x50bab30)
>>    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3640
>> 3640      if (!NONDEBUG_INSN_P (insn))

It found two matching blocks, each of which had 5 insns, but didn't stop
there since BB_END was a DEBUG_INSN.  The first insns in the two
following basic blocks matched, so flow_find_head_matching_sequence
returned 6.

The following patch should fix it; ok after bootstrap/test?


Bernd

[-- Attachment #2: past-end.diff --]
[-- Type: text/plain, Size: 834 bytes --]

	* cfgcleanup.c (flow_find_head_matching_sequence): Terminate when
	reaching the end of a block if it occurs at a DEBUG_INSN.

Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 164552)
+++ cfgcleanup.c	(working copy)
@@ -1184,7 +1184,6 @@ flow_find_head_matching_sequence (basic_
 
   while (true)
     {
-
       /* Ignore notes.  */
       while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1))
 	i1 = NEXT_INSN (i1);
@@ -1192,6 +1191,10 @@ flow_find_head_matching_sequence (basic_
       while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2))
 	i2 = NEXT_INSN (i2);
 
+      if ((i1 == BB_END (bb1) && !NONDEBUG_INSN_P (i1))
+	  || (i2 == BB_END (bb2) && !NONDEBUG_INSN_P (i2)))
+	break;
+
       if (NOTE_P (i1) || NOTE_P (i2)
 	  || JUMP_P (i1) || JUMP_P (i2))
 	break;

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-09-23 22:18                                           ` Bernd Schmidt
@ 2010-09-24 11:29                                             ` Richard Guenther
  0 siblings, 0 replies; 95+ messages in thread
From: Richard Guenther @ 2010-09-24 11:29 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Thu, Sep 23, 2010 at 5:50 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 09/23/2010 04:41 PM, Richard Guenther wrote:
>>> I now get an bootstrap fail building Ada on x86_64-linux in
>>>
>>> Program received signal SIGSEGV, Segmentation fault.
>>> 0x0000000000c9ebf6 in df_simulate_one_insn_backwards (bb=0x7ffff3f0ad68,
>>>    insn=0x0, live=0x50bab30)
>>>    at /space/rguenther/src/svn/trunk/gcc/df-problems.c:3640
>>> 3640      if (!NONDEBUG_INSN_P (insn))
>
> It found two matching blocks, each of which had 5 insns, but didn't stop
> there since BB_END was a DEBUG_INSN.  The first insns in the two
> following basic blocks matched, so flow_find_head_matching_sequence
> returned 6.
>
> The following patch should fix it; ok after bootstrap/test?

Ok.

Thanks,
Richard.

>
> Bernd
>

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803"
  2010-09-23 15:53                                     ` Bernd Schmidt
  2010-09-23 22:00                                       ` Richard Guenther
@ 2010-09-27 15:56                                       ` Hans-Peter Nilsson
  2010-09-27 20:34                                         ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: Hans-Peter Nilsson @ 2010-09-27 15:56 UTC (permalink / raw)
  To: bernds; +Cc: gcc-patches

(I'm replying to the recent message regarding the patch.)
The title was missing PR44374, I'm not sure whether there is
overlap or just a cutnpasto.

> Date: Thu, 23 Sep 2010 12:07:51 +0200
> From: Bernd Schmidt <bernds@codesourcery.com>

> Thanks.  Committed with a small fix found while testing on x86_64: In
> try_head_merge_bb, in the case where we try to move across multiple
> blocks to optimize for a switch statement, the final destination block
> must be the only predecessor of the block we're looking at.

Revision 164552 broke build for cris-elf, see PR45792.

Looking at the patch, I can't see how the "Try again"-codepath
(see patch below) can ever work; the retry will try merging the
insns starting with the NEXT_INSN after the old, merged, insns.
But, the insns are merged, so the old NEXT_INSN is lost; the new
NEXT_INSN is now move_before (the compare insn for the cbranch
sequence), so you'll try moving the compare and jump to before
the jump.  Which will cause reorder_insns to get stuck in a
loop, after having set PREV_INSN(cmp) = NEXT_INSN (cmp) = cmp...

How did that pass testing?  Is there something I (we both) miss
causing that code-path to never execute for non-cc0 targets?

Is the following what you had in mind?  I don't like adding
more #ifdef HAVE_cc0, but it's either there or in
can_move_insns_across, and the latter seems like the worse choice.
I'll test this native x86_64 and cross to cris-elf, but perhaps
you meant something different altogether so I thought better
give early notice.  Maybe reconsider and lose the loopness of
the do...while(!moveall) loop, as that apparently doesn't happen
very often, at least not successfully. :)

Perhaps a sanity-check for reorder_insns would is be in order.
(With it, I couldn't spot any slowdown for the .52s user time
that the test-case compiled, considering that reorder_insns_nobb
is now linear in the insn range length, but I didn't check
further.)

Ok to commit, either or all parts, after testing?

gcc/ChangeLog:
	PR rtl-optimization/45792
	* cfgcleanup.c (try_head_merge_bb): New rtx vector nextptr.
	If not all insns are to be merged, for each edge, stash the
	NEXT_INSNs after the to-be-merged insns before doing the merge,
	and use them for the retry at the new insertion point.

	* emit-rtl.c (reorder_insns_nobb) [ENABLE_CHECKING]: Sanity-check
	that AFTER is not in the range FROM..TO, inclusive.

--- gcc/cfgcleanup.c~	Sun Sep 26 08:24:42 2010
+++ gcc/cfgcleanup.c	Mon Sep 27 07:33:36 2010
@@ -1944,7 +1941,7 @@ try_head_merge_bb (basic_block bb)
   basic_block final_dest_bb = NULL;
   int max_match = INT_MAX;
   edge e0;
-  rtx *headptr, *currptr;
+  rtx *headptr, *currptr, *nextptr;
   bool changed, moveall;
   unsigned ix;
   rtx e0_last_head, cond, move_before;
@@ -2077,6 +2074,7 @@ try_head_merge_bb (basic_block bb)
 
   currptr = XNEWVEC (rtx, nedges);
   headptr = XNEWVEC (rtx, nedges);
+  nextptr = XNEWVEC (rtx, nedges);
 
   for (ix = 0; ix < nedges; ix++)
     {
@@ -2132,6 +2130,14 @@ try_head_merge_bb (basic_block bb)
 
 	  /* Try again, using a different insertion point.  */
 	  move_before = jump;
+
+#ifdef HAVE_cc0
+	  /* Don't try moving before a cc0 user, as that may invalidate
+	     the cc0.  */
+	  if (reg_mentioned_p (cc0_rtx, jump))
+	    break;
+#endif
+
 	  continue;
 	}
 
@@ -2155,6 +2161,12 @@ try_head_merge_bb (basic_block bb)
 	    }
 	}
 
+      /* If we can't currently move all of the identical insns, remember
+	 each insn after the range that we'll merge.  */
+      if (!moveall)
+	for (ix = 0; ix < nedges; ix++)
+	  nextptr[ix] = NEXT_INSN (currptr[ix]);
+
       reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
       df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
       if (final_dest_bb != NULL)
@@ -2170,14 +2182,21 @@ try_head_merge_bb (basic_block bb)
 	  if (jump == move_before)
 	    break;
 
-	  /* Try again, using a different insertion point.  */
+	  /* For the unmerged insns, try a different insertion point.  */
 	  move_before = jump;
+
+#ifdef HAVE_cc0
+	  /* Don't try moving before a cc0 user, as that may invalidate
+	     the cc0.  */
+	  if (reg_mentioned_p (cc0_rtx, jump))
+	    break;
+#endif
+
 	  for (ix = 0; ix < nedges; ix++)
 	    {
-	      rtx curr = currptr[ix];
-	      do
-		curr = NEXT_INSN (curr);
+	      rtx curr = nextptr[ix];
 	      while (!NONDEBUG_INSN_P (curr));
+		curr = NEXT_INSN (curr);
 	      currptr[ix] = headptr[ix] = curr;
 	    }
 	}
--- gcc/emit-rtl.c~	Sun Sep 26 08:24:42 2010
+++ gcc/emit-rtl.c	Mon Sep 27 08:19:20 2010
@@ -3972,16 +3972,23 @@ delete_insns_since (rtx from)
    AFTER must not be FROM or TO or any insn in between.
 
    This function does not know about SEQUENCEs and hence should not be
    called after delay-slot filling has been done.  */
 
 void
 reorder_insns_nobb (rtx from, rtx to, rtx after)
 {
+#ifdef ENABLE_CHECKING
+  rtx x;
+  for (x = from; x != to; x = NEXT_INSN (x))
+    gcc_assert (after != x);
+  gcc_assert (after != to);
+#endif
+
   /* Splice this bunch out of where it is now.  */
   if (PREV_INSN (from))
     NEXT_INSN (PREV_INSN (from)) = NEXT_INSN (to);
   if (NEXT_INSN (to))
     PREV_INSN (NEXT_INSN (to)) = PREV_INSN (from);
   if (get_last_insn () == to)
     set_last_insn (PREV_INSN (from));
   if (get_insns () == from)

brgds, H-P

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803"
  2010-09-27 15:56                                       ` Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803" Hans-Peter Nilsson
@ 2010-09-27 20:34                                         ` Bernd Schmidt
  2010-09-27 23:38                                           ` Hans-Peter Nilsson
  0 siblings, 1 reply; 95+ messages in thread
From: Bernd Schmidt @ 2010-09-27 20:34 UTC (permalink / raw)
  To: Hans-Peter Nilsson; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1236 bytes --]

On 09/27/2010 11:11 AM, Hans-Peter Nilsson wrote:
> Revision 164552 broke build for cris-elf, see PR45792.
> 
> Looking at the patch, I can't see how the "Try again"-codepath
> (see patch below) can ever work; the retry will try merging the
> insns starting with the NEXT_INSN after the old, merged, insns.
> But, the insns are merged, so the old NEXT_INSN is lost; the new
> NEXT_INSN is now move_before (the compare insn for the cbranch
> sequence), so you'll try moving the compare and jump to before
> the jump.  Which will cause reorder_insns to get stuck in a
> loop, after having set PREV_INSN(cmp) = NEXT_INSN (cmp) = cmp...
> 
> How did that pass testing?  Is there something I (we both) miss
> causing that code-path to never execute for non-cc0 targets?

It appears that it never triggers.  I'm pretty sure I tested that path
at some point, but I may have changed the currptr etc. bookkeeping
afterwards and broken it without noticing.

Here's a slightly different version, with the entire loop moved to an
earlier place (I saw crashes with your patch), and with an additional
free of the nextptr array.  This makes the code trigger a few times.
I've tested it on i686-linux; could you try on cris?  Ok if it passes.


Bernd

[-- Attachment #2: hp-cris.diff --]
[-- Type: text/plain, Size: 2400 bytes --]

Index: cfgcleanup.c
===================================================================
--- cfgcleanup.c	(revision 164589)
+++ cfgcleanup.c	(working copy)
@@ -1944,7 +1944,7 @@ try_head_merge_bb (basic_block bb)
   basic_block final_dest_bb = NULL;
   int max_match = INT_MAX;
   edge e0;
-  rtx *headptr, *currptr;
+  rtx *headptr, *currptr, *nextptr;
   bool changed, moveall;
   unsigned ix;
   rtx e0_last_head, cond, move_before;
@@ -2077,6 +2077,7 @@ try_head_merge_bb (basic_block bb)
 
   currptr = XNEWVEC (rtx, nedges);
   headptr = XNEWVEC (rtx, nedges);
+  nextptr = XNEWVEC (rtx, nedges);
 
   for (ix = 0; ix < nedges; ix++)
     {
@@ -2132,6 +2133,14 @@ try_head_merge_bb (basic_block bb)
 
 	  /* Try again, using a different insertion point.  */
 	  move_before = jump;
+
+#ifdef HAVE_cc0
+	  /* Don't try moving before a cc0 user, as that may invalidate
+	     the cc0.  */
+	  if (reg_mentioned_p (cc0_rtx, jump))
+	    break;
+#endif
+
 	  continue;
 	}
 
@@ -2155,6 +2164,18 @@ try_head_merge_bb (basic_block bb)
 	    }
 	}
 
+      /* If we can't currently move all of the identical insns, remember
+	 each insn after the range that we'll merge.  */
+      if (!moveall)
+	for (ix = 0; ix < nedges; ix++)
+	  {
+	    rtx curr = currptr[ix];
+	    do
+	      curr = NEXT_INSN (curr);
+	    while (!NONDEBUG_INSN_P (curr));
+	    nextptr[ix] = curr;
+	  }
+
       reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before));
       df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest);
       if (final_dest_bb != NULL)
@@ -2170,16 +2191,18 @@ try_head_merge_bb (basic_block bb)
 	  if (jump == move_before)
 	    break;
 
-	  /* Try again, using a different insertion point.  */
+	  /* For the unmerged insns, try a different insertion point.  */
 	  move_before = jump;
+
+#ifdef HAVE_cc0
+	  /* Don't try moving before a cc0 user, as that may invalidate
+	     the cc0.  */
+	  if (reg_mentioned_p (cc0_rtx, jump))
+	    break;
+#endif
+
 	  for (ix = 0; ix < nedges; ix++)
-	    {
-	      rtx curr = currptr[ix];
-	      do
-		curr = NEXT_INSN (curr);
-	      while (!NONDEBUG_INSN_P (curr));
-	      currptr[ix] = headptr[ix] = curr;
-	    }
+	    currptr[ix] = headptr[ix] = nextptr[ix];
 	}
     }
   while (!moveall);
@@ -2187,6 +2210,7 @@ try_head_merge_bb (basic_block bb)
  out:
   free (currptr);
   free (headptr);
+  free (nextptr);
 
   crossjumps_occured |= changed;
 

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803"
  2010-09-27 20:34                                         ` Bernd Schmidt
@ 2010-09-27 23:38                                           ` Hans-Peter Nilsson
  2010-09-28  0:07                                             ` Bernd Schmidt
  0 siblings, 1 reply; 95+ messages in thread
From: Hans-Peter Nilsson @ 2010-09-27 23:38 UTC (permalink / raw)
  To: bernds; +Cc: hp, gcc-patches

> Date: Mon, 27 Sep 2010 16:16:46 +0200
> From: Bernd Schmidt <bernds@codesourcery.com>

> Here's a slightly different version, with the entire loop moved to an
> earlier place (I saw crashes with your patch), and with an additional
> free of the nextptr array.

Oops, I missed that part.  No regressions for cris-elf though.

> This makes the code trigger a few times.
> I've tested it on i686-linux; could you try on cris?  Ok if it passes.

In progress, at r164560.  Ready in a few hours; will commit then
if successful.  Thanks.

In the meantime, what about the emit-rtl.c:reorder_insns_nobb
sanity-check?

brgds, H-P

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803"
  2010-09-27 23:38                                           ` Hans-Peter Nilsson
@ 2010-09-28  0:07                                             ` Bernd Schmidt
  0 siblings, 0 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-09-28  0:07 UTC (permalink / raw)
  To: Hans-Peter Nilsson; +Cc: hp, gcc-patches

On 09/27/2010 05:53 PM, Hans-Peter Nilsson wrote:
> In the meantime, what about the emit-rtl.c:reorder_insns_nobb
> sanity-check?

Sure, why not.  I left that out of the diff by accident.


Bernd

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-08-04 13:36                             ` Bernd Schmidt
  2010-08-30 16:00                               ` Bernd Schmidt
@ 2010-10-02 13:07                               ` H.J. Lu
  2010-10-03 11:33                                 ` Bernd Schmidt
  1 sibling, 1 reply; 95+ messages in thread
From: H.J. Lu @ 2010-10-02 13:07 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Wed, Aug 4, 2010 at 6:35 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 08/03/2010 07:12 PM, Jeff Law wrote:
>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>> at the same time as a block is marked dirty, but is not cleared during a
>> df_analyze allowing a pass to update the DF information and still know
>> what blocks were modified.
>
> New patch below.
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45865


-- 
H.J.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-10-02 13:07                               ` ifcvt/crossjump patch: Fix PR 42496, 21803 H.J. Lu
@ 2010-10-03 11:33                                 ` Bernd Schmidt
  2010-10-03 11:39                                   ` H.J. Lu
  2010-10-06  1:12                                   ` H.J. Lu
  0 siblings, 2 replies; 95+ messages in thread
From: Bernd Schmidt @ 2010-10-03 11:33 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 655 bytes --]

On 10/02/2010 03:07 PM, H.J. Lu wrote:
> On Wed, Aug 4, 2010 at 6:35 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>>> at the same time as a block is marked dirty, but is not cleared during a
>>> df_analyze allowing a pass to update the DF information and still know
>>> what blocks were modified.
>>
>> New patch below.
>>
> 
> This caused:
> 
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45865

This should fix it, refusing to move NOTE_EPILOGUE_BEGIN and anything
beyond it.

Bootstrapped and tested on i686-linux, ok?


Bernd

[-- Attachment #2: epilogue.diff --]
[-- Type: text/plain, Size: 496 bytes --]

	* df-problems.c (can_move_insns_across): Stop at
	NOTE_INSN_EPILOGUE_BEG.
	
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 164552)
+++ df-problems.c	(working copy)
@@ -3915,6 +3915,8 @@ can_move_insns_across (rtx from, rtx to,
     {
       if (CALL_P (insn))
 	break;
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+	break;
       if (NONDEBUG_INSN_P (insn))
 	{
 	  if (may_trap_p (PATTERN (insn))

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-10-03 11:33                                 ` Bernd Schmidt
@ 2010-10-03 11:39                                   ` H.J. Lu
  2010-10-06  1:12                                   ` H.J. Lu
  1 sibling, 0 replies; 95+ messages in thread
From: H.J. Lu @ 2010-10-03 11:39 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Sun, Oct 3, 2010 at 4:34 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 10/02/2010 03:07 PM, H.J. Lu wrote:
>> On Wed, Aug 4, 2010 at 6:35 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>>>> at the same time as a block is marked dirty, but is not cleared during a
>>>> df_analyze allowing a pass to update the DF information and still know
>>>> what blocks were modified.
>>>
>>> New patch below.
>>>
>>
>> This caused:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45865
>
> This should fix it, refusing to move NOTE_EPILOGUE_BEGIN and anything
> beyond it.
>
> Bootstrapped and tested on i686-linux, ok?
>

Can you include a testcase?

Thanks.


-- 
H.J.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-10-03 11:33                                 ` Bernd Schmidt
  2010-10-03 11:39                                   ` H.J. Lu
@ 2010-10-06  1:12                                   ` H.J. Lu
  2010-10-06  2:46                                     ` H.J. Lu
  1 sibling, 1 reply; 95+ messages in thread
From: H.J. Lu @ 2010-10-06  1:12 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

On Sun, Oct 3, 2010 at 4:34 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 10/02/2010 03:07 PM, H.J. Lu wrote:
>> On Wed, Aug 4, 2010 at 6:35 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>>>> at the same time as a block is marked dirty, but is not cleared during a
>>>> df_analyze allowing a pass to update the DF information and still know
>>>> what blocks were modified.
>>>
>>> New patch below.
>>>
>>
>> This caused:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45865
>
> This should fix it, refusing to move NOTE_EPILOGUE_BEGIN and anything
> beyond it.
>
> Bootstrapped and tested on i686-linux, ok?
>

It still failed this testcase.

-- 
H.J.
---
[hjl@gnu-32 rrs]$ cat pr45865.c
typedef struct rtx_def *rtx;
enum machine_mode {
  VOIDmode,
  CCFPmode,
  CCFPUmode,
  MAX_MACHINE_MODE
};
enum mode_class {
  MODE_CC,
  MODE_FLOAT,
  MODE_COMPLEX_FLOAT,
  MODE_VECTOR_FLOAT
};
extern const enum mode_class mode_class[(int) MAX_MACHINE_MODE];
enum rtx_code {
  UNKNOWN,
  GEU,
  ORDERED,
  CONST_INT
};
struct rtx_def {
  unsigned int code: 16;
  unsigned int mode : 8;
};
extern enum rtx_code reverse_condition (enum rtx_code);
enum rtx_code
reversed_comparison_code_parts (enum rtx_code code, rtx insn, rtx arg0,
				rtx arg1)
{
  enum machine_mode mode;
  mode = (enum machine_mode) (arg0)->mode;
  if (mode == VOIDmode)
    mode = (enum machine_mode) (arg1)->mode;
  if ((mode_class[(int) (mode)]) == MODE_CC)
    return (mode != CCFPmode && mode != CCFPUmode
	    ? reverse_condition (code)
	    : reverse_condition_maybe_unordered (code));
  switch (code)
    {
    case GEU:
      return reverse_condition (code);
    case ORDERED:
      return UNKNOWN;
    }
  if (((enum rtx_code) (arg0)->code) == CONST_INT
      || (((enum machine_mode) (arg0)->mode) != VOIDmode
	  && ! ((mode_class[(int) (mode)]) == MODE_FLOAT
		|| (mode_class[(int) (mode)]) == MODE_COMPLEX_FLOAT
		|| (mode_class[(int) (mode)]) == MODE_VECTOR_FLOAT)))
    return reverse_condition (code);
  return UNKNOWN;
}
[hjl@gnu-32 rrs]$ /export/gnu/import/rrs/164914/usr/bin/gcc -O2 -S
-m32 pr45865.c
pr45865.c: In function \u2018reversed_comparison_code_parts\u2019:
pr45865.c:52:1: internal compiler error: in
dwarf2out_cfi_begin_epilogue, at dwarf2out.c:2930
Please submit a full bug report,
with preprocessed source if appropriate.
See <http://gcc.gnu.org/bugs.html> for instructions.
[hjl@gnu-32 rrs]$

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: ifcvt/crossjump patch: Fix PR 42496, 21803
  2010-10-06  1:12                                   ` H.J. Lu
@ 2010-10-06  2:46                                     ` H.J. Lu
  0 siblings, 0 replies; 95+ messages in thread
From: H.J. Lu @ 2010-10-06  2:46 UTC (permalink / raw)
  To: Bernd Schmidt
  Cc: Jeff Law, Eric Botcazou, gcc-patches, Steven Bosscher, Jim Wilson

[-- Attachment #1: Type: text/plain, Size: 3490 bytes --]

On Tue, Oct 5, 2010 at 6:12 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Oct 3, 2010 at 4:34 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> On 10/02/2010 03:07 PM, H.J. Lu wrote:
>>> On Wed, Aug 4, 2010 at 6:35 AM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>>>> On 08/03/2010 07:12 PM, Jeff Law wrote:
>>>>> OK.  WRT the comment, we might want to just say that BB_MODIFIED is set
>>>>> at the same time as a block is marked dirty, but is not cleared during a
>>>>> df_analyze allowing a pass to update the DF information and still know
>>>>> what blocks were modified.
>>>>
>>>> New patch below.
>>>>
>>>
>>> This caused:
>>>
>>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45865
>>
>> This should fix it, refusing to move NOTE_EPILOGUE_BEGIN and anything
>> beyond it.
>>
>> Bootstrapped and tested on i686-linux, ok?
>>
>
> It still failed this testcase.
>
> --
> H.J.
> ---
> [hjl@gnu-32 rrs]$ cat pr45865.c
> typedef struct rtx_def *rtx;
> enum machine_mode {
>  VOIDmode,
>  CCFPmode,
>  CCFPUmode,
>  MAX_MACHINE_MODE
> };
> enum mode_class {
>  MODE_CC,
>  MODE_FLOAT,
>  MODE_COMPLEX_FLOAT,
>  MODE_VECTOR_FLOAT
> };
> extern const enum mode_class mode_class[(int) MAX_MACHINE_MODE];
> enum rtx_code {
>  UNKNOWN,
>  GEU,
>  ORDERED,
>  CONST_INT
> };
> struct rtx_def {
>  unsigned int code: 16;
>  unsigned int mode : 8;
> };
> extern enum rtx_code reverse_condition (enum rtx_code);
> enum rtx_code
> reversed_comparison_code_parts (enum rtx_code code, rtx insn, rtx arg0,
>                                rtx arg1)
> {
>  enum machine_mode mode;
>  mode = (enum machine_mode) (arg0)->mode;
>  if (mode == VOIDmode)
>    mode = (enum machine_mode) (arg1)->mode;
>  if ((mode_class[(int) (mode)]) == MODE_CC)
>    return (mode != CCFPmode && mode != CCFPUmode
>            ? reverse_condition (code)
>            : reverse_condition_maybe_unordered (code));
>  switch (code)
>    {
>    case GEU:
>      return reverse_condition (code);
>    case ORDERED:
>      return UNKNOWN;
>    }
>  if (((enum rtx_code) (arg0)->code) == CONST_INT
>      || (((enum machine_mode) (arg0)->mode) != VOIDmode
>          && ! ((mode_class[(int) (mode)]) == MODE_FLOAT
>                || (mode_class[(int) (mode)]) == MODE_COMPLEX_FLOAT
>                || (mode_class[(int) (mode)]) == MODE_VECTOR_FLOAT)))
>    return reverse_condition (code);
>  return UNKNOWN;
> }
> [hjl@gnu-32 rrs]$ /export/gnu/import/rrs/164914/usr/bin/gcc -O2 -S
> -m32 pr45865.c
> pr45865.c: In function \u2018reversed_comparison_code_parts\u2019:
> pr45865.c:52:1: internal compiler error: in
> dwarf2out_cfi_begin_epilogue, at dwarf2out.c:2930
> Please submit a full bug report,
> with preprocessed source if appropriate.
> See <http://gcc.gnu.org/bugs.html> for instructions.
> [hjl@gnu-32 rrs]$
>

One problem is:

if (max_to == NULL_RTX || (fail && pmove_upto == NULL))

When pmove_upto isn't NULL,*pmove_upto is initialized to NULL
This patch works for me.  OK for trunk?

Thanks.

-- 
H.J.
---
gcc/

2010-10-05  Bernd Schmidt  <bernds@codesourcery.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	 PR rtl-optimization/45865
	 * df-problems.c (can_move_insns_across): Don't move
	 NOTE_EPILOGUE_BEGIN and anything beyond it.  Properly check
	 pmove_upto.

gcc/testsuite/

2010-10-05  H.J. Lu  <hongjiu.lu@intel.com>

	 PR rtl-optimization/45865
	 * gcc.dg/torture/pr45865.c: New.

[-- Attachment #2: gcc-pr45865-1.patch --]
[-- Type: text/plain, Size: 2693 bytes --]

gcc/

2010-10-05  Bernd Schmidt  <bernds@codesourcery.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	 PR rtl-optimization/45865
	 * df-problems.c (can_move_insns_across): Don't move
	 NOTE_EPILOGUE_BEGIN and anything beyond it.  Properly check
	 pmove_upto.

2010-10-05  H.J. Lu  <hongjiu.lu@intel.com>

	 PR rtl-optimization/45865
	 * gcc.dg/torture/pr45865.c: New.

diff --git a/gcc/df-problems.c b/gcc/df-problems.c
index 82a0d0b..932fa5f 100644
--- a/gcc/df-problems.c
+++ b/gcc/df-problems.c
@@ -3915,6 +3915,8 @@ can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
     {
       if (CALL_P (insn))
 	break;
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+	break;
       if (NONDEBUG_INSN_P (insn))
 	{
 	  if (may_trap_p (PATTERN (insn))
@@ -3970,7 +3972,8 @@ can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to,
   if (max_to != to)
     fail = 1;
 
-  if (max_to == NULL_RTX || (fail && pmove_upto == NULL))
+  if (max_to == NULL_RTX
+      || (fail && (pmove_upto == NULL || *pmove_upto == NULL)))
     goto out;
 
   /* Now, lower this upper bound by also taking into account that
--- /dev/null	2010-09-09 09:16:30.485584932 -0700
+++ gcc/gcc/testsuite/gcc.dg/torture/pr45865.c	2010-10-05 19:36:04.918278945 -0700
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+
+typedef struct rtx_def *rtx;
+enum machine_mode {
+  VOIDmode,
+  CCFPmode,
+  CCFPUmode,
+  MAX_MACHINE_MODE
+};
+enum mode_class {
+  MODE_CC,
+  MODE_FLOAT,
+  MODE_COMPLEX_FLOAT,
+  MODE_VECTOR_FLOAT
+};
+extern const enum mode_class mode_class[(int) MAX_MACHINE_MODE];
+enum rtx_code {
+  UNKNOWN,
+  GEU,
+  ORDERED,
+  CONST_INT
+};
+struct rtx_def {
+  unsigned int code: 16;
+  unsigned int mode : 8;
+};
+extern enum rtx_code reverse_condition (enum rtx_code);
+enum rtx_code
+reversed_comparison_code_parts (enum rtx_code code, rtx insn, rtx arg0,
+				rtx arg1)
+{
+  enum machine_mode mode;
+  mode = (enum machine_mode) (arg0)->mode;
+  if (mode == VOIDmode)
+    mode = (enum machine_mode) (arg1)->mode;
+  if ((mode_class[(int) (mode)]) == MODE_CC)
+    return (mode != CCFPmode && mode != CCFPUmode
+	    ? reverse_condition (code)
+	    : reverse_condition_maybe_unordered (code));
+  switch (code) 
+    {
+    case GEU:
+      return reverse_condition (code);
+    case ORDERED:
+      return UNKNOWN;
+    }
+  if (((enum rtx_code) (arg0)->code) == CONST_INT
+      || (((enum machine_mode) (arg0)->mode) != VOIDmode
+	  && ! ((mode_class[(int) (mode)]) == MODE_FLOAT
+		|| (mode_class[(int) (mode)]) == MODE_COMPLEX_FLOAT
+		|| (mode_class[(int) (mode)]) == MODE_VECTOR_FLOAT)))
+    return reverse_condition (code);
+  return UNKNOWN;
+}

^ permalink raw reply	[flat|nested] 95+ messages in thread

end of thread, other threads:[~2010-10-06  2:46 UTC | newest]

Thread overview: 95+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-31 22:08 ifcvt/crossjump patch: Fix PR 42496, 21803 Bernd Schmidt
2010-04-01 18:00 ` Steven Bosscher
2010-04-01 18:01   ` Bernd Schmidt
2010-04-02  9:45   ` Bernd Schmidt
2010-04-06  9:21   ` Bernd Schmidt
2010-04-10 10:37     ` Eric Botcazou
2010-04-12 23:34       ` Bernd Schmidt
2010-04-13 21:14         ` Eric Botcazou
2010-04-13 21:36           ` Bernd Schmidt
2010-04-13 21:51             ` Eric Botcazou
2010-04-14 20:51           ` Bernd Schmidt
2010-04-14 21:09       ` Bernd Schmidt
2010-04-19 22:05         ` Eric Botcazou
2010-04-19 22:14           ` Steven Bosscher
2010-04-19 22:18             ` Steven Bosscher
2010-04-19 22:47               ` Steven Bosscher
2010-04-20 10:34             ` Eric Botcazou
2010-04-20 11:26             ` Bernd Schmidt
2010-04-23  9:25               ` Eric Botcazou
2010-04-23 11:15                 ` Steven Bosscher
2010-05-15 11:24                   ` Steven Bosscher
2010-05-28 10:00                     ` Eric Botcazou
2010-05-28 11:20                       ` Steven Bosscher
2010-04-20 12:30           ` Bernd Schmidt
2010-07-20 20:43           ` Bernd Schmidt
2010-07-22 19:47             ` Eric Botcazou
2010-07-22 21:09               ` Bernd Schmidt
2010-07-23 22:06                 ` Eric Botcazou
2010-07-23 22:13                   ` Bernd Schmidt
2010-07-24 13:07                     ` Eric Botcazou
2010-07-26  9:42                       ` Bernd Schmidt
2010-07-26 13:40                         ` Paolo Bonzini
2010-07-26 13:50                           ` Paolo Bonzini
2010-07-26 13:56                           ` Bernd Schmidt
2010-07-26 14:14                             ` Paolo Bonzini
2010-07-27  8:31                             ` Eric Botcazou
2010-07-27  9:37                               ` Bernd Schmidt
2010-07-27 13:35                                 ` Bernd Schmidt
2010-07-27 22:38                                   ` Eric Botcazou
2010-07-28 16:58                                     ` Jeff Law
2010-07-29  8:25                                       ` Eric Botcazou
2010-07-27 17:39                                 ` Jeff Law
2010-07-27 22:05                                   ` Bernd Schmidt
2010-07-27 22:40                                     ` Eric Botcazou
2010-07-28 17:06                                       ` Jeff Law
2010-07-29 17:28                                     ` Jeff Law
2010-07-29 17:43                                       ` Bernd Schmidt
2010-07-27 22:23                                 ` Eric Botcazou
2010-07-27 23:04                                   ` Bernd Schmidt
2010-07-28  8:40                                     ` Eric Botcazou
2010-07-28 10:13                                       ` Bernd Schmidt
2010-07-28 19:40                                         ` Jeff Law
2010-07-28 20:15                                           ` Bernd Schmidt
2010-07-29 16:00                                             ` Jeff Law
2010-07-29 16:21                                               ` Paolo Bonzini
2010-07-29 17:09                                                 ` Bernd Schmidt
2010-07-29 17:13                                                   ` Paolo Bonzini
2010-07-30  0:55                                                 ` Steven Bosscher
2010-07-28 18:31                                     ` Jeff Law
2010-07-28 18:36                                       ` Paolo Bonzini
2010-07-29  9:07                                       ` Eric Botcazou
2010-07-27 23:08                                   ` Paolo Bonzini
2010-07-28 21:44                           ` Bernd Schmidt
2010-07-29 14:31                             ` Jeff Law
2010-07-27 15:31                   ` Jeff Law
2010-07-27 22:18                     ` Eric Botcazou
2010-07-28 17:07                       ` Jeff Law
2010-07-28 17:38                         ` Bernd Schmidt
2010-08-02 15:57             ` Jeff Law
2010-08-02 15:59               ` Bernd Schmidt
2010-08-02 16:05                 ` Jeff Law
2010-08-02 16:15                   ` Bernd Schmidt
2010-08-03 14:10                     ` Bernd Schmidt
2010-08-03 15:16                       ` Jeff Law
2010-08-03 15:31                         ` Bernd Schmidt
2010-08-03 17:13                           ` Jeff Law
2010-08-04 13:36                             ` Bernd Schmidt
2010-08-30 16:00                               ` Bernd Schmidt
2010-09-20 10:23                                 ` Bernd Schmidt
2010-09-20 16:25                                   ` Jeff Law
2010-09-23 15:53                                     ` Bernd Schmidt
2010-09-23 22:00                                       ` Richard Guenther
2010-09-23 22:03                                         ` Richard Guenther
2010-09-23 22:18                                           ` Bernd Schmidt
2010-09-24 11:29                                             ` Richard Guenther
2010-09-27 15:56                                       ` Fix PR45792, cris-elf build breakage from PR44374-fix "ifcvt/crossjump patch: Fix PR 42496, 21803" Hans-Peter Nilsson
2010-09-27 20:34                                         ` Bernd Schmidt
2010-09-27 23:38                                           ` Hans-Peter Nilsson
2010-09-28  0:07                                             ` Bernd Schmidt
2010-10-02 13:07                               ` ifcvt/crossjump patch: Fix PR 42496, 21803 H.J. Lu
2010-10-03 11:33                                 ` Bernd Schmidt
2010-10-03 11:39                                   ` H.J. Lu
2010-10-06  1:12                                   ` H.J. Lu
2010-10-06  2:46                                     ` H.J. Lu
2010-04-12 20:43     ` Jim Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).