public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][RFC] Fix DF sub-CFG analysis slowness (PR39326)
@ 2014-01-15 10:53 Richard Biener
  2014-01-15 12:41 ` Steven Bosscher
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2014-01-15 10:53 UTC (permalink / raw)
  To: gcc-patches


This improves compile time of the testcase in PR39326 at -O3 from

 loop invariant motion   :  24.41 (32%) usr   0.03 ( 4%) sys  24.43 (32%) 
wall     148 kB ( 0%) ggc
 loop unswitching        :  15.16 (20%) usr   0.02 ( 3%) sys  15.30 (20%) 
wall       0 kB ( 0%) ggc
 TOTAL                 :  76.51             0.72            77.23             
456534 kB

to

 loop invariant motion   :   0.18 ( 0%) usr   0.00 ( 0%) sys   0.16 ( 0%) 
wall     148 kB ( 0%) ggc
 loop unswitching        :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) 
wall       0 kB ( 0%) ggc
 TOTAL                 :  37.14             0.67            37.80             
456534 kB

The problem with DF sub-CFG analysis (df_set_blocks) is that while
the actual analysis works on the sub-CFG the function first computes
inverted postorder and postorder of the whole function and then
prunes it, which takes 99% of the compile time for the testcase
with a lot of very simple loops.

There are two users of df_set_blocks, RTL loop invariant motion,
doing a df_analyze on each loop, and RTL loop IV analysis
(done on each loop by unswitching).  The following patch fixes
the slowness when the sub-CFG is the body of a single loop
as it's reasonably easy to implement (inverted) postorder
computation for a SEME region.

Now the question is what the desired interface of this should be
(the patch is a kludge here) and if the generic df_set_blocks
interface should be retained (it doesn't prohibit you to
analyze unconnected parts of a CFG for example) - it also retains
its slowness for simple analysis cases.

I can split off a df_analyze_1 and have a df_analyze () doing what
it does now and a df_analyze_loop () combining df_set_blocks ()
and df_analyze () for example.

Any preference?  Or do we want a df_set_loop ()-like interface?
df_analyze seems to be the only public interface caring about
blocks (its comment even suggests that the sub-CFG bitmap was
even part of its interface at some point).

I'll probably move the CFG order computes to cfgloop.c and
eventually make it use flow_bb_inside_loop_p instead of
the bitmap with all loop blocks (even though that will be
a little slower in the end).

Thanks,
Richard.

Index: gcc/df.h
===================================================================
*** gcc/df.h	(revision 206624)
--- gcc/df.h	(working copy)
*************** extern void df_set_blocks (bitmap);
*** 900,906 ****
  extern void df_remove_problem (struct dataflow *);
  extern void df_finish_pass (bool);
  extern void df_analyze_problem (struct dataflow *, bitmap, int *, int);
! extern void df_analyze (void);
  extern int df_get_n_blocks (enum df_flow_dir);
  extern int *df_get_postorder (enum df_flow_dir);
  extern void df_simple_dataflow (enum df_flow_dir, df_init_function,
--- 900,906 ----
  extern void df_remove_problem (struct dataflow *);
  extern void df_finish_pass (bool);
  extern void df_analyze_problem (struct dataflow *, bitmap, int *, int);
! extern void df_analyze (struct loop *loop = NULL);
  extern int df_get_n_blocks (enum df_flow_dir);
  extern int *df_get_postorder (enum df_flow_dir);
  extern void df_simple_dataflow (enum df_flow_dir, df_init_function,
Index: gcc/df-core.c
===================================================================
*** gcc/df-core.c	(revision 206624)
--- gcc/df-core.c	(working copy)
*************** are write-only operations.
*** 393,398 ****
--- 393,399 ----
  #include "df.h"
  #include "tree-pass.h"
  #include "params.h"
+ #include "cfgloop.h"
  
  static void *df_get_bb_info (struct dataflow *, unsigned int);
  static void df_set_bb_info (struct dataflow *, unsigned int, void *);
*************** df_analyze_problem (struct dataflow *dfl
*** 1225,1235 ****
  }
  
  
  /* Analyze dataflow info for the basic blocks specified by the bitmap
     BLOCKS, or for the whole CFG if BLOCKS is zero.  */
  
  void
! df_analyze (void)
  {
    bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack);
    bool everything;
--- 1226,1383 ----
  }
  
  
+ static int
+ loop_post_order_compute (int *post_order, struct loop *loop,
+ 			 bitmap loop_blocks)
+ {
+   edge_iterator *stack;
+   int sp;
+   int post_order_num = 0;
+   bitmap visited;
+ 
+   /* Allocate stack for back-tracking up CFG.  */
+   stack = XNEWVEC (edge_iterator, loop->num_nodes + 1);
+   sp = 0;
+ 
+   /* Allocate bitmap to track nodes that have been visited.  */
+   visited = BITMAP_ALLOC (NULL);
+ 
+   /* Push the first edge on to the stack.  */
+   stack[sp++] = ei_start (loop_preheader_edge (loop)->src->succs);
+ 
+   while (sp)
+     {
+       edge_iterator ei;
+       basic_block src;
+       basic_block dest;
+ 
+       /* Look at the edge on the top of the stack.  */
+       ei = stack[sp - 1];
+       src = ei_edge (ei)->src;
+       dest = ei_edge (ei)->dest;
+ 
+       /* Check if the edge destination has been visited yet and mark it
+          if not so.  */
+       if (bitmap_bit_p (loop_blocks, dest->index)
+ 	  && bitmap_set_bit (visited, dest->index))
+ 	{
+ 	  if (EDGE_COUNT (dest->succs) > 0)
+ 	    /* Since the DEST node has been visited for the first
+ 	       time, check its successors.  */
+ 	    stack[sp++] = ei_start (dest->succs);
+ 	  else
+ 	    post_order[post_order_num++] = dest->index;
+ 	}
+       else
+ 	{
+ 	  if (ei_one_before_end_p (ei)
+ 	      && src != loop_preheader_edge (loop)->src)
+ 	    post_order[post_order_num++] = src->index;
+ 
+ 	  if (!ei_one_before_end_p (ei))
+ 	    ei_next (&stack[sp - 1]);
+ 	  else
+ 	    sp--;
+ 	}
+     }
+ 
+   free (stack);
+   BITMAP_FREE (visited);
+ 
+   return post_order_num;
+ }
+ 
+ int
+ loop_inverted_post_order_compute (int *post_order, struct loop *loop,
+ 				  bitmap loop_blocks)
+ {
+   basic_block bb;
+   edge_iterator *stack;
+   int sp;
+   int post_order_num = 0;
+   bitmap visited;
+   unsigned i;
+ 
+   vec<edge> exits = get_loop_exit_edges (loop);
+ 
+   /* Allocate stack for back-tracking up CFG.  */
+   stack = XNEWVEC (edge_iterator, loop->num_nodes + exits.length () + 1);
+   sp = 0;
+ 
+   /* Allocate bitmap to track nodes that have been visited.  */
+   visited = BITMAP_ALLOC (NULL);
+ 
+   /* Put all loop exit blocks into the initial work list or the latches,
+      if there is no exit.  */
+   if (!exits.is_empty ())
+     {
+       edge e;
+       for (i = 0; exits.iterate (i, &e); ++i)
+ 	stack[sp++] = ei_start (e->dest->preds);
+     }
+   else
+     {
+       stack[sp++] = ei_start (loop->header->preds);
+       bitmap_set_bit (visited, loop->header->index);
+     }
+   exits.release ();
+ 
+   /* The inverted traversal loop. */
+   while (sp)
+     {
+       edge_iterator ei;
+       basic_block pred;
+ 
+       /* Look at the edge on the top of the stack.  */
+       ei = stack[sp - 1];
+       bb = ei_edge (ei)->dest;
+       pred = ei_edge (ei)->src;
+ 
+       /* Check if the predecessor has been visited yet and mark it
+ 	 if not so.  */
+       if (bitmap_bit_p (loop_blocks, pred->index)
+ 	  && bitmap_set_bit (visited, pred->index))
+ 	{
+ 	  if (EDGE_COUNT (pred->preds) > 0)
+ 	    /* Since the predecessor node has been visited for the first
+ 	       time, check its predecessors.  */
+ 	    stack[sp++] = ei_start (pred->preds);
+ 	  else
+ 	    post_order[post_order_num++] = pred->index;
+ 	}
+       else
+ 	{
+ 	  if (bitmap_bit_p (loop_blocks, bb->index)
+ 	      && ei_one_before_end_p (ei))
+ 	    post_order[post_order_num++] = bb->index;
+ 
+ 	  if (!ei_one_before_end_p (ei))
+ 	    ei_next (&stack[sp - 1]);
+ 	  else
+ 	    sp--;
+ 	}
+     }
+ 
+   /* Detect any infinite loop and activate the kludge.
+      Note that this doesn't check EXIT_BLOCK itself
+      since EXIT_BLOCK is always added after the outer do-while loop.  */
+   /* ???  Assert this doesn't happen, thus that we visited all
+      loop_blocks.  */
+   bitmap_compl_and_into (visited, loop_blocks);
+   gcc_assert (bitmap_empty_p (visited));
+ 
+   free (stack);
+   BITMAP_FREE (visited);
+   return post_order_num;
+ }
+ 
+ 
+ 
  /* Analyze dataflow info for the basic blocks specified by the bitmap
     BLOCKS, or for the whole CFG if BLOCKS is zero.  */
  
  void
! df_analyze (struct loop *loop)
  {
    bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack);
    bool everything;
*************** df_analyze (void)
*** 1237,1246 ****
  
    free (df->postorder);
    free (df->postorder_inverted);
!   df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
!   df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun));
!   df->n_blocks = post_order_compute (df->postorder, true, true);
!   df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted);
  
    /* These should be the same.  */
    gcc_assert (df->n_blocks == df->n_blocks_inverted);
--- 1385,1412 ----
  
    free (df->postorder);
    free (df->postorder_inverted);
!   if (loop)
!     {
!       gcc_assert (df->analyze_subset);
!       gcc_checking_assert (bitmap_count_bits (df->blocks_to_analyze)
! 			   == loop->num_nodes);
!       df->postorder = XNEWVEC (int, loop->num_nodes);
!       df->postorder_inverted = XNEWVEC (int, loop->num_nodes);
!       df->n_blocks = loop_post_order_compute (df->postorder, loop,
! 					      df->blocks_to_analyze);
!       df->n_blocks_inverted
! 	= loop_inverted_post_order_compute (df->postorder_inverted, loop,
! 					    df->blocks_to_analyze);
!       gcc_assert ((unsigned) df->n_blocks == loop->num_nodes);
!       gcc_assert ((unsigned) df->n_blocks_inverted == loop->num_nodes);
!     }
!   else
!     {
!       df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
!       df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun));
!       df->n_blocks = post_order_compute (df->postorder, true, true);
!       df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted);
!     }
  
    /* These should be the same.  */
    gcc_assert (df->n_blocks == df->n_blocks_inverted);
*************** df_analyze (void)
*** 1273,1284 ****
    if (df->analyze_subset)
      {
        everything = false;
!       bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
!       df->n_blocks = df_prune_to_subcfg (df->postorder,
! 					 df->n_blocks, df->blocks_to_analyze);
!       df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted,
! 			                          df->n_blocks_inverted,
!                                                   df->blocks_to_analyze);
        BITMAP_FREE (current_all_blocks);
      }
    else
--- 1439,1456 ----
    if (df->analyze_subset)
      {
        everything = false;
!       if (!loop)
! 	{
! 	  bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
! 	  df->n_blocks = df_prune_to_subcfg (df->postorder,
! 					     df->n_blocks, df->blocks_to_analyze);
! 	  df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted,
! 						      df->n_blocks_inverted,
! 						      df->blocks_to_analyze);
! 	}
!       else
! 	{
! 	}
        BITMAP_FREE (current_all_blocks);
      }
    else
Index: gcc/loop-invariant.c
===================================================================
*** gcc/loop-invariant.c	(revision 206624)
--- gcc/loop-invariant.c	(working copy)
*************** find_defs (struct loop *loop, basic_bloc
*** 672,678 ****
    df_chain_add_problem (DF_UD_CHAIN);
    df_set_blocks (blocks);
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
!   df_analyze ();
    check_invariant_table_size ();
  
    if (dump_file)
--- 672,678 ----
    df_chain_add_problem (DF_UD_CHAIN);
    df_set_blocks (blocks);
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
!   df_analyze (loop);
    check_invariant_table_size ();
  
    if (dump_file)
Index: gcc/loop-iv.c
===================================================================
*** gcc/loop-iv.c	(revision 206624)
--- gcc/loop-iv.c	(working copy)
*************** iv_analysis_loop_init (struct loop *loop
*** 307,313 ****
    df_chain_add_problem (DF_UD_CHAIN);
    df_note_add_problem ();
    df_set_blocks (blocks);
!   df_analyze ();
    if (dump_file)
      df_dump_region (dump_file);
  
--- 307,313 ----
    df_chain_add_problem (DF_UD_CHAIN);
    df_note_add_problem ();
    df_set_blocks (blocks);
!   df_analyze (loop);
    if (dump_file)
      df_dump_region (dump_file);
  

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][RFC] Fix DF sub-CFG analysis slowness (PR39326)
  2014-01-15 10:53 [PATCH][RFC] Fix DF sub-CFG analysis slowness (PR39326) Richard Biener
@ 2014-01-15 12:41 ` Steven Bosscher
  2014-01-15 12:53   ` Richard Biener
  2014-01-15 14:03   ` [PATCH] " Richard Biener
  0 siblings, 2 replies; 4+ messages in thread
From: Steven Bosscher @ 2014-01-15 12:41 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Wed, Jan 15, 2014 at 11:52 AM, Richard Biener wrote:
> I can split off a df_analyze_1 and have a df_analyze () doing what
> it does now and a df_analyze_loop () combining df_set_blocks ()
> and df_analyze () for example.

df_analyze_loop sounds good to me.

But shouldn't it be possible to avoid the re-computation of the
pre/postorders? If the CFG hasn't changed, the orders also haven't
changed. It should be possible to communicate that to DF from the CFG
hooks somehow.

AFAIR loop-invariant.c doesn't modify the CFG (and if it does then
it's easily avoided) and RTL loop unswitching should just go away.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][RFC] Fix DF sub-CFG analysis slowness (PR39326)
  2014-01-15 12:41 ` Steven Bosscher
@ 2014-01-15 12:53   ` Richard Biener
  2014-01-15 14:03   ` [PATCH] " Richard Biener
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Biener @ 2014-01-15 12:53 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: gcc-patches

On Wed, 15 Jan 2014, Steven Bosscher wrote:

> On Wed, Jan 15, 2014 at 11:52 AM, Richard Biener wrote:
> > I can split off a df_analyze_1 and have a df_analyze () doing what
> > it does now and a df_analyze_loop () combining df_set_blocks ()
> > and df_analyze () for example.
> 
> df_analyze_loop sounds good to me.
> 
> But shouldn't it be possible to avoid the re-computation of the
> pre/postorders? If the CFG hasn't changed, the orders also haven't
> changed. It should be possible to communicate that to DF from the CFG
> hooks somehow.

But in these cases it doesn't really help as the pre/postorders
are pruned to the sub-CFG (so they are lost).  But yeah, with
more refactoring in this area this could be improved.

> AFAIR loop-invariant.c doesn't modify the CFG (and if it does then
> it's easily avoided) and RTL loop unswitching should just go away.

Right.  Even better would be if loop-invariant.c could keep the
DF info up-to-date.  After all it just moves stmts and seems to
be interested in UD chains only ... (which shouldn't change at all?)

Richard.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] Fix DF sub-CFG analysis slowness (PR39326)
  2014-01-15 12:41 ` Steven Bosscher
  2014-01-15 12:53   ` Richard Biener
@ 2014-01-15 14:03   ` Richard Biener
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Biener @ 2014-01-15 14:03 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: gcc-patches

On Wed, 15 Jan 2014, Steven Bosscher wrote:

> On Wed, Jan 15, 2014 at 11:52 AM, Richard Biener wrote:
> > I can split off a df_analyze_1 and have a df_analyze () doing what
> > it does now and a df_analyze_loop () combining df_set_blocks ()
> > and df_analyze () for example.
> 
> df_analyze_loop sounds good to me.

Like the following.  I've kept the sub-CFG postorder compute
routines in df-core.c as they are quite special and don't
match the get_loop_body_in_XXX_order ones we have in cfgloop.c.

Bootstrap and regtest running on x86_64-unknown-linux-gnu
(succeeded with the hackish patch).

Ok for trunk?

Thanks,
Richard.

2014-01-15  Richard Biener  <rguenther@suse.de>

	PR rtl-optimization/38518
	* df.h (df_analyze_loop): Declare.
	* df-core.c: Include cfgloop.h.
	(df_analyze_1): Split out main part of df_analyze.
	(df_analyze): Adjust.
	(loop_inverted_post_order_compute): New function.
	(loop_post_order_compute): Likewise.
	(df_analyze_loop): New function avoiding whole-function
	postorder computes.
	* loop-invariant.c (find_defs): Use df_analyze_loop.
	(find_invariants): Adjust.
	* loop-iv.c (iv_analysis_loop_init): Use df_analyze_loop.

Index: gcc/df.h
===================================================================
*** gcc/df.h.orig	2014-01-15 14:04:21.175544448 +0100
--- gcc/df.h	2014-01-15 14:06:59.851533523 +0100
*************** extern void df_set_blocks (bitmap);
*** 900,906 ****
  extern void df_remove_problem (struct dataflow *);
  extern void df_finish_pass (bool);
  extern void df_analyze_problem (struct dataflow *, bitmap, int *, int);
! extern void df_analyze (void);
  extern int df_get_n_blocks (enum df_flow_dir);
  extern int *df_get_postorder (enum df_flow_dir);
  extern void df_simple_dataflow (enum df_flow_dir, df_init_function,
--- 900,907 ----
  extern void df_remove_problem (struct dataflow *);
  extern void df_finish_pass (bool);
  extern void df_analyze_problem (struct dataflow *, bitmap, int *, int);
! extern void df_analyze ();
! extern void df_analyze_loop (struct loop *);
  extern int df_get_n_blocks (enum df_flow_dir);
  extern int *df_get_postorder (enum df_flow_dir);
  extern void df_simple_dataflow (enum df_flow_dir, df_init_function,
Index: gcc/df-core.c
===================================================================
*** gcc/df-core.c.orig	2014-01-15 14:04:21.188544447 +0100
--- gcc/df-core.c	2014-01-15 14:48:25.089362417 +0100
*************** are write-only operations.
*** 393,398 ****
--- 393,399 ----
  #include "df.h"
  #include "tree-pass.h"
  #include "params.h"
+ #include "cfgloop.h"
  
  static void *df_get_bb_info (struct dataflow *, unsigned int);
  static void df_set_bb_info (struct dataflow *, unsigned int, void *);
*************** df_analyze_problem (struct dataflow *dfl
*** 1225,1247 ****
  }
  
  
! /* Analyze dataflow info for the basic blocks specified by the bitmap
!    BLOCKS, or for the whole CFG if BLOCKS is zero.  */
  
! void
! df_analyze (void)
  {
-   bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack);
-   bool everything;
    int i;
  
-   free (df->postorder);
-   free (df->postorder_inverted);
-   df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
-   df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun));
-   df->n_blocks = post_order_compute (df->postorder, true, true);
-   df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted);
- 
    /* These should be the same.  */
    gcc_assert (df->n_blocks == df->n_blocks_inverted);
  
--- 1226,1238 ----
  }
  
  
! /* Analyze dataflow info.  */
  
! static void
! df_analyze_1 (void)
  {
    int i;
  
    /* These should be the same.  */
    gcc_assert (df->n_blocks == df->n_blocks_inverted);
  
*************** df_analyze (void)
*** 1258,1263 ****
--- 1249,1299 ----
  #endif
      df_verify ();
  
+   /* Skip over the DF_SCAN problem. */
+   for (i = 1; i < df->num_problems_defined; i++)
+     {
+       struct dataflow *dflow = df->problems_in_order[i];
+       if (dflow->solutions_dirty)
+         {
+           if (dflow->problem->dir == DF_FORWARD)
+             df_analyze_problem (dflow,
+                                 df->blocks_to_analyze,
+                                 df->postorder_inverted,
+                                 df->n_blocks_inverted);
+           else
+             df_analyze_problem (dflow,
+                                 df->blocks_to_analyze,
+                                 df->postorder,
+                                 df->n_blocks);
+         }
+     }
+ 
+   if (!df->analyze_subset)
+     {
+       BITMAP_FREE (df->blocks_to_analyze);
+       df->blocks_to_analyze = NULL;
+     }
+ 
+ #ifdef DF_DEBUG_CFG
+   df_set_clean_cfg ();
+ #endif
+ }
+ 
+ /* Analyze dataflow info.  */
+ 
+ void
+ df_analyze (void)
+ {
+   bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack);
+   int i;
+ 
+   free (df->postorder);
+   free (df->postorder_inverted);
+   df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
+   df->postorder_inverted = XNEWVEC (int, last_basic_block_for_fn (cfun));
+   df->n_blocks = post_order_compute (df->postorder, true, true);
+   df->n_blocks_inverted = inverted_post_order_compute (df->postorder_inverted);
+ 
    for (i = 0; i < df->n_blocks; i++)
      bitmap_set_bit (current_all_blocks, df->postorder[i]);
  
*************** df_analyze (void)
*** 1272,1321 ****
       sets.  */
    if (df->analyze_subset)
      {
-       everything = false;
        bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
        df->n_blocks = df_prune_to_subcfg (df->postorder,
  					 df->n_blocks, df->blocks_to_analyze);
        df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted,
! 			                          df->n_blocks_inverted,
!                                                   df->blocks_to_analyze);
        BITMAP_FREE (current_all_blocks);
      }
    else
      {
-       everything = true;
        df->blocks_to_analyze = current_all_blocks;
        current_all_blocks = NULL;
      }
  
!   /* Skip over the DF_SCAN problem. */
!   for (i = 1; i < df->num_problems_defined; i++)
      {
!       struct dataflow *dflow = df->problems_in_order[i];
!       if (dflow->solutions_dirty)
!         {
!           if (dflow->problem->dir == DF_FORWARD)
!             df_analyze_problem (dflow,
!                                 df->blocks_to_analyze,
!                                 df->postorder_inverted,
!                                 df->n_blocks_inverted);
!           else
!             df_analyze_problem (dflow,
!                                 df->blocks_to_analyze,
!                                 df->postorder,
!                                 df->n_blocks);
!         }
      }
  
!   if (everything)
      {
!       BITMAP_FREE (df->blocks_to_analyze);
!       df->blocks_to_analyze = NULL;
      }
  
! #ifdef DF_DEBUG_CFG
!   df_set_clean_cfg ();
! #endif
  }
  
  
--- 1308,1484 ----
       sets.  */
    if (df->analyze_subset)
      {
        bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
        df->n_blocks = df_prune_to_subcfg (df->postorder,
  					 df->n_blocks, df->blocks_to_analyze);
        df->n_blocks_inverted = df_prune_to_subcfg (df->postorder_inverted,
! 						  df->n_blocks_inverted,
! 						  df->blocks_to_analyze);
        BITMAP_FREE (current_all_blocks);
      }
    else
      {
        df->blocks_to_analyze = current_all_blocks;
        current_all_blocks = NULL;
      }
  
!   df_analyze_1 ();
! }
! 
! /* Compute the reverse top sort order of the sub-CFG specified by LOOP.
!    Returns the number of blocks which is always loop->num_nodes.  */
! 
! static int
! loop_post_order_compute (int *post_order, struct loop *loop)
! {
!   edge_iterator *stack;
!   int sp;
!   int post_order_num = 0;
!   bitmap visited;
! 
!   /* Allocate stack for back-tracking up CFG.  */
!   stack = XNEWVEC (edge_iterator, loop->num_nodes + 1);
!   sp = 0;
! 
!   /* Allocate bitmap to track nodes that have been visited.  */
!   visited = BITMAP_ALLOC (NULL);
! 
!   /* Push the first edge on to the stack.  */
!   stack[sp++] = ei_start (loop_preheader_edge (loop)->src->succs);
! 
!   while (sp)
      {
!       edge_iterator ei;
!       basic_block src;
!       basic_block dest;
! 
!       /* Look at the edge on the top of the stack.  */
!       ei = stack[sp - 1];
!       src = ei_edge (ei)->src;
!       dest = ei_edge (ei)->dest;
! 
!       /* Check if the edge destination has been visited yet and mark it
!          if not so.  */
!       if (flow_bb_inside_loop_p (loop, dest)
! 	  && bitmap_set_bit (visited, dest->index))
! 	{
! 	  if (EDGE_COUNT (dest->succs) > 0)
! 	    /* Since the DEST node has been visited for the first
! 	       time, check its successors.  */
! 	    stack[sp++] = ei_start (dest->succs);
! 	  else
! 	    post_order[post_order_num++] = dest->index;
! 	}
!       else
! 	{
! 	  if (ei_one_before_end_p (ei)
! 	      && src != loop_preheader_edge (loop)->src)
! 	    post_order[post_order_num++] = src->index;
! 
! 	  if (!ei_one_before_end_p (ei))
! 	    ei_next (&stack[sp - 1]);
! 	  else
! 	    sp--;
! 	}
      }
  
!   free (stack);
!   BITMAP_FREE (visited);
! 
!   return post_order_num;
! }
! 
! /* Compute the reverse top sort order of the inverted sub-CFG specified
!    by LOOP.  Returns the number of blocks which is always loop->num_nodes.  */
! 
! static int
! loop_inverted_post_order_compute (int *post_order, struct loop *loop)
! {
!   basic_block bb;
!   edge_iterator *stack;
!   int sp;
!   int post_order_num = 0;
!   bitmap visited;
! 
!   /* Allocate stack for back-tracking up CFG.  */
!   stack = XNEWVEC (edge_iterator, loop->num_nodes + 1);
!   sp = 0;
! 
!   /* Allocate bitmap to track nodes that have been visited.  */
!   visited = BITMAP_ALLOC (NULL);
! 
!   /* Put all latches into the initial work list.  In theory we'd want
!      to start from loop exits but then we'd have the special case of
!      endless loops.  It doesn't really matter for DF iteration order and
!      handling latches last is probably even better.  */
!   stack[sp++] = ei_start (loop->header->preds);
!   bitmap_set_bit (visited, loop->header->index);
! 
!   /* The inverted traversal loop. */
!   while (sp)
      {
!       edge_iterator ei;
!       basic_block pred;
! 
!       /* Look at the edge on the top of the stack.  */
!       ei = stack[sp - 1];
!       bb = ei_edge (ei)->dest;
!       pred = ei_edge (ei)->src;
! 
!       /* Check if the predecessor has been visited yet and mark it
! 	 if not so.  */
!       if (flow_bb_inside_loop_p (loop, pred)
! 	  && bitmap_set_bit (visited, pred->index))
! 	{
! 	  if (EDGE_COUNT (pred->preds) > 0)
! 	    /* Since the predecessor node has been visited for the first
! 	       time, check its predecessors.  */
! 	    stack[sp++] = ei_start (pred->preds);
! 	  else
! 	    post_order[post_order_num++] = pred->index;
! 	}
!       else
! 	{
! 	  if (flow_bb_inside_loop_p (loop, bb)
! 	      && ei_one_before_end_p (ei))
! 	    post_order[post_order_num++] = bb->index;
! 
! 	  if (!ei_one_before_end_p (ei))
! 	    ei_next (&stack[sp - 1]);
! 	  else
! 	    sp--;
! 	}
      }
  
!   free (stack);
!   BITMAP_FREE (visited);
!   return post_order_num;
! }
! 
! 
! /* Analyze dataflow info for the basic blocks contained in LOOP.  */
! 
! void
! df_analyze_loop (struct loop *loop)
! {
!   free (df->postorder);
!   free (df->postorder_inverted);
! 
!   df->postorder = XNEWVEC (int, loop->num_nodes);
!   df->postorder_inverted = XNEWVEC (int, loop->num_nodes);
!   df->n_blocks = loop_post_order_compute (df->postorder, loop);
!   df->n_blocks_inverted
!     = loop_inverted_post_order_compute (df->postorder_inverted, loop);
!   gcc_assert ((unsigned) df->n_blocks == loop->num_nodes);
!   gcc_assert ((unsigned) df->n_blocks_inverted == loop->num_nodes);
! 
!   bitmap blocks = BITMAP_ALLOC (&df_bitmap_obstack);
!   for (int i = 0; i < df->n_blocks; ++i)
!     bitmap_set_bit (blocks, df->postorder[i]);
!   df_set_blocks (blocks);
!   BITMAP_FREE (blocks);
! 
!   df_analyze_1 ();
  }
  
  
Index: gcc/loop-invariant.c
===================================================================
*** gcc/loop-invariant.c.orig	2014-01-15 14:04:21.200544446 +0100
--- gcc/loop-invariant.c	2014-01-15 14:54:15.047338323 +0100
*************** may_assign_reg_p (rtx x)
*** 652,665 ****
     BODY.  */
  
  static void
! find_defs (struct loop *loop, basic_block *body)
  {
-   unsigned i;
-   bitmap blocks = BITMAP_ALLOC (NULL);
- 
-   for (i = 0; i < loop->num_nodes; i++)
-     bitmap_set_bit (blocks, body[i]->index);
- 
    if (dump_file)
      {
        fprintf (dump_file,
--- 652,659 ----
     BODY.  */
  
  static void
! find_defs (struct loop *loop)
  {
    if (dump_file)
      {
        fprintf (dump_file,
*************** find_defs (struct loop *loop, basic_bloc
*** 670,678 ****
    df_remove_problem (df_chain);
    df_process_deferred_rescans ();
    df_chain_add_problem (DF_UD_CHAIN);
-   df_set_blocks (blocks);
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
!   df_analyze ();
    check_invariant_table_size ();
  
    if (dump_file)
--- 664,671 ----
    df_remove_problem (df_chain);
    df_process_deferred_rescans ();
    df_chain_add_problem (DF_UD_CHAIN);
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
!   df_analyze_loop (loop);
    check_invariant_table_size ();
  
    if (dump_file)
*************** find_defs (struct loop *loop, basic_bloc
*** 682,689 ****
  	       "*****ending processing of loop %d ******\n",
  	       loop->num);
      }
- 
-   BITMAP_FREE (blocks);
  }
  
  /* Creates a new invariant for definition DEF in INSN, depending on invariants
--- 675,680 ----
*************** find_invariants (struct loop *loop)
*** 1005,1011 ****
    compute_always_reached (loop, body, may_exit, always_reached);
    compute_always_reached (loop, body, has_exit, always_executed);
  
!   find_defs (loop, body);
    find_invariants_body (loop, body, always_reached, always_executed);
    merge_identical_invariants ();
  
--- 996,1002 ----
    compute_always_reached (loop, body, may_exit, always_reached);
    compute_always_reached (loop, body, has_exit, always_executed);
  
!   find_defs (loop);
    find_invariants_body (loop, body, always_reached, always_executed);
    merge_identical_invariants ();
  
Index: gcc/loop-iv.c
===================================================================
*** gcc/loop-iv.c.orig	2014-01-15 14:04:21.200544446 +0100
--- gcc/loop-iv.c	2014-01-15 14:08:27.429527493 +0100
*************** clear_iv_info (void)
*** 278,287 ****
  void
  iv_analysis_loop_init (struct loop *loop)
  {
-   basic_block *body = get_loop_body_in_dom_order (loop), bb;
-   bitmap blocks = BITMAP_ALLOC (NULL);
-   unsigned i;
- 
    current_loop = loop;
  
    /* Clear the information from the analysis of the previous loop.  */
--- 278,283 ----
*************** iv_analysis_loop_init (struct loop *loop
*** 294,304 ****
    else
      clear_iv_info ();
  
-   for (i = 0; i < loop->num_nodes; i++)
-     {
-       bb = body[i];
-       bitmap_set_bit (blocks, bb->index);
-     }
    /* Get rid of the ud chains before processing the rescans.  Then add
       the problem back.  */
    df_remove_problem (df_chain);
--- 290,295 ----
*************** iv_analysis_loop_init (struct loop *loop
*** 306,319 ****
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
    df_chain_add_problem (DF_UD_CHAIN);
    df_note_add_problem ();
!   df_set_blocks (blocks);
!   df_analyze ();
    if (dump_file)
      df_dump_region (dump_file);
  
    check_iv_ref_table_size ();
-   BITMAP_FREE (blocks);
-   free (body);
  }
  
  /* Finds the definition of REG that dominates loop latch and stores
--- 297,307 ----
    df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
    df_chain_add_problem (DF_UD_CHAIN);
    df_note_add_problem ();
!   df_analyze_loop (loop);
    if (dump_file)
      df_dump_region (dump_file);
  
    check_iv_ref_table_size ();
  }
  
  /* Finds the definition of REG that dominates loop latch and stores

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-01-15 14:03 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-15 10:53 [PATCH][RFC] Fix DF sub-CFG analysis slowness (PR39326) Richard Biener
2014-01-15 12:41 ` Steven Bosscher
2014-01-15 12:53   ` Richard Biener
2014-01-15 14:03   ` [PATCH] " Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).