public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][2/n] tree LIM TLC
@ 2013-03-12 12:32 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2013-03-12 12:32 UTC (permalink / raw)
  To: gcc-patches


This makes LIM work per outermost loop, reducing peak memory use.

Not necessarily 2/n, but I've completed and tested it
on x86_64-unknown-linux-gnu.  Queued for 4.9.

Richard.

2013-03-12  Richard Biener  <rguenther@suse.de>

	* tree-ssa-loop-im.c (determine_invariantness_stmt): Rename to ...
	(determine_invariantness_bb): ... this.  Adjust for ...
	(determine_invariantness): ... walk all blocks of the loop
	we process.
	(move_computations_stmt): Rename to ...
	(move_computations_bb): ... this.  Adjust for ...
	(move_computations): ... walk all blocks of the loop we process.
	(analyze_memory_references): Likewise.
	(store_motion): Process all sub-loops of the loop we process.
	(fill_always_executed_in): Likewise.
	(tree_ssa_lim_initialize): Move global bits to tree_ssa_lim.
	(tree_ssa_lim_finalize): Likewise.
	(tree_ssa_lim_1): Split out from ...
	(tree_ssa_lim): ... this.  Perform global init and iterate over
	all outermost loops.

Index: gcc/tree-ssa-loop-im.c
===================================================================
*** gcc/tree-ssa-loop-im.c.orig	2013-03-11 16:11:02.000000000 +0100
--- gcc/tree-ssa-loop-im.c	2013-03-12 10:09:58.923878391 +0100
*************** rewrite_bittest (gimple_stmt_iterator *b
*** 1040,1047 ****
     Callback for walk_dominator_tree.  */
  
  static void
! determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
! 			      basic_block bb)
  {
    enum move_pos pos;
    gimple_stmt_iterator bsi;
--- 1040,1046 ----
     Callback for walk_dominator_tree.  */
  
  static void
! determine_invariantness_bb (basic_block bb)
  {
    enum move_pos pos;
    gimple_stmt_iterator bsi;
*************** determine_invariantness_stmt (struct dom
*** 1050,1058 ****
    struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
    struct lim_aux_data *lim_data;
  
-   if (!loop_outer (bb->loop_father))
-     return;
- 
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
  	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
--- 1049,1054 ----
*************** determine_invariantness_stmt (struct dom
*** 1177,1211 ****
     each statement.  */
  
  static void
! determine_invariantness (void)
  {
!   struct dom_walk_data walk_data;
! 
!   memset (&walk_data, 0, sizeof (struct dom_walk_data));
!   walk_data.dom_direction = CDI_DOMINATORS;
!   walk_data.before_dom_children = determine_invariantness_stmt;
  
!   init_walk_dominator_tree (&walk_data);
!   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
!   fini_walk_dominator_tree (&walk_data);
  }
  
  /* Hoist the statements in basic block BB out of the loops prescribed by
     data stored in LIM_DATA structures associated with each statement.  Callback
     for walk_dominator_tree.  */
  
! static void
! move_computations_stmt (struct dom_walk_data *dw_data,
! 			basic_block bb)
  {
    struct loop *level;
    gimple_stmt_iterator bsi;
    gimple stmt;
    unsigned cost = 0;
    struct lim_aux_data *lim_data;
! 
!   if (!loop_outer (bb->loop_father))
!     return;
  
    for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
      {
--- 1173,1202 ----
     each statement.  */
  
  static void
! determine_invariantness (struct loop *loop, basic_block *bbs)
  {
!   unsigned i;
  
!   for (i = 0; i < loop->num_nodes; ++i)
!     {
!       basic_block bb = bbs[i];
!       determine_invariantness_bb (bb);
!     }
  }
  
  /* Hoist the statements in basic block BB out of the loops prescribed by
     data stored in LIM_DATA structures associated with each statement.  Callback
     for walk_dominator_tree.  */
  
! static unsigned
! move_computations_bb (basic_block bb)
  {
    struct loop *level;
    gimple_stmt_iterator bsi;
    gimple stmt;
    unsigned cost = 0;
    struct lim_aux_data *lim_data;
!   unsigned todo = 0;
  
    for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
      {
*************** move_computations_stmt (struct dom_walk_
*** 1260,1266 ****
  						   gimple_phi_result (stmt),
  						   t, arg0, arg1);
  	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
! 	  *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
  	}
        gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
        remove_phi_node (&bsi, false);
--- 1251,1257 ----
  						   gimple_phi_result (stmt),
  						   t, arg0, arg1);
  	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
! 	  todo |= TODO_cleanup_cfg;
  	}
        gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
        remove_phi_node (&bsi, false);
*************** move_computations_stmt (struct dom_walk_
*** 1323,1351 ****
        gsi_remove (&bsi, false);
        gsi_insert_on_edge (e, stmt);
      }
  }
  
  /* Hoist the statements out of the loops prescribed by data stored in
!    LIM_DATA structures associated with each statement.*/
  
  static unsigned int
! move_computations (void)
  {
!   struct dom_walk_data walk_data;
!   unsigned int todo = 0;
! 
!   memset (&walk_data, 0, sizeof (struct dom_walk_data));
!   walk_data.global_data = &todo;
!   walk_data.dom_direction = CDI_DOMINATORS;
!   walk_data.before_dom_children = move_computations_stmt;
! 
!   init_walk_dominator_tree (&walk_data);
!   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
!   fini_walk_dominator_tree (&walk_data);
  
!   gsi_commit_edge_inserts ();
!   if (need_ssa_update_p (cfun))
!     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
  
    return todo;
  }
--- 1314,1337 ----
        gsi_remove (&bsi, false);
        gsi_insert_on_edge (e, stmt);
      }
+ 
+   return todo;
  }
  
  /* Hoist the statements out of the loops prescribed by data stored in
!    LIM_DATA structures associated with each statement.  */
  
  static unsigned int
! move_computations (struct loop *loop, basic_block *bbs)
  {
!   unsigned i;
!   unsigned todo = 0;
  
!   for (i = 0; i < loop->num_nodes; ++i)
!     {
!       basic_block bb = bbs[i];
!       todo |= move_computations_bb (bb);
!     }
  
    return todo;
  }
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1612,1618 ****
  /* Gathers memory references in loops.  */
  
  static void
! analyze_memory_references (void)
  {
    gimple_stmt_iterator bsi;
    basic_block bb;
--- 1598,1604 ----
  /* Gathers memory references in loops.  */
  
  static void
! analyze_memory_references (struct loop *outer, basic_block *bbs)
  {
    gimple_stmt_iterator bsi;
    basic_block bb;
*************** analyze_memory_references (void)
*** 1620,1630 ****
    loop_iterator li;
    bitmap lrefs, alrefs, alrefso;
  
!   FOR_EACH_BB (bb)
      {
        loop = bb->loop_father;
-       if (loop == current_loops->tree_root)
- 	continue;
  
        for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
  	gather_mem_refs_stmt (loop, gsi_stmt (bsi));
--- 1606,1615 ----
    loop_iterator li;
    bitmap lrefs, alrefs, alrefso;
  
!   for (unsigned i = 0; i < outer->num_nodes; ++i)
      {
+       bb = bbs[i];
        loop = bb->loop_father;
  
        for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
  	gather_mem_refs_stmt (loop, gsi_stmt (bsi));
*************** store_motion_loop (struct loop *loop, bi
*** 2388,2403 ****
     loops.  */
  
  static void
! store_motion (void)
  {
-   struct loop *loop;
    bitmap sm_executed = BITMAP_ALLOC (NULL);
! 
!   for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
!     store_motion_loop (loop, sm_executed);
! 
    BITMAP_FREE (sm_executed);
-   gsi_commit_edge_inserts ();
  }
  
  /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
--- 2373,2383 ----
     loops.  */
  
  static void
! store_motion (struct loop *loop)
  {
    bitmap sm_executed = BITMAP_ALLOC (NULL);
!   store_motion_loop (loop, sm_executed);
    BITMAP_FREE (sm_executed);
  }
  
  /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
*************** fill_always_executed_in_1 (struct loop *
*** 2473,2488 ****
     of its header implies execution of bb.  */
  
  static void
! fill_always_executed_in (void)
  {
    sbitmap contains_call = sbitmap_alloc (last_basic_block);
!   basic_block bb;
!   struct loop *loop;
  
!   bitmap_clear (contains_call);
!   FOR_EACH_BB (bb)
      {
        gimple_stmt_iterator gsi;
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  	{
  	  if (nonpure_call_p (gsi_stmt (gsi)))
--- 2453,2470 ----
     of its header implies execution of bb.  */
  
  static void
! fill_always_executed_in (struct loop *loop, basic_block *bbs)
  {
    sbitmap contains_call = sbitmap_alloc (last_basic_block);
!   unsigned i;
  
!   /* We don't need to clear the sbitmap, instead we set/clear all
!      bits we eventually will look at.
!      ???  Eventually this will lead to tons of false valgrind errors...  */
!   for (i = 0; i < loop->num_nodes; ++i)
      {
        gimple_stmt_iterator gsi;
+       basic_block bb = bbs[i];
        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  	{
  	  if (nonpure_call_p (gsi_stmt (gsi)))
*************** fill_always_executed_in (void)
*** 2491,2500 ****
  
        if (!gsi_end_p (gsi))
  	bitmap_set_bit (contains_call, bb->index);
      }
  
!   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
!     fill_always_executed_in_1 (loop, contains_call);
  
    sbitmap_free (contains_call);
  }
--- 2473,2483 ----
  
        if (!gsi_end_p (gsi))
  	bitmap_set_bit (contains_call, bb->index);
+       else
+ 	bitmap_clear_bit (contains_call, bb->index);
      }
  
!   fill_always_executed_in_1 (loop, contains_call);
  
    sbitmap_free (contains_call);
  }
*************** tree_ssa_lim_initialize (void)
*** 2511,2521 ****
  
    lim_aux_data_map = pointer_map_create ();
  
-   if (flag_tm)
-     compute_transaction_bits ();
- 
-   alloc_aux_for_edges (0);
- 
    memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
    memory_accesses.refs_list.create (100);
    memory_accesses.refs_in_loop.create (number_of_loops ());
--- 2494,2499 ----
*************** tree_ssa_lim_initialize (void)
*** 2538,2554 ****
  /* Cleans up after the invariant motion pass.  */
  
  static void
! tree_ssa_lim_finalize (void)
  {
-   basic_block bb;
    unsigned i;
    mem_ref_p ref;
  
-   free_aux_for_edges ();
- 
-   FOR_EACH_BB (bb)
-     SET_ALWAYS_EXECUTED_IN (bb, NULL);
- 
    bitmap_obstack_release (&lim_bitmap_obstack);
    pointer_map_destroy (lim_aux_data_map);
  
--- 2516,2526 ----
  /* Cleans up after the invariant motion pass.  */
  
  static void
! tree_ssa_lim_finalize ()
  {
    unsigned i;
    mem_ref_p ref;
  
    bitmap_obstack_release (&lim_bitmap_obstack);
    pointer_map_destroy (lim_aux_data_map);
  
*************** tree_ssa_lim_finalize (void)
*** 2569,2599 ****
  /* Moves invariants from loops.  Only "expensive" invariants are moved out --
     i.e. those that are likely to be win regardless of the register pressure.  */
  
! unsigned int
! tree_ssa_lim (void)
  {
    unsigned int todo;
  
    tree_ssa_lim_initialize ();
  
    /* Gathers information about memory accesses in the loops.  */
!   analyze_memory_references ();
  
    /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
!   fill_always_executed_in ();
  
    /* For each statement determine the outermost loop in that it is
       invariant and cost for computing the invariant.  */
!   determine_invariantness ();
  
    /* Execute store motion.  Force the necessary invariants to be moved
       out of the loops as well.  */
!   store_motion ();
  
    /* Move the expressions that are expensive enough.  */
!   todo = move_computations ();
  
    tree_ssa_lim_finalize ();
  
    return todo;
  }
--- 2541,2612 ----
  /* Moves invariants from loops.  Only "expensive" invariants are moved out --
     i.e. those that are likely to be win regardless of the register pressure.  */
  
! static unsigned int
! tree_ssa_lim_1 (struct loop *loop)
  {
    unsigned int todo;
+   basic_block *bbs;
  
    tree_ssa_lim_initialize ();
  
+   bbs = get_loop_body_in_dom_order (loop);
+ 
    /* Gathers information about memory accesses in the loops.  */
!   analyze_memory_references (loop, bbs);
  
    /* Fills ALWAYS_EXECUTED_IN information for basic blocks.  */
!   fill_always_executed_in (loop, bbs);
  
    /* For each statement determine the outermost loop in that it is
       invariant and cost for computing the invariant.  */
!   determine_invariantness (loop, bbs);
  
    /* Execute store motion.  Force the necessary invariants to be moved
       out of the loops as well.  */
!   store_motion (loop);
! 
!   /* ???  Somehow edge inserts of SM get lost if we don't commit them
!      here.  */
!   free (bbs);
!   gsi_commit_edge_inserts ();
!   bbs = get_loop_body_in_dom_order (loop);
  
    /* Move the expressions that are expensive enough.  */
!   todo = move_computations (loop, bbs);
! 
!   free (bbs);
!   gsi_commit_edge_inserts ();
  
    tree_ssa_lim_finalize ();
  
    return todo;
  }
+ 
+ unsigned int
+ tree_ssa_lim (void)
+ {
+   struct loop *loop;
+   basic_block bb;
+   unsigned todo = 0;
+ 
+   if (flag_tm)
+     compute_transaction_bits ();
+   alloc_aux_for_edges (0);
+ 
+   /* Process outermost loops independently.  */
+   loop = current_loops->tree_root->inner;
+   while (loop)
+     {
+       todo |= tree_ssa_lim_1 (loop);
+       loop = loop->next;
+     }
+ 
+   free_aux_for_edges ();
+   FOR_EACH_BB (bb)
+     SET_ALWAYS_EXECUTED_IN (bb, NULL);
+ 
+   if (need_ssa_update_p (cfun))
+     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+ 
+   return todo;
+ }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2013-03-12 12:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-12 12:32 [PATCH][2/n] tree LIM TLC Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).