* [PATCH][2/n] tree LIM TLC
@ 2013-03-12 12:32 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2013-03-12 12:32 UTC (permalink / raw)
To: gcc-patches
This makes LIM work per outermost loop, reducing peak memory use.
Not necessarily 2/n, but I've completed and tested it
on x86_64-unknown-linux-gnu. Queued for 4.9.
Richard.
2013-03-12 Richard Biener <rguenther@suse.de>
* tree-ssa-loop-im.c (determine_invariantness_stmt): Rename to ...
(determine_invariantness_bb): ... this. Adjust for ...
(determine_invariantness): ... walk all blocks of the loop
we process.
(move_computations_stmt): Rename to ...
(move_computations_bb): ... this. Adjust for ...
(move_computations): ... walk all blocks of the loop we process.
(analyze_memory_references): Likewise.
(store_motion): Process all sub-loops of the loop we process.
(fill_always_executed_in): Likewise.
(tree_ssa_lim_initialize): Move global bits to tree_ssa_lim.
(tree_ssa_lim_finalize): Likewise.
(tree_ssa_lim_1): Split out from ...
(tree_ssa_lim): ... this. Perform global init and iterate over
all outermost loops.
Index: gcc/tree-ssa-loop-im.c
===================================================================
*** gcc/tree-ssa-loop-im.c.orig 2013-03-11 16:11:02.000000000 +0100
--- gcc/tree-ssa-loop-im.c 2013-03-12 10:09:58.923878391 +0100
*************** rewrite_bittest (gimple_stmt_iterator *b
*** 1040,1047 ****
Callback for walk_dominator_tree. */
static void
! determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
! basic_block bb)
{
enum move_pos pos;
gimple_stmt_iterator bsi;
--- 1040,1046 ----
Callback for walk_dominator_tree. */
static void
! determine_invariantness_bb (basic_block bb)
{
enum move_pos pos;
gimple_stmt_iterator bsi;
*************** determine_invariantness_stmt (struct dom
*** 1050,1058 ****
struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
struct lim_aux_data *lim_data;
- if (!loop_outer (bb->loop_father))
- return;
-
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
--- 1049,1054 ----
*************** determine_invariantness_stmt (struct dom
*** 1177,1211 ****
each statement. */
static void
! determine_invariantness (void)
{
! struct dom_walk_data walk_data;
!
! memset (&walk_data, 0, sizeof (struct dom_walk_data));
! walk_data.dom_direction = CDI_DOMINATORS;
! walk_data.before_dom_children = determine_invariantness_stmt;
! init_walk_dominator_tree (&walk_data);
! walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
! fini_walk_dominator_tree (&walk_data);
}
/* Hoist the statements in basic block BB out of the loops prescribed by
data stored in LIM_DATA structures associated with each statement. Callback
for walk_dominator_tree. */
! static void
! move_computations_stmt (struct dom_walk_data *dw_data,
! basic_block bb)
{
struct loop *level;
gimple_stmt_iterator bsi;
gimple stmt;
unsigned cost = 0;
struct lim_aux_data *lim_data;
!
! if (!loop_outer (bb->loop_father))
! return;
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
{
--- 1173,1202 ----
each statement. */
static void
! determine_invariantness (struct loop *loop, basic_block *bbs)
{
! unsigned i;
! for (i = 0; i < loop->num_nodes; ++i)
! {
! basic_block bb = bbs[i];
! determine_invariantness_bb (bb);
! }
}
/* Hoist the statements in basic block BB out of the loops prescribed by
data stored in LIM_DATA structures associated with each statement. Callback
for walk_dominator_tree. */
! static unsigned
! move_computations_bb (basic_block bb)
{
struct loop *level;
gimple_stmt_iterator bsi;
gimple stmt;
unsigned cost = 0;
struct lim_aux_data *lim_data;
! unsigned todo = 0;
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
{
*************** move_computations_stmt (struct dom_walk_
*** 1260,1266 ****
gimple_phi_result (stmt),
t, arg0, arg1);
SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
! *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
}
gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
remove_phi_node (&bsi, false);
--- 1251,1257 ----
gimple_phi_result (stmt),
t, arg0, arg1);
SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
! todo |= TODO_cleanup_cfg;
}
gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
remove_phi_node (&bsi, false);
*************** move_computations_stmt (struct dom_walk_
*** 1323,1351 ****
gsi_remove (&bsi, false);
gsi_insert_on_edge (e, stmt);
}
}
/* Hoist the statements out of the loops prescribed by data stored in
! LIM_DATA structures associated with each statement.*/
static unsigned int
! move_computations (void)
{
! struct dom_walk_data walk_data;
! unsigned int todo = 0;
!
! memset (&walk_data, 0, sizeof (struct dom_walk_data));
! walk_data.global_data = &todo;
! walk_data.dom_direction = CDI_DOMINATORS;
! walk_data.before_dom_children = move_computations_stmt;
!
! init_walk_dominator_tree (&walk_data);
! walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
! fini_walk_dominator_tree (&walk_data);
! gsi_commit_edge_inserts ();
! if (need_ssa_update_p (cfun))
! rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
return todo;
}
--- 1314,1337 ----
gsi_remove (&bsi, false);
gsi_insert_on_edge (e, stmt);
}
+
+ return todo;
}
/* Hoist the statements out of the loops prescribed by data stored in
! LIM_DATA structures associated with each statement. */
static unsigned int
! move_computations (struct loop *loop, basic_block *bbs)
{
! unsigned i;
! unsigned todo = 0;
! for (i = 0; i < loop->num_nodes; ++i)
! {
! basic_block bb = bbs[i];
! todo |= move_computations_bb (bb);
! }
return todo;
}
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1612,1618 ****
/* Gathers memory references in loops. */
static void
! analyze_memory_references (void)
{
gimple_stmt_iterator bsi;
basic_block bb;
--- 1598,1604 ----
/* Gathers memory references in loops. */
static void
! analyze_memory_references (struct loop *outer, basic_block *bbs)
{
gimple_stmt_iterator bsi;
basic_block bb;
*************** analyze_memory_references (void)
*** 1620,1630 ****
loop_iterator li;
bitmap lrefs, alrefs, alrefso;
! FOR_EACH_BB (bb)
{
loop = bb->loop_father;
- if (loop == current_loops->tree_root)
- continue;
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
gather_mem_refs_stmt (loop, gsi_stmt (bsi));
--- 1606,1615 ----
loop_iterator li;
bitmap lrefs, alrefs, alrefso;
! for (unsigned i = 0; i < outer->num_nodes; ++i)
{
+ bb = bbs[i];
loop = bb->loop_father;
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
gather_mem_refs_stmt (loop, gsi_stmt (bsi));
*************** store_motion_loop (struct loop *loop, bi
*** 2388,2403 ****
loops. */
static void
! store_motion (void)
{
- struct loop *loop;
bitmap sm_executed = BITMAP_ALLOC (NULL);
!
! for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
! store_motion_loop (loop, sm_executed);
!
BITMAP_FREE (sm_executed);
- gsi_commit_edge_inserts ();
}
/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
--- 2373,2383 ----
loops. */
static void
! store_motion (struct loop *loop)
{
bitmap sm_executed = BITMAP_ALLOC (NULL);
! store_motion_loop (loop, sm_executed);
BITMAP_FREE (sm_executed);
}
/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
*************** fill_always_executed_in_1 (struct loop *
*** 2473,2488 ****
of its header implies execution of bb. */
static void
! fill_always_executed_in (void)
{
sbitmap contains_call = sbitmap_alloc (last_basic_block);
! basic_block bb;
! struct loop *loop;
! bitmap_clear (contains_call);
! FOR_EACH_BB (bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
if (nonpure_call_p (gsi_stmt (gsi)))
--- 2453,2470 ----
of its header implies execution of bb. */
static void
! fill_always_executed_in (struct loop *loop, basic_block *bbs)
{
sbitmap contains_call = sbitmap_alloc (last_basic_block);
! unsigned i;
! /* We don't need to clear the sbitmap, instead we set/clear all
! bits we eventually will look at.
! ??? Eventually this will lead to tons of false valgrind errors... */
! for (i = 0; i < loop->num_nodes; ++i)
{
gimple_stmt_iterator gsi;
+ basic_block bb = bbs[i];
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
if (nonpure_call_p (gsi_stmt (gsi)))
*************** fill_always_executed_in (void)
*** 2491,2500 ****
if (!gsi_end_p (gsi))
bitmap_set_bit (contains_call, bb->index);
}
! for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
! fill_always_executed_in_1 (loop, contains_call);
sbitmap_free (contains_call);
}
--- 2473,2483 ----
if (!gsi_end_p (gsi))
bitmap_set_bit (contains_call, bb->index);
+ else
+ bitmap_clear_bit (contains_call, bb->index);
}
! fill_always_executed_in_1 (loop, contains_call);
sbitmap_free (contains_call);
}
*************** tree_ssa_lim_initialize (void)
*** 2511,2521 ****
lim_aux_data_map = pointer_map_create ();
- if (flag_tm)
- compute_transaction_bits ();
-
- alloc_aux_for_edges (0);
-
memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
memory_accesses.refs_list.create (100);
memory_accesses.refs_in_loop.create (number_of_loops ());
--- 2494,2499 ----
*************** tree_ssa_lim_initialize (void)
*** 2538,2554 ****
/* Cleans up after the invariant motion pass. */
static void
! tree_ssa_lim_finalize (void)
{
- basic_block bb;
unsigned i;
mem_ref_p ref;
- free_aux_for_edges ();
-
- FOR_EACH_BB (bb)
- SET_ALWAYS_EXECUTED_IN (bb, NULL);
-
bitmap_obstack_release (&lim_bitmap_obstack);
pointer_map_destroy (lim_aux_data_map);
--- 2516,2526 ----
/* Cleans up after the invariant motion pass. */
static void
! tree_ssa_lim_finalize ()
{
unsigned i;
mem_ref_p ref;
bitmap_obstack_release (&lim_bitmap_obstack);
pointer_map_destroy (lim_aux_data_map);
*************** tree_ssa_lim_finalize (void)
*** 2569,2599 ****
/* Moves invariants from loops. Only "expensive" invariants are moved out --
i.e. those that are likely to be win regardless of the register pressure. */
! unsigned int
! tree_ssa_lim (void)
{
unsigned int todo;
tree_ssa_lim_initialize ();
/* Gathers information about memory accesses in the loops. */
! analyze_memory_references ();
/* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
! fill_always_executed_in ();
/* For each statement determine the outermost loop in that it is
invariant and cost for computing the invariant. */
! determine_invariantness ();
/* Execute store motion. Force the necessary invariants to be moved
out of the loops as well. */
! store_motion ();
/* Move the expressions that are expensive enough. */
! todo = move_computations ();
tree_ssa_lim_finalize ();
return todo;
}
--- 2541,2612 ----
/* Moves invariants from loops. Only "expensive" invariants are moved out --
i.e. those that are likely to be win regardless of the register pressure. */
! static unsigned int
! tree_ssa_lim_1 (struct loop *loop)
{
unsigned int todo;
+ basic_block *bbs;
tree_ssa_lim_initialize ();
+ bbs = get_loop_body_in_dom_order (loop);
+
/* Gathers information about memory accesses in the loops. */
! analyze_memory_references (loop, bbs);
/* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
! fill_always_executed_in (loop, bbs);
/* For each statement determine the outermost loop in that it is
invariant and cost for computing the invariant. */
! determine_invariantness (loop, bbs);
/* Execute store motion. Force the necessary invariants to be moved
out of the loops as well. */
! store_motion (loop);
!
! /* ??? Somehow edge inserts of SM get lost if we don't commit them
! here. */
! free (bbs);
! gsi_commit_edge_inserts ();
! bbs = get_loop_body_in_dom_order (loop);
/* Move the expressions that are expensive enough. */
! todo = move_computations (loop, bbs);
!
! free (bbs);
! gsi_commit_edge_inserts ();
tree_ssa_lim_finalize ();
return todo;
}
+
+ unsigned int
+ tree_ssa_lim (void)
+ {
+ struct loop *loop;
+ basic_block bb;
+ unsigned todo = 0;
+
+ if (flag_tm)
+ compute_transaction_bits ();
+ alloc_aux_for_edges (0);
+
+ /* Process outermost loops independently. */
+ loop = current_loops->tree_root->inner;
+ while (loop)
+ {
+ todo |= tree_ssa_lim_1 (loop);
+ loop = loop->next;
+ }
+
+ free_aux_for_edges ();
+ FOR_EACH_BB (bb)
+ SET_ALWAYS_EXECUTED_IN (bb, NULL);
+
+ if (need_ssa_update_p (cfun))
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+
+ return todo;
+ }
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2013-03-12 12:32 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-12 12:32 [PATCH][2/n] tree LIM TLC Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).