From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 26631 invoked by alias); 12 Mar 2013 12:32:59 -0000 Received: (qmail 26612 invoked by uid 22791); 12 Mar 2013 12:32:57 -0000 X-SWARE-Spam-Status: No, hits=-6.7 required=5.0 tests=AWL,BAYES_00,KHOP_RCVD_UNTRUST,RCVD_IN_DNSWL_HI,RP_MATCHES_RCVD,TW_TM X-Spam-Check-By: sourceware.org Received: from cantor2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 12 Mar 2013 12:32:37 +0000 Received: from relay2.suse.de (unknown [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id BB3E7A4E82 for ; Tue, 12 Mar 2013 13:32:35 +0100 (CET) Date: Tue, 12 Mar 2013 12:32:00 -0000 From: Richard Biener To: gcc-patches@gcc.gnu.org Subject: [PATCH][2/n] tree LIM TLC Message-ID: User-Agent: Alpine 2.00 (LNX 1167 2008-08-23) MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2013-03/txt/msg00429.txt.bz2 This makes LIM work per outermost loop, reducing peak memory use. Not necessarily 2/n, but I've completed and tested it on x86_64-unknown-linux-gnu. Queued for 4.9. Richard. 2013-03-12 Richard Biener * tree-ssa-loop-im.c (determine_invariantness_stmt): Rename to ... (determine_invariantness_bb): ... this. Adjust for ... (determine_invariantness): ... walk all blocks of the loop we process. (move_computations_stmt): Rename to ... (move_computations_bb): ... this. Adjust for ... (move_computations): ... walk all blocks of the loop we process. (analyze_memory_references): Likewise. (store_motion): Process all sub-loops of the loop we process. (fill_always_executed_in): Likewise. (tree_ssa_lim_initialize): Move global bits to tree_ssa_lim. (tree_ssa_lim_finalize): Likewise. (tree_ssa_lim_1): Split out from ... (tree_ssa_lim): ... this. Perform global init and iterate over all outermost loops. Index: gcc/tree-ssa-loop-im.c =================================================================== *** gcc/tree-ssa-loop-im.c.orig 2013-03-11 16:11:02.000000000 +0100 --- gcc/tree-ssa-loop-im.c 2013-03-12 10:09:58.923878391 +0100 *************** rewrite_bittest (gimple_stmt_iterator *b *** 1040,1047 **** Callback for walk_dominator_tree. */ static void ! determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED, ! basic_block bb) { enum move_pos pos; gimple_stmt_iterator bsi; --- 1040,1046 ---- Callback for walk_dominator_tree. */ static void ! determine_invariantness_bb (basic_block bb) { enum move_pos pos; gimple_stmt_iterator bsi; *************** determine_invariantness_stmt (struct dom *** 1050,1058 **** struct loop *outermost = ALWAYS_EXECUTED_IN (bb); struct lim_aux_data *lim_data; - if (!loop_outer (bb->loop_father)) - return; - if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n", bb->index, bb->loop_father->num, loop_depth (bb->loop_father)); --- 1049,1054 ---- *************** determine_invariantness_stmt (struct dom *** 1177,1211 **** each statement. */ static void ! determine_invariantness (void) { ! struct dom_walk_data walk_data; ! ! memset (&walk_data, 0, sizeof (struct dom_walk_data)); ! walk_data.dom_direction = CDI_DOMINATORS; ! walk_data.before_dom_children = determine_invariantness_stmt; ! init_walk_dominator_tree (&walk_data); ! walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); ! fini_walk_dominator_tree (&walk_data); } /* Hoist the statements in basic block BB out of the loops prescribed by data stored in LIM_DATA structures associated with each statement. Callback for walk_dominator_tree. */ ! static void ! move_computations_stmt (struct dom_walk_data *dw_data, ! basic_block bb) { struct loop *level; gimple_stmt_iterator bsi; gimple stmt; unsigned cost = 0; struct lim_aux_data *lim_data; ! ! if (!loop_outer (bb->loop_father)) ! return; for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) { --- 1173,1202 ---- each statement. */ static void ! determine_invariantness (struct loop *loop, basic_block *bbs) { ! unsigned i; ! for (i = 0; i < loop->num_nodes; ++i) ! { ! basic_block bb = bbs[i]; ! determine_invariantness_bb (bb); ! } } /* Hoist the statements in basic block BB out of the loops prescribed by data stored in LIM_DATA structures associated with each statement. Callback for walk_dominator_tree. */ ! static unsigned ! move_computations_bb (basic_block bb) { struct loop *level; gimple_stmt_iterator bsi; gimple stmt; unsigned cost = 0; struct lim_aux_data *lim_data; ! unsigned todo = 0; for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); ) { *************** move_computations_stmt (struct dom_walk_ *** 1260,1266 **** gimple_phi_result (stmt), t, arg0, arg1); SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; ! *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg; } gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); remove_phi_node (&bsi, false); --- 1251,1257 ---- gimple_phi_result (stmt), t, arg0, arg1); SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt; ! todo |= TODO_cleanup_cfg; } gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); remove_phi_node (&bsi, false); *************** move_computations_stmt (struct dom_walk_ *** 1323,1351 **** gsi_remove (&bsi, false); gsi_insert_on_edge (e, stmt); } } /* Hoist the statements out of the loops prescribed by data stored in ! LIM_DATA structures associated with each statement.*/ static unsigned int ! move_computations (void) { ! struct dom_walk_data walk_data; ! unsigned int todo = 0; ! ! memset (&walk_data, 0, sizeof (struct dom_walk_data)); ! walk_data.global_data = &todo; ! walk_data.dom_direction = CDI_DOMINATORS; ! walk_data.before_dom_children = move_computations_stmt; ! ! init_walk_dominator_tree (&walk_data); ! walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); ! fini_walk_dominator_tree (&walk_data); ! gsi_commit_edge_inserts (); ! if (need_ssa_update_p (cfun)) ! rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); return todo; } --- 1314,1337 ---- gsi_remove (&bsi, false); gsi_insert_on_edge (e, stmt); } + + return todo; } /* Hoist the statements out of the loops prescribed by data stored in ! LIM_DATA structures associated with each statement. */ static unsigned int ! move_computations (struct loop *loop, basic_block *bbs) { ! unsigned i; ! unsigned todo = 0; ! for (i = 0; i < loop->num_nodes; ++i) ! { ! basic_block bb = bbs[i]; ! todo |= move_computations_bb (bb); ! } return todo; } *************** gather_mem_refs_stmt (struct loop *loop, *** 1612,1618 **** /* Gathers memory references in loops. */ static void ! analyze_memory_references (void) { gimple_stmt_iterator bsi; basic_block bb; --- 1598,1604 ---- /* Gathers memory references in loops. */ static void ! analyze_memory_references (struct loop *outer, basic_block *bbs) { gimple_stmt_iterator bsi; basic_block bb; *************** analyze_memory_references (void) *** 1620,1630 **** loop_iterator li; bitmap lrefs, alrefs, alrefso; ! FOR_EACH_BB (bb) { loop = bb->loop_father; - if (loop == current_loops->tree_root) - continue; for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) gather_mem_refs_stmt (loop, gsi_stmt (bsi)); --- 1606,1615 ---- loop_iterator li; bitmap lrefs, alrefs, alrefso; ! for (unsigned i = 0; i < outer->num_nodes; ++i) { + bb = bbs[i]; loop = bb->loop_father; for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) gather_mem_refs_stmt (loop, gsi_stmt (bsi)); *************** store_motion_loop (struct loop *loop, bi *** 2388,2403 **** loops. */ static void ! store_motion (void) { - struct loop *loop; bitmap sm_executed = BITMAP_ALLOC (NULL); ! ! for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next) ! store_motion_loop (loop, sm_executed); ! BITMAP_FREE (sm_executed); - gsi_commit_edge_inserts (); } /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. --- 2373,2383 ---- loops. */ static void ! store_motion (struct loop *loop) { bitmap sm_executed = BITMAP_ALLOC (NULL); ! store_motion_loop (loop, sm_executed); BITMAP_FREE (sm_executed); } /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e. *************** fill_always_executed_in_1 (struct loop * *** 2473,2488 **** of its header implies execution of bb. */ static void ! fill_always_executed_in (void) { sbitmap contains_call = sbitmap_alloc (last_basic_block); ! basic_block bb; ! struct loop *loop; ! bitmap_clear (contains_call); ! FOR_EACH_BB (bb) { gimple_stmt_iterator gsi; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { if (nonpure_call_p (gsi_stmt (gsi))) --- 2453,2470 ---- of its header implies execution of bb. */ static void ! fill_always_executed_in (struct loop *loop, basic_block *bbs) { sbitmap contains_call = sbitmap_alloc (last_basic_block); ! unsigned i; ! /* We don't need to clear the sbitmap, instead we set/clear all ! bits we eventually will look at. ! ??? Eventually this will lead to tons of false valgrind errors... */ ! for (i = 0; i < loop->num_nodes; ++i) { gimple_stmt_iterator gsi; + basic_block bb = bbs[i]; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { if (nonpure_call_p (gsi_stmt (gsi))) *************** fill_always_executed_in (void) *** 2491,2500 **** if (!gsi_end_p (gsi)) bitmap_set_bit (contains_call, bb->index); } ! for (loop = current_loops->tree_root->inner; loop; loop = loop->next) ! fill_always_executed_in_1 (loop, contains_call); sbitmap_free (contains_call); } --- 2473,2483 ---- if (!gsi_end_p (gsi)) bitmap_set_bit (contains_call, bb->index); + else + bitmap_clear_bit (contains_call, bb->index); } ! fill_always_executed_in_1 (loop, contains_call); sbitmap_free (contains_call); } *************** tree_ssa_lim_initialize (void) *** 2511,2521 **** lim_aux_data_map = pointer_map_create (); - if (flag_tm) - compute_transaction_bits (); - - alloc_aux_for_edges (0); - memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL); memory_accesses.refs_list.create (100); memory_accesses.refs_in_loop.create (number_of_loops ()); --- 2494,2499 ---- *************** tree_ssa_lim_initialize (void) *** 2538,2554 **** /* Cleans up after the invariant motion pass. */ static void ! tree_ssa_lim_finalize (void) { - basic_block bb; unsigned i; mem_ref_p ref; - free_aux_for_edges (); - - FOR_EACH_BB (bb) - SET_ALWAYS_EXECUTED_IN (bb, NULL); - bitmap_obstack_release (&lim_bitmap_obstack); pointer_map_destroy (lim_aux_data_map); --- 2516,2526 ---- /* Cleans up after the invariant motion pass. */ static void ! tree_ssa_lim_finalize () { unsigned i; mem_ref_p ref; bitmap_obstack_release (&lim_bitmap_obstack); pointer_map_destroy (lim_aux_data_map); *************** tree_ssa_lim_finalize (void) *** 2569,2599 **** /* Moves invariants from loops. Only "expensive" invariants are moved out -- i.e. those that are likely to be win regardless of the register pressure. */ ! unsigned int ! tree_ssa_lim (void) { unsigned int todo; tree_ssa_lim_initialize (); /* Gathers information about memory accesses in the loops. */ ! analyze_memory_references (); /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ ! fill_always_executed_in (); /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ ! determine_invariantness (); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ ! store_motion (); /* Move the expressions that are expensive enough. */ ! todo = move_computations (); tree_ssa_lim_finalize (); return todo; } --- 2541,2612 ---- /* Moves invariants from loops. Only "expensive" invariants are moved out -- i.e. those that are likely to be win regardless of the register pressure. */ ! static unsigned int ! tree_ssa_lim_1 (struct loop *loop) { unsigned int todo; + basic_block *bbs; tree_ssa_lim_initialize (); + bbs = get_loop_body_in_dom_order (loop); + /* Gathers information about memory accesses in the loops. */ ! analyze_memory_references (loop, bbs); /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */ ! fill_always_executed_in (loop, bbs); /* For each statement determine the outermost loop in that it is invariant and cost for computing the invariant. */ ! determine_invariantness (loop, bbs); /* Execute store motion. Force the necessary invariants to be moved out of the loops as well. */ ! store_motion (loop); ! ! /* ??? Somehow edge inserts of SM get lost if we don't commit them ! here. */ ! free (bbs); ! gsi_commit_edge_inserts (); ! bbs = get_loop_body_in_dom_order (loop); /* Move the expressions that are expensive enough. */ ! todo = move_computations (loop, bbs); ! ! free (bbs); ! gsi_commit_edge_inserts (); tree_ssa_lim_finalize (); return todo; } + + unsigned int + tree_ssa_lim (void) + { + struct loop *loop; + basic_block bb; + unsigned todo = 0; + + if (flag_tm) + compute_transaction_bits (); + alloc_aux_for_edges (0); + + /* Process outermost loops independently. */ + loop = current_loops->tree_root->inner; + while (loop) + { + todo |= tree_ssa_lim_1 (loop); + loop = loop->next; + } + + free_aux_for_edges (); + FOR_EACH_BB (bb) + SET_ALWAYS_EXECUTED_IN (bb, NULL); + + if (need_ssa_update_p (cfun)) + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + + return todo; + }