From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 118995 invoked by alias); 5 Dec 2017 13:02:41 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 118977 invoked by uid 89); 5 Dec 2017 13:02:40 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-11.9 required=5.0 tests=BAYES_00,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=114411, rectangle, firstly X-HELO: mx2.suse.de Received: from mx2.suse.de (HELO mx2.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 05 Dec 2017 13:02:37 +0000 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.220.254]) by mx2.suse.de (Postfix) with ESMTP id E1AABADC8; Tue, 5 Dec 2017 13:02:34 +0000 (UTC) Date: Tue, 05 Dec 2017 13:02:00 -0000 From: Richard Biener To: gcc-patches@gcc.gnu.org cc: "Bin.Cheng" Subject: [PATCH][gimple-interchange] Final cleanup stuff Message-ID: User-Agent: Alpine 2.20 (LSU 67 2015-01-07) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII X-SW-Source: 2017-12/txt/msg00217.txt.bz2 This is my final sweep through the code doing cleanup on-the-fly. I think the code is ready to go now (after you committed your changes). What I'd eventually like to see is merge the two loop_cand objects into a single interchange_cand object, having two really confuses me when reading and trying to understand code. But let's defer this for next stage1. A change that's still required is adjusting the graphite testcases to use -floop-nest-optimize (you promised to do that) and to enable the interchange pass at -O3 by default. With that, can you prepare a patch that merges the changes on the branch to trunk and provide updated performance / statistics for SPEC (maybe also SPEC compile-time figures with/without the patch? it's enough to look at the Elapsed Compile time numbers in the log file IMHO). Thanks, Richard. 2017-12-05 Richard Biener * gimple-loop-interchange.cc (AVG_LOOP_NITER): Remove. (loop_cand::supported_operations): Simplify. (loop_cand::analyze_iloop_reduction_var): Use m_exit. (loop_cand::analyze_oloop_reduction_var): Likewise. (loop_cand::analyze_lcssa_phis): Likewise. (find_deps_in_bb_for_stmt): Use gimple_seq_add_stmt_without_update. (loop_cand::undo_simple_reduction): Likewise, properly release virtual defs. (tree_loop_interchange::interchange_loops): Likewise. Move code to innner loop here. (tree_loop_interchange::map_inductions_to_loop): Remove code moving code to inner loop. (insert_pos_at_inner_loop): Inline into single caller... (tree_loop_interchange::move_code_to_inner): ...here. Properly release virtual defs. (proper_loop_form_for_interchange): Properly analyze/instantiate SCEV. (prepare_perfect_loop_nest): Do not explicitely allocate vectors. Index: gcc/gimple-loop-interchange.cc =================================================================== --- gcc/gimple-loop-interchange.cc (revision 255414) +++ gcc/gimple-loop-interchange.cc (working copy) @@ -81,8 +81,6 @@ along with GCC; see the file COPYING3. #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS)) /* Maximum number of data references in loop nest. */ #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) -/* Default average number of loop iterations. */ -#define AVG_LOOP_NITER (PARAM_VALUE (PARAM_AVG_LOOP_NITER)) /* Comparison ratio of access stride between inner/outer loops to be interchanged. This is the minimum stride ratio for loop interchange @@ -105,7 +103,7 @@ typedef struct induction /* IV's base and step part of SCEV. */ tree base; tree step; -}* induction_p; +} *induction_p; /* Enum type for loop reduction variable. */ enum reduction_type @@ -136,7 +134,7 @@ typedef struct reduction reference. */ tree fini_ref; enum reduction_type type; -}* reduction_p; +} *reduction_p; /* Dump reduction RE. */ @@ -302,24 +300,17 @@ loop_cand::supported_operations (basic_b if (is_gimple_debug (stmt)) continue; - if (gimple_has_volatile_ops (stmt) - || gimple_has_side_effects (stmt)) + if (gimple_has_side_effects (stmt)) return false; bb_num_stmts++; - if (is_gimple_call (stmt)) + if (gcall *call = dyn_cast (stmt)) { - int cflags = gimple_call_flags (stmt); - /* Only support const/pure calls. */ - if (!(cflags & (ECF_CONST | ECF_PURE))) - return false; - /* In basic block of outer loop, the call should be cheap since it will be moved to inner loop. */ if (iloop != NULL - && !gimple_inexpensive_call_p (as_a (stmt))) + && !gimple_inexpensive_call_p (call)) return false; - continue; } @@ -334,6 +325,7 @@ loop_cand::supported_operations (basic_b tree lhs; /* Support loop invariant memory reference if it's only used once by inner loop. */ + /* ??? How's this checking for invariantness? */ if (gimple_assign_single_p (stmt) && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE && TREE_CODE (lhs) == SSA_NAME @@ -347,7 +339,7 @@ loop_cand::supported_operations (basic_b /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer loop's header, or PHI nodes in dest bb of inner loop's exit edge. */ if (!iloop || bb == m_loop->header - || bb == single_dom_exit (iloop->m_loop)->dest) + || bb == iloop->m_exit->dest) return true; /* Don't allow any other PHI nodes. */ @@ -484,7 +476,6 @@ loop_cand::analyze_iloop_reduction_var ( gphi *lcssa_phi = NULL, *use_phi; tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); - edge e = single_exit (m_loop); reduction_p re; gimple *stmt, *next_def, *single_use = NULL; use_operand_p use_p; @@ -571,8 +562,8 @@ loop_cand::analyze_iloop_reduction_var ( if (use_phi != NULL && lcssa_phi == NULL - && gimple_bb (stmt) == e->dest - && PHI_ARG_DEF_FROM_EDGE (use_phi, e) == next) + && gimple_bb (stmt) == m_exit->dest + && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) lcssa_phi = use_phi; else return false; @@ -621,7 +612,6 @@ loop_cand::analyze_oloop_reduction_var ( gphi *lcssa_phi = NULL, *use_phi; tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); - edge e = single_exit (m_loop); reduction_p re; gimple *stmt, *next_def; use_operand_p use_p; @@ -671,8 +661,8 @@ loop_cand::analyze_oloop_reduction_var ( if (lcssa_phi == NULL && use_phi != NULL - && gimple_bb (stmt) == e->dest - && PHI_ARG_DEF_FROM_EDGE (use_phi, e) == next) + && gimple_bb (stmt) == m_exit->dest + && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) lcssa_phi = use_phi; else return false; @@ -781,10 +771,8 @@ loop_cand::analyze_carried_vars (loop_ca bool loop_cand::analyze_lcssa_phis (void) { - edge e = single_exit (m_loop); gphi_iterator gsi; - - for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) { gphi *phi = gsi.phi (); @@ -810,7 +798,7 @@ loop_cand::analyze_lcssa_phis (void) static void find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer) { - auto_vec worklist; + auto_vec worklist; use_operand_p use_p; ssa_op_iter iter; gimple *stmt, *def_stmt; @@ -845,7 +833,7 @@ find_deps_in_bb_for_stmt (gimple_seq *st if (gimple_plf (stmt, GF_PLF_1)) { gsi_remove (&gsi, false); - gimple_seq_add_stmt (stmts, stmt); + gimple_seq_add_stmt_without_update (stmts, stmt); } else gsi_next_nondebug (&gsi); @@ -898,7 +886,7 @@ loop_cand::undo_simple_reduction (reduct gimple_set_vuse (re->producer, NULL_TREE); from = gsi_for_stmt (re->producer); gsi_remove (&from, false); - gimple_seq_add_stmt (&stmts, re->producer); + gimple_seq_add_stmt_without_update (&stmts, re->producer); new_var = re->init; } else @@ -908,7 +896,7 @@ loop_cand::undo_simple_reduction (reduct /* Because we generate new stmt loading from the MEM_REF to TMP. */ tree tmp = copy_ssa_name (re->var); stmt = gimple_build_assign (tmp, re->init_ref); - gimple_seq_add_stmt (&stmts, stmt); + gimple_seq_add_stmt_without_update (&stmts, stmt); /* Init new_var to MEM_REF or CONST depending on if it is the first iteration. */ @@ -916,7 +904,7 @@ loop_cand::undo_simple_reduction (reduct tree cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init); new_var = copy_ssa_name (re->var); stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init); - gimple_seq_add_stmt (&stmts, stmt); + gimple_seq_add_stmt_without_update (&stmts, stmt); } gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT); @@ -932,10 +920,11 @@ loop_cand::undo_simple_reduction (reduct } /* Move consumer stmt into inner loop, just after reduction next's def. */ + unlink_stmt_vdef (re->consumer); + release_ssa_name (gimple_vdef (re->consumer)); gimple_set_vdef (re->consumer, NULL_TREE); gimple_set_vuse (re->consumer, NULL_TREE); gimple_assign_set_rhs1 (re->consumer, re->next); - update_stmt (re->consumer); from = gsi_for_stmt (re->consumer); to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next)); gsi_move_after (&from, &to); @@ -1129,6 +1118,10 @@ tree_loop_interchange::interchange_loops o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true, NULL_TREE, false, GSI_CONTINUE_LINKING); + /* Move src's code to tgt loop. This is necessary when src is the outer + loop and tgt is the inner loop. */ + move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs); + /* Map outer loop's IV to inner loop, and vice versa. */ map_inductions_to_loop (oloop, iloop); map_inductions_to_loop (iloop, oloop); @@ -1137,10 +1130,10 @@ tree_loop_interchange::interchange_loops loop is actually from inner/outer loop. Also we record the new IV created for the outer loop so that it can be skipped in later loop interchange. */ - create_canonical_iv (oloop.m_loop, single_exit (oloop.m_loop), + create_canonical_iv (oloop.m_loop, oloop.m_exit, i_niters, &m_niters_iv_var, &var_after); bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); - create_canonical_iv (iloop.m_loop, single_exit (iloop.m_loop), + create_canonical_iv (iloop.m_loop, iloop.m_exit, o_niters, NULL, &var_after); bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); @@ -1151,6 +1144,11 @@ tree_loop_interchange::interchange_loops oloop.m_loop->any_upper_bound = false; oloop.m_loop->any_likely_upper_bound = false; free_numbers_of_iterations_estimates (oloop.m_loop); + + /* ??? The association between the loop data structure and the + CFG changed, so what was loop N at the source level is now + loop M. We should think of retaining the association or breaking + it fully by creating a new loop instead of re-using the "wrong" one. */ } /* Map induction variables of SRC loop to TGT loop. The function firstly @@ -1180,14 +1178,8 @@ void tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt) { induction_p iv; - edge e = single_exit (tgt.m_loop); + edge e = tgt.m_exit; gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi; - bool move_code_p = flow_loop_nested_p (src.m_loop, tgt.m_loop); - - /* Move src's code to tgt loop. This is necessary when src is the outer - loop and tgt is the inner loop. */ - if (move_code_p) - move_code_to_inner_loop (src.m_loop, tgt.m_loop, src.m_bbs); /* Map source loop's IV to target loop. */ for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i) @@ -1234,26 +1226,6 @@ tree_loop_interchange::map_inductions_to } } -/* Compute the insert position at inner loop when moving code from outer - loop to inner one. */ - -static inline void -insert_pos_at_inner_loop (struct loop *outer, struct loop *inner, - basic_block bb, gimple_stmt_iterator *pos) -{ - /* Move code from header/latch to header/latch. */ - if (bb == outer->header) - *pos = gsi_after_labels (inner->header); - else if (bb == outer->latch) - *pos = gsi_after_labels (inner->latch); - else - { - /* Otherwise, simply move to exit->src. */ - edge e = single_exit (inner); - *pos = gsi_last_bb (e->src); - } -} - /* Move stmts of outer loop to inner loop. */ void @@ -1272,7 +1244,15 @@ tree_loop_interchange::move_code_to_inne if (flow_bb_inside_loop_p (inner, bb)) continue; - insert_pos_at_inner_loop (outer, inner, bb, &to); + /* Move code from header/latch to header/latch. */ + if (bb == outer->header) + to = gsi_after_labels (inner->header); + else if (bb == outer->latch) + to = gsi_after_labels (inner->latch); + else + /* Otherwise, simply move to exit->src. */ + to = gsi_last_bb (single_exit (inner)->src); + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) { gimple *stmt = gsi_stmt (gsi); @@ -1287,7 +1267,11 @@ tree_loop_interchange::move_code_to_inne if (gimple_vuse (stmt)) gimple_set_vuse (stmt, NULL_TREE); if (gimple_vdef (stmt)) - gimple_set_vdef (stmt, NULL_TREE); + { + unlink_stmt_vdef (stmt); + release_ssa_name (gimple_vdef (stmt)); + gimple_set_vdef (stmt, NULL_TREE); + } reset_debug_uses (stmt); gsi_move_before (&gsi, &to); @@ -1756,18 +1740,20 @@ proper_loop_form_for_interchange (struct free (bbs); tree niters = number_of_latch_executions (loop); + niters = analyze_scalar_evolution (loop_outer (loop), niters); if (!niters || chrec_contains_undetermined (niters)) return false; /* Record the innermost outer loop that doesn't form rectangle loop nest. */ - for (loop = loop_outer (loop); - loop && flow_loop_nested_p (*min_outer, loop); - loop = loop_outer (loop)) - { - niters = instantiate_scev (loop_preheader_edge (loop), loop, niters); - if (!evolution_function_is_invariant_p (niters, loop->num)) + for (loop_p loop2 = loop_outer (loop); + loop2 && flow_loop_nested_p (*min_outer, loop2); + loop2 = loop_outer (loop2)) + { + niters = instantiate_scev (loop_preheader_edge (loop2), + loop_outer (loop), niters); + if (!evolution_function_is_invariant_p (niters, loop2->num)) { - *min_outer = loop; + *min_outer = loop2; break; } } @@ -1968,7 +1954,6 @@ prepare_perfect_loop_nest (struct loop * /* Prepare the data reference vector for the loop nest, pruning outer loops we cannot handle. */ - datarefs->create (20); start_loop = prepare_data_references (start_loop, datarefs); if (!start_loop /* Check if there is no data reference. */ @@ -1990,9 +1975,7 @@ prepare_perfect_loop_nest (struct loop * /* Check if data dependences can be computed for loop nest starting from start_loop. */ loop = start_loop; - loop_nest->create (3); do { - ddrs->create (20); loop_nest->truncate (0); if (loop != start_loop) @@ -2015,8 +1998,6 @@ prepare_perfect_loop_nest (struct loop * return true; } - /* ??? We should be able to adjust DDRs we already computed by - truncating distance vectors. */ free_dependence_relations (*ddrs); *ddrs = vNULL; /* Try to compute data dependences with the outermost loop stripped. */