From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1984) id 7CD213858C36; Wed, 15 Nov 2023 14:54:52 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7CD213858C36 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1700060092; bh=A1Rjq/xUX6sPjqpSWkHYO2FxvVzzsxhdgU4Tmfp7bGg=; h=From:To:Subject:Date:From; b=nTW96nurkLH52QjLG8jZW78ZgM0jwZs9dNCPnF9/kPWV2mXcLEDrsBWrjKmPUyeO4 8eHGyi1JCo+KXPa3aGMy6agyCqS71gw4lXsfIGp6FN5j/ZeUfq/JNERIgJlirCXTkZ vhg9NYkhhOllF7XFE4rtT71cYQ9apFtNCwVA9hhY= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Tamar Christina To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: update loop peeling code to maintain LCSSA form for early breaks X-Act-Checkin: gcc X-Git-Author: Tamar Christina X-Git-Refname: refs/users/tnfchris/heads/gcc-14-early-break X-Git-Oldrev: 4dd62ce6c19171e0d7c8e690998afe0c57ce5564 X-Git-Newrev: d03c5411959a2756a3e80bc8e88d1a076e2f5314 Message-Id: <20231115145452.7CD213858C36@sourceware.org> Date: Wed, 15 Nov 2023 14:54:52 +0000 (GMT) List-Id: https://gcc.gnu.org/g:d03c5411959a2756a3e80bc8e88d1a076e2f5314 commit d03c5411959a2756a3e80bc8e88d1a076e2f5314 Author: Tamar Christina Date: Thu Nov 2 15:30:55 2023 +0000 middle-end: update loop peeling code to maintain LCSSA form for early breaks Reviewed at https://reviewboard.gnu.aws.arm.com/r/17964/ Diff: --- gcc/tree-vect-loop-manip.cc | 287 ++++++++++++++++++++++++++++++-------------- gcc/tree-vectorizer.h | 6 +- 2 files changed, 204 insertions(+), 89 deletions(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index b9161274ce4..fafbf924e8d 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1392,6 +1392,153 @@ vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo (gimple *) cond_stmt); } +/* Determine if the exit choosen by the loop vectorizer differs from the + natural loop exit. i.e. if the exit leads to the loop patch or not. + When this happens we need to flip the understanding of main and other + exits by peeling and IV updates. */ + +bool inline +vect_is_loop_exit_latch_pred (edge loop_exit, class loop *loop) +{ + return single_pred (loop->latch) == loop_exit->src; +} + +/* Perform peeling for when the peeled loop is placed after the original loop. + This maintains LCSSA and creates the appropriate blocks for multiple exit + vectorization. */ + +void static +slpeel_tree_duplicate_loop_for_vectorization (class loop *loop, edge loop_exit, + vec &loop_exits, + class loop *new_loop, + bool flow_loops, + basic_block new_preheader) +{ + bool multiple_exits_p = loop_exits.length () > 1; + basic_block main_loop_exit_block = new_preheader; + if (multiple_exits_p) + { + edge loop_entry = single_succ_edge (new_preheader); + new_preheader = split_edge (loop_entry); + } + + auto_vec new_phis; + hash_map new_phi_args; + /* First create the empty phi nodes so that when we flush the + statements they can be filled in. However because there is no order + between the PHI nodes in the exits and the loop headers we need to + order them base on the order of the two headers. First record the new + phi nodes. Then redirect the edges and flush the changes. This writes out + the new SSA names. */ + for (auto gsi_from = gsi_start_phis (loop_exit->dest); + !gsi_end_p (gsi_from); gsi_next (&gsi_from)) + { + gimple *from_phi = gsi_stmt (gsi_from); + tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); + gphi *res = create_phi_node (new_res, main_loop_exit_block); + new_phis.safe_push (res); + } + + for (auto exit : loop_exits) + { + basic_block dest + = exit == loop_exit ? main_loop_exit_block : new_preheader; + redirect_edge_and_branch (exit, dest); + } + + /* Only fush the main exit, the remaining exits we need to match the order + in the loop->header which with multiple exits may not be the same. */ + flush_pending_stmts (loop_exit); + + /* Record the new SSA names in the cache so that we can skip materializing + them again when we fill in the rest of the LCSSA variables. */ + for (auto phi : new_phis) + { + tree new_arg = gimple_phi_arg (phi, 0)->def; + + if (!SSA_VAR_P (new_arg)) + continue; + + /* If the PHI MEM node dominates the loop then we shouldn't create + a new LC-SSSA PHI for it in the intermediate block. */ + /* A MEM phi that consitutes a new DEF for the vUSE chain can either + be a .VDEF or a PHI that operates on MEM. And said definition + must not be inside the main loop. Or we must be a parameter. + In the last two cases we may remove a non-MEM PHI node, but since + they dominate both loops the removal is unlikely to cause trouble + as the exits must already be using them. */ + if (virtual_operand_p (new_arg) + && (SSA_NAME_IS_DEFAULT_DEF (new_arg) + || !flow_bb_inside_loop_p (loop, + gimple_bb (SSA_NAME_DEF_STMT (new_arg))))) + { + auto gsi = gsi_for_stmt (phi); + remove_phi_node (&gsi, true); + continue; + } + + /* If we decide to remove the PHI node we should also not + rematerialize it later on. */ + new_phi_args.put (new_arg, gimple_phi_result (phi)); + + if (TREE_CODE (new_arg) != SSA_NAME) + continue; + } + + /* Copy the current loop LC PHI nodes between the original loop exit + block and the new loop header. This allows us to later split the + preheader block and still find the right LC nodes. */ + edge loop_entry = single_succ_edge (new_preheader); + if (flow_loops) + for (auto gsi_from = gsi_start_phis (loop->header), + gsi_to = gsi_start_phis (new_loop->header); + !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to); + gsi_next (&gsi_from), gsi_next (&gsi_to)) + { + gimple *from_phi = gsi_stmt (gsi_from); + gimple *to_phi = gsi_stmt (gsi_to); + tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, loop_latch_edge (loop)); + tree *res = NULL; + + /* Check if we've already created a new phi node during edge + redirection. If we have, only propagate the value downwards. */ + if ((res = new_phi_args.get (new_arg))) + new_arg = *res; + + /* All other exits use the previous iters. */ + if (multiple_exits_p) + { + tree alt_arg = gimple_phi_result (from_phi); + tree alt_res = copy_ssa_name (alt_arg); + gphi *alt_lcssa_phi = create_phi_node (alt_res, new_preheader); + edge main_e = single_succ_edge (main_loop_exit_block); + for (edge e : loop_exits) + if (e != loop_exit) + { + add_phi_arg (alt_lcssa_phi, alt_arg, e, UNKNOWN_LOCATION); + SET_PHI_ARG_DEF (alt_lcssa_phi, main_e->dest_idx, new_arg); + } + new_arg = alt_res; /* Push it down to the new_loop header. */ + } else if (!res) { + /* For non-early break we need to keep the possibly live values in + the exit block. For early break these are kept in the merge + block in the code above. */ + tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); + gphi *lcssa_phi = create_phi_node (new_res, new_preheader); + + /* Main loop exit should use the final iter value. */ + add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION); + new_arg = new_res; + } + + adjust_phi_and_debug_stmts (to_phi, loop_entry, new_arg); + } + + /* Now clear all the redirect maps. */ + for (auto exit : loop_exits) + redirect_edge_var_map_clear (exit); +} + /* Given LOOP this function generates a new copy of it and puts it on E which is either the entry or exit of LOOP. If SCALAR_LOOP is non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the @@ -1403,13 +1550,16 @@ vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo copies remains the same. If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms - dominators were updated during the peeling. */ + dominators were updated during the peeling. When doing early break vectorization + then LOOP_VINFO needs to be provided and is used to keep track of any newly created + memory references that need to be updated should we decide to vectorize. */ class loop * slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, class loop *scalar_loop, edge scalar_exit, edge e, edge *new_e, - bool flow_loops) + bool flow_loops, + vec *updated_doms) { class loop *new_loop; basic_block *new_bbs, *bbs, *pbbs; @@ -1526,7 +1676,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, } auto loop_exits = get_loop_exit_edges (loop); + bool multiple_exits_p = loop_exits.length () > 1; auto_vec doms; + class loop *update_loop = NULL; if (at_exit) /* Add the loop copy at exit. */ { @@ -1536,91 +1688,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, flush_pending_stmts (new_exit); } - auto_vec new_phis; - hash_map new_phi_args; - /* First create the empty phi nodes so that when we flush the - statements they can be filled in. However because there is no order - between the PHI nodes in the exits and the loop headers we need to - order them base on the order of the two headers. First record the new - phi nodes. */ - for (auto gsi_from = gsi_start_phis (scalar_exit->dest); - !gsi_end_p (gsi_from); gsi_next (&gsi_from)) - { - gimple *from_phi = gsi_stmt (gsi_from); - tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); - gphi *res = create_phi_node (new_res, new_preheader); - new_phis.safe_push (res); - } - - /* Then redirect the edges and flush the changes. This writes out the new - SSA names. */ - for (edge exit : loop_exits) - { - edge temp_e = redirect_edge_and_branch (exit, new_preheader); - flush_pending_stmts (temp_e); - } - /* Record the new SSA names in the cache so that we can skip materializing - them again when we fill in the rest of the LCSSA variables. */ - for (auto phi : new_phis) - { - tree new_arg = gimple_phi_arg (phi, 0)->def; - - if (!SSA_VAR_P (new_arg)) - continue; - /* If the PHI MEM node dominates the loop then we shouldn't create - a new LC-SSSA PHI for it in the intermediate block. */ - /* A MEM phi that consitutes a new DEF for the vUSE chain can either - be a .VDEF or a PHI that operates on MEM. And said definition - must not be inside the main loop. Or we must be a parameter. - In the last two cases we may remove a non-MEM PHI node, but since - they dominate both loops the removal is unlikely to cause trouble - as the exits must already be using them. */ - if (virtual_operand_p (new_arg) - && (SSA_NAME_IS_DEFAULT_DEF (new_arg) - || !flow_bb_inside_loop_p (loop, - gimple_bb (SSA_NAME_DEF_STMT (new_arg))))) - { - auto gsi = gsi_for_stmt (phi); - remove_phi_node (&gsi, true); - continue; - } - new_phi_args.put (new_arg, gimple_phi_result (phi)); - - if (TREE_CODE (new_arg) != SSA_NAME) - continue; - } - - /* Copy the current loop LC PHI nodes between the original loop exit - block and the new loop header. This allows us to later split the - preheader block and still find the right LC nodes. */ - edge loop_entry = single_succ_edge (new_preheader); - if (flow_loops) - for (auto gsi_from = gsi_start_phis (loop->header), - gsi_to = gsi_start_phis (new_loop->header); - !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to); - gsi_next (&gsi_from), gsi_next (&gsi_to)) - { - gimple *from_phi = gsi_stmt (gsi_from); - gimple *to_phi = gsi_stmt (gsi_to); - tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, - loop_latch_edge (loop)); - - /* Check if we've already created a new phi node during edge - redirection. If we have, only propagate the value downwards. */ - if (tree *res = new_phi_args.get (new_arg)) - { - adjust_phi_and_debug_stmts (to_phi, loop_entry, *res); - continue; - } - - tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); - gphi *lcssa_phi = create_phi_node (new_res, new_preheader); - - /* Main loop exit should use the final iter value. */ - add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION); - - adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res); - } + slpeel_tree_duplicate_loop_for_vectorization (loop, loop_exit, loop_exits, + new_loop, flow_loops, + new_preheader); set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src); @@ -1634,6 +1704,21 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, delete_basic_block (preheader); set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header, loop_preheader_edge (scalar_loop)->src); + + /* Finally after wiring the new epilogue we need to update its main exit + to the original function exit we recorded. Other exits are already + correct. */ + if (multiple_exits_p) + { + update_loop = new_loop; + for (edge e : get_loop_exit_edges (loop)) + doms.safe_push (e->dest); + doms.safe_push (exit_dest); + + /* Likely a fall-through edge, so update if needed. */ + if (single_succ_p (exit_dest)) + doms.safe_push (single_succ (exit_dest)); + } } else /* Add the copy at entry. */ { @@ -1681,6 +1766,34 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, delete_basic_block (new_preheader); set_immediate_dominator (CDI_DOMINATORS, new_loop->header, loop_preheader_edge (new_loop)->src); + + if (multiple_exits_p) + update_loop = loop; + } + + if (multiple_exits_p) + { + for (edge e : get_loop_exit_edges (update_loop)) + { + edge ex; + edge_iterator ei; + FOR_EACH_EDGE (ex, ei, e->dest->succs) + { + /* Find the first non-fallthrough block as fall-throughs can't + dominate other blocks. */ + if (single_succ_p (ex->dest)) + { + doms.safe_push (ex->dest); + ex = single_succ_edge (ex->dest); + } + doms.safe_push (ex->dest); + } + doms.safe_push (e->dest); + } + + iterate_fix_dominators (CDI_DOMINATORS, doms, false); + if (updated_doms) + updated_doms->safe_splice (doms); } free (new_bbs); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 76451a7fefe..b9a71a0b5f5 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1821,7 +1821,7 @@ is_loop_header_bb_p (basic_block bb) { if (bb == (bb->loop_father)->header) return true; - gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); + return false; } @@ -2212,7 +2212,8 @@ extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge, const_edge); class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge, class loop *, edge, - edge, edge *, bool = true); + edge, edge *, bool = true, + vec * = NULL); class loop *vect_loop_versioning (loop_vec_info, gimple *); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, tree *, tree *, tree *, int, bool, bool, @@ -2223,6 +2224,7 @@ extern dump_user_location_t find_loop_location (class loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); extern edge vec_init_loop_exit_info (class loop *); +extern bool vect_is_loop_exit_latch_pred (edge, class loop *); /* In tree-vect-stmts.cc. */ extern tree get_related_vectype_for_scalar_type (machine_mode, tree,