public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: update loop peeling code to maintain LCSSA form for early breaks
@ 2023-11-15 14:54 Tamar Christina
0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2023-11-15 14:54 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d03c5411959a2756a3e80bc8e88d1a076e2f5314
commit d03c5411959a2756a3e80bc8e88d1a076e2f5314
Author: Tamar Christina <tamar.christina@arm.com>
Date: Thu Nov 2 15:30:55 2023 +0000
middle-end: update loop peeling code to maintain LCSSA form for early breaks
Reviewed at https://reviewboard.gnu.aws.arm.com/r/17964/
Diff:
---
gcc/tree-vect-loop-manip.cc | 287 ++++++++++++++++++++++++++++++--------------
gcc/tree-vectorizer.h | 6 +-
2 files changed, 204 insertions(+), 89 deletions(-)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index b9161274ce4..fafbf924e8d 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1392,6 +1392,153 @@ vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo
(gimple *) cond_stmt);
}
+/* Determine if the exit choosen by the loop vectorizer differs from the
+ natural loop exit. i.e. if the exit leads to the loop patch or not.
+ When this happens we need to flip the understanding of main and other
+ exits by peeling and IV updates. */
+
+bool inline
+vect_is_loop_exit_latch_pred (edge loop_exit, class loop *loop)
+{
+ return single_pred (loop->latch) == loop_exit->src;
+}
+
+/* Perform peeling for when the peeled loop is placed after the original loop.
+ This maintains LCSSA and creates the appropriate blocks for multiple exit
+ vectorization. */
+
+void static
+slpeel_tree_duplicate_loop_for_vectorization (class loop *loop, edge loop_exit,
+ vec<edge> &loop_exits,
+ class loop *new_loop,
+ bool flow_loops,
+ basic_block new_preheader)
+{
+ bool multiple_exits_p = loop_exits.length () > 1;
+ basic_block main_loop_exit_block = new_preheader;
+ if (multiple_exits_p)
+ {
+ edge loop_entry = single_succ_edge (new_preheader);
+ new_preheader = split_edge (loop_entry);
+ }
+
+ auto_vec <gimple *> new_phis;
+ hash_map <tree, tree> new_phi_args;
+ /* First create the empty phi nodes so that when we flush the
+ statements they can be filled in. However because there is no order
+ between the PHI nodes in the exits and the loop headers we need to
+ order them base on the order of the two headers. First record the new
+ phi nodes. Then redirect the edges and flush the changes. This writes out
+ the new SSA names. */
+ for (auto gsi_from = gsi_start_phis (loop_exit->dest);
+ !gsi_end_p (gsi_from); gsi_next (&gsi_from))
+ {
+ gimple *from_phi = gsi_stmt (gsi_from);
+ tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
+ gphi *res = create_phi_node (new_res, main_loop_exit_block);
+ new_phis.safe_push (res);
+ }
+
+ for (auto exit : loop_exits)
+ {
+ basic_block dest
+ = exit == loop_exit ? main_loop_exit_block : new_preheader;
+ redirect_edge_and_branch (exit, dest);
+ }
+
+ /* Only fush the main exit, the remaining exits we need to match the order
+ in the loop->header which with multiple exits may not be the same. */
+ flush_pending_stmts (loop_exit);
+
+ /* Record the new SSA names in the cache so that we can skip materializing
+ them again when we fill in the rest of the LCSSA variables. */
+ for (auto phi : new_phis)
+ {
+ tree new_arg = gimple_phi_arg (phi, 0)->def;
+
+ if (!SSA_VAR_P (new_arg))
+ continue;
+
+ /* If the PHI MEM node dominates the loop then we shouldn't create
+ a new LC-SSSA PHI for it in the intermediate block. */
+ /* A MEM phi that consitutes a new DEF for the vUSE chain can either
+ be a .VDEF or a PHI that operates on MEM. And said definition
+ must not be inside the main loop. Or we must be a parameter.
+ In the last two cases we may remove a non-MEM PHI node, but since
+ they dominate both loops the removal is unlikely to cause trouble
+ as the exits must already be using them. */
+ if (virtual_operand_p (new_arg)
+ && (SSA_NAME_IS_DEFAULT_DEF (new_arg)
+ || !flow_bb_inside_loop_p (loop,
+ gimple_bb (SSA_NAME_DEF_STMT (new_arg)))))
+ {
+ auto gsi = gsi_for_stmt (phi);
+ remove_phi_node (&gsi, true);
+ continue;
+ }
+
+ /* If we decide to remove the PHI node we should also not
+ rematerialize it later on. */
+ new_phi_args.put (new_arg, gimple_phi_result (phi));
+
+ if (TREE_CODE (new_arg) != SSA_NAME)
+ continue;
+ }
+
+ /* Copy the current loop LC PHI nodes between the original loop exit
+ block and the new loop header. This allows us to later split the
+ preheader block and still find the right LC nodes. */
+ edge loop_entry = single_succ_edge (new_preheader);
+ if (flow_loops)
+ for (auto gsi_from = gsi_start_phis (loop->header),
+ gsi_to = gsi_start_phis (new_loop->header);
+ !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
+ gsi_next (&gsi_from), gsi_next (&gsi_to))
+ {
+ gimple *from_phi = gsi_stmt (gsi_from);
+ gimple *to_phi = gsi_stmt (gsi_to);
+ tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, loop_latch_edge (loop));
+ tree *res = NULL;
+
+ /* Check if we've already created a new phi node during edge
+ redirection. If we have, only propagate the value downwards. */
+ if ((res = new_phi_args.get (new_arg)))
+ new_arg = *res;
+
+ /* All other exits use the previous iters. */
+ if (multiple_exits_p)
+ {
+ tree alt_arg = gimple_phi_result (from_phi);
+ tree alt_res = copy_ssa_name (alt_arg);
+ gphi *alt_lcssa_phi = create_phi_node (alt_res, new_preheader);
+ edge main_e = single_succ_edge (main_loop_exit_block);
+ for (edge e : loop_exits)
+ if (e != loop_exit)
+ {
+ add_phi_arg (alt_lcssa_phi, alt_arg, e, UNKNOWN_LOCATION);
+ SET_PHI_ARG_DEF (alt_lcssa_phi, main_e->dest_idx, new_arg);
+ }
+ new_arg = alt_res; /* Push it down to the new_loop header. */
+ } else if (!res) {
+ /* For non-early break we need to keep the possibly live values in
+ the exit block. For early break these are kept in the merge
+ block in the code above. */
+ tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
+ gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
+
+ /* Main loop exit should use the final iter value. */
+ add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION);
+ new_arg = new_res;
+ }
+
+ adjust_phi_and_debug_stmts (to_phi, loop_entry, new_arg);
+ }
+
+ /* Now clear all the redirect maps. */
+ for (auto exit : loop_exits)
+ redirect_edge_var_map_clear (exit);
+}
+
/* Given LOOP this function generates a new copy of it and puts it
on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
@@ -1403,13 +1550,16 @@ vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo
copies remains the same.
If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
- dominators were updated during the peeling. */
+ dominators were updated during the peeling. When doing early break vectorization
+ then LOOP_VINFO needs to be provided and is used to keep track of any newly created
+ memory references that need to be updated should we decide to vectorize. */
class loop *
slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
class loop *scalar_loop,
edge scalar_exit, edge e, edge *new_e,
- bool flow_loops)
+ bool flow_loops,
+ vec<basic_block> *updated_doms)
{
class loop *new_loop;
basic_block *new_bbs, *bbs, *pbbs;
@@ -1526,7 +1676,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
}
auto loop_exits = get_loop_exit_edges (loop);
+ bool multiple_exits_p = loop_exits.length () > 1;
auto_vec<basic_block> doms;
+ class loop *update_loop = NULL;
if (at_exit) /* Add the loop copy at exit. */
{
@@ -1536,91 +1688,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
flush_pending_stmts (new_exit);
}
- auto_vec <gimple *> new_phis;
- hash_map <tree, tree> new_phi_args;
- /* First create the empty phi nodes so that when we flush the
- statements they can be filled in. However because there is no order
- between the PHI nodes in the exits and the loop headers we need to
- order them base on the order of the two headers. First record the new
- phi nodes. */
- for (auto gsi_from = gsi_start_phis (scalar_exit->dest);
- !gsi_end_p (gsi_from); gsi_next (&gsi_from))
- {
- gimple *from_phi = gsi_stmt (gsi_from);
- tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
- gphi *res = create_phi_node (new_res, new_preheader);
- new_phis.safe_push (res);
- }
-
- /* Then redirect the edges and flush the changes. This writes out the new
- SSA names. */
- for (edge exit : loop_exits)
- {
- edge temp_e = redirect_edge_and_branch (exit, new_preheader);
- flush_pending_stmts (temp_e);
- }
- /* Record the new SSA names in the cache so that we can skip materializing
- them again when we fill in the rest of the LCSSA variables. */
- for (auto phi : new_phis)
- {
- tree new_arg = gimple_phi_arg (phi, 0)->def;
-
- if (!SSA_VAR_P (new_arg))
- continue;
- /* If the PHI MEM node dominates the loop then we shouldn't create
- a new LC-SSSA PHI for it in the intermediate block. */
- /* A MEM phi that consitutes a new DEF for the vUSE chain can either
- be a .VDEF or a PHI that operates on MEM. And said definition
- must not be inside the main loop. Or we must be a parameter.
- In the last two cases we may remove a non-MEM PHI node, but since
- they dominate both loops the removal is unlikely to cause trouble
- as the exits must already be using them. */
- if (virtual_operand_p (new_arg)
- && (SSA_NAME_IS_DEFAULT_DEF (new_arg)
- || !flow_bb_inside_loop_p (loop,
- gimple_bb (SSA_NAME_DEF_STMT (new_arg)))))
- {
- auto gsi = gsi_for_stmt (phi);
- remove_phi_node (&gsi, true);
- continue;
- }
- new_phi_args.put (new_arg, gimple_phi_result (phi));
-
- if (TREE_CODE (new_arg) != SSA_NAME)
- continue;
- }
-
- /* Copy the current loop LC PHI nodes between the original loop exit
- block and the new loop header. This allows us to later split the
- preheader block and still find the right LC nodes. */
- edge loop_entry = single_succ_edge (new_preheader);
- if (flow_loops)
- for (auto gsi_from = gsi_start_phis (loop->header),
- gsi_to = gsi_start_phis (new_loop->header);
- !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
- gsi_next (&gsi_from), gsi_next (&gsi_to))
- {
- gimple *from_phi = gsi_stmt (gsi_from);
- gimple *to_phi = gsi_stmt (gsi_to);
- tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
- loop_latch_edge (loop));
-
- /* Check if we've already created a new phi node during edge
- redirection. If we have, only propagate the value downwards. */
- if (tree *res = new_phi_args.get (new_arg))
- {
- adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
- continue;
- }
-
- tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
- gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
-
- /* Main loop exit should use the final iter value. */
- add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION);
-
- adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
- }
+ slpeel_tree_duplicate_loop_for_vectorization (loop, loop_exit, loop_exits,
+ new_loop, flow_loops,
+ new_preheader);
set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
@@ -1634,6 +1704,21 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
delete_basic_block (preheader);
set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
loop_preheader_edge (scalar_loop)->src);
+
+ /* Finally after wiring the new epilogue we need to update its main exit
+ to the original function exit we recorded. Other exits are already
+ correct. */
+ if (multiple_exits_p)
+ {
+ update_loop = new_loop;
+ for (edge e : get_loop_exit_edges (loop))
+ doms.safe_push (e->dest);
+ doms.safe_push (exit_dest);
+
+ /* Likely a fall-through edge, so update if needed. */
+ if (single_succ_p (exit_dest))
+ doms.safe_push (single_succ (exit_dest));
+ }
}
else /* Add the copy at entry. */
{
@@ -1681,6 +1766,34 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
delete_basic_block (new_preheader);
set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
loop_preheader_edge (new_loop)->src);
+
+ if (multiple_exits_p)
+ update_loop = loop;
+ }
+
+ if (multiple_exits_p)
+ {
+ for (edge e : get_loop_exit_edges (update_loop))
+ {
+ edge ex;
+ edge_iterator ei;
+ FOR_EACH_EDGE (ex, ei, e->dest->succs)
+ {
+ /* Find the first non-fallthrough block as fall-throughs can't
+ dominate other blocks. */
+ if (single_succ_p (ex->dest))
+ {
+ doms.safe_push (ex->dest);
+ ex = single_succ_edge (ex->dest);
+ }
+ doms.safe_push (ex->dest);
+ }
+ doms.safe_push (e->dest);
+ }
+
+ iterate_fix_dominators (CDI_DOMINATORS, doms, false);
+ if (updated_doms)
+ updated_doms->safe_splice (doms);
}
free (new_bbs);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 76451a7fefe..b9a71a0b5f5 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1821,7 +1821,7 @@ is_loop_header_bb_p (basic_block bb)
{
if (bb == (bb->loop_father)->header)
return true;
- gcc_checking_assert (EDGE_COUNT (bb->preds) == 1);
+
return false;
}
@@ -2212,7 +2212,8 @@ extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge,
const_edge);
class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge,
class loop *, edge,
- edge, edge *, bool = true);
+ edge, edge *, bool = true,
+ vec<basic_block> * = NULL);
class loop *vect_loop_versioning (loop_vec_info, gimple *);
extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
tree *, tree *, tree *, int, bool, bool,
@@ -2223,6 +2224,7 @@ extern dump_user_location_t find_loop_location (class loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
extern edge vec_init_loop_exit_info (class loop *);
+extern bool vect_is_loop_exit_latch_pred (edge, class loop *);
/* In tree-vect-stmts.cc. */
extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-11-15 14:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-15 14:54 [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: update loop peeling code to maintain LCSSA form for early breaks Tamar Christina
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).