public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: Implement code motion and dependency analysis for early breaks
@ 2023-11-15 14:54 Tamar Christina
0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2023-11-15 14:54 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4dd62ce6c19171e0d7c8e690998afe0c57ce5564
commit 4dd62ce6c19171e0d7c8e690998afe0c57ce5564
Author: Tamar Christina <tamar.christina@arm.com>
Date: Thu Nov 2 15:18:45 2023 +0000
middle-end: Implement code motion and dependency analysis for early breaks
Diff:
---
gcc/tree-vect-data-refs.cc | 330 +++++++++++++++++++++++++++++++++++++++++++++
gcc/tree-vect-loop.cc | 55 ++++++++
gcc/tree-vect-stmts.cc | 3 +
gcc/tree-vectorizer.h | 23 ++++
4 files changed, 411 insertions(+)
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d5c9c4a11c2..9549570bac8 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -613,6 +613,331 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
return opt_result::success ();
}
+/* This function tries to validate whether an early break vectorization
+ is possible for the current instruction sequence. Returns True i
+ possible, otherwise False.
+
+ Requirements:
+ - Any memory access must be to a fixed size buffer.
+ - There must not be any loads and stores to the same object.
+ - Multiple loads are allowed as long as they don't alias.
+
+ NOTE:
+ This implemementation is very conservative. Any overlappig loads/stores
+ that take place before the early break statement gets rejected aside from
+ WAR dependencies.
+
+ i.e.:
+
+ a[i] = 8
+ c = a[i]
+ if (b[i])
+ ...
+
+ is not allowed, but
+
+ c = a[i]
+ a[i] = 8
+ if (b[i])
+ ...
+
+ is which is the common case.
+
+ Arguments:
+ - LOOP_VINFO: loop information for the current loop.
+ - CHAIN: Currently detected sequence of instructions that need to be moved
+ if we are to vectorize this early break.
+ - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable from
+ one or more cond conditions. If this set overlaps with CHAIN then FIXED
+ takes precedence. This deals with non-single use cases.
+ - LOADS: List of all loads found during traversal.
+ - BASES: List of all load data references found during traversal.
+ - GSTMT: Current position to inspect for validity. The sequence
+ will be moved upwards from this point.
+ - REACHING_VUSE: The dominating VUSE found so far. */
+
+static bool
+validate_early_exit_stmts (loop_vec_info loop_vinfo, hash_set<tree> *chain,
+ hash_set<tree> *fixed, vec<tree> *loads,
+ vec<data_reference *> *bases, tree *reaching_vuse,
+ gimple_stmt_iterator *gstmt)
+{
+ if (gsi_end_p (*gstmt))
+ return true;
+
+ gimple *stmt = gsi_stmt (*gstmt);
+ /* ?? Do I need to move debug statements? not quite sure.. */
+ if (gimple_has_ops (stmt)
+ && !is_gimple_debug (stmt))
+ {
+ tree dest = NULL_TREE;
+ /* Try to find the SSA_NAME being defined. For Statements with an LHS
+ use the LHS, if not, assume that the first argument of a call is the
+ value being defined. e.g. MASKED_LOAD etc. */
+ if (gimple_has_lhs (stmt))
+ dest = gimple_get_lhs (stmt);
+ else if (const gcall *call = dyn_cast <const gcall *> (stmt))
+ dest = gimple_arg (call, 0);
+ else if (const gcond *cond = dyn_cast <const gcond *> (stmt))
+ {
+ /* Operands of conds are ones we can't move. */
+ fixed->add (gimple_cond_lhs (cond));
+ fixed->add (gimple_cond_rhs (cond));
+ }
+
+ bool move = false;
+
+ stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
+ if (!stmt_vinfo)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported. Unknown"
+ " statement: %G", stmt);
+ return false;
+ }
+
+ auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+ if (dr_ref)
+ {
+ /* We currently only support statically allocated objects due to
+ not having first-faulting loads support or peeling for alignment
+ support. Compute the size of the referenced object (it could be
+ dynamically allocated). */
+ tree obj = DR_BASE_ADDRESS (dr_ref);
+ if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return false;
+ }
+
+ tree refop = TREE_OPERAND (obj, 0);
+ tree refbase = get_base_address (refop);
+ if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+ || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks only supported on statically"
+ " allocated objects.\n");
+ return false;
+ }
+
+ if (DR_IS_READ (dr_ref))
+ {
+ loads->safe_push (dest);
+ bases->safe_push (dr_ref);
+ }
+ else if (DR_IS_WRITE (dr_ref))
+ {
+ for (auto dr : bases)
+ if (same_data_refs_base_objects (dr, dr_ref))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "early breaks only supported,"
+ " overlapping loads and stores found"
+ " before the break statement.\n");
+ return false;
+ }
+ /* Any writes starts a new chain. */
+ move = true;
+ }
+ }
+
+ /* If a statement is live and escapes the loop through usage in the loop
+ epilogue then we can't move it since we need to maintain its
+ reachability through all exits. */
+ bool skip = false;
+ if (STMT_VINFO_LIVE_P (stmt_vinfo)
+ && !(dr_ref && DR_IS_WRITE (dr_ref)))
+ {
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest)
+ {
+ basic_block bb = gimple_bb (USE_STMT (use_p));
+ skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+ if (skip)
+ break;
+ }
+ }
+
+ /* If we found the defining statement of a something that's part of the
+ chain then expand the chain with the new SSA_VARs being used. */
+ if (!skip && (chain->contains (dest) || move))
+ {
+ move = true;
+ for (unsigned x = 0; x < gimple_num_args (stmt); x++)
+ {
+ tree var = gimple_arg (stmt, x);
+ if (TREE_CODE (var) == SSA_NAME)
+ {
+ if (fixed->contains (dest))
+ {
+ move = false;
+ fixed->add (var);
+ }
+ else
+ chain->add (var);
+ }
+ else
+ {
+ use_operand_p use_p;
+ ssa_op_iter iter;
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+ {
+ tree op = USE_FROM_PTR (use_p);
+ gcc_assert (TREE_CODE (op) == SSA_NAME);
+ if (fixed->contains (dest))
+ {
+ move = false;
+ fixed->add (op);
+ }
+ else
+ chain->add (op);
+ }
+ }
+ }
+
+ if (dump_enabled_p ())
+ {
+ if (move)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "found chain %G", stmt);
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "ignored chain %G, not single use", stmt);
+ }
+ }
+
+ if (move)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "==> recording stmt %G", stmt);
+
+ for (tree ref : loads)
+ if (stmt_may_clobber_ref_p (stmt, ref, true))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "early breaks not supported as memory used"
+ " may alias.\n");
+ return false;
+ }
+
+ /* If we've moved a VDEF, extract the defining MEM and update
+ usages of it. */
+ tree vdef;
+ if ((vdef = gimple_vdef (stmt)))
+ {
+ /* This statement is to be moved. */
+ LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push (stmt);
+ *reaching_vuse = gimple_vuse (stmt);
+ }
+ }
+ }
+
+ gsi_prev (gstmt);
+
+ if (!validate_early_exit_stmts (loop_vinfo, chain, fixed, loads, bases,
+ reaching_vuse, gstmt))
+ return false;
+
+ if (gimple_vuse (stmt) && !gimple_vdef (stmt))
+ {
+ LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_push (stmt);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "marked statement for vUSE update: %G", stmt);
+ }
+
+ return true;
+}
+
+/* Funcion vect_analyze_early_break_dependences.
+
+ Examime all the data references in the loop and make sure that if we have
+ mulitple exits that we are able to safely move stores such that they become
+ safe for vectorization. The function also calculates the place where to move
+ the instructions to and computes what the new vUSE chain should be.
+
+ This works in tandem with the CFG that will be produced by
+ slpeel_tree_duplicate_loop_to_edge_cfg later on. */
+
+static opt_result
+vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
+{
+ DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
+
+ hash_set<tree> chain, fixed;
+ auto_vec<tree> loads;
+ auto_vec<data_reference *> bases;
+ basic_block dest_bb = NULL;
+ tree vuse = NULL;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "loop contains multiple exits, analyzing"
+ " statement dependencies.\n");
+
+ for (gcond *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+ {
+ stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
+ if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
+ continue;
+
+ gimple *stmt = STMT_VINFO_STMT (loop_cond_info);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+ /* Initiaze the vuse chain with the one at the early break. */
+ if (!vuse)
+ vuse = gimple_vuse (c);
+
+ if (!validate_early_exit_stmts (loop_vinfo, &chain, &fixed, &loads,
+ &bases, &vuse, &gsi))
+ return opt_result::failure_at (stmt,
+ "can't safely apply code motion to "
+ "dependencies of %G to vectorize "
+ "the early exit.\n", stmt);
+
+ /* Save destination as we go, BB are visited in order and the last one
+ is where statements should be moved to. */
+ if (!dest_bb)
+ dest_bb = gimple_bb (c);
+ else
+ {
+ basic_block curr_bb = gimple_bb (c);
+ if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
+ dest_bb = curr_bb;
+ }
+ }
+
+ dest_bb = FALLTHRU_EDGE (dest_bb)->dest;
+ gcc_assert (dest_bb);
+ LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+
+ for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "updated use: %T, mem_ref: %G",
+ vuse, g);
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "recorded statements to be moved to BB %d\n",
+ LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
+
+ return opt_result::success ();
+}
+
/* Function vect_analyze_data_ref_dependences.
Examine all the data references in the loop, and make sure there do not
@@ -657,6 +982,11 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
return res;
}
+ /* If we have early break statements in the loop, check to see if they
+ are of a form we can vectorizer. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ return vect_analyze_early_break_dependences (loop_vinfo);
+
return opt_result::success ();
}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5213aa0169c..4cf7f65dc16 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1040,6 +1040,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
partial_load_store_bias (0),
peeling_for_gaps (false),
peeling_for_niter (false),
+ early_breaks (false),
no_data_dependencies (false),
has_mask_store (false),
scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -11492,6 +11493,55 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
epilogue_vinfo->shared->save_datarefs ();
}
+/* When vectorizing early break statements instructions that happen before
+ the early break in the current BB need to be moved to after the early
+ break. This function deals with that and assumes that any validity
+ checks has already been performed.
+
+ While moving the instructions if it encounters a VUSE or VDEF it then
+ corrects the VUSES as it moves the statements along. GDEST is the location
+ in which to insert the new statements. */
+
+static void
+move_early_exit_stmts (loop_vec_info loop_vinfo)
+{
+ if (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).is_empty ())
+ return;
+
+ /* Move all stmts that need moving. */
+ basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
+ gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb);
+
+ for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo))
+ {
+ /* Check to see if statement is still required for vect or has been
+ elided. */
+ auto stmt_info = loop_vinfo->lookup_stmt (stmt);
+ if (!stmt_info)
+ continue;
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
+
+ gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
+ gsi_move_before (&stmt_gsi, &dest_gsi);
+ gsi_prev (&dest_gsi);
+ update_stmt (stmt);
+ }
+
+ /* Update all the stmts with their new reaching VUSES. */
+ tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).last ());
+ for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "updating vuse to %T for stmt %G", vuse, p);
+ unlink_stmt_vdef (p);
+ gimple_set_vuse (p, vuse);
+ update_stmt (p);
+ }
+}
+
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
@@ -11641,6 +11691,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
}
+ /* Handle any code motion that we need to for early-break vectorization after
+ we've done peeling but just before we start vectorizing. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+ move_early_exit_stmts (loop_vinfo);
+
/* FORNOW: the vectorizer supports only loops which body consist
of one basic block (header + empty latch). When the vectorizer will
support more involved loop forms, the order by which the BBs are
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 65883e04ad7..3a22bf02f5a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -13545,6 +13545,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
case vect_first_order_recurrence:
dump_printf (MSG_NOTE, "first order recurrence\n");
break;
+ case vect_early_exit_def:
+ dump_printf (MSG_NOTE, "early exit\n");
+ break;
case vect_unknown_def_type:
dump_printf (MSG_NOTE, "unknown\n");
break;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index d2ddc2e4ad5..76451a7fefe 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -66,6 +66,7 @@ enum vect_def_type {
vect_double_reduction_def,
vect_nested_cycle,
vect_first_order_recurrence,
+ vect_early_exit_def,
vect_unknown_def_type
};
@@ -888,6 +889,10 @@ public:
we need to peel off iterations at the end to form an epilogue loop. */
bool peeling_for_niter;
+ /* When the loop has early breaks that we can vectorize we need to peel
+ the loop for the break finding loop. */
+ bool early_breaks;
+
/* List of loop additional IV conditionals found in the loop. */
auto_vec<gcond *> conds;
@@ -942,6 +947,20 @@ public:
/* The controlling loop IV for the scalar loop being vectorized. This IV
controls the natural exits of the loop. */
edge scalar_loop_iv_exit;
+
+ /* Used to store the list of statements needing to be moved if doing early
+ break vectorization as they would violate the scalar loop semantics if
+ vectorized in their current location. These are stored in order that they need
+ to be moved. */
+ auto_vec<gimple *> early_break_conflict;
+
+ /* The final basic block where to move statements to. In the case of
+ multiple exits this could be pretty far away. */
+ basic_block early_break_dest_bb;
+
+ /* Statements whose VUSES need updating if early break vectorization is to
+ happen. */
+ auto_vec<gimple*> early_break_vuses;
} *loop_vec_info;
/* Access Functions. */
@@ -996,6 +1015,10 @@ public:
#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
+#define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
+#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
+#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-11-15 14:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-15 14:54 [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: Implement code motion and dependency analysis for early breaks Tamar Christina
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).