diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index fcc950f528b2d1e044be12424c2df11f692ee8ba..240bd7a86233f6b907816f812681e4cd778ecaae 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -568,6 +568,278 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, return opt_result::success (); } +/* This function tries to validate whether an early break vectorization + is possible for the current instruction sequence. Returns True i + possible, otherwise False. + + Requirements: + - Any memory access must be to a fixed size buffer. + - There must not be any loads and stores to the same object. + - Multiple loads are allowed as long as they don't alias. + + NOTE: + This implemementation is very conservative. Any overlappig loads/stores + that take place before the early break statement gets rejected aside from + WAR dependencies. + + i.e.: + + a[i] = 8 + c = a[i] + if (b[i]) + ... + + is not allowed, but + + c = a[i] + a[i] = 8 + if (b[i]) + ... + + is which is the common case. + + Arguments: + - LOOP_VINFO: loop information for the current loop. + - CHAIN: Currently detected sequence of instructions that need to be moved + if we are to vectorize this early break. + - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable from + one or more cond conditions. If this set overlaps with CHAIN then FIXED + takes precedence. This deals with non-single use cases. + - LOADS: List of all loads found during traversal. + - BASES: List of all load data references found during traversal. + - GSTMT: Current position to inspect for validity. The sequence + will be moved upwards from this point. + - REACHING_VUSE: The dominating VUSE found so far. + - CURRENT_VDEF: The last VDEF we've seen. These are updated in + pre-order and updated in post-order after moving the + instruction. */ + +static bool +validate_early_exit_stmts (loop_vec_info loop_vinfo, hash_set *chain, + hash_set *fixed, vec *loads, + vec *bases, tree *reaching_vuse, + tree *current_vdef, gimple_stmt_iterator *gstmt, + hash_map *renames) +{ + if (gsi_end_p (*gstmt)) + return true; + + gimple *stmt = gsi_stmt (*gstmt); + if (gimple_has_ops (stmt)) + { + tree dest = NULL_TREE; + /* Try to find the SSA_NAME being defined. For Statements with an LHS + use the LHS, if not, assume that the first argument of a call is the + value being defined. e.g. MASKED_LOAD etc. */ + if (gimple_has_lhs (stmt)) + { + if (is_gimple_assign (stmt)) + dest = gimple_assign_lhs (stmt); + else if (const gcall *call = dyn_cast (stmt)) + dest = gimple_call_lhs (call); + } + else if (const gcall *call = dyn_cast (stmt)) + dest = gimple_arg (call, 0); + else if (const gcond *cond = dyn_cast (stmt)) + { + /* Operands of conds are ones we can't move. */ + fixed->add (gimple_cond_lhs (cond)); + fixed->add (gimple_cond_rhs (cond)); + } + + bool move = false; + + stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt); + if (!stmt_vinfo) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks only supported. Unknown" + " statement: %G", stmt); + return false; + } + + auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo); + if (dr_ref) + { + /* We currenly only support statically allocated objects due to + not having first-faulting loads support or peeling for alignment + support. Compute the isize of the referenced object (it could be + dynamically allocated). */ + tree obj = DR_BASE_ADDRESS (dr_ref); + if (!obj || TREE_CODE (obj) != ADDR_EXPR) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks only supported on statically" + " allocated objects.\n"); + return false; + } + + tree refop = TREE_OPERAND (obj, 0); + tree refbase = get_base_address (refop); + if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase) + || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks only supported on statically" + " allocated objects.\n"); + return false; + } + + if (DR_IS_READ (dr_ref)) + { + loads->safe_push (dest); + bases->safe_push (dr_ref); + } + else if (DR_IS_WRITE (dr_ref)) + { + for (auto dr : bases) + if (same_data_refs_base_objects (dr, dr_ref)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "early breaks only supported," + " overlapping loads and stores found" + " before the break statement.\n"); + return false; + } + /* Any writes starts a new chain. */ + move = true; + } + } + + /* If a statement if live and escapes the loop through usage in the loop + epilogue then we can't move it since we need to maintain its + reachability through all exits. */ + bool skip = false; + if (STMT_VINFO_LIVE_P (stmt_vinfo) + && !(dr_ref && DR_IS_WRITE (dr_ref))) + { + imm_use_iterator imm_iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest) + { + basic_block bb = gimple_bb (USE_STMT (use_p)); + skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; + if (skip) + break; + } + } + + /* If we found the defining statement of a something that's part of the + chain then expand the chain with the new SSA_VARs being used. */ + if (!skip && (chain->contains (dest) || move)) + { + move = true; + for (unsigned x = 0; x < gimple_num_args (stmt); x++) + { + tree var = gimple_arg (stmt, x); + if (TREE_CODE (var) == SSA_NAME) + { + if (fixed->contains (dest)) + { + move = false; + fixed->add (var); + } + else + chain->add (var); + } + else + { + use_operand_p use_p; + ssa_op_iter iter; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree op = USE_FROM_PTR (use_p); + gcc_assert (TREE_CODE (op) == SSA_NAME); + if (fixed->contains (dest)) + { + move = false; + fixed->add (op); + } + else + chain->add (op); + } + } + } + + if (dump_enabled_p ()) + { + if (move) + dump_printf_loc (MSG_NOTE, vect_location, + "found chain %G", stmt); + else + dump_printf_loc (MSG_NOTE, vect_location, + "ignored chain %G, not single use", stmt); + } + } + + if (move) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "==> recording stmt %G", stmt); + + for (tree ref : loads) + if (stmt_may_clobber_ref_p (stmt, ref, true)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "early breaks not supported as memory used" + " may alias.\n"); + return false; + } + + /* This statement is to be moved. */ + LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push (stmt); + + /* If we've moved a VDEF, extract the defining MEM and update + usages of it. */ + tree vdef; + if ((vdef = gimple_vdef (stmt))) + { + *current_vdef = vdef; + *reaching_vuse = gimple_vuse (stmt); + } + } + } + + gsi_prev (gstmt); + + if (!validate_early_exit_stmts (loop_vinfo, chain, fixed, loads, bases, + reaching_vuse, current_vdef, gstmt, renames)) + return false; + + if (gimple_vuse (stmt) + && reaching_vuse && *reaching_vuse + && gimple_vuse (stmt) == *current_vdef) + { + tree new_vuse = *reaching_vuse; + tree *renamed = renames->get (new_vuse); + if (renamed) + new_vuse = *renamed; + LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_push ({stmt, new_vuse}); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "current_use: %T, new_use: %T, mem_ref: %G", + *current_vdef, new_vuse, stmt); + + if (!renamed) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "stored: %T -> %T\n", *current_vdef, new_vuse); + + renames->put (*current_vdef, new_vuse); + } + } + + return true; +} + /* Function vect_analyze_data_ref_dependences. Examine all the data references in the loop, and make sure there do not @@ -612,6 +884,84 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, return res; } + /* If we have early break statements in the loop, check to see if they + are of a form we can vectorizer. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + { + hash_set chain, fixed; + auto_vec loads; + auto_vec bases; + hash_map renames; + basic_block dest_bb = NULL; + tree vdef = NULL; + tree vuse = NULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "loop contains multiple exits, analyzing" + " statement dependencies.\n"); + + for (gcond *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) + { + stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c); + if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type) + continue; + + gimple *stmt = STMT_VINFO_STMT (loop_cond_info); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + + /* Initiaze the vuse chain with the one at the early break. */ + if (!vuse) + vuse = gimple_vuse (c); + + if (!validate_early_exit_stmts (loop_vinfo, &chain, &fixed, &loads, + &bases, &vuse, &vdef, &gsi, &renames)) + return opt_result::failure_at (stmt, + "can't safely apply code motion to " + "dependencies of %G to vectorize " + "the early exit.\n", stmt); + + /* Save destination as we go, BB are visited in order and the last one + is where statements should be moved to. */ + if (!dest_bb) + dest_bb = gimple_bb (c); + else + { + basic_block curr_bb = gimple_bb (c); + if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) + dest_bb = curr_bb; + } + } + + dest_bb = FALLTHRU_EDGE (dest_bb)->dest; + gcc_assert (dest_bb); + LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; + + /* Do some renaming to update the uses chain. */ + for (unsigned i = 0; i < LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).length (); i++) + { + auto g = LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)[i]; + tree *tmp = renames.get (g.second); + if (tmp) + LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)[i] + = std::make_pair (g.first, *tmp); + } + + /* TODO: Remove? It's useful debug statement but may be too much. */ + for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "overrode use: %T, mem_ref: %G", + g.second, g.first); + } + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "recorded statements to be moved to BB %d\n", + LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index); + } + return opt_result::success (); } diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 9065811b3b9c2a550baf44768603172b9e26b94b..b4a98de80aa39057fc9b17977dd0e347b4f0fb5d 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11192,6 +11192,45 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) epilogue_vinfo->shared->save_datarefs (); } +/* When vectorizing early break statements instructions that happen before + the early break in the current BB need to be moved to after the early + break. This function deals with that and assumes that any validity + checks has already been performed. + + While moving the instructions if it encounters a VUSE or VDEF it then + corrects the VUSES as it moves the statements along. GDEST is the location + in which to insert the new statements. */ + +static void +move_early_exit_stmts (loop_vec_info loop_vinfo) +{ + /* Move all stmts that need moving. */ + basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo); + gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb); + + for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt); + + gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt); + gsi_move_before (&stmt_gsi, &dest_gsi); + gsi_prev (&dest_gsi); + update_stmt (stmt); + } + + /* Update all the stmts with their new reaching VUSES. */ + for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "updating vuse %G", p.first); + unlink_stmt_vdef (p.first); + gimple_set_vuse (p.first, p.second); + update_stmt (p.first); + } +} + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -11330,6 +11369,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); } + /* Handle any code motion that we need to for early-break vectorization after + we've done peeling but just before we start vectorizing. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + move_early_exit_stmts (loop_vinfo); + /* FORNOW: the vectorizer supports only loops which body consist of one basic block (header + empty latch). When the vectorizer will support more involved loop forms, the order by which the BBs are diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1cc003c12e2447eca878f56cb019236f56e96f85..ec65b65b5910e9cbad0a8c7e83c950b6168b98bf 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -919,6 +919,19 @@ public: analysis. */ vec<_loop_vec_info *> epilogue_vinfos; + /* Used to store the list of statements needing to be moved if doing early + break vectorization as they would violate the scalar loop semantics if + vectorized in their current location. */ + auto_vec early_break_conflict; + + /* The final basic block where to move statements to. In the case of + multiple exits this could be pretty far away. */ + basic_block early_break_dest_bb; + + /* Statements whose VUSES need updating if early break vectorization is to + happen. */ + auto_vec> early_break_vuses; + } *loop_vec_info; /* Access Functions. */ @@ -972,6 +985,9 @@ public: #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter +#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict +#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb +#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies