Hi, Having simplified peeling this patch becomes smaller as well: This changes the PHI node updates to support early breaks. It has to support both the case where the loop's exit matches the normal loop exit and one where the early exit is "inverted", i.e. it's an early exit edge. In the latter case we must always restart the loop for VF iterations. For an early exit the reason is obvious, but there are cases where the "normal" exit is located before the early one. This exit then does a check on ivtmp resulting in us leaving the loop since it thinks we're done. In these case we may still have side-effects to perform so we also go to the scalar loop. For the "normal" exit niters has already been adjusted for peeling, for the early exits we must find out how many iterations we actually did. So we have to recalculate the new position for each exit. For the "inverse" case we essentially peel a vector iteration *after* the vector loop has finished. i.e. conceptually it's the same as vect epilogue peeling but without generating code for the peeled iteration. That'll be handled by the scalar loop. To do this we just adjust niters_vector_mult_vf and remove one VF and for masked cases we do the same with final_iv. The normal IV update code will then generate the correct values for us. Eventually VRP will simplify the constant bounds and we get the proper scalar unrolling. This means we don't have to make any changes at all to vect_update_ivs_after_vectorizer but dropping some asserts. Ok for master? Thanks, Tamar gcc/ChangeLog: * tree-vect-loop-manip.cc (vect_set_loop_condition_partial_vectors, vect_set_loop_condition_partial_vectors_avx512, vect_gen_vector_loop_niters_mult_vf): Support peeling a vector iteration. (vect_update_ivs_after_vectorizer): Drop asserts. (vect_do_peeling): Skip forwarder edge. (vect_is_loop_exit_latch_pred): New. * tree-vectorizer.h (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED): New. --- inline copy of patch --- diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index d61d7c3a189b279fc3bcbb58c3c0e32521db3cf8..476be8a0bb6da2d06c4ca7052cb07bacecca60b1 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -951,7 +951,18 @@ vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge, if (final_iv) { - gassign *assign = gimple_build_assign (final_iv, orig_niters); + gassign *assign; + /* If vectorizing an inverted early break loop we have to restart the + scalar loop at niters - vf. This matches what we do in + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */ + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) + { + tree ftype = TREE_TYPE (orig_niters); + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf); + } + else + assign = gimple_build_assign (final_iv, orig_niters); gsi_insert_on_edge_immediate (exit_edge, assign); } @@ -1188,8 +1199,19 @@ vect_set_loop_condition_partial_vectors_avx512 (class loop *loop, if (final_iv) { - gassign *assign = gimple_build_assign (final_iv, orig_niters); - gsi_insert_on_edge_immediate (single_exit (loop), assign); + gassign *assign; + /* If vectorizing an inverted early break loop we have to restart the + scalar loop at niters - vf. This matches what we do in + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */ + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) + { + tree ftype = TREE_TYPE (orig_niters); + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf); + } + else + assign = gimple_build_assign (final_iv, orig_niters); + gsi_insert_on_edge_immediate (exit_edge, assign); } return cond_stmt; @@ -2157,11 +2179,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, gphi_iterator gsi, gsi1; class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block update_bb = update_e->dest; - basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; - - /* Make sure there exists a single-predecessor exit bb: */ - gcc_assert (single_pred_p (exit_bb)); + gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb); for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb); !gsi_end_p (gsi) && !gsi_end_p (gsi1); @@ -2171,7 +2190,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree step_expr, off; tree type; tree var, ni, ni_name; - gimple_stmt_iterator last_gsi; gphi *phi = gsi.phi (); gphi *phi1 = gsi1.phi (); @@ -2207,7 +2225,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, { tree stype = TREE_TYPE (step_expr); off = fold_build2 (MULT_EXPR, stype, - fold_convert (stype, niters), step_expr); + fold_convert (stype, niters), step_expr); + if (POINTER_TYPE_P (type)) ni = fold_build_pointer_plus (init_expr, off); else @@ -2226,9 +2245,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, var = create_tmp_var (type, "tmp"); - last_gsi = gsi_last_bb (exit_bb); gimple_seq new_stmts = NULL; ni_name = force_gimple_operand (ni, &new_stmts, false, var); + /* Exit_bb shouldn't be empty. */ if (!gsi_end_p (last_gsi)) { @@ -2726,11 +2745,19 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant (); tree type = TREE_TYPE (niters_vector); tree log_vf = build_int_cst (type, exact_log2 (vf)); + tree tree_vf = build_int_cst (type, vf); basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; gcc_assert (niters_vector_mult_vf_ptr != NULL); tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type, niters_vector, log_vf); + + /* If we've peeled a vector iteration then subtract one full vector + iteration. */ + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) + niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type, + niters_vector_mult_vf, tree_vf); + if (!is_gimple_val (niters_vector_mult_vf)) { tree var = create_tmp_var (type, "niters_vector_mult_vf"); @@ -3328,6 +3355,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, niters_vector_mult_vf steps. */ gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); update_e = skip_vector ? e : loop_preheader_edge (epilog); + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + update_e = single_succ_edge (e->dest); + + /* Update the main exit. */ vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, update_e); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 39aa4d1250efe308acccf484d370f8adfd1ba843..de60da31e2a3030a7fbc302d3f676af9683fd019 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1016,6 +1016,8 @@ public: #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks +#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \ + (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) #define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses @@ -2224,6 +2226,7 @@ extern dump_user_location_t find_loop_location (class loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); extern edge vec_init_loop_exit_info (class loop *); +extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool *); /* In tree-vect-stmts.cc. */ extern tree get_related_vectype_for_scalar_type (machine_mode, tree,