diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0b0154ffd7bf031a005de993b101d9db6dd98c43..d01512ea46467f1cf77793bdc75b48e71b0b9641 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_CFGLOOP_H #include "cfgloopmanip.h" +#include "target.h" /* Structure to hold decision about unrolling/peeling. */ enum lpt_dec @@ -268,6 +269,9 @@ public: the basic-block from being collected but its index can still be reused. */ basic_block former_header; + + /* Keep track of vector sizes we know we can vectorize the epilogue with. */ + vector_sizes epilogue_vsizes; }; /* Set if the loop is known to be infinite. */ diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 4ad1f658708f83dbd8789666c26d4bd056837bc6..f3e81bcd00b3f125389aa15b12dc5201b3578d20 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -198,6 +198,7 @@ flow_loop_free (class loop *loop) exit->prev = exit; } + loop->epilogue_vsizes.release(); ggc_free (loop->exits); ggc_free (loop); } @@ -355,6 +356,7 @@ alloc_loop (void) loop->nb_iterations_upper_bound = 0; loop->nb_iterations_likely_upper_bound = 0; loop->nb_iterations_estimate = 0; + loop->epilogue_vsizes.create(8); return loop; } diff --git a/gcc/gengtype.c b/gcc/gengtype.c index 53317337cf8c8e8caefd6b819d28b3bba301e755..80fb6ef71465b24e034fa45d69fec56be6b2e7f8 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -5197,6 +5197,7 @@ main (int argc, char **argv) POS_HERE (do_scalar_typedef ("widest_int", &pos)); POS_HERE (do_scalar_typedef ("int64_t", &pos)); POS_HERE (do_scalar_typedef ("poly_int64", &pos)); + POS_HERE (do_scalar_typedef ("poly_uint64", &pos)); POS_HERE (do_scalar_typedef ("uint64_t", &pos)); POS_HERE (do_scalar_typedef ("uint8", &pos)); POS_HERE (do_scalar_typedef ("uintptr_t", &pos)); @@ -5206,6 +5207,7 @@ main (int argc, char **argv) POS_HERE (do_scalar_typedef ("machine_mode", &pos)); POS_HERE (do_scalar_typedef ("fixed_size_mode", &pos)); POS_HERE (do_scalar_typedef ("CONSTEXPR", &pos)); + POS_HERE (do_scalar_typedef ("vector_sizes", &pos)); POS_HERE (do_typedef ("PTR", create_pointer (resolve_typedef ("void", &pos)), &pos)); diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 5c25441c70a271f04730486e513437fffa75b7e3..189f7458b1b20be06a9a20d3ee05e74bc176434c 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "gimple.h" #include "cfghooks.h" +#include "tree-if-conv.h" #include "tree-pass.h" #include "ssa.h" #include "fold-const.h" @@ -1724,7 +1725,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code) Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO. CODE and NITERS are as for vect_update_inits_of_dr. */ -static void +void vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, tree_code code) { @@ -1736,19 +1737,7 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, /* Adjust niters to sizetype and insert stmts on loop preheader edge. */ if (!types_compatible_p (sizetype, TREE_TYPE (niters))) - { - gimple_seq seq; - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); - - niters = fold_convert (sizetype, niters); - niters = force_gimple_operand (niters, &seq, false, var); - if (seq) - { - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); - } - } + niters = fold_convert (sizetype, niters); FOR_EACH_VEC_ELT (datarefs, i, dr) { @@ -2401,14 +2390,18 @@ class loop * vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, tree *niters_vector, tree *step_vector, tree *niters_vector_mult_vf_var, int th, - bool check_profitability, bool niters_no_overflow) + bool check_profitability, bool niters_no_overflow, + tree *advance) { edge e, guard_e; - tree type = TREE_TYPE (niters), guard_cond; + tree type = TREE_TYPE (niters), guard_cond, advance_guard = NULL; basic_block guard_bb, guard_to; profile_probability prob_prolog, prob_vector, prob_epilog; int estimated_vf; int prolog_peeling = 0; + bool vect_epilogues + = loop_vinfo->epilogue_vinfos.length () > 0 + && !LOOP_VINFO_EPILOGUE_P (loop_vinfo); /* We currently do not support prolog peeling if the target alignment is not known at compile time. 'vect_gen_prolog_loop_niters' depends on the target alignment being constant. */ @@ -2466,15 +2459,61 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, else niters_prolog = build_int_cst (type, 0); + loop_vec_info epilogue_vinfo = NULL; + if (vect_epilogues) + { + epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; + loop_vinfo->epilogue_vinfos.ordered_remove (0); + + /* Don't vectorize epilogues if not most inner loop or if you may need to + peel the epilogue loop for alignment. */ + if (loop->inner != NULL + || LOOP_VINFO_PEELING_FOR_ALIGNMENT (epilogue_vinfo)) + vect_epilogues = false; + + } + + unsigned int lowest_vf = constant_lower_bound (vf); + bool epilogue_any_upper_bound = false; + unsigned HOST_WIDE_INT eiters = 0; + tree niters_vector_mult_vf; + + /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work + on niters already ajusted for the iterations of the prologue. */ + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && known_eq (vf, lowest_vf)) + { + vector_sizes vector_sizes = loop->epilogue_vsizes; + unsigned next_size = 0; + eiters = (LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); + + if (prolog_peeling > 0) + eiters -= prolog_peeling; + eiters + = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo); + epilogue_any_upper_bound = true; + + unsigned int ratio; + while (next_size < vector_sizes.length () + && !(constant_multiple_p (current_vector_size, + vector_sizes[next_size], &ratio) + && eiters >= lowest_vf / ratio)) + next_size += 1; + + if (next_size == vector_sizes.length ()) + vect_epilogues = false; + } + /* Prolog loop may be skipped. */ bool skip_prolog = (prolog_peeling != 0); /* Skip to epilog if scalar loop may be preferred. It's only needed - when we peel for epilog loop and when it hasn't been checked with - loop versioning. */ + when we peel for epilog loop or when we loop version. */ bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), bound_prolog + bound_epilog) - : !LOOP_REQUIRES_VERSIONING (loop_vinfo)); + : (!LOOP_REQUIRES_VERSIONING (loop_vinfo) + || vect_epilogues)); /* Epilog loop must be executed if the number of iterations for epilog loop is known at compile time, otherwise we need to add a check at the end of vector loop and skip to the end of epilog loop. */ @@ -2503,7 +2542,17 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } dump_user_location_t loop_loc = find_loop_location (loop); - class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + class loop *scalar_loop; + if (vect_epilogues) + { + scalar_loop = get_loop_copy (loop); + LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo) + = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = NULL; + } + else + scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + if (prolog_peeling) { e = loop_preheader_edge (loop); @@ -2586,12 +2635,24 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } /* Peel epilog and put it on exit edge of loop. */ epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); + if (!epilog) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n"); gcc_unreachable (); } + + if (epilogue_any_upper_bound && prolog_peeling >= 0) + { + epilog->any_upper_bound = true; + epilog->nb_iterations_upper_bound = eiters + 1; + } + else if (prolog_peeling < 0) + { + epilog->any_upper_bound = false; + } + epilog->force_vectorize = false; slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false); @@ -2608,6 +2669,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, check_profitability); /* Build guard against NITERSM1 since NITERS may overflow. */ guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t); + advance_guard = guard_cond; guard_bb = anchor; guard_to = split_edge (loop_preheader_edge (epilog)); guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, @@ -2635,7 +2697,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } basic_block bb_before_epilog = loop_preheader_edge (epilog)->src; - tree niters_vector_mult_vf; /* If loop is peeled for non-zero constant times, now niters refers to orig_niters - prolog_peeling, it won't overflow even the orig_niters overflows. */ @@ -2699,10 +2760,105 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, adjust_vec_debug_stmts (); scev_reset (); } + + if (vect_epilogues) + { + epilog->aux = epilogue_vinfo; + LOOP_VINFO_LOOP (epilogue_vinfo) = epilog; + + loop_constraint_clear (epilog, LOOP_C_INFINITE); + + /* We now must calculate the number of iterations for our epilogue. */ + tree cond_niters, niters; + + /* Depending on whether we peel for gaps we take niters or niters - 1, + we will refer to this as N - G, where both N and G are the NITERS and + GAP for the original loop. */ + niters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + ? LOOP_VINFO_NITERSM1 (loop_vinfo) + : LOOP_VINFO_NITERS (loop_vinfo); + + /* Here we build a vector factorization mask: + vf_mask = ~(VF - 1), where VF is the Vectorization Factor. */ + tree vf_mask = build_int_cst (TREE_TYPE (niters), + LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + vf_mask = fold_build2 (MINUS_EXPR, TREE_TYPE (vf_mask), + vf_mask, + build_one_cst (TREE_TYPE (vf_mask))); + vf_mask = fold_build1 (BIT_NOT_EXPR, TREE_TYPE (niters), vf_mask); + + /* Here we calculate: + niters = N - ((N-G) & ~(VF -1)) */ + niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), + LOOP_VINFO_NITERS (loop_vinfo), + fold_build2 (BIT_AND_EXPR, TREE_TYPE (niters), + niters, + vf_mask)); + + if (skip_vector) + { + /* We do this by constructing: + cond_niters = !do_we_enter_main_loop ? N + niters_prolog : niters + we add npeel, the number of peeled iterations for alignment, to N + in case we don't enter the main loop, has these have already been + subtracted from N (the number of iterations of the main loop). + Since the prolog peeling is also skipped if we skip the + vectorization we must add them back. */ + cond_niters + = fold_build3 (COND_EXPR, TREE_TYPE (niters), + advance_guard, + fold_build2 (PLUS_EXPR, TREE_TYPE (niters), + LOOP_VINFO_NITERS (loop_vinfo), + fold_convert (TREE_TYPE (niters), + niters_prolog)), + niters); + } + else + cond_niters = niters; + + LOOP_VINFO_NITERS (epilogue_vinfo) = cond_niters; + LOOP_VINFO_NITERSM1 (epilogue_vinfo) + = fold_build2 (MINUS_EXPR, TREE_TYPE (cond_niters), + cond_niters, build_one_cst (TREE_TYPE (cond_niters))); + + /* We now calculate the amount of iterations we must advance our + epilogue's data references by. + Make sure to use sizetype here as we might use a negative constant + if the loop peels for alignment. If the target is 64-bit this can go + wrong if the computation is not done in sizetype. */ + *advance = fold_convert (sizetype, niters); + + *advance = fold_build2 (MINUS_EXPR, TREE_TYPE (*advance), + *advance, + fold_convert (sizetype, + LOOP_VINFO_NITERS (loop_vinfo))); + *advance = fold_build2 (MINUS_EXPR, TREE_TYPE (*advance), + build_zero_cst (TREE_TYPE (*advance)), + *advance); + + if (skip_vector) + { + *advance + = fold_build3 (COND_EXPR, TREE_TYPE (*advance), + advance_guard, + fold_build2 (MINUS_EXPR, TREE_TYPE (*advance), + build_zero_cst (TREE_TYPE (*advance)), + fold_convert (TREE_TYPE (*advance), + niters_prolog)), + *advance); + } + + /* Redo the peeling for niter analysis as the NITERs and need for + alignment have been updated to take the main loop into + account. */ + LOOP_VINFO_PEELING_FOR_NITER (epilogue_vinfo) = false; + determine_peel_for_niter (epilogue_vinfo); + } + adjust_vec.release (); free_original_copy_tables (); - return epilog; + return vect_epilogues ? epilog : NULL; } /* Function vect_create_cond_for_niters_checks. @@ -2966,9 +3122,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) *COND_EXPR_STMT_LIST. */ class loop * -vect_loop_versioning (loop_vec_info loop_vinfo, - unsigned int th, bool check_profitability, - poly_uint64 versioning_threshold) +vect_loop_versioning (loop_vec_info loop_vinfo) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop; class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); @@ -2988,10 +3142,15 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); + poly_uint64 versioning_threshold + = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); tree version_simd_if_cond = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo); + unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); - if (check_profitability) + if (th >= vect_vf_for_cost (loop_vinfo) + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && !ordered_p (th, versioning_threshold)) cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, build_int_cst (TREE_TYPE (scalar_loop_iters), th - 1)); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b0cbbac0cb5ba1ffce706715d3dbb9139063803d..6dbde0fe35c29d0357cf5c6e7ab5599957a8242a 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -885,6 +885,8 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) } } } + + epilogue_vinfos.create (6); } /* Free all levels of MASKS. */ @@ -960,6 +962,7 @@ _loop_vec_info::~_loop_vec_info () release_vec_loop_masks (&masks); delete ivexpr_map; delete scan_map; + epilogue_vinfos.release (); loop->aux = NULL; } @@ -1726,7 +1729,13 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo) return 0; } - HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop); + HOST_WIDE_INT estimated_niter = -1; + + if (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) + estimated_niter + = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - 1; + if (estimated_niter == -1) + estimated_niter = estimated_stmt_executions_int (loop); if (estimated_niter == -1) estimated_niter = likely_max_stmt_executions_int (loop); if (estimated_niter != -1 @@ -1852,6 +1861,56 @@ vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) } } + +/* Decides whether we need to create an epilogue loop to handle + remaining scalar iterations and sets PEELING_FOR_NITERS accordingly. */ + +void +determine_peel_for_niter (loop_vec_info loop_vinfo) +{ + + unsigned HOST_WIDE_INT const_vf; + HOST_WIDE_INT max_niter + = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); + + unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); + if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) + th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO + (loop_vinfo)); + + if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + /* The main loop handles all iterations. */ + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; + else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) + { + /* Work out the (constant) number of iterations that need to be + peeled for reasons other than niters. */ + unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + peel_niter += 1; + if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter, + LOOP_VINFO_VECT_FACTOR (loop_vinfo))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; + } + else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + /* ??? When peeling for gaps but not alignment, we could + try to check whether the (variable) niters is known to be + VF * N + 1. That's something of a niche case though. */ + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf) + || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) + < (unsigned) exact_log2 (const_vf)) + /* In case of versioning, check if the maximum number of + iterations is greater than th. If they are identical, + the epilogue is unnecessary. */ + && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) + || ((unsigned HOST_WIDE_INT) max_niter + > (th / const_vf) * const_vf)))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; +} + + /* Function vect_analyze_loop_2. Apply a set of analyses on LOOP, and create a loop_vec_info struct @@ -1864,6 +1923,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) int res; unsigned int max_vf = MAX_VECTORIZATION_FACTOR; poly_uint64 min_vf = 2; + loop_vec_info orig_loop_vinfo = NULL; /* The first group of checks is independent of the vector size. */ fatal = true; @@ -1979,7 +2039,6 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) vect_compute_single_scalar_iteration_cost (loop_vinfo); poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - unsigned th; /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ ok = vect_analyze_slp (loop_vinfo, *n_stmts); @@ -2019,9 +2078,6 @@ start_over: LOOP_VINFO_INT_NITERS (loop_vinfo)); } - HOST_WIDE_INT max_niter - = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); - /* Analyze the alignment of the data-refs in the loop. Fail if a data reference is found that cannot be vectorized. */ @@ -2125,42 +2181,7 @@ start_over: return opt_result::failure_at (vect_location, "Loop costings not worthwhile.\n"); - /* Decide whether we need to create an epilogue loop to handle - remaining scalar iterations. */ - th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); - - unsigned HOST_WIDE_INT const_vf; - if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) - /* The main loop handles all iterations. */ - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; - else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) - { - /* Work out the (constant) number of iterations that need to be - peeled for reasons other than niters. */ - unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - peel_niter += 1; - if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter, - LOOP_VINFO_VECT_FACTOR (loop_vinfo))) - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; - } - else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) - /* ??? When peeling for gaps but not alignment, we could - try to check whether the (variable) niters is known to be - VF * N + 1. That's something of a niche case though. */ - || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf) - || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) - < (unsigned) exact_log2 (const_vf)) - /* In case of versioning, check if the maximum number of - iterations is greater than th. If they are identical, - the epilogue is unnecessary. */ - && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) - || ((unsigned HOST_WIDE_INT) max_niter - > (th / const_vf) * const_vf)))) - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; - + determine_peel_for_niter (loop_vinfo); /* If an epilogue loop is required make sure we can create one. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) @@ -2183,9 +2204,12 @@ start_over: enough for both peeled prolog loop and vector loop. This check can be merged along with threshold check of loop versioning, so increase threshold for this case if necessary. */ - if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) + if (LOOP_REQUIRES_VERSIONING (loop_vinfo) + || ((orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) + && LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))) { poly_uint64 niters_th = 0; + unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) { @@ -2206,6 +2230,14 @@ start_over: /* One additional iteration because of peeling for gap. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) niters_th += 1; + + /* Use the same condition as vect_transform_loop to decide when to use + the cost to determine a versioning threshold. */ + if (th >= vect_vf_for_cost (loop_vinfo) + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && ordered_p (th, niters_th)) + niters_th = ordered_max (poly_uint64 (th), niters_th); + LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th; } @@ -2329,14 +2361,8 @@ again: be vectorized. */ opt_loop_vec_info vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, - vec_info_shared *shared) + vec_info_shared *shared, vector_sizes vector_sizes) { - auto_vector_sizes vector_sizes; - - /* Autodetect first vector size we try. */ - current_vector_size = 0; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, - loop->simdlen != 0); unsigned int next_size = 0; DUMP_VECT_SCOPE ("analyze_loop_nest"); @@ -2357,6 +2383,9 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, poly_uint64 autodetected_vector_size = 0; opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); poly_uint64 first_vector_size = 0; + poly_uint64 lowest_th = 0; + unsigned vectorized_loops = 0; + bool vect_epilogues = !loop->simdlen && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK); while (1) { /* Check the CFG characteristics of the loop (nesting, entry/exit). */ @@ -2375,24 +2404,54 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, if (orig_loop_vinfo) LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; + else if (vect_epilogues && first_loop_vinfo) + { + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo; + } opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); if (res) { LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; + vectorized_loops++; - if (loop->simdlen - && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), - (unsigned HOST_WIDE_INT) loop->simdlen)) + if ((loop->simdlen + && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + (unsigned HOST_WIDE_INT) loop->simdlen)) + || vect_epilogues) { if (first_loop_vinfo == NULL) { first_loop_vinfo = loop_vinfo; + lowest_th + = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); first_vector_size = current_vector_size; loop->aux = NULL; } else - delete loop_vinfo; + { + /* Keep track of vector sizes that we know we can vectorize + the epilogue with. */ + if (vect_epilogues) + { + loop->aux = NULL; + loop->epilogue_vsizes.reserve (1); + loop->epilogue_vsizes.quick_push (current_vector_size); + first_loop_vinfo->epilogue_vinfos.reserve (1); + first_loop_vinfo->epilogue_vinfos.quick_push (loop_vinfo); + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo; + poly_uint64 th + = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); + gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo) + || maybe_ne (lowest_th, 0U)); + /* Keep track of the known smallest versioning + threshold. */ + if (ordered_p (lowest_th, th)) + lowest_th = ordered_min (lowest_th, th); + } + else + delete loop_vinfo; + } } else { @@ -2430,6 +2489,8 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, dump_dec (MSG_NOTE, current_vector_size); dump_printf (MSG_NOTE, "\n"); } + LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th; + return first_loop_vinfo; } else @@ -8460,6 +8521,33 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, *seen_store = stmt_info; } + + +static tree +replace_ops (tree op, hash_map &mapping) +{ + if (!op) + return NULL; + + tree *new_op; + tree ret = NULL; + for (int j = 0; j < TREE_OPERAND_LENGTH (op); ++j) + { + if ((new_op = mapping.get (TREE_OPERAND (op, j)))) + { + TREE_OPERAND (op, j) = *new_op; + ret = *new_op; + } + else + ret = replace_ops (TREE_OPERAND (op, j), mapping); + + if (ret) + return ret; + } + + return NULL; +} + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -8483,6 +8571,9 @@ vect_transform_loop (loop_vec_info loop_vinfo) gimple *stmt; bool check_profitability = false; unsigned int th; + auto_vec orig_stmts; + auto_vec gather_scatter_drs; + auto_vec gather_scatter_stmts; DUMP_VECT_SCOPE ("vec_transform_loop"); @@ -8497,11 +8588,11 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (th >= vect_vf_for_cost (loop_vinfo) && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Profitability threshold is %d loop iterations.\n", - th); - check_profitability = true; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Profitability threshold is %d loop iterations.\n", + th); + check_profitability = true; } /* Make sure there exists a single-predecessor exit bb. Do this before @@ -8519,18 +8610,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) { - poly_uint64 versioning_threshold - = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); - if (check_profitability - && ordered_p (poly_uint64 (th), versioning_threshold)) - { - versioning_threshold = ordered_max (poly_uint64 (th), - versioning_threshold); - check_profitability = false; - } class loop *sloop - = vect_loop_versioning (loop_vinfo, th, check_profitability, - versioning_threshold); + = vect_loop_versioning (loop_vinfo); sloop->force_vectorize = false; check_profitability = false; } @@ -8555,9 +8636,58 @@ vect_transform_loop (loop_vec_info loop_vinfo) LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); + tree advance; epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, &step_vector, &niters_vector_mult_vf, th, - check_profitability, niters_no_overflow); + check_profitability, niters_no_overflow, + &advance); + + if (epilogue) + { + basic_block *orig_bbs = get_loop_body (loop); + loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue); + + gimple_stmt_iterator orig_gsi; + gphi_iterator orig_phi_gsi; + gimple *stmt; + stmt_vec_info stmt_vinfo; + + /* The stmt_vec_info's of the epilogue were constructed for the main loop + and need to be updated to refer to the cloned variables used in the + epilogue loop. We do this by assuming the original main loop and the + epilogue loop are identical (aside the different SSA names). This + means we assume we can go through each BB in the loop and each STMT in + each BB and map them 1:1, replacing the STMT_VINFO_STMT of each + stmt_vec_info in the epilogue's loop_vec_info. Here we only keep + track of the original state of the main loop, before vectorization. + After vectorization we proceed to update the epilogue's stmt_vec_infos + information. We also update the references in PATTERN_DEF_SEQ's, + RELATED_STMT's and data_references. Mainly the latter has to be + updated after we are done vectorizing the main loop, as the + data_references are shared between main and epilogue. */ + for (unsigned i = 0; i < loop->num_nodes; ++i) + { + for (orig_phi_gsi = gsi_start_phis (orig_bbs[i]); + !gsi_end_p (orig_phi_gsi); gsi_next (&orig_phi_gsi)) + orig_stmts.safe_push (orig_phi_gsi.phi ()); + for (orig_gsi = gsi_start_bb (orig_bbs[i]); + !gsi_end_p (orig_gsi); gsi_next (&orig_gsi)) + { + stmt = gsi_stmt (orig_gsi); + orig_stmts.safe_push (stmt); + stmt_vinfo = epilogue_vinfo->lookup_stmt (stmt); + /* Data references pointing to gather loads and scatter stores + require special treatment because the address computation + happens in a different gimple node, pointed to by DR_REF. In + contrast to normal loads and stores where we only need to + update the offset of the data reference. */ + if (stmt_vinfo + && STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo)) + gather_scatter_drs.safe_push (STMT_VINFO_DR_INFO (stmt_vinfo)); + } + } + } + if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo) && LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ()) scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo), @@ -8814,57 +8944,157 @@ vect_transform_loop (loop_vec_info loop_vinfo) since vectorized loop can have loop-carried dependencies. */ loop->safelen = 0; - /* Don't vectorize epilogue for epilogue. */ - if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) - epilogue = NULL; - - if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) - epilogue = NULL; - if (epilogue) { - auto_vector_sizes vector_sizes; - targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false); - unsigned int next_size = 0; - /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work - on niters already ajusted for the iterations of the prologue. */ - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && known_eq (vf, lowest_vf)) - { - unsigned HOST_WIDE_INT eiters - = (LOOP_VINFO_INT_NITERS (loop_vinfo) - - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); - eiters - = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo); - epilogue->nb_iterations_upper_bound = eiters - 1; - epilogue->any_upper_bound = true; - - unsigned int ratio; - while (next_size < vector_sizes.length () - && !(constant_multiple_p (current_vector_size, - vector_sizes[next_size], &ratio) - && eiters >= lowest_vf / ratio)) - next_size += 1; - } - else - while (next_size < vector_sizes.length () - && maybe_lt (current_vector_size, vector_sizes[next_size])) - next_size += 1; + loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue); + vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR); - if (next_size == vector_sizes.length ()) - epilogue = NULL; - } + auto_vec pattern_worklist, related_worklist; + hash_map mapping; + gimple * orig_stmt, * new_stmt; + gimple_stmt_iterator epilogue_gsi; + gphi_iterator epilogue_phi_gsi; + stmt_vec_info stmt_vinfo = NULL, related_vinfo; + basic_block *epilogue_bbs = get_loop_body (epilogue); - if (epilogue) - { + epilogue->simduid = loop->simduid; epilogue->force_vectorize = loop->force_vectorize; epilogue->safelen = loop->safelen; epilogue->dont_vectorize = false; + LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs; + + /* We are done vectorizing the main loop, so now we update the epilogues + stmt_vec_info's. At the same time we set the gimple UID of each + statement in the epilogue, as these are used to look them up in the + epilogues loop_vec_info later. We also keep track of what + stmt_vec_info's have PATTERN_DEF_SEQ's and RELATED_STMT's that might + need updating and we construct a mapping between variables defined in + the main loop and their corresponding names in epilogue. */ + for (unsigned i = 0; i < loop->num_nodes; ++i) + { + for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]); + !gsi_end_p (epilogue_phi_gsi); gsi_next (&epilogue_phi_gsi)) + { + orig_stmt = orig_stmts[0]; + orig_stmts.ordered_remove (0); + new_stmt = epilogue_phi_gsi.phi (); + + stmt_vinfo + = epilogue_vinfo->lookup_stmt (orig_stmt); + + STMT_VINFO_STMT (stmt_vinfo) = new_stmt; + gimple_set_uid (new_stmt, gimple_uid (orig_stmt)); + + mapping.put (gimple_phi_result (orig_stmt), + gimple_phi_result (new_stmt)); + + if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)) + pattern_worklist.safe_push (stmt_vinfo); + + related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo); + while (related_vinfo && related_vinfo != stmt_vinfo) + { + related_worklist.safe_push (related_vinfo); + /* Set BB such that the assert in + 'get_initial_def_for_reduction' is able to determine that + the BB of the related stmt is inside this loop. */ + gimple_set_bb (STMT_VINFO_STMT (related_vinfo), + gimple_bb (new_stmt)); + related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo); + } + } + + for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]); + !gsi_end_p (epilogue_gsi); gsi_next (&epilogue_gsi)) + { + orig_stmt = orig_stmts[0]; + orig_stmts.ordered_remove (0); + new_stmt = gsi_stmt (epilogue_gsi); + + stmt_vinfo + = epilogue_vinfo->lookup_stmt (orig_stmt); + + STMT_VINFO_STMT (stmt_vinfo) = new_stmt; + gimple_set_uid (new_stmt, gimple_uid (orig_stmt)); + + if (is_gimple_assign (orig_stmt)) + { + gcc_assert (is_gimple_assign (new_stmt)); + mapping.put (gimple_assign_lhs (orig_stmt), + gimple_assign_lhs (new_stmt)); + } + + if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)) + pattern_worklist.safe_push (stmt_vinfo); + + related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo); + related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo); + while (related_vinfo && related_vinfo != stmt_vinfo) + { + related_worklist.safe_push (related_vinfo); + /* Set BB such that the assert in + 'get_initial_def_for_reduction' is able to determine that + the BB of the related stmt is inside this loop. */ + gimple_set_bb (STMT_VINFO_STMT (related_vinfo), + gimple_bb (new_stmt)); + related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo); + } + } + gcc_assert (orig_stmts.length () == 0); + } + + /* The PATTERN_DEF_SEQ's in the epilogue were constructed using the + original main loop and thus need to be updated to refer to the cloned + variables used in the epilogue. */ + for (unsigned i = 0; i < pattern_worklist.length (); ++i) + { + gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (pattern_worklist[i]); + tree *new_op; + + while (seq) + { + for (unsigned j = 1; j < gimple_num_ops (seq); ++j) + { + tree op = gimple_op (seq, j); + if ((new_op = mapping.get(op))) + gimple_set_op (seq, j, *new_op); + else + { + op = unshare_expr (op); + replace_ops (op, mapping); + gimple_set_op (seq, j, op); + } + } + seq = seq->next; + } + } + + /* Just like the PATTERN_DEF_SEQ's the RELATED_STMT's also need to be + updated. */ + for (unsigned i = 0; i < related_worklist.length (); ++i) + { + tree *new_t; + gimple * stmt = STMT_VINFO_STMT (related_worklist[i]); + for (unsigned j = 1; j < gimple_num_ops (stmt); ++j) + if ((new_t = mapping.get(gimple_op (stmt, j)))) + gimple_set_op (stmt, j, *new_t); + } + + tree new_op; + for (unsigned i = 0; i < gather_scatter_drs.length (); ++i) + { + dr_vec_info *dr_info = gather_scatter_drs[i]; + data_reference *dr = dr_info->dr; + gcc_assert (dr); + DR_REF (dr) = unshare_expr (DR_REF (dr)); + new_op = replace_ops (DR_REF (dr), mapping); + if (new_op) + DR_STMT (dr_info->dr) = SSA_NAME_DEF_STMT (new_op); + } - /* We may need to if-convert epilogue to vectorize it. */ - if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)) - tree_if_conversion (epilogue); + epilogue_vinfo->shared->datarefs_copy.release (); + epilogue_vinfo->shared->save_datarefs (); } return epilogue; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1456cde4c2c2dec7244c504d2c496248894a4f1e..9788c02535999e2e08cb03d1f20ddd80ff448d51 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -564,6 +564,8 @@ public: this points to the original vectorized loop. Otherwise NULL. */ _loop_vec_info *orig_loop_info; + vec<_loop_vec_info *> epilogue_vinfos; + } *loop_vec_info; /* Access Functions. */ @@ -1480,13 +1482,15 @@ extern void vect_set_loop_condition (class loop *, loop_vec_info, extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge); class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, class loop *, edge); -class loop *vect_loop_versioning (loop_vec_info, unsigned int, bool, - poly_uint64); +class loop *vect_loop_versioning (loop_vec_info); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, - tree *, tree *, tree *, int, bool, bool); + tree *, tree *, tree *, int, bool, bool, + tree *); extern void vect_prepare_for_masked_peels (loop_vec_info); extern dump_user_location_t find_loop_location (class loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); +extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); + /* In tree-vect-stmts.c. */ extern poly_uint64 current_vector_size; @@ -1600,6 +1604,8 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, tree, tree = NULL_TREE); /* In tree-vect-loop.c. */ +/* Used in tree-vect-loop-manip.c */ +extern void determine_peel_for_niter (loop_vec_info); /* FORNOW: Used in tree-parloops.c. */ extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, bool *, bool); @@ -1610,7 +1616,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, /* Drive for loop analysis stage. */ extern opt_loop_vec_info vect_analyze_loop (class loop *, loop_vec_info, - vec_info_shared *); + vec_info_shared *, + vector_sizes); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, tree *, bool); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 173e6b51652fd023893b38da786ff28f827553b5..71bbf4fdf8dc7588c45a0e8feef9272b52c0c04c 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -875,6 +875,10 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, vec_info_shared shared; auto_purge_vect_location sentinel; vect_location = find_loop_location (loop); + auto_vector_sizes auto_vector_sizes; + vector_sizes vector_sizes; + bool assert_versioning = false; + if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION && dump_enabled_p ()) dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS, @@ -882,10 +886,35 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, LOCATION_FILE (vect_location.get_location_t ()), LOCATION_LINE (vect_location.get_location_t ())); + /* If this is an epilogue, we already know what vector sizes we will use for + vectorization as the analyzis was part of the main vectorized loop. Use + these instead of going through all vector sizes again. */ + if (orig_loop_vinfo + && !LOOP_VINFO_LOOP (orig_loop_vinfo)->epilogue_vsizes.is_empty ()) + { + vector_sizes = LOOP_VINFO_LOOP (orig_loop_vinfo)->epilogue_vsizes; + assert_versioning = LOOP_REQUIRES_VERSIONING (orig_loop_vinfo); + current_vector_size = vector_sizes[0]; + } + else + { + /* Autodetect first vector size we try. */ + current_vector_size = 0; + + targetm.vectorize.autovectorize_vector_sizes (&auto_vector_sizes, + loop->simdlen != 0); + vector_sizes = auto_vector_sizes; + } + /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */ - opt_loop_vec_info loop_vinfo - = vect_analyze_loop (loop, orig_loop_vinfo, &shared); - loop->aux = loop_vinfo; + opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL); + if (loop_vec_info_for_loop (loop)) + loop_vinfo = opt_loop_vec_info::success (loop_vec_info_for_loop (loop)); + else + { + loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo, &shared, vector_sizes); + loop->aux = loop_vinfo; + } if (!loop_vinfo) if (dump_enabled_p ()) @@ -898,6 +927,10 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) { + /* If this loops requires versioning, make sure the analyzis done on the + epilogue loops succeeds. */ + gcc_assert (!assert_versioning); + /* Free existing information if loop is analyzed with some assumptions. */ if (loop_constraint_set_p (loop, LOOP_C_FINITE)) @@ -1013,8 +1046,13 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, /* Epilogue of vectorized loop must be vectorized too. */ if (new_loop) - ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, - new_loop, loop_vinfo, NULL, NULL); + { + /* Don't include vectorized epilogues in the "vectorized loops" count. + */ + unsigned dont_count = *num_vectorized_loops; + ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count, + new_loop, loop_vinfo, NULL, NULL); + } return ret; }