diff --git a/gcc/gengtype.c b/gcc/gengtype.c index 53317337cf8c8e8caefd6b819d28b3bba301e755..56ffa08a7dee54837441f0c743f8c0faa285c74b 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -5197,6 +5197,7 @@ main (int argc, char **argv) POS_HERE (do_scalar_typedef ("widest_int", &pos)); POS_HERE (do_scalar_typedef ("int64_t", &pos)); POS_HERE (do_scalar_typedef ("poly_int64", &pos)); + POS_HERE (do_scalar_typedef ("poly_uint64", &pos)); POS_HERE (do_scalar_typedef ("uint64_t", &pos)); POS_HERE (do_scalar_typedef ("uint8", &pos)); POS_HERE (do_scalar_typedef ("uintptr_t", &pos)); diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 5c25441c70a271f04730486e513437fffa75b7e3..3b5f14c45b5b9b601120c6776734bbafefe1e178 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2401,7 +2401,8 @@ class loop * vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, tree *niters_vector, tree *step_vector, tree *niters_vector_mult_vf_var, int th, - bool check_profitability, bool niters_no_overflow) + bool check_profitability, bool niters_no_overflow, + bool vect_epilogues_nomask) { edge e, guard_e; tree type = TREE_TYPE (niters), guard_cond; @@ -2474,7 +2475,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), bound_prolog + bound_epilog) - : !LOOP_REQUIRES_VERSIONING (loop_vinfo)); + : (!LOOP_REQUIRES_VERSIONING (loop_vinfo) + || vect_epilogues_nomask)); /* Epilog loop must be executed if the number of iterations for epilog loop is known at compile time, otherwise we need to add a check at the end of vector loop and skip to the end of epilog loop. */ @@ -2966,9 +2968,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) *COND_EXPR_STMT_LIST. */ class loop * -vect_loop_versioning (loop_vec_info loop_vinfo, - unsigned int th, bool check_profitability, - poly_uint64 versioning_threshold) +vect_loop_versioning (loop_vec_info loop_vinfo) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop; class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); @@ -2988,10 +2988,15 @@ vect_loop_versioning (loop_vec_info loop_vinfo, bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); + poly_uint64 versioning_threshold + = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); tree version_simd_if_cond = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo); + unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); - if (check_profitability) + if (th >= vect_vf_for_cost (loop_vinfo) + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && !ordered_p (th, versioning_threshold)) cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, build_int_cst (TREE_TYPE (scalar_loop_iters), th - 1)); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b0cbbac0cb5ba1ffce706715d3dbb9139063803d..305ee2b06eabde9091049da829e6fc93161aa13f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1858,7 +1858,8 @@ vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) for it. The different analyses will record information in the loop_vec_info struct. */ static opt_result -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts, + bool *vect_epilogues_nomask) { opt_result ok = opt_result::success (); int res; @@ -2179,6 +2180,11 @@ start_over: } } + /* Disable epilogue vectorization if versioning is required because of the + iteration count. TODO: Needs investigation as to whether it is possible + to vectorize epilogues in this case. */ + *vect_epilogues_nomask &= !LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); + /* During peeling, we need to check if number of loop iterations is enough for both peeled prolog loop and vector loop. This check can be merged along with threshold check of loop versioning, so @@ -2186,6 +2192,7 @@ start_over: if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) { poly_uint64 niters_th = 0; + unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) { @@ -2206,6 +2213,14 @@ start_over: /* One additional iteration because of peeling for gap. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) niters_th += 1; + + /* Use the same condition as vect_transform_loop to decide when to use + the cost to determine a versioning threshold. */ + if (th >= vect_vf_for_cost (loop_vinfo) + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && ordered_p (th, niters_th)) + niters_th = ordered_max (poly_uint64 (th), niters_th); + LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th; } @@ -2329,7 +2344,7 @@ again: be vectorized. */ opt_loop_vec_info vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, - vec_info_shared *shared) + vec_info_shared *shared, bool *vect_epilogues_nomask) { auto_vector_sizes vector_sizes; @@ -2357,6 +2372,7 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, poly_uint64 autodetected_vector_size = 0; opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); poly_uint64 first_vector_size = 0; + unsigned vectorized_loops = 0; while (1) { /* Check the CFG characteristics of the loop (nesting, entry/exit). */ @@ -2376,14 +2392,17 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, if (orig_loop_vinfo) LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; - opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); + opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts, + vect_epilogues_nomask); if (res) { LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; + vectorized_loops++; - if (loop->simdlen - && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), - (unsigned HOST_WIDE_INT) loop->simdlen)) + if ((loop->simdlen + && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + (unsigned HOST_WIDE_INT) loop->simdlen)) + || *vect_epilogues_nomask) { if (first_loop_vinfo == NULL) { @@ -2392,7 +2411,13 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, loop->aux = NULL; } else - delete loop_vinfo; + { + /* Set versioning threshold of the original LOOP_VINFO based + on the last vectorization of the epilog. */ + LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) + = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); + delete loop_vinfo; + } } else { @@ -2401,7 +2426,12 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo, } } else - delete loop_vinfo; + { + /* Disable epilog vectorization if we can't determine the epilogs can + be vectorized. */ + *vect_epilogues_nomask &= vectorized_loops > 1; + delete loop_vinfo; + } if (next_size == 0) autodetected_vector_size = current_vector_size; @@ -8468,7 +8498,7 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, Returns scalar epilogue loop if any. */ class loop * -vect_transform_loop (loop_vec_info loop_vinfo) +vect_transform_loop (loop_vec_info loop_vinfo, bool vect_epilogues_nomask) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); class loop *epilogue = NULL; @@ -8497,11 +8527,11 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (th >= vect_vf_for_cost (loop_vinfo) && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Profitability threshold is %d loop iterations.\n", - th); - check_profitability = true; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Profitability threshold is %d loop iterations.\n", + th); + check_profitability = true; } /* Make sure there exists a single-predecessor exit bb. Do this before @@ -8519,18 +8549,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) { - poly_uint64 versioning_threshold - = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); - if (check_profitability - && ordered_p (poly_uint64 (th), versioning_threshold)) - { - versioning_threshold = ordered_max (poly_uint64 (th), - versioning_threshold); - check_profitability = false; - } class loop *sloop - = vect_loop_versioning (loop_vinfo, th, check_profitability, - versioning_threshold); + = vect_loop_versioning (loop_vinfo); sloop->force_vectorize = false; check_profitability = false; } @@ -8557,7 +8577,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, &step_vector, &niters_vector_mult_vf, th, - check_profitability, niters_no_overflow); + check_profitability, niters_no_overflow, + vect_epilogues_nomask); if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo) && LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ()) scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo), @@ -8818,7 +8839,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) epilogue = NULL; - if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) + if (!vect_epilogues_nomask) epilogue = NULL; if (epilogue) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1456cde4c2c2dec7244c504d2c496248894a4f1e..e87170c592036a6f3f5330e1ebf5d125441861a6 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1480,10 +1480,10 @@ extern void vect_set_loop_condition (class loop *, loop_vec_info, extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge); class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, class loop *, edge); -class loop *vect_loop_versioning (loop_vec_info, unsigned int, bool, - poly_uint64); +class loop *vect_loop_versioning (loop_vec_info); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, - tree *, tree *, tree *, int, bool, bool); + tree *, tree *, tree *, int, bool, bool, + bool); extern void vect_prepare_for_masked_peels (loop_vec_info); extern dump_user_location_t find_loop_location (class loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); @@ -1610,7 +1610,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, /* Drive for loop analysis stage. */ extern opt_loop_vec_info vect_analyze_loop (class loop *, loop_vec_info, - vec_info_shared *); + vec_info_shared *, + bool *); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, tree *, bool); @@ -1622,7 +1623,7 @@ extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, unsigned int, tree, unsigned int); /* Drive for loop transformation stage. */ -extern class loop *vect_transform_loop (loop_vec_info); +extern class loop *vect_transform_loop (loop_vec_info, bool); extern opt_loop_vec_info vect_analyze_loop_form (class loop *, vec_info_shared *); extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 173e6b51652fd023893b38da786ff28f827553b5..25c3fc8ff55e017ae0b971fa93ce8ce2a07cb94c 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "gimple.h" #include "predict.h" +#include "params.h" #include "tree-pass.h" #include "ssa.h" #include "cgraph.h" @@ -875,6 +876,7 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, vec_info_shared shared; auto_purge_vect_location sentinel; vect_location = find_loop_location (loop); + bool vect_epilogues_nomask = PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK); if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION && dump_enabled_p ()) dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS, @@ -884,7 +886,7 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */ opt_loop_vec_info loop_vinfo - = vect_analyze_loop (loop, orig_loop_vinfo, &shared); + = vect_analyze_loop (loop, orig_loop_vinfo, &shared, &vect_epilogues_nomask); loop->aux = loop_vinfo; if (!loop_vinfo) @@ -980,7 +982,7 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, "loop vectorized using variable length vectors\n"); } - loop_p new_loop = vect_transform_loop (loop_vinfo); + loop_p new_loop = vect_transform_loop (loop_vinfo, vect_epilogues_nomask); (*num_vectorized_loops)++; /* Now that the loop has been vectorized, allow it to be unrolled etc. */ @@ -1013,8 +1015,13 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab, /* Epilogue of vectorized loop must be vectorized too. */ if (new_loop) - ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, - new_loop, loop_vinfo, NULL, NULL); + { + /* Don't include vectorized epilogues in the "vectorized loops" count. + */ + unsigned dont_count = *num_vectorized_loops; + ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count, + new_loop, loop_vinfo, NULL, NULL); + } return ret; }