--- gcc/cfgloop.h | 3 + gcc/tree-ssa-loop-manip.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/tree-ssa-loop-manip.h | 3 +- gcc/tree-ssa-loop.c | 33 ++++++ gcc/tree-ssa-loop.h | 2 + 5 files changed, 292 insertions(+), 2 deletions(-) diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 11378ca..c5bcca7 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -232,6 +232,9 @@ public: Other values means unroll with the given unrolling factor. */ unsigned short unroll; + /* Like unroll field above, but it's estimated in middle-end. */ + unsigned short estimated_unroll; + /* If this loop was inlined the main clique of the callee which does not need remapping when copying the loop body. */ unsigned short owned_clique; diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index 120b35b..72ac335 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "backend.h" +#include "target.h" #include "tree.h" #include "gimple.h" #include "cfghooks.h" @@ -42,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "tree-scalar-evolution.h" #include "tree-inline.h" +#include "wide-int.h" /* All bitmaps for rewriting into loop-closed SSA go on this obstack, so that we can free them all at once. */ @@ -1592,3 +1594,254 @@ canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch) return var_before; } + +/* Try to determine estimated unroll factor for given LOOP with constant number + of iterations, mainly refer to decide_unroll_constant_iterations. + - NITER_DESC holds number of iteration description if it isn't NULL. + - NUNROLL holds a unroll factor value computed with instruction numbers. + - ITER holds estimated or likely max loop iterations. + Return true if it succeeds, also update estimated_unroll. */ + +static bool +decide_uf_const_iter (class loop *loop, const tree_niter_desc *niter_desc, + unsigned nunroll, const widest_int *iter) +{ + /* Skip big loops. */ + if (nunroll <= 1) + return false; + + gcc_assert (niter_desc && niter_desc->assumptions); + + /* Check number of iterations is constant, return false if no. */ + if ((niter_desc->may_be_zero && !integer_zerop (niter_desc->may_be_zero)) + || !tree_fits_uhwi_p (niter_desc->niter)) + return false; + + unsigned HOST_WIDE_INT const_niter = tree_to_uhwi (niter_desc->niter); + + /* If unroll factor is set explicitly, use it as estimated_unroll. */ + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) + { + /* It should have been peeled instead. */ + if (const_niter == 0 || (unsigned) loop->unroll > const_niter - 1) + loop->estimated_unroll = 1; + else + loop->estimated_unroll = loop->unroll; + return true; + } + + /* Check whether the loop rolls enough to consider. */ + if (const_niter < 2 * nunroll || wi::ltu_p (*iter, 2 * nunroll)) + return false; + + /* Success; now compute number of iterations to unroll. */ + unsigned best_unroll = 0, n_copies = 0; + unsigned best_copies = 2 * nunroll + 10; + unsigned i = 2 * nunroll + 2; + + if (i > const_niter - 2) + i = const_niter - 2; + + for (; i >= nunroll - 1; i--) + { + unsigned exit_mod = const_niter % (i + 1); + + if (!empty_block_p (loop->latch)) + n_copies = exit_mod + i + 1; + else if (exit_mod != i) + n_copies = exit_mod + i + 2; + else + n_copies = i + 1; + + if (n_copies < best_copies) + { + best_copies = n_copies; + best_unroll = i; + } + } + + loop->estimated_unroll = best_unroll + 1; + return true; +} + +/* Try to determine estimated unroll factor for given LOOP with countable but + non-constant number of iterations, mainly refer to + decide_unroll_runtime_iterations. + - NITER_DESC holds number of iteration description if it isn't NULL. + - NUNROLL_IN holds a unroll factor value computed with instruction numbers. + - ITER holds estimated or likely max loop iterations. + Return true if it succeeds, also update estimated_unroll. */ + +static bool +decide_uf_runtime_iter (class loop *loop, const tree_niter_desc *niter_desc, + unsigned nunroll_in, const widest_int *iter) +{ + unsigned nunroll = nunroll_in; + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) + nunroll = loop->unroll; + + /* Skip big loops. */ + if (nunroll <= 1) + return false; + + gcc_assert (niter_desc && niter_desc->assumptions); + + /* Skip constant number of iterations. */ + if ((!niter_desc->may_be_zero || !integer_zerop (niter_desc->may_be_zero)) + && tree_fits_uhwi_p (niter_desc->niter)) + return false; + + /* Check whether the loop rolls. */ + if (wi::ltu_p (*iter, 2 * nunroll)) + return false; + + /* Success; now force nunroll to be power of 2. */ + unsigned i; + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; + + loop->estimated_unroll = i; + return true; +} + +/* Try to determine estimated unroll factor for given LOOP with uncountable + number of iterations, mainly refer to decide_unroll_stupid. + - NITER_DESC holds number of iteration description if it isn't NULL. + - NUNROLL_IN holds a unroll factor value computed with instruction numbers. + - ITER holds estimated or likely max loop iterations. + Return true if it succeeds, also update estimated_unroll. */ + +static bool +decide_uf_stupid (class loop *loop, const tree_niter_desc *niter_desc, + unsigned nunroll_in, const widest_int *iter) +{ + if (!flag_unroll_all_loops && !loop->unroll) + return false; + + unsigned nunroll = nunroll_in; + if (loop->unroll > 0 && loop->unroll < USHRT_MAX) + nunroll = loop->unroll; + + /* Skip big loops. */ + if (nunroll <= 1) + return false; + + gcc_assert (!niter_desc || !niter_desc->assumptions); + + /* Skip loop with multiple branches for now. */ + if (num_loop_branches (loop) > 1) + return false; + + /* Check whether the loop rolls. */ + if (wi::ltu_p (*iter, 2 * nunroll)) + return false; + + /* Success; now force nunroll to be power of 2. */ + unsigned i; + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; + + loop->estimated_unroll = i; + return true; +} + +/* Try to estimate whether this given LOOP can be unrolled or not, and compute + its estimated unroll factor if it can. To avoid duplicated computation, you + can pass number of iterations information by DESC. The heuristics mainly + refer to decide_unrolling in loop-unroll.c. */ + +void +estimate_unroll_factor (class loop *loop, tree_niter_desc *desc) +{ + /* Return the existing estimated unroll factor. */ + if (loop->estimated_unroll) + return; + + /* Don't unroll explicitly. */ + if (loop->unroll == 1) + { + loop->estimated_unroll = loop->unroll; + return; + } + + /* Like decide_unrolling, don't unroll if: + 1) the loop is cold. + 2) the loop can't be manipulated. + 3) the loop isn't innermost. */ + if (optimize_loop_for_size_p (loop) || !can_duplicate_loop_p (loop) + || loop->inner != NULL) + { + loop->estimated_unroll = 1; + return; + } + + /* Don't unroll without explicit information. */ + if (!loop->unroll && !flag_unroll_loops && !flag_unroll_all_loops) + { + loop->estimated_unroll = 1; + return; + } + + /* Check for instruction number and average instruction number. */ + loop->ninsns = tree_num_loop_insns (loop, &eni_size_weights); + loop->av_ninsns = tree_average_num_loop_insns (loop, &eni_size_weights); + unsigned nunroll = param_max_unrolled_insns / loop->ninsns; + unsigned nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns; + + if (nunroll > nunroll_by_av) + nunroll = nunroll_by_av; + if (nunroll > (unsigned) param_max_unroll_times) + nunroll = param_max_unroll_times; + + if (targetm.loop_unroll_adjust) + nunroll = targetm.loop_unroll_adjust (nunroll, loop); + + tree_niter_desc *niter_desc = NULL; + bool desc_need_delete = false; + + /* Compute number of iterations if need. */ + if (!desc) + { + /* For now, use single_dom_exit for simplicity. TODO: Support multiple + exits like find_simple_exit if we finds some profitable cases. */ + niter_desc = XNEW (class tree_niter_desc); + gcc_assert (niter_desc); + edge exit = single_dom_exit (loop); + if (!exit || !number_of_iterations_exit (loop, exit, niter_desc, true)) + { + XDELETE (niter_desc); + niter_desc = NULL; + } + else + desc_need_delete = true; + } + else + niter_desc = desc; + + /* For checking the loop rolls enough to consider, also consult loop bounds + and profile. */ + widest_int iterations; + if (!get_estimated_loop_iterations (loop, &iterations) + && !get_likely_max_loop_iterations (loop, &iterations)) + iterations = 0; + + if (niter_desc && niter_desc->assumptions) + { + /* For countable loops. */ + if (!decide_uf_const_iter (loop, niter_desc, nunroll, &iterations) + && !decide_uf_runtime_iter (loop, niter_desc, nunroll, &iterations)) + loop->estimated_unroll = 1; + } + else + { + if (!decide_uf_stupid (loop, niter_desc, nunroll, &iterations)) + loop->estimated_unroll = 1; + } + + if (desc_need_delete) + { + XDELETE (niter_desc); + niter_desc = NULL; + } +} + diff --git a/gcc/tree-ssa-loop-manip.h b/gcc/tree-ssa-loop-manip.h index e789e4f..773a2b3 100644 --- a/gcc/tree-ssa-loop-manip.h +++ b/gcc/tree-ssa-loop-manip.h @@ -55,7 +55,6 @@ extern void tree_transform_and_unroll_loop (class loop *, unsigned, extern void tree_unroll_loop (class loop *, unsigned, edge, class tree_niter_desc *); extern tree canonicalize_loop_ivs (class loop *, tree *, bool); - - +extern void estimate_unroll_factor (class loop *, tree_niter_desc *); #endif /* GCC_TREE_SSA_LOOP_MANIP_H */ diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 5e8365d..25320fb 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "stringpool.h" #include "attribs.h" +#include "sreal.h" /* A pass making sure loops are fixed up. */ @@ -790,5 +791,37 @@ tree_num_loop_insns (class loop *loop, eni_weights *weights) return size; } +/* Computes an estimated number of insns on average per iteration in LOOP, + weighted by WEIGHTS. Refer to function average_num_loop_insns. */ +unsigned +tree_average_num_loop_insns (class loop *loop, eni_weights *weights) +{ + basic_block *body = get_loop_body (loop); + gimple_stmt_iterator gsi; + unsigned bb_size, i; + sreal nsize = 0; + + for (i = 0; i < loop->num_nodes; i++) + { + bb_size = 0; + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + bb_size += estimate_num_insns (gsi_stmt (gsi), weights); + nsize += (sreal) bb_size + * body[i]->count.to_sreal_scale (loop->header->count); + /* Avoid overflows. */ + if (nsize > 1000000) + { + free (body); + return 1000000; + } + } + free (body); + + unsigned ret = nsize.to_int (); + if (!ret) + ret = 1; /* To avoid division by zero. */ + + return ret; +} diff --git a/gcc/tree-ssa-loop.h b/gcc/tree-ssa-loop.h index 9e35125..af36177 100644 --- a/gcc/tree-ssa-loop.h +++ b/gcc/tree-ssa-loop.h @@ -67,6 +67,8 @@ public: extern bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *); extern char *get_lsm_tmp_name (tree ref, unsigned n, const char *suffix = NULL); extern unsigned tree_num_loop_insns (class loop *, struct eni_weights *); +extern unsigned tree_average_num_loop_insns (class loop *, + struct eni_weights *); /* Returns the loop of the statement STMT. */ -- 2.7.4