public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Jan Hubicka <hubicka@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/kubaneko/heads/histogram)] Add histogram based loop versioning. Date: Mon, 1 May 2023 19:52:45 +0000 (GMT) [thread overview] Message-ID: <20230501195245.17E883858D28@sourceware.org> (raw) https://gcc.gnu.org/g:349581e9d5319442b0ccfa913717b934dd61f8c2 commit 349581e9d5319442b0ccfa913717b934dd61f8c2 Author: Honza <jh@ryzen3.suse.cz> Date: Mon May 1 21:52:20 2023 +0200 Add histogram based loop versioning. Diff: --- gcc/Makefile.in | 1 + gcc/common.opt | 4 + gcc/gimple-loop-versioning-histograms.cc | 264 +++++++++++++++++++++++++++++++ gcc/gimple-loop-versioning.cc | 3 + gcc/opts.cc | 1 + gcc/passes.def | 2 + gcc/tree-pass.h | 1 + 7 files changed, 276 insertions(+) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 6001c9e3b55..d15d3205d10 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1421,6 +1421,7 @@ OBJS = \ gimple-loop-interchange.o \ gimple-loop-jam.o \ gimple-loop-versioning.o \ + gimple-loop-versioning-histograms.o \ gimple-low.o \ gimple-predicate-analysis.o \ gimple-pretty-print.o \ diff --git a/gcc/common.opt b/gcc/common.opt index e4ef0685484..5b1633cb5ac 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3103,6 +3103,10 @@ fversion-loops-for-strides Common Var(flag_version_loops_for_strides) Optimization Version loops based on whether indices have a stride of one. +fversion-loops-using-histograms +Common Var(flag_version_loops_using_histograms) Optimization +Version loops based on iteration count histograms + funwind-tables Common Var(flag_unwind_tables) Optimization Just generate unwind tables for exception handling. diff --git a/gcc/gimple-loop-versioning-histograms.cc b/gcc/gimple-loop-versioning-histograms.cc new file mode 100644 index 00000000000..c9e2dc98634 --- /dev/null +++ b/gcc/gimple-loop-versioning-histograms.cc @@ -0,0 +1,264 @@ +/* Loop versioning using histograms. + Copyright (C) 2023 Free Software Foundation, Inc. + Contributed by Ondrej Kubanek and Jan Hubicka + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "tree-pass.h" +#include "gimplify-me.h" +#include "cfgloop.h" +#include "tree-ssa-loop.h" +#include "ssa.h" +#include "tree-scalar-evolution.h" +#include "tree-ssa-loop-ivopts.h" +#include "fold-const.h" +#include "tree-ssa-propagate.h" +#include "tree-inline.h" +#include "domwalk.h" +#include "tree-vectorizer.h" +#include "omp-general.h" +#include "predict.h" +#include "tree-into-ssa.h" +#include "gimple-range.h" +#include "tree-cfg.h" +#include "tree-ssa-loop-niter.h" +#include "tree-pretty-print.h" + +namespace { + +/* Histogram loop versioning pass. + The pass looks for loops with single exit conditioned by a induction + variable test. If histogram indicates that such loop iterates usually + N times, we produce a versined version: + + if (number_of_iterations == N) + for (int i = 0; i < N; i++) + loop_body; + else + original_loop. + After this transformation the specialized version can often be + fully unrolled or vectorized. */ + +const pass_data pass_data_loop_histogram_versioning = +{ + GIMPLE_PASS, /* type */ + "hversion", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_LOOP_VERSIONING, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_loop_histogram_versioning : public gimple_opt_pass +{ +public: + pass_loop_histogram_versioning (gcc::context *ctxt) + : gimple_opt_pass (pass_data_loop_histogram_versioning, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) final override + { + return flag_version_loops_using_histograms; + } + unsigned int execute (function *) final override; +}; + +unsigned int +pass_loop_histogram_versioning::execute (function *fn) +{ + bool changed = false; + if (number_of_loops (fn) <= 1) + return 0; + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); + scev_initialize (); + estimate_numbers_of_iterations (cfun); + + for (auto loop : loops_list (cfun, 0)) + { + /* We version only inner loops optimized for speed with histogram + attachd. */ + if (loop->inner != NULL + || loop->counters == NULL + || !optimize_loop_for_speed_p (loop)) + continue; + int best_iters = - 1; + gcov_type best_val = 0; + + /* Look for dominating iteration count. */ + for (int i = 0; i < param_profile_histogram_size_lin; i++) + { + if ((*(loop->counters->hist))[i] > best_val) + { + best_val = (*(loop->counters->hist))[i]; + best_iters = i; + } + } + if (best_val <= 0 || best_val < loop->counters->sum * 9 / 10) + continue; + if (dump_file) + fprintf (dump_file, "Loop %i has dominating number of iterations %i\n", + loop->num, best_iters); + if (!best_iters) + { + if (dump_file) + fprintf (dump_file, + "Not versioning since normal peeling will do the job\n"); + continue; + } + + /* If loop has no single exit, don't try to version. + In this case we don not know if the loop iteration count corresponds + to the IV test. + TODO: We may be able to also handle loop with single dominating + exit. */ + auto_vec<edge> exits = get_loop_exit_edges (loop); + if (exits.length () != 1) + { + if (dump_file) + fprintf (dump_file, "Wrong number of exits: %i\n", exits.length ()); + continue; + } + + /* Analyze number of iterations. */ + class tree_niter_desc niter; + if (!number_of_iterations_exit (loop, exits[0], + &niter, false, false)) + { + if (dump_file) + fprintf (dump_file, "niter analysis failed\n"); + continue; + } + /* If the loop exit is not simple IV test or the loop already + has known number of iterations, give up. */ + if (!niter.niter + || !integer_onep (niter.assumptions) + || TREE_CODE (niter.niter) == INTEGER_CST) + { + if (dump_file) + { + if (TREE_CODE (niter.niter) == INTEGER_CST) + fprintf (dump_file, "niter is already constant:"); + else + fprintf (dump_file, "niter wrong:"); + print_generic_expr (dump_file, niter.niter); + fprintf (dump_file, "\n"); + } + continue; + } + + /* Sanity check with maximal number of iterations. */ + int maxiter = max_loop_iterations_int (loop); + if (maxiter >= 0 && maxiter < best_iters) + { + if (dump_file) + fprintf (dump_file, "Upper bound on number of iterations is too small\n"); + continue; + } + + /* Compute loop body size. */ + int ninsns = 0; + basic_block *bbs = get_loop_body (loop); + for (unsigned int i = 0; i < loop->num_nodes; i++) + for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); + !gsi_end_p (gsi) && ninsns <= param_loop_versioning_max_inner_insns; + gsi_next (&gsi)) + ninsns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights); + free (bbs); + if (ninsns > param_loop_versioning_max_inner_insns) + { + if (dump_file) + fprintf (dump_file, "--param loop-versioning-max-inner-insns exceeded\n"); + continue; + } + + + /* Do versining. */ + dump_user_location_t locus = last_stmt (exits[0]->src); + basic_block cond_bb; + gimple_seq stmts = NULL; + initialize_original_copy_tables (); + tree op = force_gimple_operand_1 + (fold_build2 (NE_EXPR, boolean_type_node, + niter.niter, + build_int_cst (TREE_TYPE (niter.niter), + best_iters)), + &stmts, is_gimple_condexpr_for_cond, NULL_TREE); + + class loop *optimized_loop + = loop_version (loop, op, &cond_bb, + /* It may be more precise to take probability from + histogram, but it also may lead for + over-specialization for the train run. */ + profile_probability::very_unlikely (), + profile_probability::very_likely (), + profile_probability::very_unlikely (), + profile_probability::very_likely (), true); + free_original_copy_tables (); + + /* Insert IV test. */ + if (stmts) + { + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + + /* Update optimized loop. + We force it to have given number of iteraitons and thus + it makes sense to drop histogram counters. */ + record_niter_bound (optimized_loop, best_val, false, true); + optimized_loop->counters = 0; + auto_vec<edge> optimized_exits = get_loop_exit_edges (optimized_loop); + create_canonical_iv (optimized_loop, optimized_exits[0], + build_int_cst (integer_type_node, best_iters)); + /* Update counters of non-optiized loop. + TODO: also recompute estimated number of itraitons + instead of droping it bellow. */ + (*(loop->counters->hist))[best_iters] = 0; + loop->counters->sum -= best_val; + loop->any_estimate = false; + changed = true; + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, + "loop %i versioned based on histogram " + "for %i iterations (header execution count %d)\n", + loop->num, best_iters, + optimized_loop->header->count.to_gcov_type ()); + } + free_numbers_of_iterations_estimates (cfun); + scev_finalize (); + loop_optimizer_finalize (); + return (changed ? TODO_update_ssa : 0) | TODO_cleanup_cfg; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_loop_histogram_versioning (gcc::context *ctxt) +{ + return new pass_loop_histogram_versioning (ctxt); +} diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 640bb28016f..1f3d570c42f 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-into-ssa.h" #include "gimple-range.h" #include "tree-cfg.h" +#include "tree-ssa-loop-niter.h" +#include "tree-pretty-print.h" namespace { @@ -1808,3 +1810,4 @@ make_pass_loop_versioning (gcc::context *ctxt) { return new pass_loop_versioning (ctxt); } + diff --git a/gcc/opts.cc b/gcc/opts.cc index a032cd4ce58..1027d656aa5 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -2044,6 +2044,7 @@ enable_fdo_optimizations (struct gcc_options *opts, SET_OPTION_IF_UNSET (opts, opts_set, flag_loop_interchange, value); SET_OPTION_IF_UNSET (opts, opts_set, flag_unroll_jam, value); SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value); + SET_OPTION_IF_UNSET (opts, opts_set, flag_version_loops_using_histograms, value); } /* -f{,no-}sanitize{,-recover}= suboptions. */ diff --git a/gcc/passes.def b/gcc/passes.def index c9a8f19747b..20a916b0cee 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -229,6 +229,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_merge_phi); NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_loop_histogram_versioning); NEXT_PASS (pass_ch); NEXT_PASS (pass_lower_complex); NEXT_PASS (pass_sra); @@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see /* Before loop_init we rewrite no longer addressed locals into SSA form if possible. */ NEXT_PASS (pass_tree_loop_init); + //NEXT_PASS (pass_loop_histogram_versioning); NEXT_PASS (pass_tree_unswitch); NEXT_PASS (pass_scev_cprop); NEXT_PASS (pass_loop_split); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 6cdaed7d4b2..7141aabb6fa 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -373,6 +373,7 @@ extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt); extern gimple_opt_pass *make_pass_loop_versioning (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_loop_histogram_versioning (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt); extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
reply other threads:[~2023-05-01 19:52 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230501195245.17E883858D28@sourceware.org \ --to=hubicka@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).