public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/kubaneko/heads/histogram)] Add histogram based loop versioning.
@ 2023-05-01 19:52 Jan Hubicka
0 siblings, 0 replies; only message in thread
From: Jan Hubicka @ 2023-05-01 19:52 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:349581e9d5319442b0ccfa913717b934dd61f8c2
commit 349581e9d5319442b0ccfa913717b934dd61f8c2
Author: Honza <jh@ryzen3.suse.cz>
Date: Mon May 1 21:52:20 2023 +0200
Add histogram based loop versioning.
Diff:
---
gcc/Makefile.in | 1 +
gcc/common.opt | 4 +
gcc/gimple-loop-versioning-histograms.cc | 264 +++++++++++++++++++++++++++++++
gcc/gimple-loop-versioning.cc | 3 +
gcc/opts.cc | 1 +
gcc/passes.def | 2 +
gcc/tree-pass.h | 1 +
7 files changed, 276 insertions(+)
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 6001c9e3b55..d15d3205d10 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1421,6 +1421,7 @@ OBJS = \
gimple-loop-interchange.o \
gimple-loop-jam.o \
gimple-loop-versioning.o \
+ gimple-loop-versioning-histograms.o \
gimple-low.o \
gimple-predicate-analysis.o \
gimple-pretty-print.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index e4ef0685484..5b1633cb5ac 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3103,6 +3103,10 @@ fversion-loops-for-strides
Common Var(flag_version_loops_for_strides) Optimization
Version loops based on whether indices have a stride of one.
+fversion-loops-using-histograms
+Common Var(flag_version_loops_using_histograms) Optimization
+Version loops based on iteration count histograms
+
funwind-tables
Common Var(flag_unwind_tables) Optimization
Just generate unwind tables for exception handling.
diff --git a/gcc/gimple-loop-versioning-histograms.cc b/gcc/gimple-loop-versioning-histograms.cc
new file mode 100644
index 00000000000..c9e2dc98634
--- /dev/null
+++ b/gcc/gimple-loop-versioning-histograms.cc
@@ -0,0 +1,264 @@
+/* Loop versioning using histograms.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ Contributed by Ondrej Kubanek and Jan Hubicka
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "tree-pass.h"
+#include "gimplify-me.h"
+#include "cfgloop.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-scalar-evolution.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "fold-const.h"
+#include "tree-ssa-propagate.h"
+#include "tree-inline.h"
+#include "domwalk.h"
+#include "tree-vectorizer.h"
+#include "omp-general.h"
+#include "predict.h"
+#include "tree-into-ssa.h"
+#include "gimple-range.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-pretty-print.h"
+
+namespace {
+
+/* Histogram loop versioning pass.
+ The pass looks for loops with single exit conditioned by a induction
+ variable test. If histogram indicates that such loop iterates usually
+ N times, we produce a versined version:
+
+ if (number_of_iterations == N)
+ for (int i = 0; i < N; i++)
+ loop_body;
+ else
+ original_loop.
+ After this transformation the specialized version can often be
+ fully unrolled or vectorized. */
+
+const pass_data pass_data_loop_histogram_versioning =
+{
+ GIMPLE_PASS, /* type */
+ "hversion", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_LOOP_VERSIONING, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_loop_histogram_versioning : public gimple_opt_pass
+{
+public:
+ pass_loop_histogram_versioning (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_loop_histogram_versioning, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *) final override
+ {
+ return flag_version_loops_using_histograms;
+ }
+ unsigned int execute (function *) final override;
+};
+
+unsigned int
+pass_loop_histogram_versioning::execute (function *fn)
+{
+ bool changed = false;
+ if (number_of_loops (fn) <= 1)
+ return 0;
+ loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
+ scev_initialize ();
+ estimate_numbers_of_iterations (cfun);
+
+ for (auto loop : loops_list (cfun, 0))
+ {
+ /* We version only inner loops optimized for speed with histogram
+ attachd. */
+ if (loop->inner != NULL
+ || loop->counters == NULL
+ || !optimize_loop_for_speed_p (loop))
+ continue;
+ int best_iters = - 1;
+ gcov_type best_val = 0;
+
+ /* Look for dominating iteration count. */
+ for (int i = 0; i < param_profile_histogram_size_lin; i++)
+ {
+ if ((*(loop->counters->hist))[i] > best_val)
+ {
+ best_val = (*(loop->counters->hist))[i];
+ best_iters = i;
+ }
+ }
+ if (best_val <= 0 || best_val < loop->counters->sum * 9 / 10)
+ continue;
+ if (dump_file)
+ fprintf (dump_file, "Loop %i has dominating number of iterations %i\n",
+ loop->num, best_iters);
+ if (!best_iters)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Not versioning since normal peeling will do the job\n");
+ continue;
+ }
+
+ /* If loop has no single exit, don't try to version.
+ In this case we don not know if the loop iteration count corresponds
+ to the IV test.
+ TODO: We may be able to also handle loop with single dominating
+ exit. */
+ auto_vec<edge> exits = get_loop_exit_edges (loop);
+ if (exits.length () != 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Wrong number of exits: %i\n", exits.length ());
+ continue;
+ }
+
+ /* Analyze number of iterations. */
+ class tree_niter_desc niter;
+ if (!number_of_iterations_exit (loop, exits[0],
+ &niter, false, false))
+ {
+ if (dump_file)
+ fprintf (dump_file, "niter analysis failed\n");
+ continue;
+ }
+ /* If the loop exit is not simple IV test or the loop already
+ has known number of iterations, give up. */
+ if (!niter.niter
+ || !integer_onep (niter.assumptions)
+ || TREE_CODE (niter.niter) == INTEGER_CST)
+ {
+ if (dump_file)
+ {
+ if (TREE_CODE (niter.niter) == INTEGER_CST)
+ fprintf (dump_file, "niter is already constant:");
+ else
+ fprintf (dump_file, "niter wrong:");
+ print_generic_expr (dump_file, niter.niter);
+ fprintf (dump_file, "\n");
+ }
+ continue;
+ }
+
+ /* Sanity check with maximal number of iterations. */
+ int maxiter = max_loop_iterations_int (loop);
+ if (maxiter >= 0 && maxiter < best_iters)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Upper bound on number of iterations is too small\n");
+ continue;
+ }
+
+ /* Compute loop body size. */
+ int ninsns = 0;
+ basic_block *bbs = get_loop_body (loop);
+ for (unsigned int i = 0; i < loop->num_nodes; i++)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
+ !gsi_end_p (gsi) && ninsns <= param_loop_versioning_max_inner_insns;
+ gsi_next (&gsi))
+ ninsns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+ free (bbs);
+ if (ninsns > param_loop_versioning_max_inner_insns)
+ {
+ if (dump_file)
+ fprintf (dump_file, "--param loop-versioning-max-inner-insns exceeded\n");
+ continue;
+ }
+
+
+ /* Do versining. */
+ dump_user_location_t locus = last_stmt (exits[0]->src);
+ basic_block cond_bb;
+ gimple_seq stmts = NULL;
+ initialize_original_copy_tables ();
+ tree op = force_gimple_operand_1
+ (fold_build2 (NE_EXPR, boolean_type_node,
+ niter.niter,
+ build_int_cst (TREE_TYPE (niter.niter),
+ best_iters)),
+ &stmts, is_gimple_condexpr_for_cond, NULL_TREE);
+
+ class loop *optimized_loop
+ = loop_version (loop, op, &cond_bb,
+ /* It may be more precise to take probability from
+ histogram, but it also may lead for
+ over-specialization for the train run. */
+ profile_probability::very_unlikely (),
+ profile_probability::very_likely (),
+ profile_probability::very_unlikely (),
+ profile_probability::very_likely (), true);
+ free_original_copy_tables ();
+
+ /* Insert IV test. */
+ if (stmts)
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+ gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+ }
+
+ /* Update optimized loop.
+ We force it to have given number of iteraitons and thus
+ it makes sense to drop histogram counters. */
+ record_niter_bound (optimized_loop, best_val, false, true);
+ optimized_loop->counters = 0;
+ auto_vec<edge> optimized_exits = get_loop_exit_edges (optimized_loop);
+ create_canonical_iv (optimized_loop, optimized_exits[0],
+ build_int_cst (integer_type_node, best_iters));
+ /* Update counters of non-optiized loop.
+ TODO: also recompute estimated number of itraitons
+ instead of droping it bellow. */
+ (*(loop->counters->hist))[best_iters] = 0;
+ loop->counters->sum -= best_val;
+ loop->any_estimate = false;
+ changed = true;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
+ "loop %i versioned based on histogram "
+ "for %i iterations (header execution count %d)\n",
+ loop->num, best_iters,
+ optimized_loop->header->count.to_gcov_type ());
+ }
+ free_numbers_of_iterations_estimates (cfun);
+ scev_finalize ();
+ loop_optimizer_finalize ();
+ return (changed ? TODO_update_ssa : 0) | TODO_cleanup_cfg;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_loop_histogram_versioning (gcc::context *ctxt)
+{
+ return new pass_loop_histogram_versioning (ctxt);
+}
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 640bb28016f..1f3d570c42f 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-into-ssa.h"
#include "gimple-range.h"
#include "tree-cfg.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-pretty-print.h"
namespace {
@@ -1808,3 +1810,4 @@ make_pass_loop_versioning (gcc::context *ctxt)
{
return new pass_loop_versioning (ctxt);
}
+
diff --git a/gcc/opts.cc b/gcc/opts.cc
index a032cd4ce58..1027d656aa5 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -2044,6 +2044,7 @@ enable_fdo_optimizations (struct gcc_options *opts,
SET_OPTION_IF_UNSET (opts, opts_set, flag_loop_interchange, value);
SET_OPTION_IF_UNSET (opts, opts_set, flag_unroll_jam, value);
SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_version_loops_using_histograms, value);
}
/* -f{,no-}sanitize{,-recover}= suboptions. */
diff --git a/gcc/passes.def b/gcc/passes.def
index c9a8f19747b..20a916b0cee 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -229,6 +229,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_merge_phi);
NEXT_PASS (pass_phiopt, false /* early_p */);
NEXT_PASS (pass_tail_recursion);
+ NEXT_PASS (pass_loop_histogram_versioning);
NEXT_PASS (pass_ch);
NEXT_PASS (pass_lower_complex);
NEXT_PASS (pass_sra);
@@ -275,6 +276,7 @@ along with GCC; see the file COPYING3. If not see
/* Before loop_init we rewrite no longer addressed locals into SSA
form if possible. */
NEXT_PASS (pass_tree_loop_init);
+ //NEXT_PASS (pass_loop_histogram_versioning);
NEXT_PASS (pass_tree_unswitch);
NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_loop_split);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 6cdaed7d4b2..7141aabb6fa 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -373,6 +373,7 @@ extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_versioning (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_loop_histogram_versioning (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-05-01 19:52 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-01 19:52 [gcc(refs/users/kubaneko/heads/histogram)] Add histogram based loop versioning Jan Hubicka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).