* Make ipa-inline-analysis to compute expected runtime without specialization
@ 2017-04-30 17:59 Jan Hubicka
0 siblings, 0 replies; only message in thread
From: Jan Hubicka @ 2017-04-30 17:59 UTC (permalink / raw)
To: gcc-patches
Hi,
this patch fixes the underlying issue of PR 79224. Both inliner and ipa-cp is trying
to maximize performance benefit at a given code size expense. The problem is that
perfomrance benefit is computed by comparing estimated time of offline function to
inline/specialized time. This is however not realistic, becaue offline function time
is computed with all parameters being unknown. For example if body has
if (param)
do_something_difficult
the offline copy will not do the difficult work and the benefits of specialization
for param==0 is only elimination of param test and possibly cheaper prologue/
less of register pressure.
This patch makes ipa-analysis to be able to compute epxected runtime of unspecialized
function and turns inliner heuristics to use it as a base for its metrics. This is
quite important change so re-tunning of inliner parameters will need to follow.
I would be thus interested in any humanly analyzable testcases which regress because
of this ;)
Bootstrapped/regtested x86_64-linux, comitted.
Honza
PR ipa/79224
* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
(account_size_time): Use two predicates - exec_pred and
nonconst_pred_ptr.
(evaluate_conditions_for_known_args): Compute both clause and
nonspec_clause.
(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
(inline_summary_t::duplicate): Update.
(estimate_function_body_sizes): Caluculate exec and nonconst predicates
separately.
(compute_inline_parameters): Likewise.
(estimate_edge_size_and_time): Update caluclation of time.
(estimate_node_size_and_time): Compute both time and nonspecialized
time.
(estimate_ipcp_clone_size_and_time): Update.
(inline_merge_summary): Update.
(do_estimate_edge_time): Update.
(do_estimate_edge_size): Update.
(do_estimate_edge_hints): Update.
(inline_read_section, inline_write_summary): Stream both new predicates.
* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
as argument.
(compute_inlined_call_time): Cleanup.
(big_speedup_p): Update.
(edge_badness): Update.
* ipa-inline.h (INLINE_TIME_SCALE): Remove.
(size_time_entry): Replace predicate by exec_predicate and
nonconst_predicate.
(edge_growth_cache_entry): Cache both time nad nonspecialized time.
(estimate_edge_time): Return also nonspec_time.
(reset_edge_growth_cache): Update.
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c (revision 247380)
+++ ipa-inline-analysis.c (working copy)
@@ -585,10 +585,12 @@ dump_clause (FILE *f, conditions conds,
}
-/* Dump predicate PREDICATE. */
+/* Dump PREDICATE to F. CONDS a vector of conditions used when evauating
+ predicats. When NL is true new line is output at the end of dump. */
static void
-dump_predicate (FILE *f, conditions conds, struct predicate *pred)
+dump_predicate (FILE *f, conditions conds, struct predicate *pred,
+ bool nl = true)
{
int i;
if (true_predicate_p (pred))
@@ -600,7 +602,8 @@ dump_predicate (FILE *f, conditions cond
fprintf (f, " && ");
dump_clause (f, conds, pred->clause[i]);
}
- fprintf (f, "\n");
+ if (nl)
+ fprintf (f, "\n");
}
@@ -660,17 +663,27 @@ dump_inline_hints (FILE *f, inline_hints
}
-/* Record SIZE and TIME under condition PRED into the inline summary. */
+/* Record SIZE and TIME to SUMMARY.
+ The accounted code will be executed when EXEC_PRED is true.
+ When NONCONST_PRED is false the code will evaulate to constant and
+ will get optimized out in specialized clones of the function. */
static void
account_size_time (struct inline_summary *summary, int size, sreal time,
- struct predicate *pred)
+ struct predicate *exec_pred,
+ struct predicate *nonconst_pred_ptr)
{
size_time_entry *e;
bool found = false;
int i;
+ struct predicate nonconst_pred;
- if (false_predicate_p (pred))
+ if (false_predicate_p (exec_pred))
+ return;
+
+ nonconst_pred = and_predicates (summary->conds, nonconst_pred_ptr, exec_pred);
+
+ if (false_predicate_p (&nonconst_pred))
return;
/* We need to create initial empty unconitional clause, but otherwie
@@ -681,7 +694,8 @@ account_size_time (struct inline_summary
gcc_assert (time >= 0);
for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
- if (predicates_equal_p (&e->predicate, pred))
+ if (predicates_equal_p (&e->exec_predicate, exec_pred)
+ && predicates_equal_p (&e->nonconst_predicate, &nonconst_pred))
{
found = true;
break;
@@ -691,7 +705,7 @@ account_size_time (struct inline_summary
i = 0;
found = true;
e = &(*summary->entry)[0];
- gcc_assert (!e->predicate.clause[0]);
+ gcc_assert (!e->exec_predicate.clause[0]);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"\t\tReached limit on number of entries, "
@@ -700,17 +714,25 @@ account_size_time (struct inline_summary
if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
{
fprintf (dump_file,
- "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate:",
+ "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
((double) size) / INLINE_SIZE_SCALE,
- (time.to_double ()) / INLINE_TIME_SCALE, found ? "" : "new ");
- dump_predicate (dump_file, summary->conds, pred);
+ (time.to_double ()), found ? "" : "new ");
+ dump_predicate (dump_file, summary->conds, exec_pred, 0);
+ if (!predicates_equal_p (exec_pred, &nonconst_pred))
+ {
+ fprintf (dump_file, " nonconst:");
+ dump_predicate (dump_file, summary->conds, &nonconst_pred);
+ }
+ else
+ fprintf (dump_file, "\n");
}
if (!found)
{
struct size_time_entry new_entry;
new_entry.size = size;
new_entry.time = time;
- new_entry.predicate = *pred;
+ new_entry.exec_predicate = *exec_pred;
+ new_entry.nonconst_predicate = nonconst_pred;
vec_safe_push (summary->entry, new_entry);
}
else
@@ -795,21 +817,33 @@ set_hint_predicate (struct predicate **p
}
-/* KNOWN_VALS is partial mapping of parameters of NODE to constant values.
+/* Compute what conditions may or may not hold given invormation about
+ parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
+ whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
+ copy when called in a given context. It is a bitmask of conditions. Bit
+ 0 means that condition is known to be false, while bit 1 means that condition
+ may or may not be true. These differs - for example NOT_INLINED condition
+ is always false in the second and also builtin_constant_p tests can not use
+ the fact that parameter is indeed a constant.
+
+ KNOWN_VALS is partial mapping of parameters of NODE to constant values.
KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
Return clause of possible truths. When INLINE_P is true, assume that we are
inlining.
ERROR_MARK means compile time invariant. */
-static clause_t
+static void
evaluate_conditions_for_known_args (struct cgraph_node *node,
bool inline_p,
vec<tree> known_vals,
vec<ipa_agg_jump_function_p>
- known_aggs)
+ known_aggs,
+ clause_t *ret_clause,
+ clause_t *ret_nonspec_clause)
{
clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition;
+ clause_t nonspec_clause = 1 << predicate_not_inlined_condition;
struct inline_summary *info = inline_summaries->get (node);
int i;
struct condition *c;
@@ -828,6 +862,7 @@ evaluate_conditions_for_known_args (stru
if (c->operand_num >= (int) known_vals.length ())
{
clause |= 1 << (i + predicate_first_dynamic_condition);
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
@@ -859,18 +894,26 @@ evaluate_conditions_for_known_args (stru
if (!val)
{
clause |= 1 << (i + predicate_first_dynamic_condition);
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
if (c->code == CHANGED)
- continue;
+ {
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
+ continue;
+ }
if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
{
clause |= 1 << (i + predicate_first_dynamic_condition);
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
if (c->code == IS_NOT_CONSTANT)
- continue;
+ {
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
+ continue;
+ }
val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
res = val
@@ -881,8 +924,11 @@ evaluate_conditions_for_known_args (stru
continue;
clause |= 1 << (i + predicate_first_dynamic_condition);
+ nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
}
- return clause;
+ *ret_clause = clause;
+ if (ret_nonspec_clause)
+ *ret_nonspec_clause = nonspec_clause;
}
@@ -890,7 +936,7 @@ evaluate_conditions_for_known_args (stru
static void
evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
- clause_t *clause_ptr,
+ clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
vec<tree> *known_vals_ptr,
vec<ipa_polymorphic_call_context>
*known_contexts_ptr,
@@ -976,9 +1022,9 @@ evaluate_properties_for_edge (struct cgr
}
}
- if (clause_ptr)
- *clause_ptr = evaluate_conditions_for_known_args (callee, inline_p,
- known_vals, known_aggs);
+ evaluate_conditions_for_known_args (callee, inline_p,
+ known_vals, known_aggs, clause_ptr,
+ nonspec_clause_ptr);
if (known_vals_ptr)
*known_vals_ptr = known_vals;
@@ -1172,12 +1218,16 @@ inline_summary_t::duplicate (cgraph_node
}
}
}
- possible_truths = evaluate_conditions_for_known_args (dst, false,
- known_vals,
- vNULL);
+ evaluate_conditions_for_known_args (dst, false,
+ known_vals,
+ vNULL,
+ &possible_truths,
+ /* We are going to specialize,
+ so ignore nonspec truths. */
+ NULL);
known_vals.release ();
- account_size_time (info, 0, 0, &true_pred);
+ account_size_time (info, 0, 0, &true_pred, &true_pred);
/* Remap size_time vectors.
Simplify the predicate by prunning out alternatives that are known
@@ -1186,14 +1236,21 @@ inline_summary_t::duplicate (cgraph_node
to be true. */
for (i = 0; vec_safe_iterate (entry, i, &e); i++)
{
- struct predicate new_predicate;
- new_predicate = remap_predicate_after_duplication (&e->predicate,
+ struct predicate new_exec_pred;
+ struct predicate new_nonconst_pred;
+ new_exec_pred = remap_predicate_after_duplication (&e->exec_predicate,
possible_truths,
info);
- if (false_predicate_p (&new_predicate))
+ new_nonconst_pred
+ = remap_predicate_after_duplication (&e->nonconst_predicate,
+ possible_truths,
+ info);
+ if (false_predicate_p (&new_exec_pred)
+ || false_predicate_p (&new_nonconst_pred))
optimized_out_size += e->size;
else
- account_size_time (info, e->size, e->time, &new_predicate);
+ account_size_time (info, e->size, e->time, &new_exec_pred,
+ &new_nonconst_pred);
}
/* Remap edge predicates with the same simplification as above.
@@ -1439,10 +1496,21 @@ dump_inline_summary (FILE *f, struct cgr
fprintf (f, " In SCC: %i\n", (int) s->scc_no);
for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
{
- fprintf (f, " size:%f, time:%f, predicate:",
+ fprintf (f, " size:%f, time:%f",
(double) e->size / INLINE_SIZE_SCALE,
- e->time.to_double () / INLINE_TIME_SCALE);
- dump_predicate (f, s->conds, &e->predicate);
+ e->time.to_double ());
+ if (!true_predicate_p (&e->exec_predicate))
+ {
+ fprintf (f, ", executed if:");
+ dump_predicate (f, s->conds, &e->exec_predicate, 0);
+ }
+ if (!predicates_equal_p (&e->exec_predicate,
+ &e->nonconst_predicate))
+ {
+ fprintf (f, ", nonconst if:");
+ dump_predicate (f, s->conds, &e->nonconst_predicate, 0);
+ }
+ fprintf (f, "\n");
}
if (s->loop_iterations)
{
@@ -2585,10 +2653,11 @@ estimate_function_body_sizes (struct cgr
/* When we run into maximal number of entries, we assign everything to the
constant truth case. Be sure to have it in list. */
bb_predicate = true_predicate ();
- account_size_time (info, 0, 0, &bb_predicate);
+ account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
bb_predicate = not_inlined_predicate ();
- account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate);
+ account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
+ &bb_predicate);
if (fbi.info)
compute_bb_predicates (&fbi, node, info);
@@ -2746,10 +2815,10 @@ estimate_function_body_sizes (struct cgr
will_be_nonconstant
= will_be_nonconstant_predicate (&fbi, info,
stmt, nonconstant_names);
+ else
+ will_be_nonconstant = true_predicate ();
if (this_time || this_size)
{
- struct predicate p;
-
this_time *= freq;
prob = eliminated_by_inlining_prob (stmt);
@@ -2759,15 +2828,15 @@ estimate_function_body_sizes (struct cgr
if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
- if (fbi.info)
- p = and_predicates (info->conds, &bb_predicate,
- &will_be_nonconstant);
- else
- p = true_predicate ();
-
- if (!false_predicate_p (&p)
- || (is_gimple_call (stmt)
- && !false_predicate_p (&bb_predicate)))
+ struct predicate p = and_predicates (info->conds, &bb_predicate,
+ &will_be_nonconstant);
+
+ /* We can ignore statement when we proved it is never going
+ to happen, but we can not do that for call statements
+ because edges are accounted specially. */
+
+ if (!false_predicate_p (is_gimple_call (stmt)
+ ? &bb_predicate : &p))
{
time += this_time;
size += this_size;
@@ -2781,13 +2850,18 @@ estimate_function_body_sizes (struct cgr
if (prob)
{
struct predicate ip = not_inlined_predicate ();
- ip = and_predicates (info->conds, &ip, &p);
+ ip = and_predicates (info->conds, &ip, &bb_predicate);
account_size_time (info, this_size * prob,
- this_time * prob, &ip);
+ (sreal)(this_time * prob)
+ / (CGRAPH_FREQ_BASE * 2), &ip,
+ &p);
}
if (prob != 2)
account_size_time (info, this_size * (2 - prob),
- this_time * (2 - prob), &p);
+ (sreal)(this_time * (2 - prob))
+ / (CGRAPH_FREQ_BASE * 2),
+ &bb_predicate,
+ &p);
}
if (!info->fp_expressions && fp_expression_p (stmt))
@@ -2969,9 +3043,9 @@ compute_inline_parameters (struct cgraph
es->call_stmt_size = eni_size_weights.call_cost;
es->call_stmt_time = eni_time_weights.call_cost;
account_size_time (info, INLINE_SIZE_SCALE * 2,
- INLINE_TIME_SCALE * 2, &t);
+ 2, &t, &t);
t = not_inlined_predicate ();
- account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t);
+ account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
inline_update_overall_summary (node);
info->self_size = info->size;
info->self_time = info->time;
@@ -3048,7 +3122,7 @@ compute_inline_parameters (struct cgraph
if (flag_checking)
{
inline_update_overall_summary (node);
- gcc_assert (info->time == info->self_time
+ gcc_assert (!(info->time - info->self_time).to_int ()
&& info->size == info->self_size);
}
}
@@ -3174,8 +3248,11 @@ estimate_edge_size_and_time (struct cgra
*size += cur_size;
if (min_size)
*min_size += cur_size;
- *time += call_time * prob / REG_BR_PROB_BASE
- * e->frequency * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE);
+ if (prob == REG_BR_PROB_BASE)
+ *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
+ else
+ *time += ((sreal)call_time) * (prob * e->frequency)
+ / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
}
@@ -3257,10 +3334,13 @@ estimate_calls_size_and_time (struct cgr
static void
estimate_node_size_and_time (struct cgraph_node *node,
clause_t possible_truths,
+ clause_t nonspec_possible_truths,
vec<tree> known_vals,
vec<ipa_polymorphic_call_context> known_contexts,
vec<ipa_agg_jump_function_p> known_aggs,
- int *ret_size, int *ret_min_size, sreal *ret_time,
+ int *ret_size, int *ret_min_size,
+ sreal *ret_time,
+ sreal *ret_nonspecialized_time,
inline_hints *ret_hints,
vec<inline_param_summary>
inline_param_summary)
@@ -3292,31 +3372,57 @@ estimate_node_size_and_time (struct cgra
}
}
+ estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
+ known_vals, known_contexts, known_aggs);
+ sreal nonspecialized_time = time;
+
for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
- if (evaluate_predicate (&e->predicate, possible_truths))
- {
- size += e->size;
- gcc_checking_assert (e->time >= 0);
- gcc_checking_assert (time >= 0);
- if (!inline_param_summary.exists ())
- time += e->time;
- else
- {
- int prob = predicate_probability (info->conds,
- &e->predicate,
- possible_truths,
- inline_param_summary);
- gcc_checking_assert (prob >= 0);
- gcc_checking_assert (prob <= REG_BR_PROB_BASE);
- time += e->time * prob / REG_BR_PROB_BASE;
- }
- gcc_checking_assert (time >= 0);
+ {
+ bool nonconst = evaluate_predicate (&e->nonconst_predicate,
+ possible_truths);
+ bool exec = evaluate_predicate (&e->exec_predicate,
+ nonspec_possible_truths);
+ gcc_assert (!nonconst || exec);
+ if (exec)
+ {
+ gcc_checking_assert (e->time >= 0);
+ gcc_checking_assert (time >= 0);
- }
- gcc_checking_assert (true_predicate_p (&(*info->entry)[0].predicate));
+ /* We compute specialized size only because size of nonspecialized
+ copy is context independent.
+
+ The difference between nonspecialized execution and specialized is
+ that nonspecialized is not going to have optimized out computations
+ known to be constant in a specialized setting. */
+ if (nonconst)
+ size += e->size;
+ nonspecialized_time += e->time;
+ if (!nonconst)
+ ;
+ else if (!inline_param_summary.exists ())
+ {
+ if (nonconst)
+ time += e->time;
+ }
+ else
+ {
+ int prob = predicate_probability (info->conds,
+ &e->nonconst_predicate,
+ possible_truths,
+ inline_param_summary);
+ gcc_checking_assert (prob >= 0);
+ gcc_checking_assert (prob <= REG_BR_PROB_BASE);
+ time += e->time * prob / REG_BR_PROB_BASE;
+ }
+ gcc_checking_assert (time >= 0);
+ }
+ }
+ gcc_checking_assert (true_predicate_p (&(*info->entry)[0].exec_predicate));
+ gcc_checking_assert (true_predicate_p (&(*info->entry)[0].nonconst_predicate));
min_size = (*info->entry)[0].size;
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
+ gcc_checking_assert (nonspecialized_time >= time);
if (info->loop_iterations
&& !evaluate_predicate (info->loop_iterations, possible_truths))
@@ -3332,18 +3438,16 @@ estimate_node_size_and_time (struct cgra
if (DECL_DECLARED_INLINE_P (node->decl))
hints |= INLINE_HINT_declared_inline;
- estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
- known_vals, known_contexts, known_aggs);
- gcc_checking_assert (size >= 0);
- gcc_checking_assert (time >= 0);
- time = time / INLINE_TIME_SCALE;
size = RDIV (size, INLINE_SIZE_SCALE);
min_size = RDIV (min_size, INLINE_SIZE_SCALE);
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "\n size:%i time:%f\n", (int) size, time.to_double ());
+ fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
+ time.to_double (), nonspecialized_time.to_double ());
if (ret_time)
*ret_time = time;
+ if (ret_nonspecialized_time)
+ *ret_nonspecialized_time = nonspecialized_time;
if (ret_size)
*ret_size = size;
if (ret_min_size)
@@ -3368,12 +3472,15 @@ estimate_ipcp_clone_size_and_time (struc
int *ret_size, sreal *ret_time,
inline_hints *hints)
{
- clause_t clause;
+ clause_t clause, nonspec_clause;
+ sreal nonspec_time;
- clause = evaluate_conditions_for_known_args (node, false, known_vals,
- known_aggs);
- estimate_node_size_and_time (node, clause, known_vals, known_contexts,
- known_aggs, ret_size, NULL, ret_time, hints, vNULL);
+ evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
+ &clause, &nonspec_clause);
+ estimate_node_size_and_time (node, clause, nonspec_clause,
+ known_vals, known_contexts,
+ known_aggs, ret_size, NULL, ret_time,
+ &nonspec_time, hints, vNULL);
}
/* Translate all conditions from callee representation into caller
@@ -3645,7 +3752,7 @@ inline_merge_summary (struct cgraph_edge
struct cgraph_node *to = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to : edge->caller);
struct inline_summary *info = inline_summaries->get (to);
- clause_t clause = 0; /* not_inline is known to be false. */
+ clause_t clause = 0; /* not_inline is known to be false. */
size_time_entry *e;
vec<int> operand_map = vNULL;
vec<int> offset_map = vNULL;
@@ -3662,7 +3769,7 @@ inline_merge_summary (struct cgraph_edge
info->fp_expressions |= callee_info->fp_expressions;
if (callee_info->conds)
- evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL);
+ evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
if (ipa_node_params_sum && callee_info->conds)
{
struct ipa_edge_args *args = IPA_EDGE_REF (edge);
@@ -3705,14 +3812,19 @@ inline_merge_summary (struct cgraph_edge
for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
{
struct predicate p = remap_predicate (info, callee_info,
- &e->predicate, operand_map,
+ &e->exec_predicate, operand_map,
offset_map, clause,
&toplev_predicate);
- if (!false_predicate_p (&p))
+ struct predicate nonconstp
+ = remap_predicate (info, callee_info,
+ &e->nonconst_predicate, operand_map,
+ offset_map, clause,
+ &toplev_predicate);
+ if (!false_predicate_p (&p) && !false_predicate_p (&nonconstp))
{
- sreal add_time = e->time * edge->frequency / CGRAPH_FREQ_BASE;
+ sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
int prob = predicate_probability (callee_info->conds,
- &e->predicate,
+ &e->nonconst_predicate,
clause, es->param);
add_time = add_time * prob / REG_BR_PROB_BASE;
if (prob != REG_BR_PROB_BASE
@@ -3721,7 +3833,7 @@ inline_merge_summary (struct cgraph_edge
fprintf (dump_file, "\t\tScaling time by probability:%f\n",
(double) prob / REG_BR_PROB_BASE);
}
- account_size_time (info, e->size, add_time, &p);
+ account_size_time (info, e->size, add_time, &p, &nonconstp);
}
}
remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
@@ -3761,13 +3873,13 @@ inline_update_overall_summary (struct cg
info->time = 0;
for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
{
- info->size += e->size, info->time += e->time;
+ info->size += e->size;
+ info->time += e->time;
}
estimate_calls_size_and_time (node, &info->size, &info->min_size,
&info->time, NULL,
~(clause_t) (1 << predicate_false_condition),
vNULL, vNULL, vNULL);
- info->time = info->time / INLINE_TIME_SCALE;
info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
}
@@ -3803,11 +3915,11 @@ simple_edge_hints (struct cgraph_edge *e
sreal
do_estimate_edge_time (struct cgraph_edge *edge)
{
- sreal time;
+ sreal time, nonspec_time;
int size;
inline_hints hints;
struct cgraph_node *callee;
- clause_t clause;
+ clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
@@ -3818,10 +3930,11 @@ do_estimate_edge_time (struct cgraph_edg
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
- &clause, &known_vals, &known_contexts,
- &known_aggs);
- estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
- known_aggs, &size, &min_size, &time, &hints, es->param);
+ &clause, &nonspec_clause, &known_vals,
+ &known_contexts, &known_aggs);
+ estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+ known_contexts, known_aggs, &size, &min_size,
+ &time, &nonspec_time, &hints, es->param);
/* When we have profile feedback, we can quite safely identify hot
edges and for those we disable size limits. Don't do that when
@@ -3846,6 +3959,7 @@ do_estimate_edge_time (struct cgraph_edg
if ((int) edge_growth_cache.length () <= edge->uid)
edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
edge_growth_cache[edge->uid].time = time;
+ edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
edge_growth_cache[edge->uid].size = size + (size >= 0);
hints |= simple_edge_hints (edge);
@@ -3863,7 +3977,7 @@ do_estimate_edge_size (struct cgraph_edg
{
int size;
struct cgraph_node *callee;
- clause_t clause;
+ clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
@@ -3883,10 +3997,12 @@ do_estimate_edge_size (struct cgraph_edg
/* Early inliner runs without caching, go ahead and do the dirty work. */
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
- &clause, &known_vals, &known_contexts,
+ &clause, &nonspec_clause,
+ &known_vals, &known_contexts,
&known_aggs);
- estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
- known_aggs, &size, NULL, NULL, NULL, vNULL);
+ estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+ known_contexts, known_aggs, &size, NULL, NULL,
+ NULL, NULL, vNULL);
known_vals.release ();
known_contexts.release ();
known_aggs.release ();
@@ -3902,7 +4018,7 @@ do_estimate_edge_hints (struct cgraph_ed
{
inline_hints hints;
struct cgraph_node *callee;
- clause_t clause;
+ clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
@@ -3922,10 +4038,12 @@ do_estimate_edge_hints (struct cgraph_ed
/* Early inliner runs without caching, go ahead and do the dirty work. */
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
- &clause, &known_vals, &known_contexts,
+ &clause, &nonspec_clause,
+ &known_vals, &known_contexts,
&known_aggs);
- estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
- known_aggs, NULL, NULL, NULL, &hints, vNULL);
+ estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+ known_contexts, known_aggs, NULL, NULL,
+ NULL, NULL, &hints, vNULL);
known_vals.release ();
known_contexts.release ();
known_aggs.release ();
@@ -4304,7 +4422,8 @@ inline_read_section (struct lto_file_dec
e.size = streamer_read_uhwi (&ib);
e.time = sreal::stream_in (&ib);
- e.predicate = read_predicate (&ib);
+ e.exec_predicate = read_predicate (&ib);
+ e.nonconst_predicate = read_predicate (&ib);
vec_safe_push (info->entry, e);
}
@@ -4463,7 +4582,8 @@ inline_write_summary (void)
{
streamer_write_uhwi (ob, e->size);
e->time.stream_out (ob);
- write_predicate (ob, &e->predicate);
+ write_predicate (ob, &e->exec_predicate);
+ write_predicate (ob, &e->nonconst_predicate);
}
write_predicate (ob, info->loop_iterations);
write_predicate (ob, info->loop_stride);
Index: ipa-inline.c
===================================================================
--- ipa-inline.c (revision 247380)
+++ ipa-inline.c (working copy)
@@ -639,10 +639,9 @@ want_early_inline_function_p (struct cgr
does not happen. */
inline sreal
-compute_uninlined_call_time (struct inline_summary *callee_info,
- struct cgraph_edge *edge)
+compute_uninlined_call_time (struct cgraph_edge *edge,
+ sreal uninlined_call_time)
{
- sreal uninlined_call_time = (sreal)callee_info->time;
cgraph_node *caller = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
@@ -677,12 +676,10 @@ compute_inlined_call_time (struct cgraph
else
time = time >> 11;
- /* This calculation should match one in ipa-inline-analysis.
- FIXME: Once ipa-inline-analysis is converted to sreal this can be
- simplified. */
- time -= (sreal) ((gcov_type) edge->frequency
- * inline_edge_summary (edge)->call_stmt_time
- * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
+ /* This calculation should match one in ipa-inline-analysis.c
+ (estimate_edge_size_and_time). */
+ time -= (sreal) edge->frequency
+ * inline_edge_summary (edge)->call_stmt_time / CGRAPH_FREQ_BASE;
time += caller_time;
if (time <= 0)
time = ((sreal) 1) >> 8;
@@ -696,12 +693,13 @@ compute_inlined_call_time (struct cgraph
static bool
big_speedup_p (struct cgraph_edge *e)
{
- sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
- e);
- sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
+ sreal unspec_time;
+ sreal spec_time = estimate_edge_time (e, &unspec_time);
+ sreal time = compute_uninlined_call_time (e, unspec_time);
+ sreal inlined_time = compute_inlined_call_time (e, spec_time);
if (time - inlined_time
- > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
+ > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
* percent_rec)
return true;
return false;
@@ -1011,7 +1009,7 @@ edge_badness (struct cgraph_edge *edge,
{
sreal badness;
int growth;
- sreal edge_time;
+ sreal edge_time, unspec_edge_time;
struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
struct inline_summary *callee_info = inline_summaries->get (callee);
inline_hints hints;
@@ -1020,12 +1018,11 @@ edge_badness (struct cgraph_edge *edge,
: edge->caller);
growth = estimate_edge_growth (edge);
- edge_time = estimate_edge_time (edge);
+ edge_time = estimate_edge_time (edge, &unspec_edge_time);
hints = estimate_edge_hints (edge);
gcc_checking_assert (edge_time >= 0);
- /* FIXME: -1 to care of rounding issues should go away once cache is migrated.
- to sreals. */
- gcc_checking_assert (edge_time <= callee_info->time);
+ /* Check that inlined time is better, but tolerate some roundoff issues. */
+ gcc_checking_assert ((edge_time - callee_info->time).to_int () <= 0);
gcc_checking_assert (growth <= callee_info->size);
if (dump)
@@ -1035,9 +1032,10 @@ edge_badness (struct cgraph_edge *edge,
edge->caller->order,
xstrdup_for_dump (callee->name ()),
edge->callee->order);
- fprintf (dump_file, " size growth %i, time %f ",
+ fprintf (dump_file, " size growth %i, time %f unspec %f ",
growth,
- edge_time.to_double ());
+ edge_time.to_double (),
+ unspec_edge_time.to_double ());
dump_inline_hints (dump_file, hints);
if (big_speedup_p (edge))
fprintf (dump_file, " big_speedup");
@@ -1076,7 +1074,7 @@ edge_badness (struct cgraph_edge *edge,
sreal numerator, denominator;
int overall_growth;
- numerator = (compute_uninlined_call_time (callee_info, edge)
+ numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- compute_inlined_call_time (edge, edge_time));
if (numerator == 0)
numerator = ((sreal) 1 >> 8);
@@ -1162,13 +1160,14 @@ edge_badness (struct cgraph_edge *edge,
fprintf (dump_file,
" %f: guessed profile. frequency %f, count %" PRId64
" caller count %" PRId64
- " time w/o inlining %f, time w/ inlining %f"
+ " time w/o inlining %f, time with inlining %f"
" overall growth %i (current) %i (original)"
" %i (compensated)\n",
badness.to_double (),
(double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count,
- compute_uninlined_call_time (callee_info, edge).to_double (),
+ compute_uninlined_call_time (edge,
+ unspec_edge_time).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee),
callee_info->growth, overall_growth);
@@ -2056,8 +2055,9 @@ inline_small_functions (void)
if (dump_file)
{
fprintf (dump_file,
- " Inlined into %s which now has time %f and size %i, "
+ " Inlined %s into %s which now has time %f and size %i, "
"net change of %+i.\n",
+ edge->callee->name (),
edge->caller->name (),
inline_summaries->get (edge->caller)->time.to_double (),
inline_summaries->get (edge->caller)->size,
Index: ipa-inline.h
===================================================================
--- ipa-inline.h (revision 247380)
+++ ipa-inline.h (working copy)
@@ -103,13 +103,16 @@ struct GTY(()) predicate
context. We keep simple array of record, every containing of predicate
and time/size to account.
- We keep values scaled up, so fractional sizes and times can be
- accounted. */
+ We keep values scaled up, so fractional sizes can be accounted. */
#define INLINE_SIZE_SCALE 2
-#define INLINE_TIME_SCALE (CGRAPH_FREQ_BASE * 2)
struct GTY(()) size_time_entry
{
- struct predicate predicate;
+ /* Predicate for code to be executed. */
+ struct predicate exec_predicate;
+ /* Predicate for value to be constant and optimized out in a specialized copy.
+ When deciding on specialization this makes it possible to see how much
+ the executed code paths will simplify. */
+ struct predicate nonconst_predicate;
int size;
sreal GTY((skip)) time;
};
@@ -230,9 +233,11 @@ struct inline_edge_summary
typedef struct inline_edge_summary inline_edge_summary_t;
extern vec<inline_edge_summary_t> inline_edge_summary_vec;
+/* Data we cache about callgraph edges during inlining to avoid expensive
+ re-computations during the greedy algorithm. */
struct edge_growth_cache_entry
{
- sreal time;
+ sreal time, nonspec_time;
int size;
inline_hints hints;
};
@@ -315,12 +320,14 @@ estimate_edge_growth (struct cgraph_edge
EDGE. */
static inline sreal
-estimate_edge_time (struct cgraph_edge *edge)
+estimate_edge_time (struct cgraph_edge *edge, sreal *nonspec_time = NULL)
{
sreal ret;
if ((int)edge_growth_cache.length () <= edge->uid
|| !edge_growth_cache[edge->uid].size)
return do_estimate_edge_time (edge);
+ if (nonspec_time)
+ *nonspec_time = edge_growth_cache[edge->uid].nonspec_time;
return edge_growth_cache[edge->uid].time;
}
@@ -345,7 +352,7 @@ reset_edge_growth_cache (struct cgraph_e
{
if ((int)edge_growth_cache.length () > edge->uid)
{
- struct edge_growth_cache_entry zero = {0, 0, 0};
+ struct edge_growth_cache_entry zero = {0, 0, 0, 0};
edge_growth_cache[edge->uid] = zero;
}
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2017-04-30 15:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-30 17:59 Make ipa-inline-analysis to compute expected runtime without specialization Jan Hubicka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).