Index: gcc/cgraphbuild.c
===================================================================
--- gcc/cgraphbuild.c (revision 191813)
+++ gcc/cgraphbuild.c (working copy)
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see
#include "except.h"
#include "l-ipo.h"
#include "ipa-inline.h"
+#include "auto-profile.h"
/* Context of record_reference. */
struct record_reference_ctx
@@ -497,6 +498,9 @@ build_cgraph_edges (void)
tree decl;
unsigned ix;
+ if (flag_auto_profile)
+ afdo_set_current_function_count ();
+
/* Create the callgraph edges and record the nodes referenced by the function.
body. */
FOR_EACH_BB (bb)
@@ -607,8 +611,9 @@ rebuild_cgraph_edges (void)
cgraph_node_remove_callees (node);
ipa_remove_all_references (&node->ref_list);
- node->count = ENTRY_BLOCK_PTR->count;
- node->max_bb_count = 0;
+ if (!flag_auto_profile)
+ node->count = ENTRY_BLOCK_PTR->count;
+ node->max_bb_count = node->count;
FOR_EACH_BB (bb)
{
Index: gcc/cgraph.c
===================================================================
--- gcc/cgraph.c (revision 191813)
+++ gcc/cgraph.c (working copy)
@@ -101,6 +101,7 @@ The callgraph:
#include "l-ipo.h"
#include "ipa-inline.h"
#include "opts.h"
+#include "auto-profile.h"
const char * const ld_plugin_symbol_resolution_names[]=
{
@@ -2183,10 +2184,17 @@ cgraph_clone_node (struct cgraph_node *n, tree dec
new_node->count = count;
new_node->max_bb_count = count;
if (n->count)
- new_node->max_bb_count = ((n->max_bb_count + n->count / 2)
- / n->count) * count;
+ {
+ new_node->max_bb_count = ((n->max_bb_count + n->count / 2)
+ / n->count) * count;
+ new_node->total_count = ((n->total_count + n->count / 2)
+ / n->count) * count;
+ }
new_node->is_versioned_clone = n->is_versioned_clone;
new_node->frequency = n->frequency;
+ if (flag_auto_profile && count > 0
+ && new_node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+ new_node->frequency = NODE_FREQUENCY_NORMAL;
new_node->clone = n->clone;
new_node->clone.tree_map = 0;
if (n->count)
@@ -2268,6 +2276,9 @@ clone_function_name (tree decl, const char *suffix
prefix[len] = '_';
#endif
ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
+ if (flag_auto_profile)
+ afdo_add_bfd_name_mapping (xstrdup (tmp_name),
+ xstrdup (lang_hooks.dwarf_name (decl, 0)));
return get_identifier (tmp_name);
}
Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h (revision 191813)
+++ gcc/cgraph.h (working copy)
@@ -198,6 +198,8 @@ struct GTY((chain_next ("%h.next"), chain_prev ("%
/* Expected number of executions: calculated in profile.c. */
gcov_type count;
+ /* Total sampled count of the function. */
+ gcov_type total_count;
/* Maximum count of any basic block in the function. */
gcov_type max_bb_count;
/* How to scale counts at materialization time; used to merge
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h (revision 191813)
+++ gcc/tree-pass.h (working copy)
@@ -465,6 +465,7 @@ extern struct gimple_opt_pass pass_tree_convert_bu
extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
extern struct simple_ipa_opt_pass pass_ipa_tree_profile;
+extern struct simple_ipa_opt_pass pass_ipa_auto_profile;
extern struct simple_ipa_opt_pass pass_early_local_passes;
Index: gcc/cfghooks.c
===================================================================
--- gcc/cfghooks.c (revision 191813)
+++ gcc/cfghooks.c (working copy)
@@ -775,6 +775,19 @@ make_forwarder_block (basic_block bb, bool (*redir
}
}
+ if (flag_auto_profile)
+ {
+ dummy->frequency = 0;
+ dummy->count = 0;
+ for (ei = ei_start (dummy->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
+ {
+ dummy->frequency += EDGE_FREQUENCY (e);
+ dummy->count += e->count;
+ }
+ if (dummy->frequency > REG_BR_PROB_BASE)
+ dummy->frequency = REG_BR_PROB_BASE;
+ }
+
if (dom_info_available_p (CDI_DOMINATORS))
{
VEC (basic_block, heap) *doms_to_fix = VEC_alloc (basic_block, heap, 2);
Index: gcc/ipa-inline-transform.c
===================================================================
--- gcc/ipa-inline-transform.c (revision 191813)
+++ gcc/ipa-inline-transform.c (working copy)
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-pass.h"
#include "l-ipo.h"
+#include "auto-profile.h"
#include "diagnostic-core.h"
int ncalls_inlined;
@@ -165,6 +166,16 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d
*overall_size -= inline_summary (e->callee)->size;
nfunctions_inlined++;
}
+ if (flag_auto_profile)
+ {
+ e->callee->total_count = afdo_get_callsite_count (e);
+ /* For functions that does not exist in the profile-collection
+ build, we need to add the scale factor to the copy_scale
+ table. Because the cloned callee only has as scaled portion
+ of the counts from the actual callee. */
+ if (e->callee->total_count == 0)
+ afdo_add_copy_scale (e);
+ }
duplicate = false;
e->callee->local.externally_visible = false;
update_noncloned_frequencies (e->callee, e->frequency);
@@ -172,11 +183,46 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d
else
{
struct cgraph_node *n;
+ gcov_type count = e->count;
+ gcov_type callsite_total_count = 0;
+ if (flag_auto_profile)
+ {
+ callsite_total_count = afdo_get_callsite_count (e);
+ /* If the callsite is inlined in the profile-collection build,
+ i.e. the cloned callee has its separate profile, we will use
+ this separate profile to annotate the callee, and the real
+ callee body will not be affected. Thus here we need to disable
+ update_original. */
+ if (callsite_total_count > 0)
+ update_original = false;
+ }
n = cgraph_clone_node (e->callee, e->callee->decl,
- e->count, e->frequency,
+ count, e->frequency,
update_original, NULL, true);
+ if (flag_auto_profile)
+ {
+ n->total_count = callsite_total_count;
+ if (callsite_total_count == 0)
+ afdo_add_copy_scale (e);
+ }
cgraph_redirect_edge_callee (e, n);
}
+ if (flag_auto_profile && e->callee->total_count > 0)
+ {
+ /* The callee's total count will be non-zero if the callsite
+ was inlined in the profile-collection build, In this case,
+ the original callee may be label unlikely_executed, which
+ may prevent its callees being inlined. Thus we need to reset
+ its frequency to normal. */
+ if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
+ e->callee->frequency = NODE_FREQUENCY_NORMAL;
+ /* we do not have enough information to calculate the node count
+ and max_bb_count. Thus we set them to the same value to make
+ other optimizations aware that they are from cloned inline
+ instances. */
+ e->callee->count = e->callee->total_count;
+ e->callee->max_bb_count = e->callee->total_count;
+ }
}
if (e->caller->global.inlined_to)
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c (revision 191813)
+++ gcc/toplev.c (working copy)
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-alias.h"
#include "plugin.h"
#include "tree-threadsafe-analyze.h"
+#include "auto-profile.h"
#if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
#include "dwarf2out.h"
@@ -700,6 +701,10 @@ compile_file (void)
}
#endif
+ /* Auto profile finalization. */
+ if (flag_auto_profile)
+ end_auto_profile ();
+
/* Invoke registered plugin callbacks. */
invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL);
@@ -1494,6 +1499,9 @@ process_options (void)
error ("target system does not support the \"%s\" debug format",
debug_type_names[write_symbols]);
+ if (flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE)
+ debug_hooks = &auto_profile_debug_hooks;
+
/* We know which debug output will be used so we can set flag_var_tracking
and flag_var_tracking_uninit if the user has not specified them. */
if (debug_info_level < DINFO_LEVEL_NORMAL
Index: gcc/debug.h
===================================================================
--- gcc/debug.h (revision 191813)
+++ gcc/debug.h (working copy)
@@ -171,6 +171,7 @@ extern const struct gcc_debug_hooks sdb_debug_hook
extern const struct gcc_debug_hooks xcoff_debug_hooks;
extern const struct gcc_debug_hooks dwarf2_debug_hooks;
extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
+extern const struct gcc_debug_hooks auto_profile_debug_hooks;
/* Dwarf2 frame information. */
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c (revision 191813)
+++ gcc/cgraphunit.c (working copy)
@@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-utils.h"
#include "lto-streamer.h"
#include "l-ipo.h"
+#include "auto-profile.h"
static void cgraph_expand_all_functions (void);
static void cgraph_mark_functions_to_output (void);
@@ -1346,6 +1347,11 @@ cgraph_finalize_compilation_unit (void)
backend_entered_p = true;
+ /* Before compilation, auto profile will process the profile to build the
+ hash tables for later optimizations. */
+ if (flag_auto_profile)
+ process_auto_profile ();
+
/* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */
if (flag_lto)
lto_streamer_hooks_init ();
@@ -2517,6 +2523,7 @@ cgraph_copy_node_for_versioning (struct cgraph_nod
new_version->rtl = old_version->rtl;
new_version->reachable = true;
new_version->count = old_version->count;
+ new_version->total_count = old_version->total_count;
new_version->max_bb_count = old_version->max_bb_count;
new_version->is_versioned_clone = true;
Index: gcc/regs.h
===================================================================
--- gcc/regs.h (revision 191813)
+++ gcc/regs.h (working copy)
@@ -136,6 +136,7 @@ extern size_t reg_info_p_size;
frequency. */
#define REG_FREQ_FROM_BB(bb) (optimize_size \
|| (flag_branch_probabilities \
+ && !flag_auto_profile \
&& !ENTRY_BLOCK_PTR->count) \
? REG_FREQ_MAX \
: ((bb)->frequency * REG_FREQ_MAX / BB_FREQ_MAX)\
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h (revision 191813)
+++ gcc/gcov-io.h (working copy)
@@ -456,6 +456,9 @@ typedef unsigned HOST_WIDEST_INT gcov_type_unsigne
#define GCOV_TAG_PMU_BRANCH_MISPREDICT_LENGTH (8)
#define GCOV_TAG_PMU_TOOL_HEADER ((gcov_unsigned_t)0xa9000000)
#define GCOV_TAG_MODULE_INFO ((gcov_unsigned_t)0xab000000)
+#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000)
+#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000)
+#define GCOV_TAG_AFDO_MODULE_GROUPING ((gcov_unsigned_t)0xae000000)
#define GCOV_TAG_PMU_STRING_TABLE_ENTRY ((gcov_unsigned_t)0xad000000)
#define GCOV_TAG_PMU_STRING_TABLE_ENTRY_LENGTH(filename) \
(gcov_string_length (filename) + 1)
Index: gcc/ira-int.h
===================================================================
--- gcc/ira-int.h (revision 191813)
+++ gcc/ira-int.h (working copy)
@@ -44,7 +44,8 @@ along with GCC; see the file COPYING3. If not see
executed, frequency is always equivalent. Otherwise rescale the
edge frequency. */
#define REG_FREQ_FROM_EDGE_FREQ(freq) \
- (optimize_size || (flag_branch_probabilities && !ENTRY_BLOCK_PTR->count) \
+ (optimize_size || (flag_branch_probabilities \
+ && !flag_auto_profile && !ENTRY_BLOCK_PTR->count) \
? REG_FREQ_MAX : (freq * REG_FREQ_MAX / BB_FREQ_MAX) \
? (freq * REG_FREQ_MAX / BB_FREQ_MAX) : 1)
Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c (revision 191813)
+++ gcc/ipa-inline.c (working copy)
@@ -119,10 +119,12 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "ipa-inline.h"
#include "ipa-utils.h"
+#include "auto-profile.h"
/* Statistics we collect about inlining algorithm. */
static int overall_size;
static gcov_type max_count;
+static gcov_type max_total_count;
/* Global variable to denote if it is in ipa-inline pass. */
bool is_in_ipa_inline = false;
@@ -478,6 +480,9 @@ edge_hot_enough_p (struct cgraph_edge *edge)
{
if (cgraph_maybe_hot_edge_p (edge))
return true;
+ if (flag_auto_profile
+ && maybe_hot_count_p (afdo_get_callsite_count (edge)))
+ return true;
if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
&& maybe_hot_count_p (edge->caller->max_bb_count))
return true;
@@ -860,6 +865,16 @@ edge_badness (struct cgraph_edge *edge, bool dump)
((int)
((double) edge->count * INT_MIN / 2 / max_count / 512) *
relative_time_benefit (callee_info, edge, time_growth)) / growth;
+ if (flag_auto_profile && max_total_count > 0)
+ {
+ gcov_type afdo_badness =
+ ((int)
+ ((double) afdo_get_callsite_count (edge) * INT_MIN / 2 /
+ max_total_count / 512) *
+ relative_time_benefit (callee_info, edge, time_growth)) / growth;
+ if (afdo_badness < badness)
+ badness = afdo_badness;
+ }
/* Be sure that insanity of the profile won't lead to increasing counts
in the scalling and thus to overflow in the computation above. */
@@ -1445,6 +1460,7 @@ inline_small_functions (void)
metrics. */
max_count = 0;
+ max_total_count = 0;
initialize_growth_caches ();
FOR_EACH_DEFINED_FUNCTION (node)
@@ -1459,6 +1475,9 @@ inline_small_functions (void)
initial_size += info->size;
}
+ if (max_total_count < node->total_count)
+ max_total_count = node->total_count;
+
for (edge = node->callers; edge; edge = edge->next_caller)
if (max_count < edge->count)
max_count = edge->count;
@@ -1493,6 +1512,7 @@ inline_small_functions (void)
}
gcc_assert (in_lto_p
+ || flag_auto_profile
|| !max_count
|| (profile_info && flag_branch_probabilities));
@@ -1526,7 +1546,7 @@ inline_small_functions (void)
of date value on it, we re-insert it now. */
current_badness = edge_badness (edge, false);
gcc_assert (cached_badness == current_badness);
- gcc_assert (current_badness >= badness);
+ gcc_assert (flag_auto_profile || current_badness >= badness);
if (current_badness != badness)
{
edge->aux = fibheap_insert (heap, current_badness, edge);
@@ -1646,7 +1666,7 @@ inline_small_functions (void)
is propagated from caller. We don't track when this happen and
thus we need to recompute everything all the time. Once this is
solved, "|| 1" should go away. */
- if (callee->global.inlined_to || 1)
+ if (callee->global.inlined_to)// || 1)
update_all_callee_keys (heap, callee, updated_nodes);
else
update_callee_keys (heap, edge->callee, updated_nodes);
Index: gcc/dwarf2out.c
===================================================================
--- gcc/dwarf2out.c (revision 191813)
+++ gcc/dwarf2out.c (working copy)
@@ -2722,6 +2722,41 @@ const struct gcc_debug_hooks dwarf2_debug_hooks =
1, /* start_end_main_source_file */
TYPE_SYMTAB_IS_DIE /* tree_type_symtab_field */
};
+
+const struct gcc_debug_hooks auto_profile_debug_hooks =
+{
+ debug_nothing_charstar,
+ debug_nothing_charstar,
+ debug_nothing_void,
+ debug_nothing_int_charstar,
+ debug_nothing_int_charstar,
+ debug_nothing_int_charstar,
+ debug_nothing_int,
+ debug_nothing_int_int, /* begin_block */
+ debug_nothing_int_int, /* end_block */
+ dwarf2out_ignore_block, /* ignore_block */
+ debug_nothing_int_charstar_int_bool, /* source_line */
+ debug_nothing_int_charstar, /* begin_prologue */
+ debug_nothing_int_charstar, /* end_prologue */
+ debug_nothing_int_charstar, /* begin_epilogue */
+ debug_nothing_int_charstar, /* end_epilogue */
+ debug_nothing_tree, /* begin_function */
+ debug_nothing_int, /* end_function */
+ debug_nothing_tree, /* function_decl */
+ debug_nothing_tree, /* global_decl */
+ debug_nothing_tree_int, /* type_decl */
+ debug_nothing_tree_tree_tree_bool, /* imported_module_or_decl */
+ debug_nothing_tree, /* deferred_inline_function */
+ debug_nothing_tree, /* outlining_inline_function */
+ debug_nothing_rtx, /* label */
+ debug_nothing_int, /* handle_pch */
+ debug_nothing_rtx, /* var_location */
+ debug_nothing_void, /* switch_text_section */
+ debug_nothing_tree_tree, /* set_name */
+ 0, /* start_end_main_source_file */
+ TYPE_SYMTAB_IS_ADDRESS /* tree_type_symtab_field */
+};
+
/* NOTE: In the comments in this file, many references are made to
"Debugging Information Entries". This term is abbreviated as `DIE'
Index: gcc/opts.c
===================================================================
--- gcc/opts.c (revision 191813)
+++ gcc/opts.c (working copy)
@@ -1653,6 +1653,33 @@ common_handle_option (struct gcc_options *opts,
opts->x_flag_gcse_after_reload = value;
break;
+ case OPT_fauto_profile_:
+ auto_profile_file = xstrdup (arg);
+ opts->x_flag_auto_profile = true;
+ value = true;
+ /* No break here - do -fauto-profile processing. */
+ case OPT_fauto_profile:
+ if (!opts_set->x_flag_branch_probabilities)
+ opts->x_flag_branch_probabilities = value;
+ if (!opts_set->x_flag_unroll_loops)
+ opts->x_flag_unroll_loops = value;
+ if (!opts_set->x_flag_peel_loops)
+ opts->x_flag_peel_loops = value;
+ if (!opts_set->x_flag_inline_functions)
+ opts->x_flag_inline_functions = value;
+ if (!opts_set->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp = value;
+ if (!opts_set->x_flag_ipa_cp_clone
+ && value && opts->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp_clone = value;
+ if (!opts_set->x_flag_predictive_commoning)
+ opts->x_flag_predictive_commoning = value;
+ if (!opts_set->x_flag_unswitch_loops)
+ opts->x_flag_unswitch_loops = value;
+ if (!opts_set->x_flag_gcse_after_reload)
+ opts->x_flag_gcse_after_reload = value;
+ break;
+
case OPT_fprofile_generate_:
opts->x_profile_data_prefix = xstrdup (arg);
value = true;
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def (revision 191813)
+++ gcc/timevar.def (working copy)
@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltra
DEFTIMEVAR (TV_WHOPR_WPA_LTRANS_EXEC , "whopr wpa->ltrans")
DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference")
DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile")
+DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile")
DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const")
DEFTIMEVAR (TV_IPA_PTA , "ipa points-to")
DEFTIMEVAR (TV_IPA_SRA , "ipa SRA")
Index: gcc/predict.c
===================================================================
--- gcc/predict.c (revision 191813)
+++ gcc/predict.c (working copy)
@@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "cfgloop.h"
#include "pointer-set.h"
+#include "auto-profile.h"
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
@@ -2749,7 +2750,8 @@ compute_function_frequency (void)
if (DECL_STATIC_DESTRUCTOR (current_function_decl))
node->only_called_at_exit = true;
- if (!profile_info || !flag_branch_probabilities)
+ if (!profile_info || !flag_branch_probabilities
+ || (flag_auto_profile && profile_status == PROFILE_GUESSED))
{
int flags = flags_from_decl_or_type (current_function_decl);
if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
@@ -2851,16 +2853,30 @@ rebuild_frequencies (void)
timevar_push (TV_REBUILD_FREQUENCIES);
if (profile_status == PROFILE_GUESSED)
{
- loop_optimizer_init (0);
- add_noreturn_fake_exit_edges ();
- mark_irreducible_loops ();
- connect_infinite_loops_to_exit ();
- estimate_bb_frequencies ();
- remove_fake_exit_edges ();
- loop_optimizer_finalize ();
+ if (flag_auto_profile && counts_to_freqs ())
+ {
+ afdo_calculate_branch_prob ();
+ counts_to_freqs();
+ profile_status = PROFILE_READ;
+ compute_function_frequency ();
+ }
+ else
+ {
+ loop_optimizer_init (0);
+ add_noreturn_fake_exit_edges ();
+ mark_irreducible_loops ();
+ connect_infinite_loops_to_exit ();
+ estimate_bb_frequencies ();
+ remove_fake_exit_edges ();
+ loop_optimizer_finalize ();
+ }
}
else if (profile_status == PROFILE_READ)
- counts_to_freqs ();
+ {
+ if (flag_auto_profile)
+ afdo_calculate_branch_prob ();
+ counts_to_freqs ();
+ }
else
gcc_unreachable ();
timevar_pop (TV_REBUILD_FREQUENCIES);
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c (revision 0)
+++ gcc/auto-profile.c (revision 0)
@@ -0,0 +1,1580 @@
+/* Calculate branch probabilities, and basic block execution counts.
+ Copyright (C) 2011. Free Software Foundation, Inc.
+ Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+/* Read and annotate call graph profile from the auto profile data
+ file. */
+
+#include
+
+#include "config.h"
+#include "system.h"
+#include "flags.h" /* for auto_profile_file. */
+#include "basic-block.h" /* for gcov_type. */
+#include "diagnostic-core.h" /* for inform(). */
+#include "gcov-io.h" /* for gcov_read_unsigned(). */
+#include "input.h" /* for expanded_location. */
+#include "profile.h" /* for profile_info. */
+#include "langhooks.h" /* for langhooks. */
+#include "opts.h" /* for in_fnames. */
+#include "tree-pass.h" /* for ipa pass. */
+#include "cfgloop.h" /* for loop_optimizer_init. */
+#include "gimple.h"
+#include "cgraph.h"
+#include "tree-flow.h"
+#include "auto-profile.h"
+
+/* The following routines implements AutoFDO optimization.
+
+ This optimization uses sampling profiles to annotate basic block counts
+ and uses heuristics to estimate branch probabilities.
+
+ There are three phases in AutoFDO:
+
+ Phase 1: Read profile from the profile data file.
+ The following info is read from the profile datafile:
+ * Function names and file names.
+ * Source level profile, which is a mapping from inline stack to
+ its sample counts.
+ * Module profile: Module to aux-modules mapping
+ Phase 1 just reads in data without processing it. It is invoked
+ before tree parsing because LIPO needs module profile before tree
+ parsing. (read_aux_modules)
+
+ Phase 2: Process profile to build internal data structure (hashmap).
+ This is done after tree parsing, because the processing requires the map
+ from function name to its debug name (bfd_name). The following hashmaps
+ is used to store profile.
+ * function_htab: map from function_name to its entry_bb count
+ * stack_htab: map from inline stack to its sample count
+ * bfd_name_htab: map from function name to its debug name (bfd_name)
+ * module_htab: map from module name to its aux-module names
+
+ Phase 3: Annotate control flow graph.
+ AutoFDO invokes a separate pass over the control flow graph to:
+ * Annotate basic block count
+ * Estimate branch probability
+
+ After the above 3 phases, all profile is readily annotated on the GCC IR.
+ AutoFDO tries to reuse all FDO infrastructure as much as possible to make
+ use of the profile. E.g. it uses existing mechanism to calculate the basic
+ block/edge frequency, as well as the cgraph node/edge count.
+
+ However, AutoFDO still differs from FDO in the following aspects:
+
+ * Profile is not accurate, because AutoFDO uses sampling to collect
+ profile, and uses debug info to represent the profile. As a result,
+ some hot basic blocks may have zero sample count. Because of this,
+ some optimization needs to be adjusted (e.g. loop peeling/unrolling).
+ * Each cloned context has its own profile, but these contexts may
+ not even exist when doing annotation. This provides more context-
+ sensitive profiles, but at the same time, adds complexity to the
+ implementation. Because of this, additional profile annotation is
+ needed for each function after the inline pass, and count scaling
+ is tricky in the second annotation.
+*/
+
+#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+#define SP_HTAB_INIT_SIZE 2000
+
+/* GCOV data structures to represent profile stored in the .afdo file. */
+
+struct gcov_callsite_pos
+{
+ const char *file;
+ const char *func;
+ gcov_unsigned_t line;
+ gcov_unsigned_t discr;
+};
+
+struct gcov_callsite
+{
+ struct gcov_callsite_pos pos;
+ const char *func;
+ gcov_type count;
+};
+
+struct gcov_stack
+{
+ const char *func_name;
+ const char *callee_name;
+ struct gcov_callsite_pos *stack;
+ gcov_unsigned_t size;
+ gcov_type num_inst;
+ gcov_type count;
+};
+
+struct gcov_function
+{
+ const char *name;
+ const char *file;
+ gcov_type total_count;
+ gcov_type entry_count;
+ gcov_type max_count;
+ /* Number of call stacks in the function. */
+ gcov_unsigned_t stack_num;
+ /* All the call stacks in the function. */
+ struct gcov_stack *stacks;
+};
+
+struct afdo_bfd_name
+{
+ const char *assembler_name;
+ /* bfd_name is the name that debugger used for function name matching.
+ Different assembler names could map to the same bfd_name. */
+ const char *bfd_name;
+};
+
+struct afdo_module
+{
+ const char *name;
+ int ident;
+ unsigned exported;
+ unsigned has_asm;
+ unsigned num_aux_modules;
+ unsigned num_quote_paths;
+ unsigned num_bracket_paths;
+ unsigned num_cpp_defines;
+ unsigned num_cpp_includes;
+ unsigned num_cl_args;
+ const char **strings;
+};
+
+/* Store the file name strings read from the profile data file. */
+static const char **file_names;
+
+/* gcov_ctr_summary structure to store the profile_info. */
+static struct gcov_ctr_summary *afdo_profile_info;
+
+/* Hash table to hold function information. */
+static htab_t function_htab;
+
+/* Hash table to hold stack information. */
+static htab_t stack_htab;
+
+/* Hash table to hold inline scale information. */
+static htab_t stack_scale_htab;
+
+/* Hash table to hold assembler name to bfd name mapping. */
+static htab_t bfd_name_htab;
+
+/* Hash table to hold module informaition. */
+static htab_t module_htab;
+
+/* Store the module hash table contents. */
+static struct afdo_module *modules;
+
+/* File static variables, which is used to pass information between
+ init_auto_profile and process_auto_profile. */
+static gcov_unsigned_t function_num;
+static gcov_unsigned_t total_module_num;
+static struct gcov_function *gcov_functions;
+
+/* Check if PATH_NAME is absolute path, if yes, strip the directory part
+ of the PATH_NAME, return the file name. */
+
+static const char *
+afdo_get_filename (const char *path_name)
+{
+ const char* last;
+ return path_name;
+ if (path_name == NULL)
+ return NULL;
+ last = strrchr(path_name, '/');
+ return ((last == 0) ? path_name : last + 1);
+}
+
+/* Given an assembler function NAME, return its original name. strip the
+ suffix at the end of the function name, added by optimizations such as
+ constant propagation etc. */
+
+static gcov_unsigned_t
+afdo_get_original_name_size (const char *name)
+{
+ const char *ret;
+ if (!name)
+ return 0;
+ ret = strchr (name, '.');
+ if (!ret)
+ return strlen(name);
+ else
+ return ret - name;
+}
+
+/* Given an asssembler function NAME, return its corresponding bfd name.
+ If the mapping cannot be found, it means that the assembler function
+ name is not used/emitted in the current module(s). */
+
+static const char *
+afdo_get_bfd_name (const char *name)
+{
+ struct afdo_bfd_name bfd, *bfd_entry;
+ gcov_unsigned_t size = afdo_get_original_name_size (name);
+ /* If the function name is cloned, we want to find its original name. */
+ char *buf = (char *) alloca (size + 1);
+ strncpy (buf, name, size);
+ buf[size] = 0;
+ bfd.assembler_name = buf;
+ bfd_entry = (struct afdo_bfd_name *) htab_find (bfd_name_htab, &bfd);
+ if (!bfd_entry)
+ return name;
+ return bfd_entry->bfd_name;
+}
+
+/* Traverse the cgraph, add each function's name to to bfd_name mapping. */
+
+static void
+afdo_read_bfd_names (void)
+{
+ struct cgraph_node *node;
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ const char *bfd_name;
+ if (lang_hooks.dwarf_name (node->decl, 0) == NULL)
+ continue;
+ bfd_name = xstrdup (lang_hooks.dwarf_name (node->decl, 0));
+ afdo_add_bfd_name_mapping
+ (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)), bfd_name);
+ }
+}
+
+/* Hash function for struct afdo_stack. */
+
+static hashval_t
+afdo_stack_hash (const void *stack)
+{
+ gcov_unsigned_t i;
+ /* An arbitrary initial value borrowed from hashtab.c. */
+ hashval_t h = 0x9e3779b9;
+ const struct gcov_stack *s = (const struct gcov_stack *) stack;
+ if (s->callee_name)
+ h = iterative_hash (afdo_get_bfd_name (s->callee_name),
+ strlen (afdo_get_bfd_name (s->callee_name)), h);
+ if (s->func_name)
+ h = iterative_hash (s->func_name,
+ afdo_get_original_name_size (s->func_name), h);
+ for (i = 0; i < s->size; i++) {
+ const struct gcov_callsite_pos *p = s->stack + i;
+ const char *file = afdo_get_filename (p->file);
+ h = iterative_hash (file, strlen (file), h);
+ h = iterative_hash (&p->line, sizeof (p->line), h);
+ if (i == 0)
+ h = iterative_hash (&p->discr, sizeof (p->discr), h);
+ }
+ return h;
+}
+
+/* Check if two afdo_stack P and Q are identical. */
+
+static int
+afdo_stack_eq (const void *p, const void *q)
+{
+ const struct gcov_stack *s1 = (const struct gcov_stack *) p;
+ const struct gcov_stack *s2 = (const struct gcov_stack *) q;
+
+ gcov_unsigned_t i;
+ if (s1->func_name == NULL || s2->func_name == NULL)
+ return 0;
+
+ if (s1->callee_name == NULL)
+ {
+ if (s2->callee_name != NULL)
+ return 0;
+ }
+ else if (s2->callee_name != NULL
+ && strcmp (afdo_get_bfd_name (s1->callee_name),
+ afdo_get_bfd_name (s2->callee_name)))
+ return 0;
+
+ i = afdo_get_original_name_size (s1->func_name);
+ if (i != afdo_get_original_name_size (s2->func_name))
+ return 0;
+
+ if (strncmp (s1->func_name, s2->func_name, i))
+ return 0;
+
+ if (s1->size != s2->size)
+ return 0;
+ for (i = 0; i < s1->size; i++)
+ {
+ const struct gcov_callsite_pos *p1 = s1->stack + i;
+ const struct gcov_callsite_pos *p2 = s2->stack + i;
+ if (strcmp (afdo_get_filename(p1->file), afdo_get_filename(p2->file))
+ || p1->line != p2->line || (i== 0 && p1->discr != p2->discr))
+ return 0;
+ }
+ return 1;
+}
+
+/* Hash function for struct afdo_function. */
+
+static hashval_t
+afdo_function_hash (const void *func)
+{
+ /* An arbitrary initial value borrowed from hashtab.c. */
+ hashval_t h = 0x9e3779b9;
+ const struct gcov_function *f = (const struct gcov_function *) func;
+
+ if (f->name)
+ h = iterative_hash (f->name, afdo_get_original_name_size (f->name), h);
+ return h;
+}
+
+/* Check if two afdo_function P and Q are identical. */
+
+static int
+afdo_function_eq (const void *p, const void *q)
+{
+ const struct gcov_function *f1 = (const struct gcov_function *) p;
+ const struct gcov_function *f2 = (const struct gcov_function *) q;
+ gcov_unsigned_t i;
+
+ if (f1->name == NULL || f2->name == NULL)
+ return 0;
+
+ i = afdo_get_original_name_size (f1->name);
+ if (i != afdo_get_original_name_size (f2->name))
+ return 0;
+
+ return !strncmp (f1->name, f2->name, i);
+}
+
+/* Hash function for struct afdo_bfd_name. */
+
+static hashval_t
+afdo_bfd_name_hash (const void *func)
+{
+ hashval_t h = 0x9e3779b9;
+ const struct afdo_bfd_name *f = (const struct afdo_bfd_name *) func;
+
+ if (f->assembler_name)
+ h = iterative_hash (f->assembler_name, strlen (f->assembler_name), h);
+ return h;
+}
+
+/* Check if two struct afdo_bfd_name P and Q are identical. */
+
+static int
+afdo_bfd_name_eq (const void *p, const void *q)
+{
+ const struct afdo_bfd_name *b1 = (const struct afdo_bfd_name *) p;
+ const struct afdo_bfd_name *b2 = (const struct afdo_bfd_name *) q;
+
+ if (b1->assembler_name == NULL || b2->assembler_name == NULL)
+ return 0;
+
+ return !strcmp (b1->assembler_name, b2->assembler_name);
+}
+
+/* Free the hash table entry P. */
+
+static void
+afdo_bfd_name_del (void *p)
+{
+ free (p);
+}
+
+/* Hash Function for struct afdo_module. */
+
+static hashval_t
+afdo_module_hash (const void *module)
+{
+ hashval_t h = 0x9e3779b9;
+ const struct afdo_module *m = (const struct afdo_module *)module;
+
+ if (m->name)
+ h = iterative_hash (m->name, strlen (m->name), h);
+
+ return h;
+}
+
+/* Check if two struct afdo_module P and Q are identical. */
+
+static int
+afdo_module_eq (const void *p, const void *q)
+{
+ const struct afdo_module *m1 = (const struct afdo_module *)p;
+ const struct afdo_module *m2 = (const struct afdo_module *)q;
+
+ if (m1->name == NULL || m2->name == NULL)
+ return 0;
+
+ return !strcmp (m1->name, m2->name);
+}
+
+/* Return the total number of emitted string for MODULE. */
+
+static unsigned long long
+afdo_module_num_strings (const struct afdo_module *module)
+{
+ return module->num_quote_paths +
+ module->num_bracket_paths +
+ module->num_cpp_defines +
+ module->num_cpp_includes +
+ module->num_cl_args;
+}
+
+/* Add a module (specified in MODULE) into gcov_module_info format in
+ MODULE_INFO, which is used by LIPO to import auxiliary modules.
+ Set the is_primary flag if IS_PRIMARY is set. */
+
+static void
+afdo_add_module (struct gcov_module_info **module_info,
+ const struct afdo_module *module,
+ gcov_unsigned_t is_primary)
+{
+ unsigned i;
+ size_t info_sz;
+
+ info_sz = sizeof (struct gcov_module_info) +
+ sizeof (void *) * afdo_module_num_strings (module);
+ *module_info = XCNEWVAR (struct gcov_module_info, info_sz);
+ (*module_info)->ident = module->ident;
+ (*module_info)->is_primary = is_primary;
+ (*module_info)->is_exported = is_primary ? module->exported : 1;
+ (*module_info)->source_filename = (char *) module->name;
+ (*module_info)->num_quote_paths = module->num_quote_paths;
+ (*module_info)->num_bracket_paths = module->num_bracket_paths;
+ (*module_info)->num_cpp_defines = module->num_cpp_defines;
+ (*module_info)->num_cpp_includes = module->num_cpp_includes;
+ (*module_info)->num_cl_args = module->num_cl_args;
+ for (i = 0; i < afdo_module_num_strings (module); i++)
+ (*module_info)->string_array[i] = (char *)
+ module->strings[module->num_aux_modules + i];
+}
+
+/* Read in the auxiliary modules for the current primary module. */
+
+static void
+read_aux_modules (void)
+{
+ unsigned i, curr_module = 1;
+ struct afdo_module module, *entry;
+
+ module.name = in_fnames[0];
+ entry = (struct afdo_module *) htab_find (module_htab, &module);
+ if (!entry)
+ {
+ inform (0, "primary module %s cannot be found.", in_fnames[0]);
+ return;
+ }
+ module_infos = XCNEWVEC (struct gcov_module_info *,
+ entry->num_aux_modules + 1);
+ afdo_add_module (module_infos, entry, true);
+ primary_module_id = entry->ident;
+ for (i = 0; i < entry->num_aux_modules; i++)
+ {
+ struct afdo_module *aux_entry;
+ module.name = entry->strings[i];
+ if (!strcmp (module.name, in_fnames[0]))
+ continue;
+ aux_entry = (struct afdo_module *) htab_find (module_htab, &module);
+ if (!aux_entry)
+ {
+ inform (0, "aux module %s cannot be found.", module.name);
+ continue;
+ }
+ afdo_add_module (&module_infos[curr_module++], aux_entry, false);
+ add_input_filename (module.name);
+ }
+}
+
+/* Return the size of the inline stack of the STMT. */
+
+static int
+get_inline_stack_size_by_stmt (gimple stmt)
+{
+ tree block;
+ int size = 1;
+
+ if (!stmt)
+ return 0;
+ block = gimple_block (stmt);
+ if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt))
+ return 0;
+
+ for ( block = BLOCK_SUPERCONTEXT (block);
+ block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block)) {
+ /* Traverse the nesting blocks. If the block contains the source
+ location info, save the source location info to the inline stack. */
+ if (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION)
+ continue;
+ size ++;
+ }
+ return size;
+}
+
+/* Return the size of the inline stack of the EDGE. All inlined callsites
+ along he inline chain are recorded. */
+
+static int
+get_inline_stack_size_by_edge (struct cgraph_edge *edge)
+{
+ struct cgraph_edge *e;
+ int size = 0;
+ for (e= edge; e; e = e->caller->callers)
+ {
+ gimple stmt = e->call_stmt;
+ if (!stmt)
+ break;
+ size += get_inline_stack_size_by_stmt (stmt);
+ if (!e->caller->global.inlined_to)
+ break;
+ }
+ return size;
+}
+
+/* Return the function name of a given lexical BLOCK. */
+
+static const char *
+get_function_name_from_block (tree block)
+{
+ tree decl;
+ for (decl = BLOCK_ABSTRACT_ORIGIN (block);
+ decl && (TREE_CODE (decl) == BLOCK);
+ decl = BLOCK_ABSTRACT_ORIGIN (decl))
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ break;
+ return decl ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) : NULL;
+}
+
+/* Store the inline stack of STMT to POS_STACK, return the size of the
+ stack. Set the discriminator of the inline stack if DISCR is TRUE. */
+
+static int
+get_inline_stack_by_stmt (gimple stmt, tree decl,
+ struct gcov_callsite_pos *pos_stack, bool discr)
+{
+ tree block;
+ int idx = 0;
+ source_location loc;
+
+ if (!stmt)
+ return 0;
+ block = gimple_block (stmt);
+ if (!block || TREE_CODE (block) != BLOCK || !gimple_location (stmt))
+ return 0;
+
+ loc = gimple_location (stmt);
+ pos_stack[idx].file = expand_location(loc).file;
+ pos_stack[idx].line = expand_location(loc).line;
+ if (discr)
+ pos_stack[idx].discr = get_discriminator_from_locus (loc);
+ else
+ pos_stack[idx].discr = 0;
+ idx++;
+ for (block = BLOCK_SUPERCONTEXT (block);
+ block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block))
+ {
+ if (! BLOCK_SOURCE_LOCATION (block) > 0)
+ continue;
+ loc = BLOCK_SOURCE_LOCATION (block);
+ pos_stack[idx].file = expand_location (loc).file;
+ pos_stack[idx].line = expand_location (loc).line;
+ pos_stack[idx - 1].func = get_function_name_from_block (block);
+ pos_stack[idx++].discr = 0;
+ }
+ if (decl)
+ pos_stack[idx - 1].func = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ return idx;
+}
+
+/* Store the inline stack of EDGE to POS_STACK, return the size of the
+ stack. All inlined callsites along the inline stack are recorded. */
+
+static int
+get_inline_stack_by_edge (struct cgraph_edge *edge,
+ struct gcov_callsite_pos *pos_stack)
+{
+ struct cgraph_edge *e;
+ int size = 0;
+
+ for (e = edge; e; e = e->caller->callers)
+ {
+ gimple stmt = e->call_stmt;
+ if (!stmt)
+ break;
+ size += get_inline_stack_by_stmt (stmt, e->caller->decl,
+ pos_stack + size, false);
+ if (!e->caller->global.inlined_to)
+ break;
+ }
+ return size;
+}
+
+/* Read sample count info of the function with DECL, and save them
+ to ENTRY_COUNT and TOTAL_COUNT respectively. */
+
+static void
+afdo_get_function_count (tree decl,
+ gcov_type *entry_count,
+ gcov_type *total_count)
+{
+ struct gcov_function func;
+ const struct gcov_function *func_entry;
+
+ *entry_count = 0;
+ *total_count = 0;
+ func.name =
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ func.file = DECL_SOURCE_FILE (decl);
+ func_entry = (const struct gcov_function *)
+ htab_find (function_htab, &func);
+ if (func_entry)
+ {
+ (*total_count) += func_entry->total_count;
+ (*entry_count) += func_entry->entry_count;
+ return;
+ }
+ func.name = afdo_get_bfd_name (func.name);
+ func_entry = (const struct gcov_function *)
+ htab_find (function_htab, &func);
+ if (func_entry)
+ {
+ (*total_count) += func_entry->total_count;
+ (*entry_count) += func_entry->entry_count;
+ }
+}
+
+/* Set the node count of the current function, and update the entry_bb
+ count. */
+
+void
+afdo_set_current_function_count (void)
+{
+ gcov_type entry_count;
+ gcov_type total_count;
+ struct cgraph_node *node = cgraph_get_create_node (current_function_decl);
+
+ afdo_get_function_count (current_function_decl, &entry_count, &total_count);
+ node->total_count += total_count;
+ node->count += entry_count;
+ ENTRY_BLOCK_PTR->count = node->count;
+}
+
+/* Add the AS_NAME->BFD_NAME to the assembler_name to bfd_name mapping. */
+
+void
+afdo_add_bfd_name_mapping (const char *as_name, const char *bfd_name)
+{
+ struct afdo_bfd_name **slot;
+ struct afdo_bfd_name *entry = (struct afdo_bfd_name *)
+ xmalloc (sizeof (struct afdo_bfd_name));
+
+ entry->assembler_name = as_name;
+ entry->bfd_name = bfd_name;
+ slot = (struct afdo_bfd_name **)
+ htab_find_slot (bfd_name_htab, entry, INSERT);
+ if (!*slot)
+ *slot = entry;
+ else
+ free (entry);
+}
+
+/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count)
+ is recorded in a hash map. */
+
+void
+afdo_add_copy_scale (struct cgraph_edge *edge)
+{
+ struct gcov_stack *stack;
+ struct gcov_stack **stack_slot;
+ int scale = edge->caller->count ?
+ (double) REG_BR_PROB_BASE * edge->count / edge->caller->count
+ : REG_BR_PROB_BASE;
+ int size = get_inline_stack_size_by_edge (edge);
+
+ if (size == 0)
+ return;
+ stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
+ stack->func_name
+ = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
+ edge->caller->global.inlined_to ?
+ edge->caller->global.inlined_to->decl : edge->caller->decl));
+ stack->callee_name
+ = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
+ stack->size = size;
+ stack->stack = (struct gcov_callsite_pos *)
+ xmalloc (sizeof (struct gcov_callsite_pos) * size);
+ stack->count = scale;
+
+ get_inline_stack_by_edge (edge, stack->stack);
+
+ stack_slot = (struct gcov_stack **)
+ htab_find_slot (stack_scale_htab, stack, INSERT);
+ if (!*stack_slot)
+ *stack_slot = stack;
+ else
+ (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count);
+}
+
+/* For a given POS_STACK with SIZE, get the COUNT/NUM_INST info for the
+ inline stack. If CALLEE_NAME is non-null, the COUNT represents the
+ total count in the inline stack. Otherwise, the COUNT represents the
+ count of an ordinary statement. Return FALSE if profile is not found
+ for the given POS_STACK. */
+
+static bool
+get_stack_count (struct gcov_callsite_pos *pos_stack,
+ const char *callee_name,
+ int size,
+ gcov_type *count, gcov_type *num_inst)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ {
+ struct gcov_stack stack, *entry;
+ stack.func_name = pos_stack[size - i - 1].func;
+ stack.callee_name = callee_name;
+ stack.stack = pos_stack;
+ stack.size = size - i;
+ entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
+ if (entry)
+ {
+ if (i == 0)
+ {
+ *count = entry->count;
+ *num_inst = entry->num_inst;
+ return true;
+ }
+ else
+ {
+ struct gcov_stack scale_stack, *scale_entry;
+ scale_stack.stack = pos_stack + size - i;
+ scale_stack.size = i;
+ scale_stack.func_name = pos_stack[size - 1].func;
+ scale_stack.callee_name = stack.func_name;
+ scale_entry = (struct gcov_stack *)
+ htab_find (stack_scale_htab, &scale_stack);
+ if (scale_entry)
+ {
+ *count = entry->count * scale_entry->count
+ / REG_BR_PROB_BASE;
+ *num_inst = entry->num_inst;
+ return true;
+ }
+ }
+ }
+ }
+ *count = 0;
+ *num_inst = 0;
+ return false;
+}
+
+/* For a given STMT, get the COUNT and NUM_INST from its profile.
+ Return FALSE if profile is not found for STMT. */
+
+static bool
+get_stmt_count (gimple stmt, gcov_type *count, gcov_type *num_inst)
+{
+ struct gcov_callsite_pos *pos_stack;
+ int size;
+
+ if (!stmt)
+ return false;
+ size = get_inline_stack_size_by_stmt (stmt);
+ if (size == 0)
+ return false;
+
+ pos_stack = (struct gcov_callsite_pos *)
+ alloca (sizeof (struct gcov_callsite_pos) * size);
+
+ get_inline_stack_by_stmt (stmt, current_function_decl, pos_stack, true);
+
+ return get_stack_count (pos_stack, NULL, size, count, num_inst);
+}
+
+/* For a given EDGE, return the total count of the inlined callsite. */
+
+gcov_type
+afdo_get_callsite_count (struct cgraph_edge *edge)
+{
+ struct gcov_callsite_pos *pos_stack;
+ gcov_type count, num_inst;
+ const char *callee_name =
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
+ int size = get_inline_stack_size_by_edge (edge);
+
+ if (size == 0)
+ return 0;
+ pos_stack = (struct gcov_callsite_pos *)
+ alloca (sizeof (struct gcov_callsite_pos) * size);
+
+ get_inline_stack_by_edge (edge, pos_stack);
+
+ if (get_stack_count (pos_stack, callee_name, size, &count, &num_inst))
+ return count;
+ else
+ return 0;
+}
+
+/* For a given BB, return its execution count. */
+
+gcov_type
+afdo_get_bb_count (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ gcov_type max_count = 0;
+ bool has_annotated = false;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gcov_type count, num_inst;
+ gimple stmt = gsi_stmt (gsi);
+ if (get_stmt_count (stmt, &count, &num_inst))
+ {
+ if (count > max_count)
+ max_count = count;
+ has_annotated = true;
+ }
+ }
+ if (has_annotated)
+ {
+ bb->flags |= BB_ANNOTATED;
+ return max_count;
+ }
+ else
+ return 0;
+}
+
+/* Annotate auto profile to the control flow graph. */
+
+static void
+afdo_annotate_cfg (void)
+{
+ basic_block bb;
+ gcov_type max_count = 0;
+
+ FOR_EACH_BB (bb)
+ {
+ bb->count = afdo_get_bb_count (bb);
+ if (bb->count > max_count)
+ max_count = bb->count;
+ }
+ if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count)
+ ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+ if (max_count > 0)
+ {
+ counts_to_freqs ();
+ afdo_calculate_branch_prob ();
+ profile_status = PROFILE_READ;
+ }
+}
+
+/* Read profile from profile data file. Write to the module hashmap. */
+
+static void
+read_profile (void)
+{
+ gcov_unsigned_t i, j, k, file_name_num;
+
+ if (gcov_open (auto_profile_file, 1) == -1)
+ {
+ inform (0, "Cannot open profile file %s.", auto_profile_file);
+ flag_auto_profile = 0;
+ return;
+ }
+
+ if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
+ {
+ inform (0, "Magic number does not mathch.");
+ flag_auto_profile = 0;
+ return;
+ }
+
+ if (gcov_read_unsigned () != GCOV_VERSION)
+ {
+/* inform (0, "Vesion number does not mathch.");
+ flag_auto_profile = 0;
+ return;*/;
+ }
+
+ /* Skip the empty integer. */
+ gcov_read_unsigned ();
+ gcc_assert (gcov_read_unsigned () == GCOV_TAG_AFDO_FILE_NAMES);
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the file name table. */
+ file_name_num = gcov_read_unsigned ();
+ file_names = (const char **)
+ xmalloc (sizeof (const char *) * file_name_num);
+ for (i = 0; i < file_name_num; i++)
+ file_names[i] = xstrdup (gcov_read_string ());
+
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
+ {
+ inform (0, "Not expected TAG.");
+ return;
+ }
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the function/callsite profile, and store it in local
+ data structure. */
+ function_num = gcov_read_unsigned ();
+ gcov_functions = (struct gcov_function *)
+ xmalloc (function_num * sizeof (struct gcov_function));
+ for (i = 0; i < function_num; i++)
+ {
+ gcov_functions[i].name = xstrdup (gcov_read_string ());
+ gcov_functions[i].file = file_names[gcov_read_unsigned ()];
+ gcov_functions[i].total_count = gcov_read_counter ();
+ gcov_functions[i].entry_count = gcov_read_counter ();
+ gcov_functions[i].max_count = 0;
+ gcov_functions[i].stack_num = gcov_read_unsigned ();
+ gcov_functions[i].stacks = (struct gcov_stack *)
+ xmalloc (gcov_functions[i].stack_num * sizeof (struct gcov_stack));
+ for (j = 0; j < gcov_functions[i].stack_num; j++)
+ {
+ gcov_functions[i].stacks[j].func_name = gcov_functions[i].name;
+ gcov_functions[i].stacks[j].callee_name = NULL;
+ gcov_functions[i].stacks[j].size = gcov_read_unsigned ();
+ gcov_functions[i].stacks[j].stack = (struct gcov_callsite_pos *)
+ xmalloc (gcov_functions[i].stacks[j].size
+ * sizeof (struct gcov_callsite_pos));
+ for (k = 0; k < gcov_functions[i].stacks[j].size; k++)
+ {
+ gcov_functions[i].stacks[j].stack[k].func =
+ file_names[gcov_read_unsigned ()];
+ gcov_functions[i].stacks[j].stack[k].file =
+ file_names[gcov_read_unsigned ()];
+ gcov_functions[i].stacks[j].stack[k].line =
+ gcov_read_unsigned ();
+ gcov_functions[i].stacks[j].stack[k].discr =
+ gcov_read_unsigned ();
+ }
+ gcov_functions[i].stacks[j].count = gcov_read_counter ();
+ gcov_functions[i].stacks[j].num_inst = gcov_read_counter ();
+ }
+ }
+
+ /* Read in the module info. */
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_MODULE_GROUPING)
+ {
+ inform (0, "Not expected TAG.");
+ return;
+ }
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the file name table. */
+ total_module_num = gcov_read_unsigned ();
+ modules = (struct afdo_module *)
+ xmalloc (total_module_num * sizeof (struct afdo_module));
+ for (i = 0; i < total_module_num; i++)
+ {
+ unsigned num_strings;
+ struct afdo_module **slot;
+ modules[i].name = xstrdup (gcov_read_string());
+ modules[i].ident = i + 1;
+ /* exported flag. */
+ modules[i].exported = gcov_read_unsigned();
+ /* has_asm flag. */
+ modules[i].has_asm = gcov_read_unsigned();
+ /* aux_module and 5 options. */
+ modules[i].num_aux_modules = gcov_read_unsigned();
+ modules[i].num_quote_paths = gcov_read_unsigned();
+ modules[i].num_bracket_paths = gcov_read_unsigned();
+ modules[i].num_cpp_defines = gcov_read_unsigned();
+ modules[i].num_cpp_includes = gcov_read_unsigned();
+ modules[i].num_cl_args = gcov_read_unsigned();
+ num_strings = modules[i].num_aux_modules
+ + modules[i].num_quote_paths
+ + modules[i].num_bracket_paths
+ + modules[i].num_cpp_defines
+ + modules[i].num_cpp_includes
+ + modules[i].num_cl_args;
+ modules[i].strings = (const char **)
+ xmalloc (num_strings * sizeof (char *));
+ for (j = 0; j < num_strings; j++)
+ modules[i].strings[j] = xstrdup (gcov_read_string());
+ slot = (struct afdo_module **)
+ htab_find_slot (module_htab, &modules[i], INSERT);
+ if (!*slot)
+ *slot = &modules[i];
+ else
+ gcc_unreachable ();
+ }
+}
+
+/* Process the profile data and build the function/callsite/callstack
+ hash maps. */
+
+void
+process_auto_profile (void)
+{
+ unsigned i;
+
+ afdo_read_bfd_names ();
+ for (i = 0; i < function_num; i++)
+ {
+ struct gcov_function **func_slot = (struct gcov_function **)
+ htab_find_slot (function_htab, gcov_functions + i, INSERT);
+ if (!*func_slot)
+ *func_slot = gcov_functions + i;
+ }
+
+ for (i = 0; i < function_num; i++)
+ {
+ unsigned j;
+ struct gcov_function *func = gcov_functions + i;
+ for (j = 0; j < func->stack_num; j++)
+ {
+ unsigned k;
+ unsigned stack_size = func->stacks[j].size;
+ gcov_type count = func->stacks[j].count;
+ struct gcov_stack **stack_slot = (struct gcov_stack **)
+ htab_find_slot (stack_htab, func->stacks + j, INSERT);
+ if (func->stacks[j].num_inst && count > afdo_profile_info->sum_max)
+ afdo_profile_info->sum_max = count / func->stacks[j].num_inst;
+ if (*stack_slot)
+ {
+ (*stack_slot)->count += count;
+ if ((*stack_slot)->num_inst < func->stacks[j].num_inst)
+ (*stack_slot)->num_inst = func->stacks[j].num_inst;
+ }
+ else
+ *stack_slot = func->stacks + j;
+ for (k = 1; k < stack_size; k++)
+ {
+ struct gcov_stack *new_stack = (struct gcov_stack *)
+ xmalloc (sizeof (struct gcov_stack));
+ new_stack->func_name = func->stacks[j].func_name;
+ new_stack->callee_name =
+ func->stacks[j].stack[stack_size - k - 1].func;
+ new_stack->stack = func->stacks[j].stack + stack_size - k;
+ new_stack->size = k;
+ new_stack->num_inst = 0;
+ new_stack->count = 0;
+ stack_slot = (struct gcov_stack **)
+ htab_find_slot (stack_htab, new_stack, INSERT);
+ if (*stack_slot)
+ {
+ (*stack_slot)->count += count;
+ free (new_stack);
+ }
+ else
+ *stack_slot = new_stack;
+ }
+ }
+ }
+}
+
+/* Create the hash tables, and read the profile from the profile data
+ file. */
+
+void
+init_auto_profile (void)
+{
+ if (auto_profile_file == NULL)
+ auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
+
+ /* Initialize the function hash table. */
+ function_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+ afdo_function_hash,
+ afdo_function_eq,
+ 0,
+ xcalloc,
+ free);
+ /* Initialize the stack hash table. */
+ stack_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+ afdo_stack_hash,
+ afdo_stack_eq,
+ 0,
+ xcalloc,
+ free);
+ /* Initialize the stack scale hash table. */
+ stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+ afdo_stack_hash,
+ afdo_stack_eq,
+ 0,
+ xcalloc,
+ free);
+ /* Initialize the bfd name mapping table. */
+ bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+ afdo_bfd_name_hash,
+ afdo_bfd_name_eq,
+ afdo_bfd_name_del,
+ xcalloc,
+ free);
+ /* Initialize the module hash table. */
+ module_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
+ afdo_module_hash,
+ afdo_module_eq,
+ 0,
+ xcalloc,
+ free);
+
+ afdo_profile_info = (struct gcov_ctr_summary *)
+ xcalloc (1, sizeof (struct gcov_ctr_summary));
+ afdo_profile_info->runs = 1;
+ afdo_profile_info->sum_max = 0;
+
+ /* Read the profile from the profile file. */
+ read_profile ();
+
+ if (flag_dyn_ipa)
+ read_aux_modules();
+}
+
+/* Free the resources. */
+
+void
+end_auto_profile (void)
+{
+ unsigned i, j;
+
+ for (i = 0; i < function_num; i++)
+ {
+ for (j = 0; j < gcov_functions[i].stack_num; ++j)
+ free (gcov_functions[i].stacks[j].stack);
+ free (gcov_functions[i].stacks);
+ }
+ free (gcov_functions);
+
+ for (i = 0; i < total_module_num; i++)
+ free (modules[i].strings);
+ free (modules);
+ free (afdo_profile_info);
+ free (file_names);
+ htab_delete (function_htab);
+ htab_delete (stack_htab);
+ htab_delete (stack_scale_htab);
+ htab_delete (bfd_name_htab);
+ htab_delete (module_htab);
+ profile_info = NULL;
+}
+
+/* BB1 and BB2 are in an equivalent class iff:
+ 1. BB1 dominates BB2.
+ 2. BB2 post-dominates BB1.
+ 3. BB1 and BB2 are in the same loop nest.
+ This function finds the equivalent class for each basic block, and
+ stores a pointer to the first BB in its equivalent class. Meanwhile,
+ set bb counts for the same equivalent class to be idenical. */
+
+static void
+afdo_find_equiv_class (void)
+{
+ basic_block bb;
+
+ FOR_ALL_BB (bb)
+ bb->aux = NULL;
+
+ FOR_ALL_BB (bb)
+ {
+ VEC (basic_block, heap) *dom_bbs;
+ basic_block bb1;
+ int i;
+
+ if (bb->aux != NULL)
+ continue;
+ bb->aux = bb;
+ dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1)
+ if (bb1->aux == NULL
+ && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0)
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ bb->flags |= BB_ANNOTATED;
+ }
+ }
+ dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb1)
+ if (bb1->aux == NULL
+ && dominated_by_p (CDI_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0)
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ bb->flags |= BB_ANNOTATED;
+ }
+ }
+ }
+}
+
+/* If a baisk block only has one in/out edge, then the bb count and he
+ edge count should be the same.
+ IS_SUCC is true if the out edge of the basic block is examined.
+ Return TRUE if any basic block/edge count is changed. */
+
+static bool
+afdo_propagate_single_edge (bool is_succ)
+{
+ basic_block bb;
+ bool changed = false;
+
+ FOR_EACH_BB (bb)
+ if (is_succ ? single_succ_p (bb) : single_pred_p (bb))
+ {
+ edge e = is_succ ? single_succ_edge (bb) : single_pred_edge (bb);
+ if (((e->flags & EDGE_ANNOTATED) == 0)
+ && ((bb->flags & BB_ANNOTATED) != 0))
+ {
+ e->count = bb->count;
+ e->flags |= EDGE_ANNOTATED;
+ changed = true;
+ }
+ else if (((e->flags & EDGE_ANNOTATED) != 0)
+ && ((bb->flags & BB_ANNOTATED) == 0))
+ {
+ bb->count = e->count;
+ bb->flags |= BB_ANNOTATED;
+ changed = true;
+ }
+ else if (bb->count != e->count)
+ {
+ e->count = bb->count = MAX (bb->count, e->count);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+/* If a basic block's count is known, and only one of its in/out edges' count
+ is unknown, its count can be calculated.
+ Meanwhile, if all of the in/out edges' counts are known, then the basic
+ block's unknown count can also be calculated.
+ IS_SUCC is true if out edges of a basic blocks are examined.
+ Return TRUE if any basic block/edge count is changed. */
+
+static bool
+afdo_propagate_multi_edge (bool is_succ)
+{
+ basic_block bb;
+ bool changed = false;
+
+ FOR_EACH_BB (bb)
+ {
+ edge e, unknown_edge = NULL;
+ edge_iterator ei;
+ int num_unknown_edge = 0;
+ gcov_type total_known_count = 0;
+
+ if (is_succ)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if ((e->flags & EDGE_ANNOTATED) == 0)
+ num_unknown_edge ++, unknown_edge = e;
+ else
+ total_known_count += e->count;
+ }
+ else
+ {
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if ((e->flags & EDGE_ANNOTATED) == 0)
+ num_unknown_edge ++, unknown_edge = e;
+ else
+ total_known_count += e->count;
+ }
+
+ if (num_unknown_edge == 0)
+ {
+ if (total_known_count > bb->count)
+ {
+ bb->count = total_known_count;
+ changed = true;
+ }
+ if ((bb->flags & BB_ANNOTATED) == 0)
+ {
+ bb->flags |= BB_ANNOTATED;
+ changed = true;
+ }
+ }
+ else if (num_unknown_edge == 1
+ && (bb->flags & BB_ANNOTATED) != 0)
+ {
+ if (bb->count >= total_known_count)
+ {
+ unknown_edge->count = bb->count - total_known_count;
+ unknown_edge->flags |= EDGE_ANNOTATED;
+ changed = true;
+ }
+ }
+ }
+ return changed;
+}
+
+/* Special propagation for circuit expressions. Because GCC translates
+ control flow into data flow for circuit expressions. E.g.
+ BB1:
+ if (a && b)
+ BB2
+ else
+ BB3
+
+ will be translated into:
+
+ BB1:
+ if (a)
+ goto BB.t1
+ else
+ goto BB.t3
+ BB.t1:
+ if (b)
+ goto BB.t2
+ else
+ goto BB.t3
+ BB.t2:
+ goto BB.t3
+ BB.t3:
+ tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
+ if (tmp)
+ goto BB2
+ else
+ goto BB3
+
+ In this case, we need to propagate through PHI to determine the edge
+ count of BB1->BB.t1, BB.t1->BB.t2. */
+
+static void
+afdo_propagate_circuit (void)
+{
+ basic_block bb;
+ FOR_ALL_BB (bb)
+ {
+ gimple phi_stmt;
+ tree cmp_rhs, cmp_lhs;
+ gimple cmp_stmt = last_stmt (bb);
+ edge e;
+ edge_iterator ei;
+
+ if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
+ continue;
+ cmp_rhs = gimple_cond_rhs (cmp_stmt);
+ cmp_lhs = gimple_cond_lhs (cmp_stmt);
+ if (!TREE_CONSTANT (cmp_rhs)
+ || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
+ continue;
+ if (TREE_CODE (cmp_lhs) != SSA_NAME)
+ continue;
+ if ((bb->flags & BB_ANNOTATED) == 0)
+ continue;
+ phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
+ while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_single_p (phi_stmt)
+ && TREE_CODE(gimple_assign_rhs1 (phi_stmt)) == SSA_NAME)
+ phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt));
+ if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
+ continue;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ unsigned i, total = 0;
+ edge only_one;
+ bool check_value_one = (((integer_onep (cmp_rhs))
+ ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
+ ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
+ if ((e->flags & EDGE_ANNOTATED) == 0)
+ continue;
+ for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
+ {
+ tree val = gimple_phi_arg_def (phi_stmt, i);
+ edge ep = gimple_phi_arg_edge (phi_stmt, i);
+
+ if (!TREE_CONSTANT (val) || !(integer_zerop (val)
+ || integer_onep (val)))
+ continue;
+ if (check_value_one ^ integer_onep (val))
+ continue;
+ total++;
+ only_one = ep;
+ if (e->probability == 0 && (e->flags & EDGE_ANNOTATED) == 0)
+ {
+ ep->probability = 0;
+ ep->count = 0;
+ ep->flags |= EDGE_ANNOTATED;
+ }
+ }
+ if (total == 1 && (only_one->flags & EDGE_ANNOTATED) == 0)
+ {
+ only_one->probability = e->probability;
+ only_one->count = e->count;
+ only_one->flags |= EDGE_ANNOTATED;
+ }
+ }
+ }
+}
+
+/* Propagate the basic block count and edge count on the control flow
+ graph. We do the propagation iteratively until stablize. */
+
+static void
+afdo_propagate (void)
+{
+ basic_block bb;
+ bool changed = true;
+
+ FOR_ALL_BB (bb)
+ {
+ bb->count = ((basic_block) bb->aux)->count;
+ if ((((basic_block) bb->aux)->flags & BB_ANNOTATED) != 0)
+ bb->flags |= BB_ANNOTATED;
+ }
+
+ while (changed)
+ {
+ changed = false;
+
+ if (afdo_propagate_single_edge (true))
+ changed = true;
+ if (afdo_propagate_single_edge (false))
+ changed = true;
+ if (afdo_propagate_multi_edge (true))
+ changed = true;
+ if (afdo_propagate_multi_edge (false))
+ changed = true;
+ afdo_propagate_circuit ();
+ }
+}
+
+/* Propagate counts on control flow graph and calculate branch
+ probabilities. */
+
+void
+afdo_calculate_branch_prob (void)
+{
+ basic_block bb;
+ bool has_sample = false;
+
+ FOR_EACH_BB (bb)
+ if (bb->count > 0)
+ has_sample = true;
+
+ if (!has_sample)
+ return;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (0);
+
+ afdo_find_equiv_class ();
+ afdo_propagate ();
+
+ FOR_EACH_BB (bb)
+ {
+ edge e;
+ edge_iterator ei;
+ int num_unknown_succ = 0;
+ gcov_type total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if ((e->flags & EDGE_ANNOTATED) == 0)
+ num_unknown_succ ++;
+ else
+ total_count += e->count;
+ }
+ if (num_unknown_succ == 0 && total_count > 0)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->probability =
+ (double) e->count * REG_BR_PROB_BASE / total_count;
+ }
+ }
+ FOR_ALL_BB (bb)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count =
+ (double) bb->count * e->probability / REG_BR_PROB_BASE;
+ bb->aux = NULL;
+ }
+
+ loop_optimizer_finalize ();
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+}
+
+/* Use AutoFDO profile to annoate the control flow graph. */
+
+static unsigned int
+auto_profile (void)
+{
+ struct cgraph_node *node;
+
+ if (cgraph_state == CGRAPH_STATE_FINISHED)
+ return 0;
+
+ init_node_map();
+ profile_info = afdo_profile_info;
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ if (!node->analyzed
+ || !gimple_has_body_p (node->decl)
+ || !(!node->clone_of || node->decl != node->clone_of->decl))
+ continue;
+
+ /* Don't profile functions produced for builtin stuff. */
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION
+ || DECL_STRUCT_FUNCTION (node->decl)->after_tree_profile)
+ continue;
+
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+ current_function_decl = node->decl;
+
+ afdo_annotate_cfg ();
+ compute_function_frequency ();
+
+ current_function_decl = NULL;
+ pop_cfun ();
+ }
+
+ return TODO_rebuild_cgraph_edges;
+}
+
+static bool
+gate_auto_profile_ipa (void)
+{
+ return flag_auto_profile;
+}
+
+struct simple_ipa_opt_pass pass_ipa_auto_profile =
+{
+ {
+ SIMPLE_IPA_PASS,
+ "afdo", /* name */
+ gate_auto_profile_ipa, /* gate */
+ auto_profile, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_IPA_AUTOFDO, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func /* todo_flags_finish */
+ }
+};
Index: gcc/auto-profile.h
===================================================================
--- gcc/auto-profile.h (revision 0)
+++ gcc/auto-profile.h (revision 0)
@@ -0,0 +1,46 @@
+/* coverage.h - Defines data exported from coverage.c
+ Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
+#ifndef AUTO_PROFILE_H
+#define AUTO_PROFILE_H
+
+/* Read, process, finalize AutoFDO data structures. */
+extern void init_auto_profile (void);
+extern void end_auto_profile (void);
+extern void process_auto_profile (void);
+
+/* Annotate function's count and total count. */
+extern void afdo_set_current_function_count (void);
+
+/* Add the assembly_name to bfd_name mapping. */
+extern void afdo_add_bfd_name_mapping (const char *, const char *);
+
+/* Add copy scale for an inlined edge to stack_scale_map. */
+extern void afdo_add_copy_scale (struct cgraph_edge *);
+
+/* Calculate branch probability in both AutoFDO pass and after inlining. */
+extern void afdo_calculate_branch_prob (void);
+
+/* Calculate total sample count of an inlined callsite. */
+extern gcov_type afdo_get_callsite_count (struct cgraph_edge *);
+
+/* Calculate basic block count. */
+extern gcov_type afdo_get_bb_count (basic_block);
+#endif /* AUTO_PROFILE_H */
Index: gcc/profile.c
===================================================================
--- gcc/profile.c (revision 191813)
+++ gcc/profile.c (working copy)
@@ -662,6 +662,7 @@ compute_branch_probabilities (unsigned cfg_checksu
/* Very simple sanity checks so we catch bugs in our profiling code. */
if (!profile_info)
return;
+
if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
{
error ("corrupted profile info: run_max * runs < sum_max");
@@ -1447,7 +1448,7 @@ branch_prob (void)
if (flag_profile_values)
gimple_find_values_to_profile (&values);
- if (flag_branch_probabilities)
+ if (flag_branch_probabilities && !flag_auto_profile)
{
compute_branch_probabilities (cfg_checksum, lineno_checksum);
if (flag_profile_values)
Index: gcc/loop-unroll.c
===================================================================
--- gcc/loop-unroll.c (revision 191813)
+++ gcc/loop-unroll.c (working copy)
@@ -1154,6 +1154,10 @@ decide_unroll_runtime_iterations (struct loop *loo
return;
}
+ if (flag_auto_profile
+ && expected_loop_iterations (loop) > loop->header->count)
+ return;
+
/* Success; now force nunroll to be power of 2, as we are unable to
cope with overflows in computation of number of iterations. */
for (i = 1; 2 * i <= nunroll; i *= 2)
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c (revision 191813)
+++ gcc/coverage.c (working copy)
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see
#include "filenames.h"
#include "dwarf2asm.h"
#include "target.h"
+#include "auto-profile.h"
#include "gcov-io.h"
#include "gcov-io.c"
@@ -274,7 +275,6 @@ static struct opt_desc force_matching_cg_opts[] =
{
{ "-fexceptions", "-fno-exceptions", true },
{ "-fsized-delete", "-fno-sized-delete", false },
- { "-frtti", "-fno-rtti", true },
{ NULL, NULL, false }
};
@@ -2296,6 +2296,8 @@ coverage_init (const char *filename, const char* s
init_pmu_profiling ();
tree_init_instrumentation_sampling ();
}
+ if (flag_auto_profile)
+ init_auto_profile ();
}
/* Return True if any type of profiling is enabled which requires linking
Index: gcc/tree-ssa-live.c
===================================================================
--- gcc/tree-ssa-live.c (revision 191813)
+++ gcc/tree-ssa-live.c (working copy)
@@ -546,7 +546,7 @@ remove_unused_scope_block_p (tree scope)
else if (!nsubblocks)
;
/* For terse debug info we can eliminate info on unused variables. */
- else if (!generate_debug_line_table
+ else if (!flag_auto_profile && !generate_debug_line_table
&& (debug_info_level == DINFO_LEVEL_NONE
|| debug_info_level == DINFO_LEVEL_TERSE))
{
Index: gcc/common.opt
===================================================================
--- gcc/common.opt (revision 191813)
+++ gcc/common.opt (working copy)
@@ -913,6 +913,16 @@ fauto-inc-dec
Common Report Var(flag_auto_inc_dec) Init(1)
Generate auto-inc/dec instructions
+fauto-profile
+Common Report Var(flag_auto_profile) Optimization
+Use sample profile information for call graph node weights. The default
+profile file is fbdata.afdo in 'pwd'.
+
+fauto-profile=
+Common Joined RejectNegative Var(auto_profile_file)
+Use sample profile information for call graph node weights. The profile
+file is specified in the argument.
+
; -fcheck-bounds causes gcc to generate array bounds checks.
; For C, C++ and ObjC: defaults off.
; For Java: defaults to on.
Index: gcc/tree-inline.c
===================================================================
--- gcc/tree-inline.c (revision 191813)
+++ gcc/tree-inline.c (working copy)
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see
#include "integrate.h"
#include "langhooks.h"
#include "l-ipo.h"
+#include "auto-profile.h"
#include "rtl.h" /* FIXME: For asm_str_count. */
@@ -1824,6 +1825,15 @@ copy_bb (copy_body_data *id, basic_block bb, int f
copy_gsi = gsi_last_bb (copy_basic_block);
}
+ if (flag_auto_profile && profile_info)
+ {
+ gcov_type count = afdo_get_bb_count (copy_basic_block);
+ if (copy_basic_block->flags & BB_ANNOTATED)
+ copy_basic_block->count = count;
+ else if (bb->flags & BB_ANNOTATED)
+ copy_basic_block->flags |= BB_ANNOTATED;
+ }
+
return copy_basic_block;
}
@@ -1918,9 +1928,10 @@ copy_edges_for_bb (basic_block bb, gcov_type count
edge new_edge;
flags = old_edge->flags;
+ flags &= (~EDGE_ANNOTATED);
/* Return edges do get a FALLTHRU flag when the get inlined. */
- if (old_edge->dest->index == EXIT_BLOCK && !old_edge->flags
+ if (old_edge->dest->index == EXIT_BLOCK && !flags
&& old_edge->dest->aux != EXIT_BLOCK_PTR)
flags |= EDGE_FALLTHRU;
new_edge = make_edge (new_bb, (basic_block) old_edge->dest->aux, flags);
@@ -2253,6 +2264,8 @@ copy_cfg_body (copy_body_data * id, gcov_type coun
if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
count_scale = (REG_BR_PROB_BASE * (double)count
/ ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count);
+ else if (flag_auto_profile && count == 0)
+ count_scale = 0;
else
count_scale = REG_BR_PROB_BASE;
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c (revision 191813)
+++ gcc/tree-profile.c (working copy)
@@ -1642,7 +1642,7 @@ direct_call_profiling (void)
static bool
gate_tree_profile_ipa (void)
{
- return (!in_lto_p
+ return (!in_lto_p && !flag_auto_profile
&& (flag_branch_probabilities || flag_test_coverage
|| profile_arc_flag || flag_profile_reusedist
|| flag_optimize_locality));
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in (revision 191813)
+++ gcc/Makefile.in (working copy)
@@ -874,6 +874,7 @@ GIMPLE_H = gimple.h gimple.def gsstruct.def pointe
TRANS_MEM_H = trans-mem.h
GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
COVERAGE_H = coverage.h $(GCOV_IO_H)
+AUTO_PROFILE_H = auto-profile.h
DEMANGLE_H = $(srcdir)/../include/demangle.h
RECOG_H = recog.h
ALIAS_H = alias.h coretypes.h
@@ -1161,6 +1162,7 @@ OBJS = \
alias.o \
alloc-pool.o \
auto-inc-dec.o \
+ auto-profile.o \
bb-reorder.o \
bitmap.o \
bt-load.o \
@@ -3034,6 +3036,10 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
$(HASHTAB_H) tree-iterator.h $(CGRAPH_H) $(TREE_PASS_H) gcov-io.c $(TM_P_H) \
opts.h $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h l-ipo.h dwarf2asm.h \
$(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H)
+auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
+ $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
+ $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) \
+ $(AUTO_PROFILE_H)
cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
$(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(TREE_PASS_H) \
Index: gcc/basic-block.h
===================================================================
--- gcc/basic-block.h (revision 191813)
+++ gcc/basic-block.h (working copy)
@@ -89,8 +89,9 @@ DEF_VEC_ALLOC_P(edge,heap);
and cold sections, when we
do partitioning. */
#define EDGE_PRESERVE 0x4000 /* Never merge blocks via this edge. */
-#define EDGE_ALL_FLAGS 0x7fff
-
+#define EDGE_ANNOTATED 0x8000 /* Edge has been annotated by AutoFDO
+ profile. */
+#define EDGE_ALL_FLAGS 0xffff
#define EDGE_COMPLEX \
(EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_EH | EDGE_PRESERVE)
@@ -268,7 +269,11 @@ enum bb_flags
/* Set on blocks that are in a transaction. This is calculated on
demand, and is available after calling
compute_transaction_bits(). */
- BB_IN_TRANSACTION = 1 << 13
+ BB_IN_TRANSACTION = 1 << 13,
+
+ /* Set on blocks that has been annotated by AutoFDO profile. This is
+ calculated while propagating profiles on control flow graph. */
+ BB_ANNOTATED = 1 << 14
};
/* Dummy flag for convenience in the hot/cold partitioning code. */
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c (revision 191813)
+++ gcc/tree-cfg.c (working copy)
@@ -1806,6 +1806,14 @@ gimple_merge_blocks (basic_block a, basic_block b)
}
}
+ /* When merging two BBs, if their counts are different, the larger
+ count is selected as the new bb count. */
+ if (flag_auto_profile && a->count != b->count)
+ {
+ a->count = MAX (a->count, b->count);
+ a->frequency = MAX (a->frequency, b->frequency);
+ }
+
/* Merge the sequences. */
last = gsi_last_bb (a);
gsi_insert_seq_after (&last, bb_seq (b), GSI_NEW_STMT);
Index: gcc/passes.c
===================================================================
--- gcc/passes.c (revision 191813)
+++ gcc/passes.c (working copy)
@@ -1251,6 +1251,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_rebuild_cgraph_edges);
NEXT_PASS (pass_inline_parameters);
}
+ NEXT_PASS (pass_ipa_auto_profile);
NEXT_PASS (pass_ipa_tree_profile);
{
struct opt_pass **p = &pass_ipa_tree_profile.pass.sub;