From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 120696 invoked by alias); 8 Jun 2017 12:52:52 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 120582 invoked by uid 89); 8 Jun 2017 12:52:50 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-9.4 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_2,GIT_PATCH_3,KAM_ASCII_DIVIDERS,KAM_LAZY_DOMAIN_SECURITY,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=Edge, directions, tracks X-HELO: nikam.ms.mff.cuni.cz Received: from nikam.ms.mff.cuni.cz (HELO nikam.ms.mff.cuni.cz) (195.113.20.16) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 08 Jun 2017 12:52:47 +0000 Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id 3908A546A64; Thu, 8 Jun 2017 14:52:49 +0200 (CEST) Date: Thu, 08 Jun 2017 12:52:00 -0000 From: Jan Hubicka To: gcc-patches@gcc.gnu.org Subject: Statically propagate basic blocks which are likely executed 0 times Message-ID: <20170608125249.GB65161@kam.mff.cuni.cz> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) X-SW-Source: 2017-06/txt/msg00504.txt.bz2 Hi, this patch adds static code to detect basic block with 0 execution count. Those are basic block either reached only by EH or those which leads to call of function decorated with cold attribute. Function decorated by noreturn is not sufficient, because exit(0) is a call that is executed in most of program executions. Note that internally we have cold and unlikely functions where the first is optimized for size but the second is known to be unlikely executed by the program and is offloaded to special unlikely section. Perhaps we want to expose this to user and also distinguish between cold and unlikely function attributes. I guess this however can be done incrementally. As a followup I will decoreate trap/abort and friends with cold attributes. Bootstrapped/regtested x86_64-linux, will commit it shortly. Honza * predict.c (maybe_hot_bb_p): Do not check profile status. (maybe_hot_edge_p): Likewise. (probably_never_executed): Check for zero counts even if profile is not read. (unlikely_executed_edge_p): New function. (unlikely_executed_stmt_p): New function. (unlikely_executed_bb_p): New function. (set_even_probabilities): Use unlikely predicates. (combine_predictions_for_bb): Likewise. (predict_paths_for_bb): Likewise. (predict_paths_leading_to_edge): Likewise. (determine_unlikely_bbs): New function. (estimate_bb_frequencies): Use it. (compute_function_frequency): Use zero counts even if profile is not read. * profile-count.h: Fix typo. * g++.dg/tree-ssa/counts-1.C: New testcase. * gcc.dg/tree-ssa/counts-1.c: New testcase. Index: predict.c =================================================================== --- predict.c (revision 249009) +++ predict.c (working copy) @@ -189,8 +189,8 @@ bool maybe_hot_bb_p (struct function *fun, const_basic_block bb) { gcc_checking_assert (fun); - if (profile_status_for_fn (fun) == PROFILE_READ) - return maybe_hot_count_p (fun, bb->count); + if (!maybe_hot_count_p (fun, bb->count)) + return false; return maybe_hot_frequency_p (fun, bb->frequency); } @@ -200,8 +200,8 @@ maybe_hot_bb_p (struct function *fun, co bool maybe_hot_edge_p (edge e) { - if (profile_status_for_fn (cfun) == PROFILE_READ) - return maybe_hot_count_p (cfun, e->count); + if (!maybe_hot_count_p (cfun, e->count)) + return false; return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e)); } @@ -213,11 +213,10 @@ probably_never_executed (struct function profile_count count, int) { gcc_checking_assert (fun); + if (count == profile_count::zero ()) + return true; if (count.initialized_p () && profile_status_for_fn (fun) == PROFILE_READ) { - if (count == profile_count::zero ()) - return true; - int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION); if (count.apply_scale (unlikely_count_fraction, 1) >= profile_info->runs) return false; @@ -763,6 +762,69 @@ dump_prediction (FILE *file, enum br_pre fprintf (file, "\n"); } +/* Return true if E is unlikely executed. */ + +static bool +unlikely_executed_edge_p (edge e) +{ + return e->count == profile_count::zero () + || (e->flags & (EDGE_EH | EDGE_FAKE)); +} + +/* Return true if STMT is known to be unlikely executed. */ + +static bool +unlikely_executed_stmt_p (gimple *stmt) +{ + if (!is_gimple_call (stmt)) + return false; + /* NORETURN attribute enough is not strong enough: exit() may be quite + likely executed once during program run. */ + if (gimple_call_fntype (stmt) + && lookup_attribute ("cold", + TYPE_ATTRIBUTES (gimple_call_fntype (stmt))) + && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))) + return true; + tree decl = gimple_call_fndecl (stmt); + if (!decl) + return false; + if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)) + && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))) + return true; + + cgraph_node *n = cgraph_node::get (decl); + if (!n) + return false; + enum availability avail; + n = n->ultimate_alias_target (&avail); + if (avail < AVAIL_AVAILABLE) + return NULL; + if (!n->analyzed + || n->decl == current_function_decl) + return false; + return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED; +} + +/* Return true if BB is unlikely executed. */ + +static bool +unlikely_executed_bb_p (basic_block bb) +{ + if (bb->count == profile_count::zero ()) + return true; + if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) + return false; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + if (unlikely_executed_stmt_p (gsi_stmt (gsi))) + return true; + if (stmt_can_terminate_bb_p (gsi_stmt (gsi))) + return false; + } + return false; +} + /* We can not predict the probabilities of outgoing edges of bb. Set them evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute even probability for all edges not mentioned in the set. These edges @@ -777,7 +839,7 @@ set_even_probabilities (basic_block bb, edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & (EDGE_EH | EDGE_FAKE))) + if (!unlikely_executed_edge_p (e)) nedges ++; /* Make the distribution even if all edges are unlikely. */ @@ -791,7 +853,7 @@ set_even_probabilities (basic_block bb, unsigned c = nedges - unlikely_count; FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & (EDGE_EH | EDGE_FAKE))) + if (!unlikely_executed_edge_p (e)) { if (unlikely_edges != NULL && unlikely_edges->contains (e)) e->probability = PROB_VERY_UNLIKELY; @@ -1056,7 +1118,7 @@ combine_predictions_for_bb (basic_block edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & (EDGE_EH | EDGE_FAKE))) + if (!unlikely_executed_edge_p (e)) { nedges ++; if (first && !second) @@ -1107,7 +1169,7 @@ combine_predictions_for_bb (basic_block "%i edges in bb %i predicted with some unlikely edges\n", nedges, bb->index); FOR_EACH_EDGE (e, ei, bb->succs) - if (!(e->flags & (EDGE_EH | EDGE_FAKE))) + if (!unlikely_executed_edge_p (e)) dump_prediction (dump_file, PRED_COMBINED, e->probability, bb, REASON_NONE, e); } @@ -1758,7 +1820,7 @@ predict_loops (void) exits = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (exits, j, ex) - if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE))) + if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL)) n_exits ++; if (!n_exits) { @@ -1792,7 +1854,8 @@ predict_loops (void) enum br_predictor predictor; widest_int nit; - if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL | EDGE_FAKE)) + if (unlikely_executed_edge_p (ex) + || (ex->flags & EDGE_ABNORMAL_CALL)) continue; /* Loop heuristics do not expect exit conditional to be inside inner loop. We predict from innermost to outermost loop. */ @@ -2609,7 +2672,7 @@ tree_bb_level_predictions (void) edge_iterator ei; FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) - if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) + if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL)) { has_return_edges = true; break; @@ -2863,7 +2926,7 @@ predict_paths_for_bb (basic_block cur, b bool found = false; /* Ignore fake edges and eh, we predict them as not taken anyway. */ - if (e->flags & (EDGE_EH | EDGE_FAKE)) + if (unlikely_executed_edge_p (e)) continue; gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); @@ -2871,7 +2934,7 @@ predict_paths_for_bb (basic_block cur, b and does not lead to BB and does not exit the loop. */ FOR_EACH_EDGE (e2, ei2, e->src->succs) if (e2 != e - && !(e2->flags & (EDGE_EH | EDGE_FAKE)) + && !unlikely_executed_edge_p (e2) && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb) && (!in_loop || !loop_exit_edge_p (in_loop, e2))) { @@ -2923,7 +2986,7 @@ predict_paths_leading_to_edge (edge e, e basic_block bb = e->src; FOR_EACH_EDGE (e2, ei, bb->succs) if (e2->dest != e->src && e2->dest != e->dest - && !(e->flags & (EDGE_EH | EDGE_FAKE)) + && !unlikely_executed_edge_p (e) && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest)) { has_nonloop_edge = true; @@ -3341,6 +3404,142 @@ expensive_function_p (int threshold) return false; } +/* Determine basic blocks/edges that are known to be unlikely executed and set + their counters to zero. + This is done with first identifying obviously unlikely BBs/edges and then + propagating in both directions. */ + +static void +determine_unlikely_bbs () +{ + basic_block bb; + auto_vec worklist; + edge_iterator ei; + edge e; + + FOR_EACH_BB_FN (bb, cfun) + { + if (!(bb->count == profile_count::zero ()) + && unlikely_executed_bb_p (bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Basic block %i is locally unlikely\n", + bb->index); + bb->count = profile_count::zero (); + } + + if (bb->count == profile_count::zero ()) + { + bb->frequency = 0; + FOR_EACH_EDGE (e, ei, bb->preds) + e->count = profile_count::zero (); + } + + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->count == profile_count::zero ()) + && unlikely_executed_edge_p (e)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Edge %i->%i is locally unlikely\n", + bb->index, e->dest->index); + e->count = profile_count::zero (); + } + + gcc_checking_assert (!bb->aux); + } + + if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())) + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1; + worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + while (worklist.length () > 0) + { + bb = worklist.pop (); + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->count == profile_count::zero ()) + && !(e->dest->count == profile_count::zero ()) + && !e->dest->aux) + { + e->dest->aux = (void *)(size_t) 1; + worklist.safe_push (e->dest); + } + } + } + + FOR_ALL_BB_FN (bb, cfun) + { + if (!bb->aux) + { + if (!(bb->count == profile_count::zero ()) + && (dump_file && (dump_flags & TDF_DETAILS))) + fprintf (dump_file, + "Basic block %i is marked unlikely by forward prop\n", + bb->index); + bb->count = profile_count::zero (); + bb->frequency = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = profile_count::zero (); + } + else + bb->aux = NULL; + } + + auto_vec nsuccs; + nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun)); + FOR_ALL_BB_FN (bb, cfun) + if (!(bb->count == profile_count::zero ()) + && bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) + { + nsuccs[bb->index] = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + if (!(e->count == profile_count::zero ())) + nsuccs[bb->index]++; + if (!nsuccs[bb->index]) + worklist.safe_push (bb); + } + while (worklist.length () > 0) + { + bb = worklist.pop (); + if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + bool found = false; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + !gsi_end_p (gsi); gsi_next (&gsi)) + if (stmt_can_terminate_bb_p (gsi_stmt (gsi)) + /* stmt_can_terminate_bb_p special cases noreturns because it + assumes that fake edges are created. We want to know that + noreturn alone does not imply BB to be unlikely. */ + || (is_gimple_call (gsi_stmt (gsi)) + && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN))) + { + found = true; + break; + } + if (found) + continue; + } + if (!(bb->count == profile_count::zero ()) + && (dump_file && (dump_flags & TDF_DETAILS))) + fprintf (dump_file, + "Basic block %i is marked unlikely by backward prop\n", + bb->index); + bb->count = profile_count::zero (); + bb->frequency = 0; + FOR_EACH_EDGE (e, ei, bb->preds) + if (!(e->count == profile_count::zero ())) + { + e->count = profile_count::zero (); + if (!(e->src->count == profile_count::zero ())) + { + nsuccs[e->src->index]--; + if (!nsuccs[e->src->index]) + worklist.safe_push (e->src); + } + } + } +} + /* Estimate and propagate basic block frequencies using the given branch probabilities. If FORCE is true, the frequencies are used to estimate the counts even when there are already non-zero profile counts. */ @@ -3351,7 +3550,10 @@ estimate_bb_frequencies (bool force) basic_block bb; sreal freq_max; - if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ()) + determine_unlikely_bbs (); + + if (force || profile_status_for_fn (cfun) != PROFILE_READ + || !counts_to_freqs ()) { static int real_values_initialized = 0; @@ -3423,8 +3625,9 @@ compute_function_frequency (void) if (profile_status_for_fn (cfun) != PROFILE_READ) { int flags = flags_from_decl_or_type (current_function_decl); - if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) - != NULL) + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero () + || lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) + != NULL) node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) != NULL) Index: profile-count.h =================================================================== --- profile-count.h (revision 249009) +++ profile-count.h (working copy) @@ -36,7 +36,7 @@ along with GCC; see the file COPYING3. profile counts known while other do not - for example when LTO optimizing partly profiled program or when profile was lost due to COMDAT merging. - For this information profile_count tracks more information than + For this reason profile_count tracks more information than just unsigned integer and it is also ready for profile mismatches. The API of this data type represent operations that are natural on profile counts - sum, difference and operation with scales and Index: testsuite/g++.dg/tree-ssa/counts-1.C =================================================================== --- testsuite/g++.dg/tree-ssa/counts-1.C (revision 0) +++ testsuite/g++.dg/tree-ssa/counts-1.C (working copy) @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +void foo(); +extern void abort (void); + +static __attribute__ ((noinline)) +void mark_me_unlikely () +{ + foo(); + foo(); + foo(); + foo(); +} + +void i_am_not_unlikely() +{ + try { foo(); } + catch (int) {mark_me_unlikely ();} +} +/* { dg-final { scan-tree-dump "mark_me_unlikely \[^\r\n\]*(unlikely executed)" "optimized"} } */ +/* { dg-final { scan-tree-dump-not "i_am_not_unlikely \[^\r\n\]*(unlikely executed)" "optimized"} } */ Index: testsuite/gcc.dg/tree-ssa/counts-1.c =================================================================== --- testsuite/gcc.dg/tree-ssa/counts-1.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/counts-1.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +void unlikely () __attribute__ ((cold)); +void unlikely2 () __attribute__ ((cold)); + +__attribute__ ((noinline)) void +i_am_also_unlikely (int a) +{ + if (a) + unlikely (); + else + unlikely2 (); +} + + +void +i_am_also_unlikely2 (int a,int b) +{ + if (b) + i_am_also_unlikely (a); + else + unlikely (); +} + +void +i_am_not_unlikely (int a,int b,int c) +{ + if (c) + __builtin_exit (0); + i_am_also_unlikely2 (a,b); +} +/* Detect i_am_also_unlikely i_am_also_unlikely2 as unlikely. */ +/* { dg-final { scan-tree-dump "i_am_also_unlikely \[^\r\n\]*(unlikely executed)" "optimized"} } */ +/* { dg-final { scan-tree-dump "i_am_also_unlikely2 \[^\r\n\]*(unlikely executed)" "optimized"} } */ +/* { dg-final { scan-tree-dump-not "i_am_not_unlikely \[^\r\n\]*(unlikely executed)" "optimized"} } */