From b015708a4a3a3cdadb184012174f813c11bcdec2 Mon Sep 17 00:00:00 2001 From: Xiong Hu Luo Date: Mon, 5 Jul 2021 03:57:11 -0500 Subject: [PATCH 2/2] Don't move cold code out of loop by checking bb count There was a patch trying to avoid move cold block out of loop: https://gcc.gnu.org/pipermail/gcc/2014-November/215551.html Richard suggested to "never hoist anything from a bb with lower execution frequency to a bb with higher one in LIM invariantness_dom_walker before_dom_children". This patch does this profile count check in both gimple LIM move_computations_worker and RTL loop-invariant.c find_invariants_bb, if the loop bb is colder than loop preheader, don't hoist it out of loop. SPEC2017 performance evaluation shows 1% performance improvement for intrate GEOMEAN and no obvious regression for others. Especially, 500.perlbench_r +7.52% (Perf shows function S_regtry of perlbench is largely improved.), and 548.exchange2_r+1.98%, 526.blender_r +1.00% on P8LE. Regression and bootstrap tested pass on P8LE, any comments? Thanks. gcc/ChangeLog: * loop-invariant.c (find_invariants_bb): Check profile count before motion. (find_invariants_body): Add argument. * tree-ssa-loop-im.c (move_computations_worker): Check profile count before motion. (execute_sm): Likewise. (execute_sm_exit): Check pointer validness. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/recip-3.c: Adjust. --- gcc/loop-invariant.c | 10 +- gcc/testsuite/gcc.dg/tree-ssa/recip-3.c | 2 +- gcc/tree-ssa-loop-im.c | 154 +++++++++++++++++++++++- 3 files changed, 157 insertions(+), 9 deletions(-) diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index fca0c2b24be..5c3be7bf0eb 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -1183,9 +1183,14 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed) call. */ static void -find_invariants_bb (basic_block bb, bool always_reached, bool always_executed) +find_invariants_bb (class loop *loop, basic_block bb, bool always_reached, + bool always_executed) { rtx_insn *insn; + basic_block preheader = loop_preheader_edge (loop)->src; + + if (preheader->count > bb->count) + return; FOR_BB_INSNS (bb, insn) { @@ -1214,8 +1219,7 @@ find_invariants_body (class loop *loop, basic_block *body, unsigned i; for (i = 0; i < loop->num_nodes; i++) - find_invariants_bb (body[i], - bitmap_bit_p (always_reached, i), + find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i), bitmap_bit_p (always_executed, i)); } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c index 638bf38db8c..641c91e719e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c @@ -23,4 +23,4 @@ float h () F[0] += E / d; } -/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */ +/* { dg-final { scan-tree-dump-times " / " 5 "recip" } } */ diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index d9f75d5025e..80d61ddf531 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1154,6 +1154,58 @@ move_computations_worker (basic_block bb) continue; } + edge e = loop_preheader_edge (level); + if (e->src->count > bb->count) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "PHI node NOT moved to %d from %d:\n", + e->src->index, bb->index); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost, + level->num); + } + gsi_next (&bsi); + continue; + } + else + { + unsigned i; + bool skip_phi_move = false; + for (i = 0; i < gimple_phi_num_args (stmt); i++) + { + tree def = PHI_ARG_DEF (stmt, i); + + if (TREE_CODE (def) != SSA_NAME) + continue; + + gimple *def_stmt = SSA_NAME_DEF_STMT (def); + + if (!gimple_bb (def_stmt)) + continue; + + if (!dominated_by_p (CDI_DOMINATORS, e->src, + gimple_bb (def_stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "PHI node NOT moved to %d from %d\n", + e->src->index, bb->index); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost, + level->num); + } + skip_phi_move = true; + break; + } + } + if (skip_phi_move) + { + gsi_next (&bsi); + continue; + } + } + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Moving PHI node\n"); @@ -1191,14 +1243,13 @@ move_computations_worker (basic_block bb) tree lhs = gimple_assign_lhs (new_stmt); SSA_NAME_RANGE_INFO (lhs) = NULL; } - gsi_insert_on_edge (loop_preheader_edge (level), new_stmt); + gsi_insert_on_edge (e, new_stmt); remove_phi_node (&bsi, false); } for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ) { edge e; - gimple *stmt = gsi_stmt (bsi); lim_data = get_lim_data (stmt); @@ -1221,7 +1272,83 @@ move_computations_worker (basic_block bb) /* We do not really want to move conditionals out of the loop; we just placed it here to force its operands to be moved if necessary. */ if (gimple_code (stmt) == GIMPLE_COND) - continue; + { + gsi_next (&bsi); + continue; + } + + e = loop_preheader_edge (level); + if (e->src->count > bb->count) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt: Statement NOT moved to %d from %d \n", + e->src->index, bb->index); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost, + level->num); + } + gsi_next (&bsi); + continue; + } + else + { + if (is_gimple_assign (stmt)) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + if (TREE_CODE (rhs1) == MEM_REF) + { + rhs2 = TREE_OPERAND (rhs1, 1); + rhs1 = TREE_OPERAND (rhs1, 0); + } + gimple *stmt1 = NULL, *stmt2 = NULL; + basic_block def_bb; + if (rhs1 && TREE_CODE (rhs1) == SSA_NAME) + { + stmt1 = SSA_NAME_DEF_STMT (rhs1); + def_bb = gimple_bb (stmt1); + if (stmt1 + && def_bb + && (def_bb == bb + || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "stmt1: Statement NOT moved to %d from %d\n", + e->src->index, bb->index); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", + cost, level->num); + } + gsi_next (&bsi); + continue; + } + } + if (rhs2 && TREE_CODE (rhs2) == SSA_NAME) + { + stmt2 = SSA_NAME_DEF_STMT (rhs2); + def_bb = gimple_bb (stmt2); + if (stmt2 && def_bb + && (def_bb == bb + || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + "stmt2: Statement NOT moved to %d from %d\n", + e->src->index, bb->index); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "(cost %u) out of loop %d.\n\n", + cost, level->num); + } + gsi_next (&bsi); + continue; + } + } + } + } if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1231,7 +1358,6 @@ move_computations_worker (basic_block bb) cost, level->num); } - e = loop_preheader_edge (level); gcc_assert (!gimple_vdef (stmt)); if (gimple_vuse (stmt)) { @@ -2133,6 +2259,19 @@ execute_sm (class loop *loop, im_mem_ref *ref, bool multi_threaded_model_p = false; gimple_stmt_iterator gsi; sm_aux *aux = new sm_aux; + basic_block bb = gimple_bb (first_mem_ref_loc (loop, ref)->stmt); + + edge e = loop_preheader_edge (loop); + if (e->src->count > bb->count) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Don't execute store motion of "); + print_generic_expr (dump_file, ref->mem.ref); + fprintf (dump_file, " from loop %d\n", loop->num); + } + return; + } if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -2241,7 +2380,12 @@ execute_sm_exit (class loop *loop, edge ex, vec &seq, } else { - sm_aux *aux = *aux_map.get (ref); + sm_aux **paux = aux_map.get (ref); + sm_aux *aux; + if (paux) + aux = *paux; + else + continue; if (!aux->store_flag || kind == sm_ord) { gassign *store; -- 2.27.0.90.geebb51ba8c