From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id AE6D53857C66; Fri, 19 Nov 2021 08:35:46 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AE6D53857C66 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5394] tree-optimization/102436 - restore loop store motion X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/master X-Git-Oldrev: 09d462146b3107c665265b11ad925c61a91c6efb X-Git-Newrev: 0fc859f5efcb4624a8b4ffdbf34d63972af179a8 Message-Id: <20211119083546.AE6D53857C66@sourceware.org> Date: Fri, 19 Nov 2021 08:35:46 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 19 Nov 2021 08:35:46 -0000 https://gcc.gnu.org/g:0fc859f5efcb4624a8b4ffdbf34d63972af179a8 commit r12-5394-g0fc859f5efcb4624a8b4ffdbf34d63972af179a8 Author: Richard Biener Date: Thu Nov 18 13:40:32 2021 +0100 tree-optimization/102436 - restore loop store motion This restores a case of conditional store motion we fail to handle after the rewrite. We can recognize the special case of all stores in a loop happening in a single conditionally executed block which ensures stores are not re-ordered by executing them in different loop iterations. Separating out the case avoids complicating the already complex main path. 2021-11-18 Richard Biener PR tree-optimization/102436 * tree-ssa-loop-im.c (execute_sm_if_changed): Add mode to just create the if structure and return the then block. (execute_sm): Add flag to indicate the var will re-use another flag var. (hoist_memory_references): Support a single conditional block with all stores as special case. * gcc.dg/torture/20211118-1.c: New testcase. * gcc.dg/tree-ssa/ssa-lim-18.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/torture/20211118-1.c | 27 +++++ gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c | 19 ++++ gcc/tree-ssa-loop-im.c | 162 +++++++++++++++++++++++++++-- 3 files changed, 199 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/20211118-1.c b/gcc/testsuite/gcc.dg/torture/20211118-1.c new file mode 100644 index 00000000000..67ef68420df --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/20211118-1.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +unsigned p[3]; +void __attribute__((noipa)) +bar (float *q, int n, int m) +{ + for (int i = 0; i < m; ++i) + { + if (i == n) + { + unsigned a = p[1]; + p[1] = a + 1; + *q = 1.; + } + q++; + } +} + +int main() +{ +#if __SIZEOF_FLOAT__ == __SIZEOF_INT__ + bar ((float *)p, 1, 3); + if (((float *)p)[1] != 1.) + __builtin_abort (); +#endif + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c new file mode 100644 index 00000000000..da19806b712 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-18.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fstrict-aliasing -fdump-tree-lim2-details" } */ + +unsigned p; + +void foo (float *q) +{ + for (int i = 0; i < 256; ++i) + { + if (p) + { + unsigned a = p; + *(q++) = 1.; + p = a + 1; + } + } +} + +/* { dg-final { scan-tree-dump-times "Executing store motion" 1 "lim2" } } */ diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 8a81acae115..682406d26c1 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -1911,10 +1911,13 @@ first_mem_ref_loc (class loop *loop, im_mem_ref *ref) } } if (lsm_flag) <-- - MEM = lsm; <-- + MEM = lsm; <-- (X) + + In case MEM and TMP_VAR are NULL the function will return the then + block so the caller can insert (X) and other related stmts. */ -static void +static basic_block execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, edge preheader, hash_set *flag_bbs, edge &append_cond_position, edge &last_cond_fallthru) @@ -2009,10 +2012,13 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, NULL_TREE, NULL_TREE); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - gsi = gsi_start_bb (then_bb); /* Insert actual store. */ - stmt = gimple_build_assign (unshare_expr (mem), tmp_var); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + if (mem) + { + gsi = gsi_start_bb (then_bb); + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } edge e1 = single_succ_edge (new_bb); edge e2 = make_edge (new_bb, then_bb, @@ -2060,6 +2066,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, update_stmt (phi); } } + + return then_bb; } /* When REF is set on the location, set flag indicating the store. */ @@ -2117,7 +2125,8 @@ struct sm_aux static void execute_sm (class loop *loop, im_mem_ref *ref, - hash_map &aux_map, bool maybe_mt) + hash_map &aux_map, bool maybe_mt, + bool use_other_flag_var) { gassign *load; struct fmt_data fmt_data; @@ -2146,7 +2155,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, || (! flag_store_data_races && ! always_stored))) multi_threaded_model_p = true; - if (multi_threaded_model_p) + if (multi_threaded_model_p && !use_other_flag_var) aux->store_flag = execute_sm_if_changed_flag_set (loop, ref, &aux->flag_bbs); else @@ -2182,7 +2191,7 @@ execute_sm (class loop *loop, im_mem_ref *ref, lim_data->tgt_loop = loop; gsi_insert_before (&gsi, load, GSI_SAME_STMT); - if (multi_threaded_model_p) + if (aux->store_flag) { load = gimple_build_assign (aux->store_flag, boolean_false_node); lim_data = init_lim_data (load); @@ -2555,6 +2564,140 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, unsigned i; bitmap_iterator bi; + /* There's a special case we can use ordered re-materialization for + conditionally excuted stores which is when all stores in the loop + happen in the same basic-block. In that case we know we'll reach + all stores and thus can simply process that BB and emit a single + conditional block of ordered materializations. See PR102436. */ + basic_block single_store_bb = NULL; + EXECUTE_IF_SET_IN_BITMAP (&memory_accesses.all_refs_stored_in_loop[loop->num], + 0, i, bi) + { + bool fail = false; + ref = memory_accesses.refs_list[i]; + for (auto loc : ref->accesses_in_loop) + if (!gimple_vdef (loc.stmt)) + ; + else if (!single_store_bb) + { + single_store_bb = gimple_bb (loc.stmt); + bool conditional = false; + for (edge e : exits) + if (!dominated_by_p (CDI_DOMINATORS, e->src, single_store_bb)) + { + /* Conditional as seen from e. */ + conditional = true; + break; + } + if (!conditional) + { + fail = true; + break; + } + } + else if (single_store_bb != gimple_bb (loc.stmt)) + { + fail = true; + break; + } + if (fail) + { + single_store_bb = NULL; + break; + } + } + if (single_store_bb) + { + /* Analyze the single block with stores. */ + auto_bitmap fully_visited; + auto_bitmap refs_not_supported; + auto_bitmap refs_not_in_seq; + auto_vec seq; + bitmap_copy (refs_not_in_seq, mem_refs); + int res = sm_seq_valid_bb (loop, single_store_bb, NULL_TREE, + seq, refs_not_in_seq, refs_not_supported, + false, fully_visited); + if (res != 1) + { + /* Unhandled refs can still fail this. */ + bitmap_clear (mem_refs); + return; + } + + /* We cannot handle sm_other since we neither remember the + stored location nor the value at the point we execute them. */ + for (unsigned i = 0; i < seq.length (); ++i) + { + unsigned new_i; + if (seq[i].second == sm_other + && seq[i].from != NULL_TREE) + seq[i].from = NULL_TREE; + else if ((seq[i].second == sm_ord + || (seq[i].second == sm_other + && seq[i].from != NULL_TREE)) + && !sm_seq_push_down (seq, i, &new_i)) + { + bitmap_set_bit (refs_not_supported, seq[new_i].first); + seq[new_i].second = sm_other; + seq[new_i].from = NULL_TREE; + } + } + bitmap_and_compl_into (mem_refs, refs_not_supported); + if (bitmap_empty_p (mem_refs)) + return; + + /* Prune seq. */ + while (seq.last ().second == sm_other + && seq.last ().from == NULL_TREE) + seq.pop (); + + hash_map aux_map; + + /* Execute SM but delay the store materialization for ordered + sequences on exit. */ + bool first_p = true; + EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) + { + ref = memory_accesses.refs_list[i]; + execute_sm (loop, ref, aux_map, true, !first_p); + first_p = false; + } + + /* Get at the single flag variable we eventually produced. */ + im_mem_ref *ref + = memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)]; + sm_aux *aux = *aux_map.get (ref); + + /* Materialize ordered store sequences on exits. */ + edge e; + FOR_EACH_VEC_ELT (exits, i, e) + { + edge append_cond_position = NULL; + edge last_cond_fallthru = NULL; + edge insert_e = e; + /* Construct the single flag variable control flow and insert + the ordered seq of stores in the then block. With + -fstore-data-races we can do the stores unconditionally. */ + if (aux->store_flag) + insert_e + = single_pred_edge + (execute_sm_if_changed (e, NULL_TREE, NULL_TREE, + aux->store_flag, + loop_preheader_edge (loop), + &aux->flag_bbs, append_cond_position, + last_cond_fallthru)); + execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord, + append_cond_position, last_cond_fallthru); + gsi_commit_one_edge_insert (insert_e, NULL); + } + + for (hash_map::iterator iter = aux_map.begin (); + iter != aux_map.end (); ++iter) + delete (*iter).second; + + return; + } + /* To address PR57359 before actually applying store-motion check the candidates found for validity with regards to reordering relative to other stores which we until here disambiguated using @@ -2693,7 +2836,8 @@ hoist_memory_references (class loop *loop, bitmap mem_refs, EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi) { ref = memory_accesses.refs_list[i]; - execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i)); + execute_sm (loop, ref, aux_map, bitmap_bit_p (refs_not_supported, i), + false); } /* Materialize ordered store sequences on exits. */