From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id D0CFC3951C78; Wed, 7 Jul 2021 11:49:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D0CFC3951C78 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-2097] tree-optimization/99728 - improve LIM for loops with aggregate copies X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/master X-Git-Oldrev: 98bfd845e93937d92ca844d7fa7e853ad51c6193 X-Git-Newrev: 9f34b780b0461ec7b2b2defe96e44ab616ea2aa3 Message-Id: <20210707114954.D0CFC3951C78@sourceware.org> Date: Wed, 7 Jul 2021 11:49:54 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 Jul 2021 11:49:54 -0000 https://gcc.gnu.org/g:9f34b780b0461ec7b2b2defe96e44ab616ea2aa3 commit r12-2097-g9f34b780b0461ec7b2b2defe96e44ab616ea2aa3 Author: Richard Biener Date: Wed Jul 7 11:41:03 2021 +0200 tree-optimization/99728 - improve LIM for loops with aggregate copies This improves LIM by recording aggregate copies for disambiguation purposes instead of as UNANALYZABLE_MEM which will prevent any invariant or store motion across it. This allows four of the six references in the loop of the testcase to be promoted. 2021-07-07 Richard Biener PR tree-optimization/99728 * tree-ssa-loop-im.c (gather_mem_refs_stmt): Record aggregate copies. (mem_refs_may_alias_p): Add assert we handled aggregate copies elsewhere. (sm_seq_valid_bb): Give up when running into aggregate copies. (ref_indep_loop_p): Handle aggregate copies as never being invariant themselves but allow other refs to be disambiguated against them. (can_sm_ref_p): Do not try to apply store-motion to aggregate copies. * g++.dg/opt/pr99728.C: New testcase. Diff: --- gcc/testsuite/g++.dg/opt/pr99728.C | 50 ++++++++++++++++++++++++++++++++ gcc/tree-ssa-loop-im.c | 59 +++++++++++++++++++++++++++++++++----- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/g++.dg/opt/pr99728.C b/gcc/testsuite/g++.dg/opt/pr99728.C new file mode 100644 index 00000000000..d4393231b4c --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr99728.C @@ -0,0 +1,50 @@ +// PR/99728 +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-lim2-details -w -Wno-psabi" } + +typedef double __m256d __attribute__((vector_size(sizeof (double) * 4))); +extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_set1_pd (double __A) +{ + return __extension__ (__m256d){ __A, __A, __A, __A }; +} + +// simple OO wrapper around __m256d +struct Tvsimple + { + __m256d v; + Tvsimple &operator+=(const Tvsimple &other) {v+=other.v; return *this;} + Tvsimple operator*(double val) const { Tvsimple res; res.v = v*_mm256_set1_pd(val); return res;} + Tvsimple operator*(Tvsimple val) const { Tvsimple res; res.v = v*val.v; return res; } + Tvsimple operator+(Tvsimple val) const { Tvsimple res; res.v = v+val.v; return res; } + Tvsimple operator+(double val) const { Tvsimple res; res.v = v+_mm256_set1_pd(val); return res;} + }; + +template struct s0data_s + { vtype sth, corfac, scale, lam1, lam2, csq, p1r, p1i, p2r, p2i; }; + +template void foo(s0data_s & __restrict__ d, + const double * __restrict__ coef, const double * __restrict__ alm, + unsigned long l, unsigned long il, unsigned long lmax) + { +// critical loop + while (l<=lmax) + { + d.p1r += d.lam2*alm[2*l]; + d.p1i += d.lam2*alm[2*l+1]; + d.p2r += d.lam2*alm[2*l+2]; + d.p2i += d.lam2*alm[2*l+3]; + Tvsimple tmp = d.lam2*(d.csq*coef[2*il] + coef[2*il+1]) + d.lam1; + d.lam1 = d.lam2; + d.lam2 = tmp; + ++il; l+=2; + } + } + +// this version has dead stores at the end of the loop +template void foo<>(s0data_s & __restrict__ d, + const double * __restrict__ coef, const double * __restrict__ alm, + unsigned long l, unsigned long il, unsigned long lmax); + +// The aggregate copy in the IL should not prevent all store-motion +// { dg-final { scan-tree-dump-times "Executing store motion" 4 "lim2" } } diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 9ac390b9a4b..81b4ec21d6e 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -122,7 +122,9 @@ public: hashval_t hash; /* Its hash value. */ /* The memory access itself and associated caching of alias-oracle - query meta-data. */ + query meta-data. We are using mem.ref == error_mark_node for the + case the reference is represented by its single access stmt + in accesses_in_loop[0]. */ ao_ref mem; bitmap stored; /* The set of loops in that this memory location @@ -130,8 +132,7 @@ public: bitmap loaded; /* The set of loops in that this memory location is loaded from. */ vec accesses_in_loop; - /* The locations of the accesses. Vector - indexed by the loop number. */ + /* The locations of the accesses. */ /* The following set is computed on demand. */ bitmap_head dep_loop; /* The set of loops in that the memory @@ -1465,7 +1466,22 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) return; mem = simple_mem_ref_in_stmt (stmt, &is_stored); - if (!mem) + if (!mem && is_gimple_assign (stmt)) + { + /* For aggregate copies record distinct references but use them + only for disambiguation purposes. */ + id = memory_accesses.refs_list.length (); + ref = mem_ref_alloc (NULL, 0, id); + memory_accesses.refs_list.safe_push (ref); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unhandled memory reference %u: ", id); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + } + record_mem_ref_loc (ref, stmt, mem); + is_stored = gimple_vdef (stmt); + } + else if (!mem) { /* We use the shared mem_ref for all unanalyzable refs. */ id = UNANALYZABLE_MEM_ID; @@ -1595,7 +1611,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) mark_ref_stored (ref, loop); } /* A not simple memory op is also a read when it is a write. */ - if (!is_stored || id == UNANALYZABLE_MEM_ID) + if (!is_stored || id == UNANALYZABLE_MEM_ID + || ref->mem.ref == error_mark_node) { bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id); mark_ref_loaded (ref, loop); @@ -1714,6 +1731,9 @@ mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2, hash_map **ttae_cache, bool tbaa_p) { + gcc_checking_assert (mem1->mem.ref != error_mark_node + && mem2->mem.ref != error_mark_node); + /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same object and their offset differ in such a way that the locations cannot overlap, then they cannot alias. */ @@ -2490,6 +2510,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef, gcc_assert (data); if (data->ref == UNANALYZABLE_MEM_ID) return -1; + /* Stop at memory references which we can't move. */ + else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node) + { + /* Mark refs_not_in_seq as unsupported. */ + bitmap_ior_into (refs_not_supported, refs_not_in_seq); + return 1; + } /* One of the stores we want to apply SM to and we've not yet seen. */ else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) { @@ -2798,7 +2825,8 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) else refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num]; - if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)) + if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID) + || ref->mem.ref == error_mark_node) indep_p = false; else { @@ -2825,7 +2853,20 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind) EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi) { im_mem_ref *aref = memory_accesses.refs_list[i]; - if (!refs_independent_p (ref, aref, kind != sm_waw)) + if (aref->mem.ref == error_mark_node) + { + gimple *stmt = aref->accesses_in_loop[0].stmt; + if ((kind == sm_war + && ref_maybe_used_by_stmt_p (stmt, &ref->mem, + kind != sm_waw)) + || stmt_may_clobber_ref_p_1 (stmt, &ref->mem, + kind != sm_waw)) + { + indep_p = false; + break; + } + } + else if (!refs_independent_p (ref, aref, kind != sm_waw)) { indep_p = false; break; @@ -2858,6 +2899,10 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref) if (!MEM_ANALYZABLE (ref)) return false; + /* Can't hoist/sink aggregate copies. */ + if (ref->mem.ref == error_mark_node) + return false; + /* It should be movable. */ if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref)) || TREE_THIS_VOLATILE (ref->mem.ref)