From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id E457D385801F; Tue, 23 Nov 2021 00:26:17 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E457D385801F From: "hubicka at kam dot mff.cuni.cz" To: gcc-bugs@gcc.gnu.org Subject: [Bug tree-optimization/103168] Value numbering for PRE of pure functions can be improved Date: Tue, 23 Nov 2021 00:26:17 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: tree-optimization X-Bugzilla-Version: 12.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: hubicka at kam dot mff.cuni.cz X-Bugzilla-Status: ASSIGNED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: rguenth at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 23 Nov 2021 00:26:18 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D103168 --- Comment #14 from hubicka at kam dot mff.cuni.cz --- This is bit modified patch I am testing. I added pre-computation of the number of accesses, enabled the path for const functions (in case they have memory operand), initialized alias sets and clarified the logic around every_* and global_memory_accesses PR tree-optimization/103168 (modref_summary::finalize): Initialize load_accesses. * ipa-modref.h (struct modref_summary): Add load_accesses. * tree-ssa-sccvn.c (visit_reference_op_call): Use modref info to walk the virtual use->def chain to CSE pure function calls. * g++.dg/tree-ssa/pr103168.C: New testcase. diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 4f9323165ea..595eb6e0d8f 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -725,6 +727,23 @@ modref_summary::finalize (tree fun) break; } } + if (loads->every_base) + load_accesses =3D 1; + else + { + load_accesses =3D 0; + for (auto base_node : loads->bases) + { + if (base_node->every_ref) + load_accesses++; + else + for (auto ref_node : base_node->refs) + if (ref_node->every_access) + load_accesses++; + else + load_accesses +=3D ref_node->accesses->length (); + } + } } /* Get function summary for FUNC if it exists, return NULL otherwise. */ diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index f868eb6de07..a7937d74945 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -53,6 +53,8 @@ struct GTY(()) modref_summary /* Flags coputed by finalize method. */ + /* Total number of accesses in loads tree. */ + unsigned int load_accesses; /* global_memory_read is not set for functions calling functions with !binds_to_current_def which, after interposition, may read global memory but do nothing useful with it (except for crashing if some diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103168.C b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C new file mode 100644 index 00000000000..82924a3e3ce --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C @@ -0,0 +1,24 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-fre1-details" } + +struct a +{ + int a; + static __attribute__ ((noinline)) + int ret (int v) {return v;} + + __attribute__ ((noinline)) + int inca () {return a++;} +}; + +int +test() +{ + struct a av; + av.a=3D1; + int val =3D av.ret (0) + av.inca(); + av.a=3D2; + return val + av.ret(0) + av.inca(); +} + +/* { dg-final { scan-tree-dump-times "Replaced a::ret" 1 "fre1" } } */ diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 149674e6a16..719f5184654 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -71,6 +71,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-niter.h" #include "builtins.h" #include "fold-const-call.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" #include "tree-ssa-sccvn.h" /* This algorithm is based on the SCC algorithm presented by Keith @@ -5084,12 +5086,118 @@ visit_reference_op_call (tree lhs, gcall *stmt) struct vn_reference_s vr1; vn_reference_t vnresult =3D NULL; tree vdef =3D gimple_vdef (stmt); + modref_summary *summary; /* Non-ssa lhs is handled in copy_reference_ops_from_call. */ if (lhs && TREE_CODE (lhs) !=3D SSA_NAME) lhs =3D NULL_TREE; vn_reference_lookup_call (stmt, &vnresult, &vr1); + + /* If the lookup did not succeed for pure functions try to use + modref info to find a candidate to CSE to. */ + const int accesses_limit =3D 8; + if (!vnresult + && !vdef + && lhs + && gimple_vuse (stmt) + && (((summary =3D get_modref_function_summary (stmt, NULL)) + && !summary->global_memory_read + && summary->load_accesses < accesses_limit) + || gimple_call_flags (stmt) & ECF_CONST)) + { + /* First search if we can do someting useful and build a + vector of all loads we have to check. */ + bool unknown_memory_access =3D false; + auto_vec accesses; + + if (summary) + { + for (auto base_node : summary->loads->bases) + if (unknown_memory_access) + break; + else for (auto ref_node : base_node->refs) + if (unknown_memory_access) + break; + else for (auto access_node : ref_node->accesses) + { + accesses.quick_grow (accesses.length () + 1); + if (!access_node.get_ao_ref (stmt, &accesses.last ())) + { + /* We could use get_call_arg (...) and initialize + a ref based on the argument and unknown offset in + some cases, but we have to get a ao_ref to + disambiguate against other stmts. */ + unknown_memory_access =3D true; + break; + } + else + { + accesses.last ().base_alias_set =3D base_node->base; + accesses.last ().ref_alias_set =3D ref_node->ref; + } + } + } + if (!unknown_memory_access) + for (unsigned i =3D 0; i < gimple_call_num_args (stmt); ++i) + { + tree arg =3D gimple_call_arg (stmt, i); + if (TREE_CODE (arg) !=3D SSA_NAME + && !is_gimple_min_invariant (arg)) + { + if (accesses.length () >=3D accesses_limit) + { + unknown_memory_access =3D true; + break; + } + accesses.quick_grow (accesses.length () + 1); + ao_ref_init (&accesses.last (), arg); + } + } + + /* Walk the VUSE->VDEF chain optimistically trying to find an entry + for the call in the hashtable. */ + unsigned limit =3D (unknown_memory_access + ? 0 + : (param_sccvn_max_alias_queries_per_access + / (accesses.length () + 1))); + while (limit > 0 && !vnresult && !SSA_NAME_IS_DEFAULT_DEF (vr1.vuse)) + { + vr1.hashcode =3D vr1.hashcode - SSA_NAME_VERSION (vr1.vuse); + gimple *def =3D SSA_NAME_DEF_STMT (vr1.vuse); + /* ??? We could use fancy stuff like in walk_non_aliased_vuses, = but + do not bother for now. */ + if (is_a (def)) + break; + vr1.vuse =3D vuse_ssa_val (gimple_vuse (def)); + vr1.hashcode =3D vr1.hashcode + SSA_NAME_VERSION (vr1.vuse); + vn_reference_lookup_1 (&vr1, &vnresult); + } + + /* If we found a candidate to CSE to verify it is valid. */ + if (vnresult && !accesses.is_empty ()) + { + tree vuse =3D vuse_ssa_val (gimple_vuse (stmt)); + while (vnresult && vuse !=3D vr1.vuse) + { + gimple *def =3D SSA_NAME_DEF_STMT (vuse); + for (auto &ref : accesses) + { + /* ??? stmt_may_clobber_ref_p_1 does per stmt constant + analysis overhead that we might be able to cache. */ + if (stmt_may_clobber_ref_p_1 (def, &ref, true)) + { + vnresult =3D NULL; + break; + } + } + vuse =3D vuse_ssa_val (gimple_vuse (def)); + } + } + if (vnresult) + statistics_counter_event (cfun, "Pure functions modref eliminated",= 1); + } + if (vnresult) { if (vnresult->result_vdef && vdef) @@ -5130,7 +5238,8 @@ visit_reference_op_call (tree lhs, gcall *stmt) if (lhs) changed |=3D set_ssa_val_to (lhs, lhs); vr2 =3D XOBNEW (&vn_tables_obstack, vn_reference_s); - vr2->vuse =3D vr1.vuse; + tree vuse =3D gimple_vuse (stmt); + vr2->vuse =3D vuse ? SSA_VAL (vuse) : NULL_TREE; /* As we are not walking the virtual operand chain we know the shared_lookup_references are still original so we can re-use them here. */ @@ -5139,7 +5248,7 @@ visit_reference_op_call (tree lhs, gcall *stmt) vr2->punned =3D vr1.punned; vr2->set =3D vr1.set; vr2->base_set =3D vr1.base_set; - vr2->hashcode =3D vr1.hashcode; + vr2->hashcode =3D vn_reference_compute_hash (vr2); vr2->result =3D lhs; vr2->result_vdef =3D vdef_val; vr2->value_id =3D 0;=