From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from nikam.ms.mff.cuni.cz (nikam.ms.mff.cuni.cz [195.113.20.16]) by sourceware.org (Postfix) with ESMTPS id F3AD1385842B; Tue, 23 Nov 2021 00:26:13 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org F3AD1385842B Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id 8DFA828270C; Tue, 23 Nov 2021 01:26:12 +0100 (CET) Date: Tue, 23 Nov 2021 01:26:12 +0100 From: Jan Hubicka To: "hubicka at gcc dot gnu.org" Cc: gcc-bugs@gcc.gnu.org Subject: Re: [Bug tree-optimization/103168] Value numbering for PRE of pure functions can be improved Message-ID: <20211123002612.GA74665@kam.mff.cuni.cz> References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.10.1 (2018-07-13) X-Spam-Status: No, score=-11.6 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, GIT_PATCH_0, RCVD_IN_MSPIKE_H3, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 23 Nov 2021 00:26:16 -0000 This is bit modified patch I am testing. I added pre-computation of the number of accesses, enabled the path for const functions (in case they have memory operand), initialized alias sets and clarified the logic around every_* and global_memory_accesses PR tree-optimization/103168 (modref_summary::finalize): Initialize load_accesses. * ipa-modref.h (struct modref_summary): Add load_accesses. * tree-ssa-sccvn.c (visit_reference_op_call): Use modref info to walk the virtual use->def chain to CSE pure function calls. * g++.dg/tree-ssa/pr103168.C: New testcase. diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 4f9323165ea..595eb6e0d8f 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -725,6 +727,23 @@ modref_summary::finalize (tree fun) break; } } + if (loads->every_base) + load_accesses = 1; + else + { + load_accesses = 0; + for (auto base_node : loads->bases) + { + if (base_node->every_ref) + load_accesses++; + else + for (auto ref_node : base_node->refs) + if (ref_node->every_access) + load_accesses++; + else + load_accesses += ref_node->accesses->length (); + } + } } /* Get function summary for FUNC if it exists, return NULL otherwise. */ diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index f868eb6de07..a7937d74945 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -53,6 +53,8 @@ struct GTY(()) modref_summary /* Flags coputed by finalize method. */ + /* Total number of accesses in loads tree. */ + unsigned int load_accesses; /* global_memory_read is not set for functions calling functions with !binds_to_current_def which, after interposition, may read global memory but do nothing useful with it (except for crashing if some diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103168.C b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C new file mode 100644 index 00000000000..82924a3e3ce --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103168.C @@ -0,0 +1,24 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-fre1-details" } + +struct a +{ + int a; + static __attribute__ ((noinline)) + int ret (int v) {return v;} + + __attribute__ ((noinline)) + int inca () {return a++;} +}; + +int +test() +{ + struct a av; + av.a=1; + int val = av.ret (0) + av.inca(); + av.a=2; + return val + av.ret(0) + av.inca(); +} + +/* { dg-final { scan-tree-dump-times "Replaced a::ret" 1 "fre1" } } */ diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 149674e6a16..719f5184654 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -71,6 +71,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-niter.h" #include "builtins.h" #include "fold-const-call.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" #include "tree-ssa-sccvn.h" /* This algorithm is based on the SCC algorithm presented by Keith @@ -5084,12 +5086,118 @@ visit_reference_op_call (tree lhs, gcall *stmt) struct vn_reference_s vr1; vn_reference_t vnresult = NULL; tree vdef = gimple_vdef (stmt); + modref_summary *summary; /* Non-ssa lhs is handled in copy_reference_ops_from_call. */ if (lhs && TREE_CODE (lhs) != SSA_NAME) lhs = NULL_TREE; vn_reference_lookup_call (stmt, &vnresult, &vr1); + + /* If the lookup did not succeed for pure functions try to use + modref info to find a candidate to CSE to. */ + const int accesses_limit = 8; + if (!vnresult + && !vdef + && lhs + && gimple_vuse (stmt) + && (((summary = get_modref_function_summary (stmt, NULL)) + && !summary->global_memory_read + && summary->load_accesses < accesses_limit) + || gimple_call_flags (stmt) & ECF_CONST)) + { + /* First search if we can do someting useful and build a + vector of all loads we have to check. */ + bool unknown_memory_access = false; + auto_vec accesses; + + if (summary) + { + for (auto base_node : summary->loads->bases) + if (unknown_memory_access) + break; + else for (auto ref_node : base_node->refs) + if (unknown_memory_access) + break; + else for (auto access_node : ref_node->accesses) + { + accesses.quick_grow (accesses.length () + 1); + if (!access_node.get_ao_ref (stmt, &accesses.last ())) + { + /* We could use get_call_arg (...) and initialize + a ref based on the argument and unknown offset in + some cases, but we have to get a ao_ref to + disambiguate against other stmts. */ + unknown_memory_access = true; + break; + } + else + { + accesses.last ().base_alias_set = base_node->base; + accesses.last ().ref_alias_set = ref_node->ref; + } + } + } + if (!unknown_memory_access) + for (unsigned i = 0; i < gimple_call_num_args (stmt); ++i) + { + tree arg = gimple_call_arg (stmt, i); + if (TREE_CODE (arg) != SSA_NAME + && !is_gimple_min_invariant (arg)) + { + if (accesses.length () >= accesses_limit) + { + unknown_memory_access = true; + break; + } + accesses.quick_grow (accesses.length () + 1); + ao_ref_init (&accesses.last (), arg); + } + } + + /* Walk the VUSE->VDEF chain optimistically trying to find an entry + for the call in the hashtable. */ + unsigned limit = (unknown_memory_access + ? 0 + : (param_sccvn_max_alias_queries_per_access + / (accesses.length () + 1))); + while (limit > 0 && !vnresult && !SSA_NAME_IS_DEFAULT_DEF (vr1.vuse)) + { + vr1.hashcode = vr1.hashcode - SSA_NAME_VERSION (vr1.vuse); + gimple *def = SSA_NAME_DEF_STMT (vr1.vuse); + /* ??? We could use fancy stuff like in walk_non_aliased_vuses, but + do not bother for now. */ + if (is_a (def)) + break; + vr1.vuse = vuse_ssa_val (gimple_vuse (def)); + vr1.hashcode = vr1.hashcode + SSA_NAME_VERSION (vr1.vuse); + vn_reference_lookup_1 (&vr1, &vnresult); + } + + /* If we found a candidate to CSE to verify it is valid. */ + if (vnresult && !accesses.is_empty ()) + { + tree vuse = vuse_ssa_val (gimple_vuse (stmt)); + while (vnresult && vuse != vr1.vuse) + { + gimple *def = SSA_NAME_DEF_STMT (vuse); + for (auto &ref : accesses) + { + /* ??? stmt_may_clobber_ref_p_1 does per stmt constant + analysis overhead that we might be able to cache. */ + if (stmt_may_clobber_ref_p_1 (def, &ref, true)) + { + vnresult = NULL; + break; + } + } + vuse = vuse_ssa_val (gimple_vuse (def)); + } + } + if (vnresult) + statistics_counter_event (cfun, "Pure functions modref eliminated", 1); + } + if (vnresult) { if (vnresult->result_vdef && vdef) @@ -5130,7 +5238,8 @@ visit_reference_op_call (tree lhs, gcall *stmt) if (lhs) changed |= set_ssa_val_to (lhs, lhs); vr2 = XOBNEW (&vn_tables_obstack, vn_reference_s); - vr2->vuse = vr1.vuse; + tree vuse = gimple_vuse (stmt); + vr2->vuse = vuse ? SSA_VAL (vuse) : NULL_TREE; /* As we are not walking the virtual operand chain we know the shared_lookup_references are still original so we can re-use them here. */ @@ -5139,7 +5248,7 @@ visit_reference_op_call (tree lhs, gcall *stmt) vr2->punned = vr1.punned; vr2->set = vr1.set; vr2->base_set = vr1.base_set; - vr2->hashcode = vr1.hashcode; + vr2->hashcode = vn_reference_compute_hash (vr2); vr2->result = lhs; vr2->result_vdef = vdef_val; vr2->value_id = 0;