From 51e79933f5264ec35276c3058ed2b5aeb2f28128 Mon Sep 17 00:00:00 2001 From: Feng Xue Date: Tue, 24 Sep 2019 11:48:26 +0800 Subject: [PATCH] cost --- gcc/ipa-cp.c | 171 ++++++++++++++++++++++--- gcc/params.def | 5 + gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c | 47 +++++++ 3 files changed, 204 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index b3e928052dc..dff9eadb64b 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -229,7 +229,9 @@ public: inline bool set_contains_variable (); bool add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val = NULL, - int src_idx = 0, HOST_WIDE_INT offset = -1); + int src_idx = 0, HOST_WIDE_INT offset = -1, + ipcp_value **val_pos_p = NULL, + bool unlimited = false); void print (FILE * f, bool dump_sources, bool dump_benefits); }; @@ -1733,22 +1735,37 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source) /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same meaning. OFFSET -1 means the source is scalar and not a part of an - aggregate. */ + aggregate. If non-NULL, VAL_POS_P specifies position in value list, + after which newly created ipcp_value will be inserted, and it is also + used to record address of the added ipcp_value before function returns. + UNLIMITED means whether value count should not exceed the limit given + by PARAM_IPA_CP_VALUE_LIST_SIZE. */ template bool ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val, - int src_idx, HOST_WIDE_INT offset) + int src_idx, HOST_WIDE_INT offset, + ipcp_value **val_pos_p, + bool unlimited) { ipcp_value *val; + if (val_pos_p) + { + for (val = values; val && val != *val_pos_p; val = val->next); + gcc_checking_assert (val); + } + if (bottom) return false; for (val = values; val; val = val->next) if (values_equal_for_ipcp_p (val->value, newval)) { + if (val_pos_p) + *val_pos_p = val; + if (ipa_edge_within_scc (cs)) { ipcp_value_source *s; @@ -1763,7 +1780,8 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, return false; } - if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) + if (!unlimited + && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) { /* We can only free sources, not the values themselves, because sources of other values in this SCC might point to them. */ @@ -1777,6 +1795,9 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, } } + if (val_pos_p) + *val_pos_p = NULL; + values = NULL; return set_to_bottom (); } @@ -1784,11 +1805,74 @@ ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, values_count++; val = allocate_and_init_ipcp_value (newval); val->add_source (cs, src_val, src_idx, offset); - val->next = values; - values = val; + if (val_pos_p) + { + val->next = (*val_pos_p)->next; + (*val_pos_p)->next = val; + *val_pos_p = val; + } + else + { + val->next = values; + values = val; + } + + return true; +} + +/* Return true, if a ipcp_value VAL is orginated from parameter value of + self-feeding recursive function by applying non-passthrough arithmetic + transformation. */ + +static bool +self_recursively_generated_p (ipcp_value *val) +{ + class ipa_node_params *info = NULL; + + for (ipcp_value_source *src = val->sources; src; src = src->next) + { + cgraph_edge *cs = src->cs; + + if (!src->val || cs->caller != cs->callee->function_symbol () + || src->val == val) + return false; + + if (!info) + info = IPA_NODE_REF (cs->caller); + + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, + src->index); + ipcp_lattice *src_lat = src->offset == -1 ? &plats->itself + : plats->aggs; + ipcp_value *src_val; + + for (src_val = src_lat->values; src_val && src_val != val; + src_val = src_val->next); + + if (!src_val) + return false; + } + return true; } +static tree +get_val_across_arith_op (enum tree_code opcode, + tree opnd1_type, + tree opnd2, + ipcp_value *src_val, + tree res_type) +{ + tree opnd1 = src_val->value; + + /* Skip source values that is incompatible with specified type. */ + if (opnd1_type + && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) + return NULL_TREE; + + return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); +} + /* Propagate values through an arithmetic transformation described by a jump function associated with edge CS, taking values from SRC_LAT and putting them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. @@ -1812,24 +1896,70 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, ipcp_value *src_val; bool ret = false; - /* Do not create new values when propagating within an SCC because if there - are arithmetic functions with circular dependencies, there is infinite - number of them and we would just make lattices bottom. If this condition - is ever relaxed we have to detect self-feeding recursive calls in - cgraph_edge_brings_value_p in a smarter way. */ + /* Due to circular dependencies, propagating within an SCC through arithmetic + transformation would create infinite number of values. But for + self-feeding recursive function, we could allow propagation in a limited + count, and this can enable a simple kind of recursive function versioning. + For other scenario, we would just make lattices bottom. */ if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) - ret = dest_lat->set_contains_variable (); + { + if (src_lat != dest_lat) + return dest_lat->set_contains_variable (); + + auto_vec *, 8> val_seeds; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + { + /* Now we do not use self-recursively generated value as propagation + source, this is absolutely conservative, but could avoid explosion + of lattice's value space, especially when one recursive function + calls another recursive. */ + if (self_recursively_generated_p (src_val)) + { + /* If the lattice has already been propagated for the call site, + no need to do that again. */ + for (ipcp_value_source *s = src_val->sources; s; + s = s->next) + if (s->cs == cs) + return dest_lat->set_contains_variable (); + } + else + val_seeds.safe_push (src_val); + } + + int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES); + int i; + + /* Recursively generate lattice values with a limited count. */ + FOR_EACH_VEC_ELT (val_seeds, i, src_val) + { + for (int j = 1; j < clone_copies; j++) + { + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); + if (!cstval) + break; + + /* Try to place the new lattice value after its source, which + can decrease iterations of propagate stage. */ + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, + src_offset, &src_val, true); + gcc_checking_assert (src_val); + } + } + ret |= dest_lat->set_contains_variable (); + } else for (src_val = src_lat->values; src_val; src_val = src_val->next) { - tree opnd1 = src_val->value; - tree cstval = NULL_TREE; - - /* Skip source values that is incompatible with specified type. */ - if (!opnd1_type - || useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) - cstval = ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); + /* Now we do not use self-recursively generated value as propagation + source, otherwise it is easy to make value space of normal lattice + overflow. */ + if (self_recursively_generated_p (src_val)) + continue; + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); if (cstval) ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, src_offset); @@ -3851,6 +3981,9 @@ get_info_about_necessary_edges (ipcp_value *val, cgraph_node *dest, hot |= cs->maybe_hot_p (); if (cs->caller != dest) non_self_recursive = true; + else if (src->val) + gcc_assert (values_equal_for_ipcp_p (src->val->value, + val->value)); } cs = get_next_cgraph_edge_clone (cs); } diff --git a/gcc/params.def b/gcc/params.def index 322c37f8b96..3e242e09f01 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY, "are evaluated for cloning.", 40, 0, 100) +DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES, + "ipa-cp-max-recursion-copies", + "Maximum function versioning copies for a self-recursive function.", + 8, 0, 0) + DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY, "ipa-cp-single-call-penalty", "Percentage penalty functions containing a single call to another " diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c new file mode 100644 index 00000000000..f70fb110c67 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursion-copies=8" } */ + +int fn(); + +int data[100]; + +int recur_fn (int i) +{ + int j; + + if (i == 6) + { + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + return 10; + } + + data[i] = i; + + for (j = 0; j < 100; j++) + recur_fn (i + 1); + + return i; +} + +int main () +{ + int i; + + for (i = 0; i < 100; i++) + recur_fn (1) + recur_fn (-5); + + return 1; +} + +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */ -- 2.17.1