This patch is in some ways still in the RFC shape because it certainly depends very much on how the previous ones will look like in the end and I'd like to discuss where this functionality should be placed. On the other hand, putting it into the cgraph rebuilding pass seems the best solution so far to me. It certainly looks better then when I tried to cram it into ipa-prop analysis phase, if not for any other reasons then because cfg cleanup might be necessary. Another reason is that I still want to try some more test-cases. However, I did not want to wait with the whole series any longer and so I am sending it out with the rest now. This patch compensates for not devirtualizing when folding. It looks for OBJ_TYPE_REF calls, and tries to deduce their type by watching out for dynamic type changes. If the type is known and the call can be converted to a direct one with that knowledge, it is done so. This patch is necessary for (modified variants of) testcases g++.dg/otr-fold-[12].C, g++.dg/tree-ssa/pr43411.C and g++.dg/tree-ssa/pr45605.C to pass again. Without it, early inlining can be detrimental to devirtualization by making it a purely intraprocedural issue and slow down simple testcases three-fold. Despite being an RFC patch, it also passes bootstrap and testing on x86_64-linux and make check-c++ on i686. I'm looking forward to all comments and suggestions. Thanks, Martin 2010-12-01 Martin Jambor * ipa-prop.c (get_ancestor_base_and_offset): New function. (compute_complex_assign_jump_func): Use it, check that op2 is NULL in the ancestor case. (compute_complex_ancestor_jump_func): Likewise. (ipa_try_devirtualize_immediately): New fuction. * cgraphbuild.c (rebuild_cgraph_edges): Renamed to rebuild_cgraph_edges_1, new parameter devirtualize, call ipa_try_devirtualize_immediately if it is true. (rebuild_cgraph_edges): New function. (rebuild_cgraph_edges_devirtualize): Likewise. (pass_rebuild_cgraph_edges_and_devirt): New variable. * passes.c (init_optimization_passes): Use it. * tree-pass.h (pass_rebuild_cgraph_edges_and_devirt): Declare. * ipa-prop.h (ipa_try_devirtualize_immediately): Declare. * testsuite/g++.dg/otr-fold-1.C: Moved to... * testsuite/g++.dg/ipa/imm-devirt-1.C: ...here, compiling with -O2. * testsuite/g++.dg/otr-fold-2.C: Moved to... * testsuite/g++.dg/ipa/imm-devirt-2.C: ...here, compiling with -O2. * testsuite/g++.dg/tree-ssa/pr45605.C: Compile with O2, scan optimized dump. Index: icln/gcc/ipa-prop.c =================================================================== --- icln.orig/gcc/ipa-prop.c +++ icln/gcc/ipa-prop.c @@ -539,6 +539,24 @@ detect_type_change (tree arg, gimple cal return detect_type_change_anc (arg, call, jfunc, 0); } +/* !!! Doc */ + +static tree +get_ancestor_base_and_offset (tree op, HOST_WIDE_INT *offset) +{ + HOST_WIDE_INT size, max_size; + tree base; + + base = get_ref_base_and_extent (op, offset, &size, &max_size); + if (TREE_CODE (base) != MEM_REF + /* If this is a varying address, punt. */ + || max_size == -1 + || max_size != size) + return NULL_TREE; + *offset += mem_ref_offset (base).low * BITS_PER_UNIT; + return TREE_OPERAND (base, 0); +} + /* Given that an actual argument is an SSA_NAME (given in NAME) and is a result of an assignment statement STMT, try to find out whether NAME can be @@ -551,7 +569,7 @@ compute_complex_assign_jump_func (struct struct ipa_jump_func *jfunc, gimple call, gimple stmt, tree name) { - HOST_WIDE_INT offset, size, max_size; + HOST_WIDE_INT offset; tree op1, op2, base; int index; @@ -588,20 +606,14 @@ compute_complex_assign_jump_func (struct return; } - if (TREE_CODE (op1) != ADDR_EXPR) + if (op2 || TREE_CODE (op1) != ADDR_EXPR) return; op1 = TREE_OPERAND (op1, 0); if (TREE_CODE (TREE_TYPE (op1)) != RECORD_TYPE) return; - base = get_ref_base_and_extent (op1, &offset, &size, &max_size); - if (TREE_CODE (base) != MEM_REF - /* If this is a varying address, punt. */ - || max_size == -1 - || max_size != size) - return; - offset += mem_ref_offset (base).low * BITS_PER_UNIT; - base = TREE_OPERAND (base, 0); - if (TREE_CODE (base) != SSA_NAME + base = get_ancestor_base_and_offset (op1, &offset); + if (!base + || TREE_CODE (base) != SSA_NAME || !SSA_NAME_IS_DEFAULT_DEF (base) || offset < 0) return; @@ -645,10 +657,10 @@ compute_complex_ancestor_jump_func (stru struct ipa_jump_func *jfunc, gimple call, gimple phi) { - HOST_WIDE_INT offset, size, max_size; + HOST_WIDE_INT offset; gimple assign, cond; basic_block phi_bb, assign_bb, cond_bb; - tree tmp, parm, expr, obj; + tree tmp, parm, expr; int index, i; if (gimple_phi_num_args (phi) != 2) @@ -676,17 +688,9 @@ compute_complex_ancestor_jump_func (stru if (TREE_CODE (expr) != ADDR_EXPR) return; expr = TREE_OPERAND (expr, 0); - obj = expr; - expr = get_ref_base_and_extent (expr, &offset, &size, &max_size); - - if (TREE_CODE (expr) != MEM_REF - /* If this is a varying address, punt. */ - || max_size == -1 - || max_size != size) - return; - offset += mem_ref_offset (expr).low * BITS_PER_UNIT; - parm = TREE_OPERAND (expr, 0); - if (TREE_CODE (parm) != SSA_NAME + parm = get_ancestor_base_and_offset (expr, &offset); + if (!parm + || TREE_CODE (parm) != SSA_NAME || !SSA_NAME_IS_DEFAULT_DEF (parm) || offset < 0) return; @@ -712,12 +716,12 @@ compute_complex_ancestor_jump_func (stru return; } - if (!detect_type_change_anc (obj, call, jfunc, offset)) + if (!detect_type_change_anc (expr, call, jfunc, offset)) { jfunc->type = IPA_JF_ANCESTOR; jfunc->value.ancestor.formal_id = index; jfunc->value.ancestor.offset = offset; - jfunc->value.ancestor.type = TREE_TYPE (obj);; + jfunc->value.ancestor.type = TREE_TYPE (expr);; } } @@ -1407,6 +1411,77 @@ ipa_analyze_indirect_call_uses (struct c return; } +/* If a call to an OBJ_TYPE_REF given by GSI can be turned into a direct one + according to the type of its object right away and if so, do it and return + true. If CFG is changed in the process, *CFG_CHANGED is set to true. */ + +tree +ipa_try_devirtualize_immediately (gimple_stmt_iterator *gsi, bool *cfg_changed) +{ + struct ipa_jump_func jfunc; + tree obj, fndecl, delta, token; + gimple call = gsi_stmt (*gsi); + tree target = gimple_call_fn (call); + + if (TREE_CODE (target) != OBJ_TYPE_REF) + return NULL_TREE; + jfunc.type = IPA_JF_UNKNOWN; + obj = OBJ_TYPE_REF_OBJECT (target); + if (TREE_CODE (obj) == SSA_NAME) + { + HOST_WIDE_INT offset; + tree op, base; + gimple stmt = SSA_NAME_DEF_STMT (obj); + if (!stmt || !is_gimple_assign (stmt) || gimple_assign_rhs2 (stmt)) + return NULL_TREE; + op = gimple_assign_rhs1 (stmt); + if (TREE_CODE (op) != ADDR_EXPR) + return NULL_TREE; + op = TREE_OPERAND (op, 0); + if (TREE_CODE (TREE_TYPE (op)) != RECORD_TYPE) + return NULL_TREE; + base = get_ancestor_base_and_offset (op, &offset); + if (!base || offset < 0) + return NULL_TREE; + detect_type_change_anc (op, call, &jfunc, offset); + } + else + compute_known_type_jump_func (obj, &jfunc, call); + if (jfunc.type != IPA_JF_KNOWN_TYPE) + return NULL_TREE; + + token = OBJ_TYPE_REF_TOKEN (target); + fndecl = gimple_get_virt_mehtod_for_binfo (tree_low_cst (token, 1), + jfunc.value.base_binfo, + &delta, true); + if (!fndecl) + return NULL_TREE; + if (dump_file) + { + fprintf (dump_file, "ipa-prop: Immediately devirtualizing call "); + print_gimple_expr (dump_file, call, 0, 0); + } + + if (integer_nonzerop (delta)) + gimple_adjust_this_by_delta (gsi, delta); + gimple_call_set_fndecl (call, fndecl); + update_stmt (call); + if (maybe_clean_eh_stmt (call) + && gimple_purge_dead_eh_edges (gimple_bb (call))) + *cfg_changed = true; + + if (dump_file) + { + fprintf (dump_file, "\n into "); + print_gimple_expr (dump_file, call, 0, 0); + fprintf (dump_file, "\n with delta "); + print_generic_expr (dump_file, delta, 0); + fprintf (dump_file, "\n"); + } + + return fndecl; +} + /* Analyze a CALL to an OBJ_TYPE_REF which is passed in TARGET and if the object referenced in the expression is a formal parameter of the caller (described by INFO), create a call note for the statement. */ Index: icln/gcc/testsuite/g++.dg/ipa/imm-devirt-1.C =================================================================== --- /dev/null +++ icln/gcc/testsuite/g++.dg/ipa/imm-devirt-1.C @@ -0,0 +1,76 @@ +/* Verify that virtual calls are folded even when a typecast to an + ancestor is involved along the way. */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized-slim" } */ + +extern "C" void abort (void); + +class Distraction +{ +public: + float f; + double d; + Distraction () + { + f = 8.3; + d = 10.2; + } + virtual float bar (float z); +}; + +class A +{ +public: + int data; + virtual int foo (int i); +}; + + +class B : public Distraction, public A +{ +public: + virtual int foo (int i); +}; + +float Distraction::bar (float z) +{ + f += z; + return f/2; +} + +int __attribute__ ((noinline)) A::foo (int i) +{ + return i + 1; +} + +int __attribute__ ((noinline)) B::foo (int i) +{ + return i + 2; +} + +int __attribute__ ((noinline,noclone)) get_input(void) +{ + return 1; +} + +static inline int middleman_1 (class A *obj, int i) +{ + return obj->foo (i); +} + +static inline int middleman_2 (class B *obj, int i) +{ + return middleman_1 (obj, i); +} + +int main (int argc, char *argv[]) +{ + class B b; + + if (middleman_2 (&b, get_input ()) != 3) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "= B::.*foo" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: icln/gcc/testsuite/g++.dg/ipa/imm-devirt-2.C =================================================================== --- /dev/null +++ icln/gcc/testsuite/g++.dg/ipa/imm-devirt-2.C @@ -0,0 +1,88 @@ +/* Verify that virtual calls are folded even when a typecast to an + ancestor is involved along the way. */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized-slim" } */ + +extern "C" void abort (void); + +class Distraction +{ +public: + float f; + double d; + Distraction () + { + f = 8.3; + d = 10.2; + } + virtual float bar (float z); +}; + +class A +{ +public: + int data; + virtual int foo (int i); +}; + +class A_2 : public A +{ +public: + int data_2; + virtual int baz (int i); +}; + + +class B : public Distraction, public A_2 +{ +public: + virtual int foo (int i); +}; + +float __attribute__ ((noinline)) Distraction::bar (float z) +{ + f += z; + return f/2; +} + +int __attribute__ ((noinline)) A::foo (int i) +{ + return i + 1; +} + +int __attribute__ ((noinline)) A_2::baz (int i) +{ + return i * 15; +} + +int __attribute__ ((noinline)) B::foo (int i) +{ + return i + 2; +} + +int __attribute__ ((noinline,noclone)) get_input(void) +{ + return 1; +} + +static inline int middleman_1 (class A *obj, int i) +{ + return obj->foo (i); +} + +static inline int middleman_2 (class A *obj, int i) +{ + return middleman_1 (obj, i); +} + +int main (int argc, char *argv[]) +{ + class B b; + + if (middleman_2 (&b, get_input ()) != 3) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "= B::.*foo" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: icln/gcc/testsuite/g++.dg/otr-fold-1.C =================================================================== --- icln.orig/gcc/testsuite/g++.dg/otr-fold-1.C +++ /dev/null @@ -1,76 +0,0 @@ -/* Verify that virtual calls are folded even when a typecast to an - ancestor is involved along the way. */ -/* { dg-do run } */ -/* { dg-options "-O -fdump-tree-optimized-slim" } */ - -extern "C" void abort (void); - -class Distraction -{ -public: - float f; - double d; - Distraction () - { - f = 8.3; - d = 10.2; - } - virtual float bar (float z); -}; - -class A -{ -public: - int data; - virtual int foo (int i); -}; - - -class B : public Distraction, public A -{ -public: - virtual int foo (int i); -}; - -float Distraction::bar (float z) -{ - f += z; - return f/2; -} - -int A::foo (int i) -{ - return i + 1; -} - -int B::foo (int i) -{ - return i + 2; -} - -int __attribute__ ((noinline,noclone)) get_input(void) -{ - return 1; -} - -static inline int middleman_1 (class A *obj, int i) -{ - return obj->foo (i); -} - -static inline int middleman_2 (class B *obj, int i) -{ - return middleman_1 (obj, i); -} - -int main (int argc, char *argv[]) -{ - class B b; - - if (middleman_2 (&b, get_input ()) != 3) - abort (); - return 0; -} - -/* { dg-final { scan-tree-dump "= B::.*foo" "optimized" } } */ -/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: icln/gcc/testsuite/g++.dg/otr-fold-2.C =================================================================== --- icln.orig/gcc/testsuite/g++.dg/otr-fold-2.C +++ /dev/null @@ -1,88 +0,0 @@ -/* Verify that virtual calls are folded even when a typecast to an - ancestor is involved along the way. */ -/* { dg-do run } */ -/* { dg-options "-O -fdump-tree-optimized-slim" } */ - -extern "C" void abort (void); - -class Distraction -{ -public: - float f; - double d; - Distraction () - { - f = 8.3; - d = 10.2; - } - virtual float bar (float z); -}; - -class A -{ -public: - int data; - virtual int foo (int i); -}; - -class A_2 : public A -{ -public: - int data_2; - virtual int baz (int i); -}; - - -class B : public Distraction, public A_2 -{ -public: - virtual int foo (int i); -}; - -float Distraction::bar (float z) -{ - f += z; - return f/2; -} - -int A::foo (int i) -{ - return i + 1; -} - -int A_2::baz (int i) -{ - return i * 15; -} - -int B::foo (int i) -{ - return i + 2; -} - -int __attribute__ ((noinline,noclone)) get_input(void) -{ - return 1; -} - -static inline int middleman_1 (class A *obj, int i) -{ - return obj->foo (i); -} - -static inline int middleman_2 (class A *obj, int i) -{ - return middleman_1 (obj, i); -} - -int main (int argc, char *argv[]) -{ - class B b; - - if (middleman_2 (&b, get_input ()) != 3) - abort (); - return 0; -} - -/* { dg-final { scan-tree-dump "= B::.*foo" "optimized" } } */ -/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: icln/gcc/testsuite/g++.dg/tree-ssa/pr45605.C =================================================================== --- icln.orig/gcc/testsuite/g++.dg/tree-ssa/pr45605.C +++ icln/gcc/testsuite/g++.dg/tree-ssa/pr45605.C @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O1 -fdump-tree-ssa" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ extern "C" void abort(); bool destructor_called = false; @@ -33,5 +33,5 @@ int main() { /* We should devirtualize call to D::Run */ -/* { dg-final { scan-tree-dump-times "D::Run \\(" 1 "ssa"} } */ -/* { dg-final { cleanup-tree-dump "ssa" } } */ +/* { dg-final { scan-tree-dump-times "D::Run \\(" 1 "optimized"} } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: icln/gcc/cgraphbuild.c =================================================================== --- icln.orig/gcc/cgraphbuild.c +++ icln/gcc/cgraphbuild.c @@ -33,6 +33,9 @@ along with GCC; see the file COPYING3. #include "tree-pass.h" #include "ipa-utils.h" #include "except.h" +#include "ipa-prop.h" + +/* !!! Add ipa-prop.h to dependencies in Makefile.in */ /* Context of record_reference. */ struct record_reference_ctx @@ -429,12 +432,13 @@ record_references_in_initializer (tree d /* Rebuild cgraph edges for current function node. This needs to be run after passes that don't update the cgraph. */ -unsigned int -rebuild_cgraph_edges (void) +static unsigned int +rebuild_cgraph_edges_1 (bool devirtualize) { basic_block bb; struct cgraph_node *node = cgraph_node (current_function_decl); gimple_stmt_iterator gsi; + bool cfg_changed = false; cgraph_node_remove_callees (node); ipa_remove_all_references (&node->ref_list); @@ -453,6 +457,8 @@ rebuild_cgraph_edges (void) int freq = compute_call_stmt_bb_frequency (current_function_decl, bb); decl = gimple_call_fndecl (stmt); + if (!decl && devirtualize) + decl = ipa_try_devirtualize_immediately (&gsi, &cfg_changed); if (decl) cgraph_create_edge (node, cgraph_node (decl), stmt, bb->count, freq, @@ -474,7 +480,26 @@ rebuild_cgraph_edges (void) record_eh_tables (node, cfun); gcc_assert (!node->global.inlined_to); - return 0; + if (cfg_changed) + return TODO_cleanup_cfg; + else + return 0; +} + +/* !!!Doc */ + +unsigned int +rebuild_cgraph_edges (void) +{ + return rebuild_cgraph_edges_1 (false); +} + +/* !!!Doc */ + +static unsigned int +rebuild_cgraph_edges_devirtualize (void) +{ + return rebuild_cgraph_edges_1 (true); } /* Rebuild cgraph edges for current function node. This needs to be run after @@ -518,6 +543,25 @@ struct gimple_opt_pass pass_rebuild_cgra NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ + TV_CGRAPH, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + } +}; + +struct gimple_opt_pass pass_rebuild_cgraph_edges_and_devirt = +{ + { + GIMPLE_PASS, + "*rebuild_cgraph_edges_d", /* name */ + NULL, /* gate */ + rebuild_cgraph_edges_devirtualize, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ TV_CGRAPH, /* tv_id */ PROP_cfg, /* properties_required */ 0, /* properties_provided */ Index: icln/gcc/ipa-prop.h =================================================================== --- icln.orig/gcc/ipa-prop.h +++ icln/gcc/ipa-prop.h @@ -429,6 +429,9 @@ void ipa_initialize_node_params (struct bool ipa_propagate_indirect_call_infos (struct cgraph_edge *cs, VEC (cgraph_edge_p, heap) **new_edges); +/* Intraprocedural devirtualization. */ +tree ipa_try_devirtualize_immediately (gimple_stmt_iterator *, bool *); + /* Indirect edge and binfo processing. */ struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, tree, tree); Index: icln/gcc/passes.c =================================================================== --- icln.orig/gcc/passes.c +++ icln/gcc/passes.c @@ -791,7 +791,7 @@ init_optimization_passes (void) NEXT_PASS (pass_split_functions); } NEXT_PASS (pass_release_ssa_names); - NEXT_PASS (pass_rebuild_cgraph_edges); + NEXT_PASS (pass_rebuild_cgraph_edges_and_devirt); NEXT_PASS (pass_inline_parameters); } NEXT_PASS (pass_ipa_tree_profile); Index: icln/gcc/tree-pass.h =================================================================== --- icln.orig/gcc/tree-pass.h +++ icln/gcc/tree-pass.h @@ -436,6 +436,7 @@ extern struct gimple_opt_pass pass_uncpr extern struct gimple_opt_pass pass_return_slot; extern struct gimple_opt_pass pass_reassoc; extern struct gimple_opt_pass pass_rebuild_cgraph_edges; +extern struct gimple_opt_pass pass_rebuild_cgraph_edges_and_devirt; extern struct gimple_opt_pass pass_remove_cgraph_callee_edges; extern struct gimple_opt_pass pass_build_cgraph_edges; extern struct gimple_opt_pass pass_local_pure_const;