diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index fd7f24da818..6af0d15ed87 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -13795,6 +13795,18 @@ rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) return rs6000_builtin_decls[code]; } +/* Return true if the builtin with CODE has any mask bits set + which are specified by MASK. */ + +bool +rs6000_fn_has_any_of_these_mask_bits (enum rs6000_builtins code, + HOST_WIDE_INT mask) +{ + gcc_assert (code < RS6000_BUILTIN_COUNT); + HOST_WIDE_INT fnmask = rs6000_builtin_info[code].mask; + return (fnmask & mask) != 0; +} + static void altivec_init_builtins (void) { diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h index 88cf9bd5692..a504f197424 100644 --- a/gcc/config/rs6000/rs6000-internal.h +++ b/gcc/config/rs6000/rs6000-internal.h @@ -180,6 +180,9 @@ extern tree rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED); +extern bool rs6000_fn_has_any_of_these_mask_bits (enum rs6000_builtins code, + HOST_WIDE_INT mask); + #if TARGET_ELF extern bool rs6000_passes_ieee128; #endif diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index c5d8e59effc..c83744a06c1 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -71,6 +71,9 @@ #include "tree-vector-builder.h" #include "context.h" #include "tree-pass.h" +#include "symbol-summary.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" #include "except.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ @@ -1786,6 +1789,12 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_INVALID_CONVERSION #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion + +#undef TARGET_NEED_IPA_FN_TARGET_INFO +#define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info + +#undef TARGET_UPDATE_IPA_FN_TARGET_INFO +#define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info /* Processor table. */ @@ -25026,7 +25035,72 @@ rs6000_generate_version_dispatcher_body (void *node_p) return resolver; } - +/* Hook to decide if we need to scan function gimples to collect + target specific information for inlining, push the initial flag + into INFO if we want to make use of target info. */ + +static bool +rs6000_need_ipa_fn_target_info (const_tree decl, vec &info) +{ + tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl); + if (!target) + target = target_option_default_node; + struct cl_target_option *opts = TREE_TARGET_OPTION (target); + + /* See PR102059, we only handle HTM for now, so will only do + the consequent scannings when HTM feature enabled. */ + if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM) + { + info.safe_push (0); + return true; + } + + /* The given function disables HTM explicitly, it's safe to be + inlined from HTM feature's perspective and don't need any + further checkings. */ + if (opts->x_rs6000_isa_flags_explicit & OPTION_MASK_HTM) + { + info.safe_push (0); + return false; + } + + return false; +} + +/* Hook to update target specific information INFO for inlining by + scanning the given STMT. Return false if we don't need to scan + any more. */ + +static bool +rs6000_update_ipa_fn_target_info (vec &info, const gimple *stmt) +{ + /* Assume inline asm can use any instruction features. */ + if (gimple_code (stmt) == GIMPLE_ASM) + { + /* Should set any bits we concerned, for now OPTION_MASK_HTM is + the only bit we care about. */ + info[0] |= OPTION_MASK_HTM; + return false; + } + else if (gimple_code (stmt) == GIMPLE_CALL) + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)) + { + enum rs6000_builtins fcode = + (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl); + /* HTM bifs definitely exploit HTM insns. */ + if (rs6000_fn_has_any_of_these_mask_bits (fcode, RS6000_BTM_HTM)) + { + info[0] |= OPTION_MASK_HTM; + return false; + } + } + } + + return true; +} + /* Hook to determine if one function can safely inline another. */ static bool @@ -25059,10 +25133,20 @@ rs6000_can_inline_p (tree caller, tree callee) | OPTION_MASK_P10_FUSION_ADDLOG | OPTION_MASK_P10_FUSION_2ADD | OPTION_MASK_PCREL_OPT); - if (always_inline) { - caller_isa &= ~always_inline_safe_mask; - callee_isa &= ~always_inline_safe_mask; - } + /* Some features are totally safe for inlining (or always inlines), + let's exclude them from the following checkings. */ + HOST_WIDE_INT safe_mask = always_inline ? always_inline_safe_mask : 0; + cgraph_node *callee_node = cgraph_node::get (callee); + if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL) + { + const vec &info = + ipa_fn_summaries->get (callee_node)->target_info; + if (!info.is_empty ()) + safe_mask |= ~info[0] & OPTION_MASK_HTM; + } + + /* To adjust callee is enough since we just check for subset. */ + callee_isa &= ~safe_mask; /* The callee's options must be a subset of the caller's options, i.e. a vsx function may inline an altivec function, but a no-vsx function @@ -25082,8 +25166,7 @@ rs6000_can_inline_p (tree caller, tree callee) then we must enforce that the callee's and caller's options match exactly; see PR70010. */ HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit; - if (always_inline) - explicit_isa &= ~always_inline_safe_mask; + explicit_isa &= ~safe_mask; if ((caller_isa & explicit_isa) != (callee_isa & explicit_isa)) { if (TARGET_DEBUG_TARGET) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index f68f42638a1..fb9fde062e7 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -10752,6 +10752,37 @@ default, inlining is not allowed if the callee function has function specific target options and the caller does not use the same options. @end deftypefn +@deftypefn {Target Hook} bool TARGET_UPDATE_IPA_FN_TARGET_INFO (vec& @var{vec}, const gimple* @var{stmt}) +Allow target to analyze all gimple statements for the given function to +record and update some target specific information for inlining. A typical +example is that a caller with one isa feature disabled is normally not +allowed to inline a callee with that same isa feature enabled even which is +attributed by always_inline, but with the conservative analysis on all +statements of the callee if we are able to guarantee the callee does not +exploit any instructions from the mismatch isa feature, it would be safe to +allow the caller to inline the callee. +@var{vec} is one @code{HOST_WIDE_INT} element vector to record information +in which one set bit indicates one corresponding feature is detected in the +analysis, @var{stmt} is the statement being analyzed. Return true if target +still need to analyze the subsequent statements, otherwise return false to +stop subsequent analysis. +The default version of this hook returns false. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_NEED_IPA_FN_TARGET_INFO (const_tree @var{decl}, vec& @var{vec}) +Allow target to check early whether it is necessary to analyze all gimple +statements in the given function to update target specific information for +inlining. See hook @code{update_ipa_fn_target_info} for usage example of +target specific information. This hook is expected to be invoked ahead of +the iterating with hook @code{update_ipa_fn_target_info}. +@var{decl} is the function being analyzed, @var{vec} is the same as what +in hook @code{update_ipa_fn_target_info}, target can do one time update +into @var{vec} without iterating for some case. Return true if target +decides to analyze all gimple statements to collect information, otherwise +return false. +The default version of this hook returns false. +@end deftypefn + @deftypefn {Target Hook} void TARGET_RELAYOUT_FUNCTION (tree @var{fndecl}) This target hook fixes function @var{fndecl} after attributes are processed. Default does nothing. On ARM, the default function's alignment is updated diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index fdf16b901c5..81b1b458b70 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7198,6 +7198,10 @@ on this implementation detail. @hook TARGET_CAN_INLINE_P +@hook TARGET_UPDATE_IPA_FN_TARGET_INFO + +@hook TARGET_NEED_IPA_FN_TARGET_INFO + @hook TARGET_RELAYOUT_FUNCTION @node Emulated TLS diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 2470937460f..72091b6193f 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "backend.h" +#include "target.h" #include "tree.h" #include "gimple.h" #include "alloc-pool.h" @@ -1146,6 +1147,14 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) fprintf (f, " calls:\n"); dump_ipa_call_summary (f, 4, node, s); fprintf (f, "\n"); + HOST_WIDE_INT flags; + for (int i = 0; s->target_info.iterate (i, &flags); i++) + { + if (i == 0) + fprintf (f, " target_info flags: "); + fprintf (f, " [%d]:" HOST_WIDE_INT_PRINT_HEX " ", i, flags); + } + fprintf (f, "\n"); } else fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ()); @@ -2608,6 +2617,7 @@ analyze_function_body (struct cgraph_node *node, bool early) info->conds = NULL; info->size_time_table.release (); info->call_size_time_table.release (); + info->target_info.release(); /* When optimizing and analyzing for IPA inliner, initialize loop optimizer so we can produce proper inline hints. @@ -2659,6 +2669,12 @@ analyze_function_body (struct cgraph_node *node, bool early) bb_predicate, bb_predicate); + /* Only look for target information for inlinable functions. */ + bool scan_for_target_info = + info->inlinable + && targetm.target_option.need_ipa_fn_target_info (node->decl, + info->target_info); + if (fbi.info) compute_bb_predicates (&fbi, node, info, params_summary); const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; @@ -2876,6 +2892,10 @@ analyze_function_body (struct cgraph_node *node, bool early) if (dump_file) fprintf (dump_file, " fp_expression set\n"); } + if (scan_for_target_info) + scan_for_target_info = + targetm.target_option.update_ipa_fn_target_info + (info->target_info, stmt); } /* Account cost of address calculations in the statements. */ @@ -4073,6 +4093,21 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) info->fp_expressions |= callee_info->fp_expressions; + if (callee_info->target_info.is_empty ()) + { + if (!info->target_info.is_empty ()) + /* There is no target info from the inlined callee, discard + the current one to ensure it's safe. */ + info->target_info.truncate (0); + } + else if (!info->target_info.is_empty ()) + { + unsigned len = info->target_info.length (); + gcc_assert (len == callee_info->target_info.length ()); + for (unsigned i = 0; i < len; i++) + info->target_info[i] |= callee_info->target_info[i]; + } + if (callee_info->conds) { ipa_auto_call_arg_values avals; @@ -4534,6 +4569,18 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, if (info) info->builtin_constant_p_parms.quick_push (parm); } + if (!lto_stream_offload_p) + { + count2 = streamer_read_uhwi (&ib); + if (info && count2) + info->target_info.reserve_exact (count2); + for (j = 0; j < count2; j++) + { + HOST_WIDE_INT flag = streamer_read_hwi (&ib); + if (info) + info->target_info.quick_push (flag); + } + } for (e = node->callees; e; e = e->next_callee) read_ipa_call_summary (&ib, e, info != NULL); for (e = node->indirect_calls; e; e = e->next_callee) @@ -4713,6 +4760,13 @@ ipa_fn_summary_write (void) for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip); i++) streamer_write_uhwi (ob, ip); + if (!lto_stream_offload_p) + { + streamer_write_uhwi (ob, info->target_info.length ()); + HOST_WIDE_INT hwi; + for (i = 0; info->target_info.iterate (i, &hwi); i++) + streamer_write_hwi (ob, hwi); + } for (edge = cnode->callees; edge; edge = edge->next_callee) write_ipa_call_summary (ob, edge); for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 78399b0b9bb..300b8da4507 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -131,7 +131,7 @@ public: time (0), conds (NULL), size_time_table (), call_size_time_table (vNULL), loop_iterations (NULL), loop_strides (NULL), - builtin_constant_p_parms (vNULL), + builtin_constant_p_parms (vNULL), target_info (), growth (0), scc_no (0) { } @@ -146,6 +146,7 @@ public: call_size_time_table (vNULL), loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), builtin_constant_p_parms (s.builtin_constant_p_parms), + target_info (s.target_info.copy()), growth (s.growth), scc_no (s.scc_no) {} @@ -193,6 +194,9 @@ public: vec *loop_strides; /* Parameters tested by builtin_constant_p. */ vec GTY((skip)) builtin_constant_p_parms; + /* Like fp_expressions, but it's to hold some target specific information, + such as some target specific isa flags. */ + auto_vec GTY((skip)) target_info; /* Estimated growth for inlining all copies of the function before start of small functions inlining. This value will get out of date as the callers are duplicated, but diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-1_0.c b/gcc/testsuite/gcc.dg/lto/pr102059-1_0.c new file mode 100644 index 00000000000..f164d96637e --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-1_0.c @@ -0,0 +1,12 @@ +/* { dg-lto-do link } */ +/* { dg-skip-if "power only" { ! { powerpc*-*-* } } } */ +/* -Wno-attributes suppresses always_inline warnings. */ +/* { dg-lto-options { "-O2 -mdejagnu-cpu=power8 -flto -Wno-attributes" } } */ + +int __attribute__ ((always_inline)) +foo1 (int *b) +{ + *b += 100; + return *b; +} + diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-1_1.c b/gcc/testsuite/gcc.dg/lto/pr102059-1_1.c new file mode 100644 index 00000000000..7e31fc7fbd9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-1_1.c @@ -0,0 +1,9 @@ +extern int foo1 (int *b); + +int __attribute__ ((always_inline)) foo2 (int *b) +{ + int res = foo1 (b); + *b += res; + return *b; +} + diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-1_2.c b/gcc/testsuite/gcc.dg/lto/pr102059-1_2.c new file mode 100644 index 00000000000..bfa1e62fb7a --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-1_2.c @@ -0,0 +1,11 @@ +extern int foo2 (int *b); + +#pragma GCC target "cpu=power10" +__attribute__ ((always_inline)) +int +main (int *a) +{ + *a = foo2 (a); + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-2_0.c b/gcc/testsuite/gcc.dg/lto/pr102059-2_0.c new file mode 100644 index 00000000000..4bd97fcd104 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-2_0.c @@ -0,0 +1,12 @@ +/* { dg-lto-do link } */ +/* { dg-skip-if "power only" { ! { powerpc*-*-* } } } */ +/* { dg-lto-options { "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -flto -fdump-ipa-inline" } } */ + +int +foo1 (int *b) +{ + *b += 100; + return *b; +} + +/* { dg-final { scan-wpa-ipa-dump-not "target specific option mismatch" "inline" } } */ diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-2_1.c b/gcc/testsuite/gcc.dg/lto/pr102059-2_1.c new file mode 100644 index 00000000000..2e5570d127e --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-2_1.c @@ -0,0 +1,9 @@ +extern int foo1 (int *b); + +int foo2 (int *b) +{ + int res = foo1 (b); + *b += res; + return *b; +} + diff --git a/gcc/testsuite/gcc.dg/lto/pr102059-2_2.c b/gcc/testsuite/gcc.dg/lto/pr102059-2_2.c new file mode 100644 index 00000000000..31951701816 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr102059-2_2.c @@ -0,0 +1,10 @@ +extern int foo2 (int *b); + +#pragma GCC target "cpu=power10" +int +main (int *a) +{ + *a = foo2 (a); + return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr102059-5.c b/gcc/testsuite/gcc.target/powerpc/pr102059-5.c new file mode 100644 index 00000000000..b969c4c4750 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr102059-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* -Wno-attributes suppresses always_inline warnings. */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -Wno-attributes" } */ + +/* Verify the reduced case from PR102059 won't fail. */ + +__attribute__ ((always_inline)) int +foo (int *b) +{ + *b += 10; + return *b; +} + +#pragma GCC target "cpu=power10" +int +bar (int *a) +{ + *a = foo (a); + return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr102059-6.c b/gcc/testsuite/gcc.target/powerpc/pr102059-6.c new file mode 100644 index 00000000000..52e009289f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr102059-6.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-htm" } */ + +/* Verify target info for inlining still works even if callee + disables HTM explicitly while caller enables it. */ + +static inline int __attribute__ ((always_inline)) +foo (int *b) +{ + *b += 10; + return *b; +} + +#pragma GCC target "htm" +int +bar (int *a) +{ + *a = foo (a); + return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr102059-7.c b/gcc/testsuite/gcc.target/powerpc/pr102059-7.c new file mode 100644 index 00000000000..90e8332f649 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr102059-7.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mno-power8-fusion -fdump-tree-einline-optimized" } */ + +/* Like pr102059-5.c, to verify the inlining still happens + even without always_inline attribute. */ + +int foo (int *b) +{ + *b += 10; + return *b; +} + +#pragma GCC target "cpu=power10" +int +bar (int *a) +{ + *a = foo (a); + return 0; +} + +/* { dg-final { scan-tree-dump-times "Inlining foo/\[0-9\]* " 1 "einline"} } */ diff --git a/gcc/target.def b/gcc/target.def index 28a34f1d51b..f9053331466 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -6600,6 +6600,41 @@ specific target options and the caller does not use the same options.", bool, (tree caller, tree callee), default_target_can_inline_p) +DEFHOOK +(update_ipa_fn_target_info, + "Allow target to analyze all gimple statements for the given function to\n\ +record and update some target specific information for inlining. A typical\n\ +example is that a caller with one isa feature disabled is normally not\n\ +allowed to inline a callee with that same isa feature enabled even which is\n\ +attributed by always_inline, but with the conservative analysis on all\n\ +statements of the callee if we are able to guarantee the callee does not\n\ +exploit any instructions from the mismatch isa feature, it would be safe to\n\ +allow the caller to inline the callee.\n\ +@var{vec} is one @code{HOST_WIDE_INT} element vector to record information\n\ +in which one set bit indicates one corresponding feature is detected in the\n\ +analysis, @var{stmt} is the statement being analyzed. Return true if target\n\ +still need to analyze the subsequent statements, otherwise return false to\n\ +stop subsequent analysis.\n\ +The default version of this hook returns false.", + bool, (vec& vec, const gimple* stmt), + default_update_ipa_fn_target_info) + +DEFHOOK +(need_ipa_fn_target_info, + "Allow target to check early whether it is necessary to analyze all gimple\n\ +statements in the given function to update target specific information for\n\ +inlining. See hook @code{update_ipa_fn_target_info} for usage example of\n\ +target specific information. This hook is expected to be invoked ahead of\n\ +the iterating with hook @code{update_ipa_fn_target_info}.\n\ +@var{decl} is the function being analyzed, @var{vec} is the same as what\n\ +in hook @code{update_ipa_fn_target_info}, target can do one time update\n\ +into @var{vec} without iterating for some case. Return true if target\n\ +decides to analyze all gimple statements to collect information, otherwise\n\ +return false.\n\ +The default version of this hook returns false.", + bool, (const_tree decl, vec& vec), + default_need_ipa_fn_target_info) + DEFHOOK (relayout_function, "This target hook fixes function @var{fndecl} after attributes are processed.\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index c9b5208853d..7b980a1607c 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1765,6 +1765,22 @@ default_target_can_inline_p (tree caller, tree callee) return callee_opts == caller_opts; } +/* By default, return false to not need to collect any target information + for inlining. Target maintainer should re-define the hook if the + target want to take advantage of it. */ + +bool +default_need_ipa_fn_target_info (const_tree, vec &) +{ + return false; +} + +bool +default_update_ipa_fn_target_info (vec &, const gimple *) +{ + return false; +} + /* If the machine does not have a case insn that compares the bounds, this means extra overhead for dispatch tables, which raises the threshold for using them. */ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 92d51992e62..8e2da5d5940 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -194,6 +194,9 @@ extern bool default_new_address_profitable_p (rtx, rtx_insn *, rtx); extern bool default_target_option_valid_attribute_p (tree, tree, tree, int); extern bool default_target_option_pragma_parse (tree, tree); extern bool default_target_can_inline_p (tree, tree); +extern bool default_update_ipa_fn_target_info (vec &, + const gimple *); +extern bool default_need_ipa_fn_target_info (const_tree, vec &); extern bool default_valid_pointer_mode (scalar_int_mode); extern bool default_ref_may_alias_errno (class ao_ref *); extern scalar_int_mode default_addr_space_pointer_mode (addr_space_t);