public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Avoid global optimize flag checks in LTO
@ 2017-07-07 13:32 Jan Hubicka
  2017-07-07 17:53 ` Bernhard Reutner-Fischer
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Hubicka @ 2017-07-07 13:32 UTC (permalink / raw)
  To: gcc-patches

Hi,
this patch fixes some places where we check global optimize flag rather than
doing it per-function. This makes optimization attribute work closer to
what one gets when passing the same flag at command line.
This requires to run IPA passes even with !optimize, but having fast way through
which does mostly nothing except when it sees functions with optimize attributes
set.

Bootstrapped/regtested x86_64-linux, comitted.
	
	* ipa-comdats.c: Remove optimize check from gate.
	* ipa-fnsummary.c (ipa_fn_summary_generate): do not generate summary
	for functions not optimized.
	(ipa_fn_summary_read): Skip optimize check.
	(ipa_fn_summary_write): Likewise.
	* ipa-inline-analysis.c (do_estimate_growth_1): Check that caller
	is optimized.
	* ipa-inline.c (can_inline_edge_p): Not optimized functions are
	uninlinable.
	(can_inline_edge_p): Check flag_pcc_struct_return for match.
	(check_callers): Give up on caller which is not optimized.
	(inline_small_functions): Likewise.
	(ipa_inline): Do not give up when not optimizing.
	* ipa-visbility.c (function_and_variable_visibility): Do not optimize
	away unoptimizes cdtors.
	(whole_program_function_and_variable_visibility): Do
	ipa_discover_readonly_nonaddressable_vars in LTO mode.
	* ipa.c (process_references): Do not check optimize.
	(symbol_table::remove_unreachable_nodes): Update optimize check.
	(set_writeonly_bit): Update optimize check.
	(pass_ipa_cdtor_merge::gate): Do not check optimize.
	(pass_ipa_single_use::gate): Remove.
Index: ipa-comdats.c
===================================================================
--- ipa-comdats.c	(revision 250021)
+++ ipa-comdats.c	(working copy)
@@ -416,7 +416,7 @@ public:
 bool
 pass_ipa_comdats::gate (function *)
 {
-  return HAVE_COMDAT_GROUP && optimize;
+  return HAVE_COMDAT_GROUP;
 }
 
 } // anon namespace
Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c	(revision 250021)
+++ ipa-fnsummary.c	(working copy)
@@ -3174,22 +3174,20 @@ ipa_fn_summary_generate (void)
 
   FOR_EACH_DEFINED_FUNCTION (node)
     if (DECL_STRUCT_FUNCTION (node->decl))
-      node->local.versionable = tree_versionable_function_p (node->decl);
-
-  /* When not optimizing, do not bother to analyze.  Inlining is still done
-     because edge redirection needs to happen there.  */
-  if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa)
-    return;
+      node->local.versionable = 
+	(opt_for_fn (node->decl, optimize)
+	&& tree_versionable_function_p (node->decl));
 
   ipa_fn_summary_alloc ();
 
   ipa_fn_summaries->enable_insertion_hook ();
 
   ipa_register_cgraph_hooks ();
-  ipa_free_fn_summary ();
 
   FOR_EACH_DEFINED_FUNCTION (node)
-    if (!node->alias)
+    if (!node->alias
+	&& (flag_generate_lto || flag_generate_offload|| flag_wpa
+	    || opt_for_fn (node->decl, optimize)))
       inline_analyze_function (node);
 }
 
@@ -3342,12 +3340,9 @@ ipa_fn_summary_read (void)
 	fatal_error (input_location,
 		     "ipa inline summary is missing in input file");
     }
-  if (optimize)
-    {
-      ipa_register_cgraph_hooks ();
-      if (!flag_ipa_cp)
-	ipa_prop_read_jump_functions ();
-    }
+  ipa_register_cgraph_hooks ();
+  if (!flag_ipa_cp)
+    ipa_prop_read_jump_functions ();
 
   gcc_assert (ipa_fn_summaries);
   ipa_fn_summaries->enable_insertion_hook ();
@@ -3462,7 +3457,7 @@ ipa_fn_summary_write (void)
   produce_asm (ob, NULL);
   destroy_output_block (ob);
 
-  if (optimize && !flag_ipa_cp)
+  if (!flag_ipa_cp)
     ipa_prop_write_jump_functions ();
 }
 
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 250021)
+++ ipa-inline-analysis.c	(working copy)
@@ -326,7 +326,8 @@ do_estimate_growth_1 (struct cgraph_node
     {
       gcc_checking_assert (e->inline_failed);
 
-      if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
+      if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR
+	  || !opt_for_fn (e->caller->decl, optimize))
 	{
 	  d->uninlinable = true;
           continue;
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 250021)
+++ ipa-inline.c	(working copy)
@@ -322,6 +322,11 @@ can_inline_edge_p (struct cgraph_edge *e
       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
       inlinable = false;
     }
+  if (!early && !opt_for_fn (callee->decl, optimize))
+    {
+      e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
+      inlinable = false;
+    }
   else if (callee->calls_comdat_local)
     {
       e->inline_failed = CIF_USES_COMDAT_LOCAL;
@@ -402,6 +407,7 @@ can_inline_edge_p (struct cgraph_edge *e
 	 Not even for always_inline declared functions.  */
      else if (check_match (flag_wrapv)
 	      || check_match (flag_trapv)
+	      || check_match (flag_pcc_struct_return)
 	      /* When caller or callee does FP math, be sure FP codegen flags
 		 compatible.  */
 	      || ((caller_info->fp_expressions && callee_info->fp_expressions)
@@ -939,7 +945,8 @@ check_callers (struct cgraph_node *node,
   struct cgraph_edge *e;
    for (e = node->callers; e; e = e->next_caller)
      {
-       if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
+       if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
+	   || !opt_for_fn (e->caller->decl, optimize))
 	 return true;
        if (!can_inline_edge_p (e, true))
          return true;
@@ -1746,7 +1753,8 @@ inline_small_functions (void)
     if (!node->global.inlined_to)
       {
 	if (!node->alias && node->analyzed
-	    && (node->has_gimple_body_p () || node->thunk.thunk_p))
+	    && (node->has_gimple_body_p () || node->thunk.thunk_p)
+	    && opt_for_fn (node->decl, optimize))
 	  {
 	    struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
 	    struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
@@ -1768,12 +1776,13 @@ inline_small_functions (void)
 		int id = dfs->scc_no + 1;
 		for (n2 = node; n2;
 		     n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
-		  {
-		    struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
-		    if (info2->scc_no)
-		      break;
-		    info2->scc_no = id;
-		  }
+		  if (opt_for_fn (n2->decl, optimize))
+		    {
+		      struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
+		      if (info2->scc_no)
+			break;
+		      info2->scc_no = id;
+		    }
 	      }
 	  }
 
@@ -1801,6 +1810,9 @@ inline_small_functions (void)
       struct cgraph_edge *next = NULL;
       bool has_speculative = false;
 
+      if (!opt_for_fn (node->decl, optimize))
+	continue;
+
       if (dump_file)
 	fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
 
@@ -2369,9 +2381,6 @@ ipa_inline (void)
   int cold;
   bool remove_functions = false;
 
-  if (!optimize)
-    return 0;
-
   cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
   percent_rec = (sreal) 1 / (sreal) 100;
 
@@ -2467,6 +2476,10 @@ ipa_inline (void)
 	  struct cgraph_edge *edge, *next;
 	  bool update=false;
 
+	  if (!opt_for_fn (node->decl, optimize)
+	      || !opt_for_fn (node->decl, flag_inline_functions_called_once))
+	    continue;
+
 	  for (edge = node->callees; edge; edge = next)
 	    {
 	      next = edge->next_callee;
@@ -2499,8 +2512,7 @@ ipa_inline (void)
     }
 
   /* Free ipa-prop structures if they are no longer needed.  */
-  if (optimize)
-    ipa_free_all_structures_after_iinln ();
+  ipa_free_all_structures_after_iinln ();
 
   if (dump_file)
     {
Index: ipa-visibility.c
===================================================================
--- ipa-visibility.c	(revision 250021)
+++ ipa-visibility.c	(working copy)
@@ -622,9 +622,12 @@ function_and_variable_visibility (bool w
       int flags = flags_from_decl_or_type (node->decl);
 
       /* Optimize away PURE and CONST constructors and destructors.  */
-      if (optimize
+      if (node->analyzed
+	  && (DECL_STATIC_CONSTRUCTOR (node->decl)
+	      || DECL_STATIC_CONSTRUCTOR (node->decl))
 	  && (flags & (ECF_CONST | ECF_PURE))
-	  && !(flags & ECF_LOOPING_CONST_OR_PURE))
+	  && !(flags & ECF_LOOPING_CONST_OR_PURE)
+	  && opt_for_fn (node->decl, optimize))
 	{
 	  DECL_STATIC_CONSTRUCTOR (node->decl) = 0;
 	  DECL_STATIC_DESTRUCTOR (node->decl) = 0;
@@ -876,7 +879,7 @@ static unsigned int
 whole_program_function_and_variable_visibility (void)
 {
   function_and_variable_visibility (flag_whole_program);
-  if (optimize)
+  if (optimize || in_lto_p)
     ipa_discover_readonly_nonaddressable_vars ();
   return 0;
 }
Index: ipa.c
===================================================================
--- ipa.c	(revision 250021)
+++ ipa.c	(working copy)
@@ -118,8 +118,7 @@ process_references (symtab_node *snode,
       if (node->definition && !node->in_other_partition
 	  && ((!DECL_EXTERNAL (node->decl) || node->alias)
 	      || (((before_inlining_p
-		    && ((TREE_CODE (node->decl) != FUNCTION_DECL
-			 && optimize)
+		    && (TREE_CODE (node->decl) != FUNCTION_DECL
 			|| (TREE_CODE (node->decl) == FUNCTION_DECL
 			    && opt_for_fn (body->decl, optimize))
 		        || (symtab->state < IPA_SSA
@@ -312,7 +311,7 @@ symbol_table::remove_unreachable_nodes (
   hash_set<symtab_node *> reachable;
   hash_set<tree> body_needed_for_clonning;
   hash_set<void *> reachable_call_targets;
-  bool before_inlining_p = symtab->state < (!optimize ? IPA_SSA
+  bool before_inlining_p = symtab->state < (!optimize && !in_lto_p ? IPA_SSA
 					    : IPA_SSA_AFTER_INLINING);
 
   timevar_push (TV_IPA_UNREACHABLE);
@@ -696,7 +695,7 @@ symbol_table::remove_unreachable_nodes (
   symtab_node::checking_verify_symtab_nodes ();
 
   /* If we removed something, perhaps profile could be improved.  */
-  if (changed && optimize && ipa_call_summaries)
+  if (changed && (optimize || in_lto_p) && ipa_call_summaries)
     FOR_EACH_DEFINED_FUNCTION (node)
       ipa_propagate_frequency (node);
 
@@ -757,7 +756,7 @@ bool
 set_writeonly_bit (varpool_node *vnode, void *data)
 {
   vnode->writeonly = true;
-  if (optimize)
+  if (optimize || in_lto_p)
     {
       DECL_INITIAL (vnode->decl) = NULL;
       if (!vnode->alias)
@@ -1175,7 +1174,7 @@ pass_ipa_cdtor_merge::gate (function *)
   /* Perform the pass when we have no ctors/dtors support
      or at LTO time to merge multiple constructors into single
      function.  */
-  return !targetm.have_ctors_dtors || (optimize && in_lto_p);
+  return !targetm.have_ctors_dtors || in_lto_p;
 }
 
 } // anon namespace
@@ -1387,17 +1386,10 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *);
   virtual unsigned int execute (function *) { return ipa_single_use (); }
 
 }; // class pass_ipa_single_use
 
-bool
-pass_ipa_single_use::gate (function *)
-{
-  return optimize;
-}
-
 } // anon namespace
 
 ipa_opt_pass_d *

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Avoid global optimize flag checks in LTO
  2017-07-07 13:32 Avoid global optimize flag checks in LTO Jan Hubicka
@ 2017-07-07 17:53 ` Bernhard Reutner-Fischer
  2017-07-07 18:33   ` Jan Hubicka
  0 siblings, 1 reply; 3+ messages in thread
From: Bernhard Reutner-Fischer @ 2017-07-07 17:53 UTC (permalink / raw)
  To: gcc-patches, Jan Hubicka

On 7 July 2017 15:31:55 CEST, Jan Hubicka <hubicka@ucw.cz> wrote:
>Hi,
>this patch fixes some places where we check global optimize flag rather
>than
>doing it per-function. This makes optimization attribute work closer to
>what one gets when passing the same flag at command line.
>This requires to run IPA passes even with !optimize, but having fast
>way through
>which does mostly nothing except when it sees functions with optimize
>attributes
>set.

Sounds gross.
>
>Bootstrapped/regtested x86_64-linux, comitted.
>	

>Index: ipa-visibility.c
>===================================================================
>--- ipa-visibility.c	(revision 250021)
>+++ ipa-visibility.c	(working copy)
>@@ -622,9 +622,12 @@ function_and_variable_visibility (bool w
>       int flags = flags_from_decl_or_type (node->decl);
> 
>      /* Optimize away PURE and CONST constructors and destructors.  */
>-      if (optimize
>+      if (node->analyzed
>+	  && (DECL_STATIC_CONSTRUCTOR (node->decl)
>+	      || DECL_STATIC_CONSTRUCTOR (node->decl))

Typo DECL_STATIC_DESTRUCTOR

thanks,

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Avoid global optimize flag checks in LTO
  2017-07-07 17:53 ` Bernhard Reutner-Fischer
@ 2017-07-07 18:33   ` Jan Hubicka
  0 siblings, 0 replies; 3+ messages in thread
From: Jan Hubicka @ 2017-07-07 18:33 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer; +Cc: gcc-patches

> On 7 July 2017 15:31:55 CEST, Jan Hubicka <hubicka@ucw.cz> wrote:
> >Hi,
> >this patch fixes some places where we check global optimize flag rather
> >than
> >doing it per-function. This makes optimization attribute work closer to
> >what one gets when passing the same flag at command line.
> >This requires to run IPA passes even with !optimize, but having fast
> >way through
> >which does mostly nothing except when it sees functions with optimize
> >attributes
> >set.
> 
> Sounds gross.

Yep, supporting units compiled with different optimization flags is not
prettiest. But with LTO they are sad reality.
> >
> >Bootstrapped/regtested x86_64-linux, comitted.
> >	
> 
> >Index: ipa-visibility.c
> >===================================================================
> >--- ipa-visibility.c	(revision 250021)
> >+++ ipa-visibility.c	(working copy)
> >@@ -622,9 +622,12 @@ function_and_variable_visibility (bool w
> >       int flags = flags_from_decl_or_type (node->decl);
> > 
> >      /* Optimize away PURE and CONST constructors and destructors.  */
> >-      if (optimize
> >+      if (node->analyzed
> >+	  && (DECL_STATIC_CONSTRUCTOR (node->decl)
> >+	      || DECL_STATIC_CONSTRUCTOR (node->decl))
> 
> Typo DECL_STATIC_DESTRUCTOR

Oops, thanks! I will fix it shortly.

Honza
> 
> thanks,

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-07-07 18:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-07 13:32 Avoid global optimize flag checks in LTO Jan Hubicka
2017-07-07 17:53 ` Bernhard Reutner-Fischer
2017-07-07 18:33   ` Jan Hubicka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).