public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Fix Eh delivery in partitioned functions
@ 2017-07-18  7:26 Jan Hubicka
  2017-07-18  7:34 ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Hubicka @ 2017-07-18  7:26 UTC (permalink / raw)
  To: gcc-patches, rguenther

Hi,
this patch fixes wrong code issue with BB partitioning where sometimes EH
is not delivered.  This is very old issue that affect all release branches
with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
because we now partition functions based on static profile prediction.

The problem is that EH tables are stored as offsets from start of functions.
In the record however value 0 is special and means that there is no landing
pad for a given region.  Normally this is safe because landing pads never
appear as very first label in function.  This is however no longer true with
partitining where cold partition is actually quite likely to start by landing
pad.

The change in except.c adds sanity check that no EH landing pads are very first
in the insn stream.  The change in bb-reorder makes reorder to chose
non-landing-pad BB as first trace for the cold partition. Such BB always exists
because landing pads must be in same partition as the instruction throwing them
and we never make BB both landing pad and reachable by normal control folow.
However I am not thrilled by the fix as it is bit fragile in case some
optimization happends after bb partitioning and code is moved away.  Also the
logic can be confused by asm statement which may result in no code (again
however the BB reachable from outside world should contain something that
produce EH that is a real instruction).

Ideas for better fix would be welcome then.  If the assert I added triggers
for valid reasons, we may just end up adding a NOP in the rare case we do
not suceed arranging cold partition to not start with landing pad.

Bootstrapped/regtested x86_64-linux, looks sane?

Honza

	PR middle-end/81331 
	* except.c (first_in_partition): New function.
	(dw2_output_call_site_table): Sanity check that landing pads are not
	very first in the partition.
	* bb-reorder.c (ok_to_be_first): New function.
	(connect_traces): Avoid traces that are !ok_to_be_first to start
	partitions.
Index: except.c
===================================================================
--- except.c	(revision 250226)
+++ except.c	(working copy)
@@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
   return size;
 }
 
+/* Return true if L will appear as very first in its partition.  */
+
+bool
+first_in_partition (rtx_insn *l)
+{
+  while (l != NULL_RTX)
+    {
+      if (active_insn_p (l))
+	return false;
+      else if (GET_CODE (l) == NOTE
+	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	return true;
+      l = PREV_INSN (l);
+    }
+  return true;
+}
+
 static void
 dw2_output_call_site_table (int cs_format, int section)
 {
@@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
       ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
 
       if (cs->landing_pad)
-	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
-				     CODE_LABEL_NUMBER (cs->landing_pad));
+	{
+	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
+				       CODE_LABEL_NUMBER (cs->landing_pad));
+	  /* Be sure that the offset will not be 0 as that would make EH
+	     delivery code to think that there is no landing pad.  */
+	  gcc_checking_assert (!first_in_partition
+				   (as_a <rtx_insn *> (cs->landing_pad)));
+	}
 
       /* ??? Perhaps use insn length scaling if the assembler supports
 	 generic arithmetic.  */
Index: bb-reorder.c
===================================================================
--- bb-reorder.c	(revision 250226)
+++ bb-reorder.c	(working copy)
@@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
   return is_better_edge;
 }
 
+/* If we place EH landing pad as very first BB in the partition, its offset
+   from start of function is 0 which is special cased by the eh table to mean
+   no landing pad.  For this reason such BBs can not appear as very first in
+   the partition.  */
+static bool
+ok_to_be_first (struct trace *t)
+{
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, t->first->preds)
+    if (e->flags & EDGE_EH)
+      return false;
+  return true;
+}
+
 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
 
 static void
@@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
   int freq_threshold;
   gcov_type count_threshold;
   bool for_size = optimize_function_for_size_p (cfun);
+  bool first_in_partition;
 
   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
   if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
@@ -1092,6 +1108,7 @@ connect_traces (int n_traces, struct tra
   current_pass = 1;
   current_partition = BB_PARTITION (traces[0].first);
   two_passes = false;
+  first_in_partition = true;
 
   if (crtl->has_bb_partition)
     for (i = 0; i < n_traces && !two_passes; i++)
@@ -1116,6 +1133,7 @@ connect_traces (int n_traces, struct tra
 	    current_partition = BB_COLD_PARTITION;
 	  else
 	    current_partition = BB_HOT_PARTITION;
+	  first_in_partition = true;
 	}
 
       if (connected[t])
@@ -1125,6 +1143,9 @@ connect_traces (int n_traces, struct tra
 	  && BB_PARTITION (traces[t].first) != current_partition)
 	continue;
 
+      if (first_in_partition && !ok_to_be_first (&traces[t]))
+	continue;
+
       connected[t] = true;
 
       /* Find the predecessor traces.  */
@@ -1143,7 +1164,9 @@ connect_traces (int n_traces, struct tra
 		  && bbd[si].end_of_trace >= 0
 		  && !connected[bbd[si].end_of_trace]
 		  && (BB_PARTITION (e->src) == current_partition)
-		  && connect_better_edge_p (e, true, best_len, best, traces))
+		  && connect_better_edge_p (e, true, best_len, best, traces)
+		  && (!first_in_partition
+		      || ok_to_be_first (&traces[bbd[si].end_of_trace])))
 		{
 		  best = e;
 		  best_len = traces[bbd[si].end_of_trace].length;
@@ -1151,6 +1174,8 @@ connect_traces (int n_traces, struct tra
 	    }
 	  if (best)
 	    {
+	      gcc_checking_assert (BB_PARTITION (best->src)
+				   == BB_PARTITION (best->dest));
 	      best->src->aux = best->dest;
 	      t2 = bbd[best->src->index].end_of_trace;
 	      connected[t2] = true;
@@ -1166,8 +1191,13 @@ connect_traces (int n_traces, struct tra
 	}
 
       if (last_trace >= 0)
-	traces[last_trace].last->aux = traces[t2].first;
+	{
+	  gcc_checking_assert (!first_in_partition
+			       || ok_to_be_first (&traces[t2]));
+	  traces[last_trace].last->aux = traces[t2].first;
+	}
       last_trace = t;
+      first_in_partition = false;
 
       /* Find the successor traces.  */
       while (1)
@@ -1226,6 +1256,8 @@ connect_traces (int n_traces, struct tra
 		fprintf (dump_file, "Connection: %d %d\n",
 			 best->src->index, best->dest->index);
 
+	      gcc_checking_assert (BB_PARTITION (best->dest)
+				   == BB_PARTITION (best->src));
 	      t = bbd[best->dest->index].start_of_trace;
 	      traces[last_trace].last->aux = traces[t].first;
 	      connected[t] = true;
@@ -1238,6 +1270,8 @@ connect_traces (int n_traces, struct tra
 		  fprintf (dump_file, "Connection: %d %d\n",
 			   best->src->index, best->dest->index);
 		}
+	      gcc_checking_assert (BB_PARTITION (best->dest)
+				   == BB_PARTITION (best->src));
 	      t = bbd[best->dest->index].start_of_trace;
 	      traces[last_trace].last->aux = traces[t].first;
 	      connected[t] = true;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-18  7:26 Fix Eh delivery in partitioned functions Jan Hubicka
@ 2017-07-18  7:34 ` Richard Biener
  2017-07-18  7:44   ` Jan Hubicka
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2017-07-18  7:34 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches

On Tue, 18 Jul 2017, Jan Hubicka wrote:

> Hi,
> this patch fixes wrong code issue with BB partitioning where sometimes EH
> is not delivered.  This is very old issue that affect all release branches
> with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
> because we now partition functions based on static profile prediction.
> 
> The problem is that EH tables are stored as offsets from start of functions.
> In the record however value 0 is special and means that there is no landing
> pad for a given region.  Normally this is safe because landing pads never
> appear as very first label in function.  This is however no longer true with
> partitining where cold partition is actually quite likely to start by landing
> pad.
> 
> The change in except.c adds sanity check that no EH landing pads are very first
> in the insn stream.  The change in bb-reorder makes reorder to chose
> non-landing-pad BB as first trace for the cold partition. Such BB always exists
> because landing pads must be in same partition as the instruction throwing them
> and we never make BB both landing pad and reachable by normal control folow.
> However I am not thrilled by the fix as it is bit fragile in case some
> optimization happends after bb partitioning and code is moved away.  Also the
> logic can be confused by asm statement which may result in no code (again
> however the BB reachable from outside world should contain something that
> produce EH that is a real instruction).
> 
> Ideas for better fix would be welcome then.  If the assert I added triggers
> for valid reasons, we may just end up adding a NOP in the rare case we do
> not suceed arranging cold partition to not start with landing pad.

Yeah, I'd rather pad the function start with a nop if it starts with a
landing pad.  How difficult would it be to arrange for this?  I suppose
we'd need to touch each and every target to accomplish this?  Or end up
using gen_nop in generic code?

Richard.

> Bootstrapped/regtested x86_64-linux, looks sane?
> 
> Honza
> 
> 	PR middle-end/81331 
> 	* except.c (first_in_partition): New function.
> 	(dw2_output_call_site_table): Sanity check that landing pads are not
> 	very first in the partition.
> 	* bb-reorder.c (ok_to_be_first): New function.
> 	(connect_traces): Avoid traces that are !ok_to_be_first to start
> 	partitions.
> Index: except.c
> ===================================================================
> --- except.c	(revision 250226)
> +++ except.c	(working copy)
> @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
>    return size;
>  }
>  
> +/* Return true if L will appear as very first in its partition.  */
> +
> +bool
> +first_in_partition (rtx_insn *l)
> +{
> +  while (l != NULL_RTX)
> +    {
> +      if (active_insn_p (l))
> +	return false;
> +      else if (GET_CODE (l) == NOTE
> +	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> +	return true;
> +      l = PREV_INSN (l);
> +    }
> +  return true;
> +}
> +
>  static void
>  dw2_output_call_site_table (int cs_format, int section)
>  {
> @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
>        ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
>  
>        if (cs->landing_pad)
> -	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> -				     CODE_LABEL_NUMBER (cs->landing_pad));
> +	{
> +	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> +				       CODE_LABEL_NUMBER (cs->landing_pad));
> +	  /* Be sure that the offset will not be 0 as that would make EH
> +	     delivery code to think that there is no landing pad.  */
> +	  gcc_checking_assert (!first_in_partition
> +				   (as_a <rtx_insn *> (cs->landing_pad)));
> +	}
>  
>        /* ??? Perhaps use insn length scaling if the assembler supports
>  	 generic arithmetic.  */
> Index: bb-reorder.c
> ===================================================================
> --- bb-reorder.c	(revision 250226)
> +++ bb-reorder.c	(working copy)
> @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
>    return is_better_edge;
>  }
>  
> +/* If we place EH landing pad as very first BB in the partition, its offset
> +   from start of function is 0 which is special cased by the eh table to mean
> +   no landing pad.  For this reason such BBs can not appear as very first in
> +   the partition.  */
> +static bool
> +ok_to_be_first (struct trace *t)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, t->first->preds)
> +    if (e->flags & EDGE_EH)
> +      return false;
> +  return true;
> +}
> +
>  /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
>  
>  static void
> @@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
>    int freq_threshold;
>    gcov_type count_threshold;
>    bool for_size = optimize_function_for_size_p (cfun);
> +  bool first_in_partition;
>  
>    freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
>    if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
> @@ -1092,6 +1108,7 @@ connect_traces (int n_traces, struct tra
>    current_pass = 1;
>    current_partition = BB_PARTITION (traces[0].first);
>    two_passes = false;
> +  first_in_partition = true;
>  
>    if (crtl->has_bb_partition)
>      for (i = 0; i < n_traces && !two_passes; i++)
> @@ -1116,6 +1133,7 @@ connect_traces (int n_traces, struct tra
>  	    current_partition = BB_COLD_PARTITION;
>  	  else
>  	    current_partition = BB_HOT_PARTITION;
> +	  first_in_partition = true;
>  	}
>  
>        if (connected[t])
> @@ -1125,6 +1143,9 @@ connect_traces (int n_traces, struct tra
>  	  && BB_PARTITION (traces[t].first) != current_partition)
>  	continue;
>  
> +      if (first_in_partition && !ok_to_be_first (&traces[t]))
> +	continue;
> +
>        connected[t] = true;
>  
>        /* Find the predecessor traces.  */
> @@ -1143,7 +1164,9 @@ connect_traces (int n_traces, struct tra
>  		  && bbd[si].end_of_trace >= 0
>  		  && !connected[bbd[si].end_of_trace]
>  		  && (BB_PARTITION (e->src) == current_partition)
> -		  && connect_better_edge_p (e, true, best_len, best, traces))
> +		  && connect_better_edge_p (e, true, best_len, best, traces)
> +		  && (!first_in_partition
> +		      || ok_to_be_first (&traces[bbd[si].end_of_trace])))
>  		{
>  		  best = e;
>  		  best_len = traces[bbd[si].end_of_trace].length;
> @@ -1151,6 +1174,8 @@ connect_traces (int n_traces, struct tra
>  	    }
>  	  if (best)
>  	    {
> +	      gcc_checking_assert (BB_PARTITION (best->src)
> +				   == BB_PARTITION (best->dest));
>  	      best->src->aux = best->dest;
>  	      t2 = bbd[best->src->index].end_of_trace;
>  	      connected[t2] = true;
> @@ -1166,8 +1191,13 @@ connect_traces (int n_traces, struct tra
>  	}
>  
>        if (last_trace >= 0)
> -	traces[last_trace].last->aux = traces[t2].first;
> +	{
> +	  gcc_checking_assert (!first_in_partition
> +			       || ok_to_be_first (&traces[t2]));
> +	  traces[last_trace].last->aux = traces[t2].first;
> +	}
>        last_trace = t;
> +      first_in_partition = false;
>  
>        /* Find the successor traces.  */
>        while (1)
> @@ -1226,6 +1256,8 @@ connect_traces (int n_traces, struct tra
>  		fprintf (dump_file, "Connection: %d %d\n",
>  			 best->src->index, best->dest->index);
>  
> +	      gcc_checking_assert (BB_PARTITION (best->dest)
> +				   == BB_PARTITION (best->src));
>  	      t = bbd[best->dest->index].start_of_trace;
>  	      traces[last_trace].last->aux = traces[t].first;
>  	      connected[t] = true;
> @@ -1238,6 +1270,8 @@ connect_traces (int n_traces, struct tra
>  		  fprintf (dump_file, "Connection: %d %d\n",
>  			   best->src->index, best->dest->index);
>  		}
> +	      gcc_checking_assert (BB_PARTITION (best->dest)
> +				   == BB_PARTITION (best->src));
>  	      t = bbd[best->dest->index].start_of_trace;
>  	      traces[last_trace].last->aux = traces[t].first;
>  	      connected[t] = true;
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-18  7:34 ` Richard Biener
@ 2017-07-18  7:44   ` Jan Hubicka
  2017-07-18  7:46     ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Hubicka @ 2017-07-18  7:44 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> On Tue, 18 Jul 2017, Jan Hubicka wrote:
> 
> > Hi,
> > this patch fixes wrong code issue with BB partitioning where sometimes EH
> > is not delivered.  This is very old issue that affect all release branches
> > with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
> > because we now partition functions based on static profile prediction.
> > 
> > The problem is that EH tables are stored as offsets from start of functions.
> > In the record however value 0 is special and means that there is no landing
> > pad for a given region.  Normally this is safe because landing pads never
> > appear as very first label in function.  This is however no longer true with
> > partitining where cold partition is actually quite likely to start by landing
> > pad.
> > 
> > The change in except.c adds sanity check that no EH landing pads are very first
> > in the insn stream.  The change in bb-reorder makes reorder to chose
> > non-landing-pad BB as first trace for the cold partition. Such BB always exists
> > because landing pads must be in same partition as the instruction throwing them
> > and we never make BB both landing pad and reachable by normal control folow.
> > However I am not thrilled by the fix as it is bit fragile in case some
> > optimization happends after bb partitioning and code is moved away.  Also the
> > logic can be confused by asm statement which may result in no code (again
> > however the BB reachable from outside world should contain something that
> > produce EH that is a real instruction).
> > 
> > Ideas for better fix would be welcome then.  If the assert I added triggers
> > for valid reasons, we may just end up adding a NOP in the rare case we do
> > not suceed arranging cold partition to not start with landing pad.
> 
> Yeah, I'd rather pad the function start with a nop if it starts with a
> landing pad.  How difficult would it be to arrange for this?  I suppose
> we'd need to touch each and every target to accomplish this?  Or end up
> using gen_nop in generic code?

I think we could just output from generic code - I think it can be done by
final_scan_insn. I don't know however if we have a way to tell if the section
starts with a landing pad?

Honza
> 
> Richard.
> 
> > Bootstrapped/regtested x86_64-linux, looks sane?
> > 
> > Honza
> > 
> > 	PR middle-end/81331 
> > 	* except.c (first_in_partition): New function.
> > 	(dw2_output_call_site_table): Sanity check that landing pads are not
> > 	very first in the partition.
> > 	* bb-reorder.c (ok_to_be_first): New function.
> > 	(connect_traces): Avoid traces that are !ok_to_be_first to start
> > 	partitions.
> > Index: except.c
> > ===================================================================
> > --- except.c	(revision 250226)
> > +++ except.c	(working copy)
> > @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
> >    return size;
> >  }
> >  
> > +/* Return true if L will appear as very first in its partition.  */
> > +
> > +bool
> > +first_in_partition (rtx_insn *l)
> > +{
> > +  while (l != NULL_RTX)
> > +    {
> > +      if (active_insn_p (l))
> > +	return false;
> > +      else if (GET_CODE (l) == NOTE
> > +	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> > +	return true;
> > +      l = PREV_INSN (l);
> > +    }
> > +  return true;
> > +}
> > +
> >  static void
> >  dw2_output_call_site_table (int cs_format, int section)
> >  {
> > @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
> >        ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
> >  
> >        if (cs->landing_pad)
> > -	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > -				     CODE_LABEL_NUMBER (cs->landing_pad));
> > +	{
> > +	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > +				       CODE_LABEL_NUMBER (cs->landing_pad));
> > +	  /* Be sure that the offset will not be 0 as that would make EH
> > +	     delivery code to think that there is no landing pad.  */
> > +	  gcc_checking_assert (!first_in_partition
> > +				   (as_a <rtx_insn *> (cs->landing_pad)));
> > +	}
> >  
> >        /* ??? Perhaps use insn length scaling if the assembler supports
> >  	 generic arithmetic.  */
> > Index: bb-reorder.c
> > ===================================================================
> > --- bb-reorder.c	(revision 250226)
> > +++ bb-reorder.c	(working copy)
> > @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
> >    return is_better_edge;
> >  }
> >  
> > +/* If we place EH landing pad as very first BB in the partition, its offset
> > +   from start of function is 0 which is special cased by the eh table to mean
> > +   no landing pad.  For this reason such BBs can not appear as very first in
> > +   the partition.  */
> > +static bool
> > +ok_to_be_first (struct trace *t)
> > +{
> > +  edge e;
> > +  edge_iterator ei;
> > +  FOR_EACH_EDGE (e, ei, t->first->preds)
> > +    if (e->flags & EDGE_EH)
> > +      return false;
> > +  return true;
> > +}
> > +
> >  /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
> >  
> >  static void
> > @@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
> >    int freq_threshold;
> >    gcov_type count_threshold;
> >    bool for_size = optimize_function_for_size_p (cfun);
> > +  bool first_in_partition;
> >  
> >    freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
> >    if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
> > @@ -1092,6 +1108,7 @@ connect_traces (int n_traces, struct tra
> >    current_pass = 1;
> >    current_partition = BB_PARTITION (traces[0].first);
> >    two_passes = false;
> > +  first_in_partition = true;
> >  
> >    if (crtl->has_bb_partition)
> >      for (i = 0; i < n_traces && !two_passes; i++)
> > @@ -1116,6 +1133,7 @@ connect_traces (int n_traces, struct tra
> >  	    current_partition = BB_COLD_PARTITION;
> >  	  else
> >  	    current_partition = BB_HOT_PARTITION;
> > +	  first_in_partition = true;
> >  	}
> >  
> >        if (connected[t])
> > @@ -1125,6 +1143,9 @@ connect_traces (int n_traces, struct tra
> >  	  && BB_PARTITION (traces[t].first) != current_partition)
> >  	continue;
> >  
> > +      if (first_in_partition && !ok_to_be_first (&traces[t]))
> > +	continue;
> > +
> >        connected[t] = true;
> >  
> >        /* Find the predecessor traces.  */
> > @@ -1143,7 +1164,9 @@ connect_traces (int n_traces, struct tra
> >  		  && bbd[si].end_of_trace >= 0
> >  		  && !connected[bbd[si].end_of_trace]
> >  		  && (BB_PARTITION (e->src) == current_partition)
> > -		  && connect_better_edge_p (e, true, best_len, best, traces))
> > +		  && connect_better_edge_p (e, true, best_len, best, traces)
> > +		  && (!first_in_partition
> > +		      || ok_to_be_first (&traces[bbd[si].end_of_trace])))
> >  		{
> >  		  best = e;
> >  		  best_len = traces[bbd[si].end_of_trace].length;
> > @@ -1151,6 +1174,8 @@ connect_traces (int n_traces, struct tra
> >  	    }
> >  	  if (best)
> >  	    {
> > +	      gcc_checking_assert (BB_PARTITION (best->src)
> > +				   == BB_PARTITION (best->dest));
> >  	      best->src->aux = best->dest;
> >  	      t2 = bbd[best->src->index].end_of_trace;
> >  	      connected[t2] = true;
> > @@ -1166,8 +1191,13 @@ connect_traces (int n_traces, struct tra
> >  	}
> >  
> >        if (last_trace >= 0)
> > -	traces[last_trace].last->aux = traces[t2].first;
> > +	{
> > +	  gcc_checking_assert (!first_in_partition
> > +			       || ok_to_be_first (&traces[t2]));
> > +	  traces[last_trace].last->aux = traces[t2].first;
> > +	}
> >        last_trace = t;
> > +      first_in_partition = false;
> >  
> >        /* Find the successor traces.  */
> >        while (1)
> > @@ -1226,6 +1256,8 @@ connect_traces (int n_traces, struct tra
> >  		fprintf (dump_file, "Connection: %d %d\n",
> >  			 best->src->index, best->dest->index);
> >  
> > +	      gcc_checking_assert (BB_PARTITION (best->dest)
> > +				   == BB_PARTITION (best->src));
> >  	      t = bbd[best->dest->index].start_of_trace;
> >  	      traces[last_trace].last->aux = traces[t].first;
> >  	      connected[t] = true;
> > @@ -1238,6 +1270,8 @@ connect_traces (int n_traces, struct tra
> >  		  fprintf (dump_file, "Connection: %d %d\n",
> >  			   best->src->index, best->dest->index);
> >  		}
> > +	      gcc_checking_assert (BB_PARTITION (best->dest)
> > +				   == BB_PARTITION (best->src));
> >  	      t = bbd[best->dest->index].start_of_trace;
> >  	      traces[last_trace].last->aux = traces[t].first;
> >  	      connected[t] = true;
> > 
> > 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-18  7:44   ` Jan Hubicka
@ 2017-07-18  7:46     ` Richard Biener
  2017-07-19 12:52       ` Jan Hubicka
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2017-07-18  7:46 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches

On Tue, 18 Jul 2017, Jan Hubicka wrote:

> > On Tue, 18 Jul 2017, Jan Hubicka wrote:
> > 
> > > Hi,
> > > this patch fixes wrong code issue with BB partitioning where sometimes EH
> > > is not delivered.  This is very old issue that affect all release branches
> > > with -fprofile-use, but recently surfaced as libstdc++ testsuite regression
> > > because we now partition functions based on static profile prediction.
> > > 
> > > The problem is that EH tables are stored as offsets from start of functions.
> > > In the record however value 0 is special and means that there is no landing
> > > pad for a given region.  Normally this is safe because landing pads never
> > > appear as very first label in function.  This is however no longer true with
> > > partitining where cold partition is actually quite likely to start by landing
> > > pad.
> > > 
> > > The change in except.c adds sanity check that no EH landing pads are very first
> > > in the insn stream.  The change in bb-reorder makes reorder to chose
> > > non-landing-pad BB as first trace for the cold partition. Such BB always exists
> > > because landing pads must be in same partition as the instruction throwing them
> > > and we never make BB both landing pad and reachable by normal control folow.
> > > However I am not thrilled by the fix as it is bit fragile in case some
> > > optimization happends after bb partitioning and code is moved away.  Also the
> > > logic can be confused by asm statement which may result in no code (again
> > > however the BB reachable from outside world should contain something that
> > > produce EH that is a real instruction).
> > > 
> > > Ideas for better fix would be welcome then.  If the assert I added triggers
> > > for valid reasons, we may just end up adding a NOP in the rare case we do
> > > not suceed arranging cold partition to not start with landing pad.
> > 
> > Yeah, I'd rather pad the function start with a nop if it starts with a
> > landing pad.  How difficult would it be to arrange for this?  I suppose
> > we'd need to touch each and every target to accomplish this?  Or end up
> > using gen_nop in generic code?
> 
> I think we could just output from generic code - I think it can be done by
> final_scan_insn. I don't know however if we have a way to tell if the section
> starts with a landing pad?

Not sure either -- some insn note / bb note?  Some flag on the label?
At least the latter should be easy to add if it's not there already.

Richard.

> Honza
> > 
> > Richard.
> > 
> > > Bootstrapped/regtested x86_64-linux, looks sane?
> > > 
> > > Honza
> > > 
> > > 	PR middle-end/81331 
> > > 	* except.c (first_in_partition): New function.
> > > 	(dw2_output_call_site_table): Sanity check that landing pads are not
> > > 	very first in the partition.
> > > 	* bb-reorder.c (ok_to_be_first): New function.
> > > 	(connect_traces): Avoid traces that are !ok_to_be_first to start
> > > 	partitions.
> > > Index: except.c
> > > ===================================================================
> > > --- except.c	(revision 250226)
> > > +++ except.c	(working copy)
> > > @@ -2724,6 +2724,23 @@ sjlj_size_of_call_site_table (void)
> > >    return size;
> > >  }
> > >  
> > > +/* Return true if L will appear as very first in its partition.  */
> > > +
> > > +bool
> > > +first_in_partition (rtx_insn *l)
> > > +{
> > > +  while (l != NULL_RTX)
> > > +    {
> > > +      if (active_insn_p (l))
> > > +	return false;
> > > +      else if (GET_CODE (l) == NOTE
> > > +	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> > > +	return true;
> > > +      l = PREV_INSN (l);
> > > +    }
> > > +  return true;
> > > +}
> > > +
> > >  static void
> > >  dw2_output_call_site_table (int cs_format, int section)
> > >  {
> > > @@ -2749,8 +2766,14 @@ dw2_output_call_site_table (int cs_forma
> > >        ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
> > >  
> > >        if (cs->landing_pad)
> > > -	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > > -				     CODE_LABEL_NUMBER (cs->landing_pad));
> > > +	{
> > > +	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> > > +				       CODE_LABEL_NUMBER (cs->landing_pad));
> > > +	  /* Be sure that the offset will not be 0 as that would make EH
> > > +	     delivery code to think that there is no landing pad.  */
> > > +	  gcc_checking_assert (!first_in_partition
> > > +				   (as_a <rtx_insn *> (cs->landing_pad)));
> > > +	}
> > >  
> > >        /* ??? Perhaps use insn length scaling if the assembler supports
> > >  	 generic arithmetic.  */
> > > Index: bb-reorder.c
> > > ===================================================================
> > > --- bb-reorder.c	(revision 250226)
> > > +++ bb-reorder.c	(working copy)
> > > @@ -1066,6 +1066,21 @@ connect_better_edge_p (const_edge e, boo
> > >    return is_better_edge;
> > >  }
> > >  
> > > +/* If we place EH landing pad as very first BB in the partition, its offset
> > > +   from start of function is 0 which is special cased by the eh table to mean
> > > +   no landing pad.  For this reason such BBs can not appear as very first in
> > > +   the partition.  */
> > > +static bool
> > > +ok_to_be_first (struct trace *t)
> > > +{
> > > +  edge e;
> > > +  edge_iterator ei;
> > > +  FOR_EACH_EDGE (e, ei, t->first->preds)
> > > +    if (e->flags & EDGE_EH)
> > > +      return false;
> > > +  return true;
> > > +}
> > > +
> > >  /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
> > >  
> > >  static void
> > > @@ -1080,6 +1095,7 @@ connect_traces (int n_traces, struct tra
> > >    int freq_threshold;
> > >    gcov_type count_threshold;
> > >    bool for_size = optimize_function_for_size_p (cfun);
> > > +  bool first_in_partition;
> > >  
> > >    freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
> > >    if (max_entry_count.to_gcov_type () < INT_MAX / 1000)
> > > @@ -1092,6 +1108,7 @@ connect_traces (int n_traces, struct tra
> > >    current_pass = 1;
> > >    current_partition = BB_PARTITION (traces[0].first);
> > >    two_passes = false;
> > > +  first_in_partition = true;
> > >  
> > >    if (crtl->has_bb_partition)
> > >      for (i = 0; i < n_traces && !two_passes; i++)
> > > @@ -1116,6 +1133,7 @@ connect_traces (int n_traces, struct tra
> > >  	    current_partition = BB_COLD_PARTITION;
> > >  	  else
> > >  	    current_partition = BB_HOT_PARTITION;
> > > +	  first_in_partition = true;
> > >  	}
> > >  
> > >        if (connected[t])
> > > @@ -1125,6 +1143,9 @@ connect_traces (int n_traces, struct tra
> > >  	  && BB_PARTITION (traces[t].first) != current_partition)
> > >  	continue;
> > >  
> > > +      if (first_in_partition && !ok_to_be_first (&traces[t]))
> > > +	continue;
> > > +
> > >        connected[t] = true;
> > >  
> > >        /* Find the predecessor traces.  */
> > > @@ -1143,7 +1164,9 @@ connect_traces (int n_traces, struct tra
> > >  		  && bbd[si].end_of_trace >= 0
> > >  		  && !connected[bbd[si].end_of_trace]
> > >  		  && (BB_PARTITION (e->src) == current_partition)
> > > -		  && connect_better_edge_p (e, true, best_len, best, traces))
> > > +		  && connect_better_edge_p (e, true, best_len, best, traces)
> > > +		  && (!first_in_partition
> > > +		      || ok_to_be_first (&traces[bbd[si].end_of_trace])))
> > >  		{
> > >  		  best = e;
> > >  		  best_len = traces[bbd[si].end_of_trace].length;
> > > @@ -1151,6 +1174,8 @@ connect_traces (int n_traces, struct tra
> > >  	    }
> > >  	  if (best)
> > >  	    {
> > > +	      gcc_checking_assert (BB_PARTITION (best->src)
> > > +				   == BB_PARTITION (best->dest));
> > >  	      best->src->aux = best->dest;
> > >  	      t2 = bbd[best->src->index].end_of_trace;
> > >  	      connected[t2] = true;
> > > @@ -1166,8 +1191,13 @@ connect_traces (int n_traces, struct tra
> > >  	}
> > >  
> > >        if (last_trace >= 0)
> > > -	traces[last_trace].last->aux = traces[t2].first;
> > > +	{
> > > +	  gcc_checking_assert (!first_in_partition
> > > +			       || ok_to_be_first (&traces[t2]));
> > > +	  traces[last_trace].last->aux = traces[t2].first;
> > > +	}
> > >        last_trace = t;
> > > +      first_in_partition = false;
> > >  
> > >        /* Find the successor traces.  */
> > >        while (1)
> > > @@ -1226,6 +1256,8 @@ connect_traces (int n_traces, struct tra
> > >  		fprintf (dump_file, "Connection: %d %d\n",
> > >  			 best->src->index, best->dest->index);
> > >  
> > > +	      gcc_checking_assert (BB_PARTITION (best->dest)
> > > +				   == BB_PARTITION (best->src));
> > >  	      t = bbd[best->dest->index].start_of_trace;
> > >  	      traces[last_trace].last->aux = traces[t].first;
> > >  	      connected[t] = true;
> > > @@ -1238,6 +1270,8 @@ connect_traces (int n_traces, struct tra
> > >  		  fprintf (dump_file, "Connection: %d %d\n",
> > >  			   best->src->index, best->dest->index);
> > >  		}
> > > +	      gcc_checking_assert (BB_PARTITION (best->dest)
> > > +				   == BB_PARTITION (best->src));
> > >  	      t = bbd[best->dest->index].start_of_trace;
> > >  	      traces[last_trace].last->aux = traces[t].first;
> > >  	      connected[t] = true;
> > > 
> > > 
> > 
> > -- 
> > Richard Biener <rguenther@suse.de>
> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-18  7:46     ` Richard Biener
@ 2017-07-19 12:52       ` Jan Hubicka
  2017-07-19 13:57         ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Hubicka @ 2017-07-19 12:52 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> > I think we could just output from generic code - I think it can be done by
> > final_scan_insn. I don't know however if we have a way to tell if the section
> > starts with a landing pad?
> 
> Not sure either -- some insn note / bb note?  Some flag on the label?
> At least the latter should be easy to add if it's not there already.
> 
> Richard.

Hi,
this is updated patch.  I am now adding NOP_EXPR into the instruction stream.
This is done before shorten branches so alignment tracking works there as
expected. 
Landing pads are having PRESERVE flag set, but that is also true about named
labels etc.  So I think only safe way is to look them up from the EH tables
which is not that hard.  first_in_partition is now called on every landing
pad in the cold section and it walks backward looking if it can be first.  I added
visited set to be sure it runs in linear time.

Boostrapped/regtested x86_64-linux, OK?

Honza

	* except.c (first_in_partition): New function.
	(maybe_add_nop_after_section_switch): New function.
	* except.h (maybe_add_nop_after_section_switch): Declare.
	* final.c (rest_of_handle_shorten_branches): Use it.
Index: except.c
===================================================================
--- except.c	(revision 250312)
+++ except.c	(working copy)
@@ -2724,6 +2724,60 @@ sjlj_size_of_call_site_table (void)
   return size;
 }
 
+/* If L is first in the partition return NOTE_INSN_SWITCH_TEXT_SECTIONS
+   note which starts the section.  */
+rtx_insn *
+first_in_partition (rtx_insn *l, hash_set<rtx_insn *> *visited)
+{
+  while (l != NULL_RTX)
+    {
+      if (visited->add (l))
+	return NULL;
+      if (active_insn_p (l)
+	  && GET_CODE (PATTERN (l)) != ASM_INPUT
+	  && GET_CODE (PATTERN (l)) != ASM_OPERANDS)
+	return NULL; 
+      else if (GET_CODE (l) == NOTE
+	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	return l;
+      l = PREV_INSN (l);
+    }
+  gcc_unreachable ();
+}
+
+/* Return true if NOP needs to be inserted after 
+   NOTE_INSN_SWITCH_TEXT_SECTIONS.  This is the case when section starts with
+   EH landing pad. Otherwise the landing pad offset will end up being 0 which
+   will be interpreted as no landing pad and exceptions will not be delivered.
+ */
+
+bool
+maybe_add_nop_after_section_switch (void)
+{
+  if (!crtl->uses_eh_lsda
+      || !crtl->eh.call_site_record_v[1])
+    return false;
+  int n = vec_safe_length (crtl->eh.call_site_record_v[1]);
+  hash_set<rtx_insn *> visited;
+
+  for (int i = 0; i < n; ++i)
+    {
+      struct call_site_record_d *cs
+	 = (*crtl->eh.call_site_record_v[1])[i];
+      if (cs->landing_pad)
+	{
+	  rtx insn = first_in_partition (as_a <rtx_insn *> (cs->landing_pad),
+					 &visited);
+	  if (insn)
+	    {
+	      emit_insn_after (gen_nop (), insn);
+	      return true;
+	    }
+	}
+    }
+  return false;
+}
+
 static void
 dw2_output_call_site_table (int cs_format, int section)
 {
@@ -2749,8 +2803,10 @@ dw2_output_call_site_table (int cs_forma
       ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
 
       if (cs->landing_pad)
-	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
-				     CODE_LABEL_NUMBER (cs->landing_pad));
+	{
+	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
+				       CODE_LABEL_NUMBER (cs->landing_pad));
+	}
 
       /* ??? Perhaps use insn length scaling if the assembler supports
 	 generic arithmetic.  */
Index: except.h
===================================================================
--- except.h	(revision 250312)
+++ except.h	(working copy)
@@ -283,6 +283,7 @@ extern eh_region get_eh_region_from_rtx
 extern eh_landing_pad get_eh_landing_pad_from_rtx (const_rtx);
 
 extern void finish_eh_generation (void);
+extern bool maybe_add_nop_after_section_switch (void);
 
 struct GTY(()) throw_stmt_node {
   gimple *stmt;
Index: final.c
===================================================================
--- final.c	(revision 250312)
+++ final.c	(working copy)
@@ -4581,6 +4582,7 @@ static unsigned int
 rest_of_handle_shorten_branches (void)
 {
   /* Shorten branches.  */
+  maybe_add_nop_after_section_switch ();
   shorten_branches (get_insns ());
   return 0;
 }

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-19 12:52       ` Jan Hubicka
@ 2017-07-19 13:57         ` Richard Biener
  2017-07-19 14:01           ` Jan Hubicka
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2017-07-19 13:57 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches

On Wed, 19 Jul 2017, Jan Hubicka wrote:

> > > I think we could just output from generic code - I think it can be done by
> > > final_scan_insn. I don't know however if we have a way to tell if the section
> > > starts with a landing pad?
> > 
> > Not sure either -- some insn note / bb note?  Some flag on the label?
> > At least the latter should be easy to add if it's not there already.
> > 
> > Richard.
> 
> Hi,
> this is updated patch.  I am now adding NOP_EXPR into the instruction stream.
> This is done before shorten branches so alignment tracking works there as
> expected. 
> Landing pads are having PRESERVE flag set, but that is also true about named
> labels etc.  So I think only safe way is to look them up from the EH tables
> which is not that hard.  first_in_partition is now called on every landing
> pad in the cold section and it walks backward looking if it can be first.  I added
> visited set to be sure it runs in linear time.
> 
> Boostrapped/regtested x86_64-linux, OK?

It looks sensible.  You leak the hash_set and I wonder if you can hook
it in pass_convert_to_eh_region_ranges instead which runs before
rest_of_handle_shorten_branches which means things can be entirely
contained in except.c?

> Honza
> 
> 	* except.c (first_in_partition): New function.
> 	(maybe_add_nop_after_section_switch): New function.
> 	* except.h (maybe_add_nop_after_section_switch): Declare.
> 	* final.c (rest_of_handle_shorten_branches): Use it.
> Index: except.c
> ===================================================================
> --- except.c	(revision 250312)
> +++ except.c	(working copy)
> @@ -2724,6 +2724,60 @@ sjlj_size_of_call_site_table (void)
>    return size;
>  }
>  
> +/* If L is first in the partition return NOTE_INSN_SWITCH_TEXT_SECTIONS
> +   note which starts the section.  */
> +rtx_insn *
> +first_in_partition (rtx_insn *l, hash_set<rtx_insn *> *visited)

Maybe rename to first_active_in_partition?

> +{
> +  while (l != NULL_RTX)
> +    {
> +      if (visited->add (l))
> +	return NULL;

given the insn stream is linear isn't it enough to add all cs->landing_pad
upfront and simply stop at them?  Or mark them with a flag, clearing after
the work?

> +      if (active_insn_p (l)
> +	  && GET_CODE (PATTERN (l)) != ASM_INPUT
> +	  && GET_CODE (PATTERN (l)) != ASM_OPERANDS)
> +	return NULL; 

a comment why we need to ignore ASMs would be nice.

> +      else if (GET_CODE (l) == NOTE
> +	       && NOTE_KIND (l) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> +	return l;
> +      l = PREV_INSN (l);
> +    }
> +  gcc_unreachable ();
> +}
> +
> +/* Return true if NOP needs to be inserted after 
> +   NOTE_INSN_SWITCH_TEXT_SECTIONS.  This is the case when section starts with
> +   EH landing pad. Otherwise the landing pad offset will end up being 0 which
> +   will be interpreted as no landing pad and exceptions will not be delivered.
> + */
> +
> +bool
> +maybe_add_nop_after_section_switch (void)
> +{
> +  if (!crtl->uses_eh_lsda
> +      || !crtl->eh.call_site_record_v[1])
> +    return false;
> +  int n = vec_safe_length (crtl->eh.call_site_record_v[1]);
> +  hash_set<rtx_insn *> visited;
> +
> +  for (int i = 0; i < n; ++i)
> +    {
> +      struct call_site_record_d *cs
> +	 = (*crtl->eh.call_site_record_v[1])[i];
> +      if (cs->landing_pad)
> +	{
> +	  rtx insn = first_in_partition (as_a <rtx_insn *> (cs->landing_pad),
> +					 &visited);

I wonder if inlining first_in_partition would make this more 
understandable

> +	  if (insn)
> +	    {
> +	      emit_insn_after (gen_nop (), insn);
> +	      return true;
> +	    }
> +	}
> +    }
> +  return false;
> +}
> +
>  static void
>  dw2_output_call_site_table (int cs_format, int section)
>  {
> @@ -2749,8 +2803,10 @@ dw2_output_call_site_table (int cs_forma
>        ASM_GENERATE_INTERNAL_LABEL (reg_end_lab, "LEHE", call_site_base + i);
>  
>        if (cs->landing_pad)
> -	ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> -				     CODE_LABEL_NUMBER (cs->landing_pad));
> +	{
> +	  ASM_GENERATE_INTERNAL_LABEL (landing_pad_lab, "L",
> +				       CODE_LABEL_NUMBER (cs->landing_pad));
> +	}
>  
>        /* ??? Perhaps use insn length scaling if the assembler supports
>  	 generic arithmetic.  */
> Index: except.h
> ===================================================================
> --- except.h	(revision 250312)
> +++ except.h	(working copy)
> @@ -283,6 +283,7 @@ extern eh_region get_eh_region_from_rtx
>  extern eh_landing_pad get_eh_landing_pad_from_rtx (const_rtx);
>  
>  extern void finish_eh_generation (void);
> +extern bool maybe_add_nop_after_section_switch (void);
>  
>  struct GTY(()) throw_stmt_node {
>    gimple *stmt;
> Index: final.c
> ===================================================================
> --- final.c	(revision 250312)
> +++ final.c	(working copy)
> @@ -4581,6 +4582,7 @@ static unsigned int
>  rest_of_handle_shorten_branches (void)
>  {
>    /* Shorten branches.  */
> +  maybe_add_nop_after_section_switch ();
>    shorten_branches (get_insns ());
>    return 0;
>  }
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-19 13:57         ` Richard Biener
@ 2017-07-19 14:01           ` Jan Hubicka
  2017-07-19 14:04             ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Hubicka @ 2017-07-19 14:01 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> On Wed, 19 Jul 2017, Jan Hubicka wrote:
> 
> > > > I think we could just output from generic code - I think it can be done by
> > > > final_scan_insn. I don't know however if we have a way to tell if the section
> > > > starts with a landing pad?
> > > 
> > > Not sure either -- some insn note / bb note?  Some flag on the label?
> > > At least the latter should be easy to add if it's not there already.
> > > 
> > > Richard.
> > 
> > Hi,
> > this is updated patch.  I am now adding NOP_EXPR into the instruction stream.
> > This is done before shorten branches so alignment tracking works there as
> > expected. 
> > Landing pads are having PRESERVE flag set, but that is also true about named
> > labels etc.  So I think only safe way is to look them up from the EH tables
> > which is not that hard.  first_in_partition is now called on every landing
> > pad in the cold section and it walks backward looking if it can be first.  I added
> > visited set to be sure it runs in linear time.
> > 
> > Boostrapped/regtested x86_64-linux, OK?
> 
> It looks sensible.  You leak the hash_set and I wonder if you can hook

Hmm, isn't the hash_set supposed to destruct itself at the end of scope?

> it in pass_convert_to_eh_region_ranges instead which runs before
> rest_of_handle_shorten_branches which means things can be entirely
> contained in except.c?

Yep, that is a good idea.  I did not look into pass.def, just searched
for convenient place to do it before branch relaxation (it probably ought
to run after mdep reorg because no one knows what those will do with the
nop :)

Will re-test updated patch and commit.  Thanks a lot!

Honza

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-19 14:01           ` Jan Hubicka
@ 2017-07-19 14:04             ` Richard Biener
  2017-07-19 14:18               ` Jan Hubicka
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Biener @ 2017-07-19 14:04 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches

On Wed, 19 Jul 2017, Jan Hubicka wrote:

> > On Wed, 19 Jul 2017, Jan Hubicka wrote:
> > 
> > > > > I think we could just output from generic code - I think it can be done by
> > > > > final_scan_insn. I don't know however if we have a way to tell if the section
> > > > > starts with a landing pad?
> > > > 
> > > > Not sure either -- some insn note / bb note?  Some flag on the label?
> > > > At least the latter should be easy to add if it's not there already.
> > > > 
> > > > Richard.
> > > 
> > > Hi,
> > > this is updated patch.  I am now adding NOP_EXPR into the instruction stream.
> > > This is done before shorten branches so alignment tracking works there as
> > > expected. 
> > > Landing pads are having PRESERVE flag set, but that is also true about named
> > > labels etc.  So I think only safe way is to look them up from the EH tables
> > > which is not that hard.  first_in_partition is now called on every landing
> > > pad in the cold section and it walks backward looking if it can be first.  I added
> > > visited set to be sure it runs in linear time.
> > > 
> > > Boostrapped/regtested x86_64-linux, OK?
> > 
> > It looks sensible.  You leak the hash_set and I wonder if you can hook
> 
> Hmm, isn't the hash_set supposed to destruct itself at the end of scope?

Hmm, you are probably right ;)

> > it in pass_convert_to_eh_region_ranges instead which runs before
> > rest_of_handle_shorten_branches which means things can be entirely
> > contained in except.c?
> 
> Yep, that is a good idea.  I did not look into pass.def, just searched
> for convenient place to do it before branch relaxation (it probably ought
> to run after mdep reorg because no one knows what those will do with the
> nop :)
> 
> Will re-test updated patch and commit.  Thanks a lot!

Did you see the inline comments?

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-19 14:04             ` Richard Biener
@ 2017-07-19 14:18               ` Jan Hubicka
  2017-07-19 14:20                 ` Richard Biener
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Hubicka @ 2017-07-19 14:18 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

> Did you see the inline comments?

I missed them, sorry :)
This is updated patch I am testing.

Index: except.c
===================================================================
--- except.c	(revision 250312)
+++ except.c	(working copy)
@@ -2444,6 +2444,61 @@ emit_note_eh_region_end (rtx_insn *insn)
   return emit_note_after (NOTE_INSN_EH_REGION_END, insn);
 }
 
+/* Add NOP after NOTE_INSN_SWITCH_TEXT_SECTIONS when the cold section starts
+   with landing pad.
+   With landing pad being at offset 0 from the start label of the section
+   we would miss EH delivery because 0 is special and means no landing pad.  */
+
+bool
+maybe_add_nop_after_section_switch (void)
+{
+  if (!crtl->uses_eh_lsda
+      || !crtl->eh.call_site_record_v[1])
+    return false;
+  int n = vec_safe_length (crtl->eh.call_site_record_v[1]);
+  hash_set<rtx_insn *> visited;
+
+  for (int i = 0; i < n; ++i)
+    {
+      struct call_site_record_d *cs
+	 = (*crtl->eh.call_site_record_v[1])[i];
+      if (cs->landing_pad)
+	{
+	  rtx_insn *insn = as_a <rtx_insn *> (cs->landing_pad);
+	  while (true)
+	    {
+	      /* Landing pads have LABEL_PRESERVE_P flag set.  This check make
+		 sure that we do not walk past landing pad visited earlier
+		 which would result in possible quadratic behaviour.  */
+	      if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)
+		  && visited.add (insn))
+		break;
+
+	      /* Conservatively assume that ASM insn may be empty.  We have
+		 now way to tell what they contain.  */
+	      if (active_insn_p (insn)
+		  && GET_CODE (PATTERN (insn)) != ASM_INPUT
+		  && GET_CODE (PATTERN (insn)) != ASM_OPERANDS)
+		break;
+
+	      /* If we reached the start of hot section, then NOP will be
+		 needed.  */
+	      if (GET_CODE (insn) == NOTE
+		  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+		{
+		  emit_insn_after (gen_nop (), insn);
+		  break;
+		}
+
+	      /* We visit only labels from cold section.  We should never hit
+		 begining of the insn stream here.  */
+	      insn = PREV_INSN (insn);
+	    }
+	}
+    }
+  return false;
+}
+
 /* Turn REG_EH_REGION notes back into NOTE_INSN_EH_REGION notes.
    The new note numbers will not refer to region numbers, but
    instead to call site entries.  */
@@ -2466,6 +2521,8 @@ convert_to_eh_region_ranges (void)
   rtx_insn *last_no_action_insn_before_switch = NULL;
   int saved_call_site_base = call_site_base;
 
+  maybe_add_nop_after_section_switch ();
+
   vec_alloc (crtl->eh.action_record_data, 64);
 
   for (iter = get_insns (); iter ; iter = NEXT_INSN (iter))

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fix Eh delivery in partitioned functions
  2017-07-19 14:18               ` Jan Hubicka
@ 2017-07-19 14:20                 ` Richard Biener
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Biener @ 2017-07-19 14:20 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches

On Wed, 19 Jul 2017, Jan Hubicka wrote:

> > Did you see the inline comments?
> 
> I missed them, sorry :)
> This is updated patch I am testing.

Ok with adding

+  maybe_add_nop_after_section_switch ();

in pass_convert_to_eh_region_ranges::execute instead.

Richard.

> Index: except.c
> ===================================================================
> --- except.c	(revision 250312)
> +++ except.c	(working copy)
> @@ -2444,6 +2444,61 @@ emit_note_eh_region_end (rtx_insn *insn)
>    return emit_note_after (NOTE_INSN_EH_REGION_END, insn);
>  }
>  
> +/* Add NOP after NOTE_INSN_SWITCH_TEXT_SECTIONS when the cold section starts
> +   with landing pad.
> +   With landing pad being at offset 0 from the start label of the section
> +   we would miss EH delivery because 0 is special and means no landing pad.  */
> +
> +bool
> +maybe_add_nop_after_section_switch (void)
> +{
> +  if (!crtl->uses_eh_lsda
> +      || !crtl->eh.call_site_record_v[1])
> +    return false;
> +  int n = vec_safe_length (crtl->eh.call_site_record_v[1]);
> +  hash_set<rtx_insn *> visited;
> +
> +  for (int i = 0; i < n; ++i)
> +    {
> +      struct call_site_record_d *cs
> +	 = (*crtl->eh.call_site_record_v[1])[i];
> +      if (cs->landing_pad)
> +	{
> +	  rtx_insn *insn = as_a <rtx_insn *> (cs->landing_pad);
> +	  while (true)
> +	    {
> +	      /* Landing pads have LABEL_PRESERVE_P flag set.  This check make
> +		 sure that we do not walk past landing pad visited earlier
> +		 which would result in possible quadratic behaviour.  */
> +	      if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)
> +		  && visited.add (insn))
> +		break;
> +
> +	      /* Conservatively assume that ASM insn may be empty.  We have
> +		 now way to tell what they contain.  */
> +	      if (active_insn_p (insn)
> +		  && GET_CODE (PATTERN (insn)) != ASM_INPUT
> +		  && GET_CODE (PATTERN (insn)) != ASM_OPERANDS)
> +		break;
> +
> +	      /* If we reached the start of hot section, then NOP will be
> +		 needed.  */
> +	      if (GET_CODE (insn) == NOTE
> +		  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
> +		{
> +		  emit_insn_after (gen_nop (), insn);
> +		  break;
> +		}
> +
> +	      /* We visit only labels from cold section.  We should never hit
> +		 begining of the insn stream here.  */
> +	      insn = PREV_INSN (insn);
> +	    }
> +	}
> +    }
> +  return false;
> +}
> +
>  /* Turn REG_EH_REGION notes back into NOTE_INSN_EH_REGION notes.
>     The new note numbers will not refer to region numbers, but
>     instead to call site entries.  */
> @@ -2466,6 +2521,8 @@ convert_to_eh_region_ranges (void)
>    rtx_insn *last_no_action_insn_before_switch = NULL;
>    int saved_call_site_base = call_site_base;
>  
> +  maybe_add_nop_after_section_switch ();
> +
>    vec_alloc (crtl->eh.action_record_data, 64);
>  
>    for (iter = get_insns (); iter ; iter = NEXT_INSN (iter))
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-07-19 14:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-18  7:26 Fix Eh delivery in partitioned functions Jan Hubicka
2017-07-18  7:34 ` Richard Biener
2017-07-18  7:44   ` Jan Hubicka
2017-07-18  7:46     ` Richard Biener
2017-07-19 12:52       ` Jan Hubicka
2017-07-19 13:57         ` Richard Biener
2017-07-19 14:01           ` Jan Hubicka
2017-07-19 14:04             ` Richard Biener
2017-07-19 14:18               ` Jan Hubicka
2017-07-19 14:20                 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).