public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Stream profile summary histogram through LTO files (issue6782131)
@ 2012-11-29  4:11 Teresa Johnson
  2012-11-29 16:17 ` Jan Hubicka
  0 siblings, 1 reply; 6+ messages in thread
From: Teresa Johnson @ 2012-11-29  4:11 UTC (permalink / raw)
  To: reply, hubicka, gcc-patches

This patch ensures that the histograms from the profile summary are streamed
through the LTO files so that the working set can be computed for use in
downstream optimizations.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?

Thanks,
Teresa

2012-11-28  Teresa Johnson  <tejohnson@google.com>

	* lto-cgraph.c (output_profile_summary): Stream out sum_all
        and histogram.
	(input_profile_summary): Stream in sum_all and histogram.
	(merge_profile_summaries): Merge sum_all and histogram.
	(input_symtab): Call compute_working_sets after merging
        summaries.
	* gcov-io.c (gcov_histo_index): Make extern for compiler.
	* gcov-io.h (gcov_histo_index): Ditto.
	* profile.c (compute_working_sets): Remove static keyword.
	* profile.h (compute_working_sets): Ditto.

Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 193909)
+++ lto-cgraph.c	(working copy)
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-streamer.h"
 #include "gcov-io.h"
 #include "tree-pass.h"
+#include "profile.h"
 
 static void output_cgraph_opt_summary (void);
 static void input_cgraph_opt_summary (vec<symtab_node>  nodes);
@@ -593,14 +594,49 @@ lto_output_ref (struct lto_simple_output_block *ob
 static void
 output_profile_summary (struct lto_simple_output_block *ob)
 {
+  unsigned h_ix, bv_ix, h_cnt = 0;
+  unsigned histo_bitvector[GCOV_HISTOGRAM_BITVECTOR_SIZE];
+
   if (profile_info)
     {
-      /* We do not output num, sum_all and run_max, they are not used by
-	 GCC profile feedback and they are difficult to merge from multiple
-	 units.  */
+      /* We do not output num and run_max, they are not used by
+         GCC profile feedback and they are difficult to merge from multiple
+         units.  */
       gcc_assert (profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+
+      /* sum_all is needed for computing the working set with the
+         histogram.  */
+      streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+
+      /* Count number of non-zero histogram entries, and fill in a bit vector
+         of non-zero indices.  */
+         counters.  */
+      for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+        histo_bitvector[bv_ix] = 0;
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (profile_info->histogram[h_ix].num_counters > 0)
+            {
+              histo_bitvector[h_ix / 32] |= 1 << (h_ix % 32);
+              h_cnt++;
+            }
+        }
+      /* Output the bitvector and the non-zero entries.  */
+      for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+        streamer_write_uhwi_stream (ob->main_stream, histo_bitvector[bv_ix]);
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!profile_info->histogram[h_ix].num_counters)
+            continue;
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].num_counters);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].min_value);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].cum_value);
+        }
     }
   else
     streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1227,11 +1263,58 @@ static void
 input_profile_summary (struct lto_input_block *ib,
 		       struct lto_file_decl_data *file_data)
 {
+  unsigned h_ix, bv_ix, h_cnt = 0;
+  unsigned histo_bitvector[GCOV_HISTOGRAM_BITVECTOR_SIZE];
+  unsigned cur_bitvector;
   unsigned int runs = streamer_read_uhwi (ib);
   if (runs)
     {
       file_data->profile_info.runs = runs;
       file_data->profile_info.sum_max = streamer_read_uhwi (ib);
+      file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+
+      memset (file_data->profile_info.histogram, 0,
+              sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+      /* Input the bitvector of non-zero histogram indices.  */
+      for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
+        {
+          histo_bitvector[bv_ix] = streamer_read_uhwi (ib);
+          h_cnt += __builtin_popcountll (histo_bitvector[bv_ix]);
+        }
+      bv_ix = 0;
+      h_ix = 0;
+      cur_bitvector = 0;
+      while (h_cnt--)
+        {
+          /* Find the index corresponding to the next entry we will read in.
+             First find the next non-zero bitvector and re-initialize
+             the histogram index accordingly, then right shift and increment
+             the index until we find a set bit.  */
+          while (!cur_bitvector)
+            {
+              h_ix = bv_ix * 32;
+              gcc_assert(bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE);
+              cur_bitvector = histo_bitvector[bv_ix++];
+            }
+          while (!(cur_bitvector & 0x1))
+            {
+              h_ix++;
+              cur_bitvector >>= 1;
+            }
+          gcc_assert(h_ix < GCOV_HISTOGRAM_SIZE);
+
+          file_data->profile_info.histogram[h_ix].num_counters
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].min_value
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].cum_value
+              = streamer_read_uhwi (ib);
+
+          /* Shift off the index we are done with and increment to the
+             corresponding next histogram entry.  */
+          cur_bitvector >>= 1;
+          h_ix++;
+        }
     }
 
 }
@@ -1242,10 +1325,13 @@ static void
 merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
 {
   struct lto_file_decl_data *file_data;
-  unsigned int j;
+  unsigned int j, h_ix;
   gcov_unsigned_t max_runs = 0;
   struct cgraph_node *node;
   struct cgraph_edge *edge;
+  gcov_type saved_sum_all = 0;
+  gcov_ctr_summary *saved_profile_info = 0;
+  int saved_scale = 0;
 
   /* Find unit with maximal number of runs.  If we ever get serious about
      roundoff errors, we might also consider computing smallest common
@@ -1269,6 +1355,8 @@ merge_profile_summaries (struct lto_file_decl_data
   profile_info = &lto_gcov_summary;
   lto_gcov_summary.runs = max_runs;
   lto_gcov_summary.sum_max = 0;
+  memset (lto_gcov_summary.histogram, 0,
+          sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 
   /* Rescale all units to the maximal number of runs.
      sum_max can not be easily merged, as we have no idea what files come from
@@ -1284,8 +1372,46 @@ merge_profile_summaries (struct lto_file_decl_data
 					 * scale
 					 + REG_BR_PROB_BASE / 2)
 					/ REG_BR_PROB_BASE);
+	lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
+					(file_data->profile_info.sum_all
+					 * scale
+					 + REG_BR_PROB_BASE / 2)
+					/ REG_BR_PROB_BASE);
+        /* Save a pointer to the profile_info with the largest
+           scaled sum_all and the scale for use in merging the
+           histogram.  */
+        if (lto_gcov_summary.sum_all > saved_sum_all)
+          {
+            saved_profile_info = &file_data->profile_info;
+            saved_sum_all = lto_gcov_summary.sum_all;
+            saved_scale = scale;
+          }
       }
 
+  gcc_assert (saved_profile_info);
+
+  /* Scale up the histogram from the profile that had the largest
+     scaled sum_all above.  */
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+    {
+      /* Scale up the min value as we did the corresponding sum_all
+         above. Use that to find the new histogram index.  */
+      int scaled_min = (saved_profile_info->histogram[h_ix].min_value
+                        * saved_scale + REG_BR_PROB_BASE / 2)
+                       / REG_BR_PROB_BASE;
+      unsigned new_ix = gcov_histo_index (scaled_min);
+      lto_gcov_summary.histogram[new_ix].min_value = scaled_min;
+      /* Some of the scaled counter values would ostensibly need to be placed
+         into different (larger) histogram buckets, but we keep things simple
+         here and place the scaled cumulative counter value in the bucket
+         corresponding to the scaled minimum counter value.  */
+      lto_gcov_summary.histogram[new_ix].cum_value
+          = (saved_profile_info->histogram[h_ix].cum_value
+             * saved_scale + REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+      lto_gcov_summary.histogram[new_ix].num_counters
+          = saved_profile_info->histogram[h_ix].num_counters;
+    }
+
   /* Watch roundoff errors.  */
   if (lto_gcov_summary.sum_max < max_runs)
     lto_gcov_summary.sum_max = max_runs;
@@ -1365,7 +1491,9 @@ input_symtab (void)
     }
 
   merge_profile_summaries (file_data_vec);
+  compute_working_sets ();
 
+
   /* Clear out the aux field that was used to store enough state to
      tell which nodes should be overwritten.  */
   FOR_EACH_FUNCTION (node)
Index: gcov-io.c
===================================================================
--- gcov-io.c	(revision 193909)
+++ gcov-io.c	(working copy)
@@ -622,10 +622,10 @@ gcov_time (void)
 }
 #endif /* IN_GCOV */
 
-#if IN_LIBGCOV || !IN_GCOV
+#if !IN_GCOV
 /* Determine the index into histogram for VALUE. */
 
-static unsigned
+GCOV_LINKAGE unsigned
 gcov_histo_index(gcov_type value)
 {
   gcov_type_unsigned v = (gcov_type_unsigned)value;
@@ -801,4 +801,4 @@ static void gcov_histogram_merge(gcov_bucket_type
   /* Finally, copy the merged histogram into tgt_histo.  */
   memcpy(tgt_histo, tmp_histo, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 }
-#endif /* IN_LIBGCOV || !IN_GCOV */
+#endif /* !IN_GCOV */
Index: gcov-io.h
===================================================================
--- gcov-io.h	(revision 193909)
+++ gcov-io.h	(working copy)
@@ -608,6 +608,7 @@ GCOV_LINKAGE void gcov_sync (gcov_position_t /*bas
 #if !IN_GCOV
 /* Available outside gcov */
 GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN;
+GCOV_LINKAGE unsigned gcov_histo_index (gcov_type value);
 #endif
 
 #if !IN_GCOV && !IN_LIBGCOV
Index: profile.c
===================================================================
--- profile.c	(revision 193909)
+++ profile.c	(working copy)
@@ -207,7 +207,7 @@ instrument_values (histogram_values values)
    the number of counters required to cover that working set percentage and
    the minimum counter value in that working set.  */
 
-static void
+void
 compute_working_sets (void)
 {
   gcov_type working_set_cum_values[NUM_GCOV_WORKING_SETS];
Index: profile.h
===================================================================
--- profile.h	(revision 193909)
+++ profile.h	(working copy)
@@ -47,4 +47,6 @@ extern gcov_type sum_edge_counts (vec<edge, va_gc>
 extern void init_node_map (void);
 extern void del_node_map (void);
 
+extern void compute_working_sets (void);
+
 #endif /* PROFILE_H */

--
This patch is available for review at http://codereview.appspot.com/6782131

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Stream profile summary histogram through LTO files (issue6782131)
  2012-11-29  4:11 [PATCH] Stream profile summary histogram through LTO files (issue6782131) Teresa Johnson
@ 2012-11-29 16:17 ` Jan Hubicka
  2012-11-29 16:46   ` Teresa Johnson
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Hubicka @ 2012-11-29 16:17 UTC (permalink / raw)
  To: Teresa Johnson; +Cc: reply, hubicka, gcc-patches

> This patch ensures that the histograms from the profile summary are streamed
> through the LTO files so that the working set can be computed for use in
> downstream optimizations.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?
> 
> Thanks,
> Teresa
> 
> 2012-11-28  Teresa Johnson  <tejohnson@google.com>
> 
> 	* lto-cgraph.c (output_profile_summary): Stream out sum_all
>         and histogram.
> 	(input_profile_summary): Stream in sum_all and histogram.
> 	(merge_profile_summaries): Merge sum_all and histogram.
> 	(input_symtab): Call compute_working_sets after merging
>         summaries.
> 	* gcov-io.c (gcov_histo_index): Make extern for compiler.
> 	* gcov-io.h (gcov_histo_index): Ditto.
> 	* profile.c (compute_working_sets): Remove static keyword.
> 	* profile.h (compute_working_sets): Ditto.

OK.
> 
> Index: lto-cgraph.c
> ===================================================================
> --- lto-cgraph.c	(revision 193909)
> +++ lto-cgraph.c	(working copy)
> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-streamer.h"
>  #include "gcov-io.h"
>  #include "tree-pass.h"
> +#include "profile.h"

Please update dependencies in Makefile.in
> +      /* Count number of non-zero histogram entries, and fill in a bit vector
> +         of non-zero indices.  */
> +         counters.  */
> +      for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
> +        histo_bitvector[bv_ix] = 0;
> +      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
> +        {
> +          if (profile_info->histogram[h_ix].num_counters > 0)
> +            {
> +              histo_bitvector[h_ix / 32] |= 1 << (h_ix % 32);
> +              h_cnt++;
> +            }

I think this would be more readable if you just produced a bitpack instead of doing it
by hand, like into gcov-io.
> +	lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
> +					(file_data->profile_info.sum_all
> +					 * scale
> +					 + REG_BR_PROB_BASE / 2)
> +					/ REG_BR_PROB_BASE);

Use RDIV for the scaling.
> -#if IN_LIBGCOV || !IN_GCOV
> +#if !IN_GCOV
>  /* Determine the index into histogram for VALUE. */
>  
> -static unsigned
> +GCOV_LINKAGE unsigned
I would probably go around the trouble of declaring this static in GCOV,
so it is inlined at we do not bload libgcov more than needed.

Thanks,
Honza

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Stream profile summary histogram through LTO files (issue6782131)
  2012-11-29 16:17 ` Jan Hubicka
@ 2012-11-29 16:46   ` Teresa Johnson
  2012-11-29 16:55     ` Jan Hubicka
  0 siblings, 1 reply; 6+ messages in thread
From: Teresa Johnson @ 2012-11-29 16:46 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: reply, gcc-patches

On Thu, Nov 29, 2012 at 8:17 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> This patch ensures that the histograms from the profile summary are streamed
>> through the LTO files so that the working set can be computed for use in
>> downstream optimizations.
>>
>> Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?
>>
>> Thanks,
>> Teresa
>>
>> 2012-11-28  Teresa Johnson  <tejohnson@google.com>
>>
>>       * lto-cgraph.c (output_profile_summary): Stream out sum_all
>>         and histogram.
>>       (input_profile_summary): Stream in sum_all and histogram.
>>       (merge_profile_summaries): Merge sum_all and histogram.
>>       (input_symtab): Call compute_working_sets after merging
>>         summaries.
>>       * gcov-io.c (gcov_histo_index): Make extern for compiler.
>>       * gcov-io.h (gcov_histo_index): Ditto.
>>       * profile.c (compute_working_sets): Remove static keyword.
>>       * profile.h (compute_working_sets): Ditto.
>
> OK.
>>
>> Index: lto-cgraph.c
>> ===================================================================
>> --- lto-cgraph.c      (revision 193909)
>> +++ lto-cgraph.c      (working copy)
>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "tree-streamer.h"
>>  #include "gcov-io.h"
>>  #include "tree-pass.h"
>> +#include "profile.h"
>
> Please update dependencies in Makefile.in

ok.

>> +      /* Count number of non-zero histogram entries, and fill in a bit vector
>> +         of non-zero indices.  */
>> +         counters.  */
>> +      for (bv_ix = 0; bv_ix < GCOV_HISTOGRAM_BITVECTOR_SIZE; bv_ix++)
>> +        histo_bitvector[bv_ix] = 0;
>> +      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
>> +        {
>> +          if (profile_info->histogram[h_ix].num_counters > 0)
>> +            {
>> +              histo_bitvector[h_ix / 32] |= 1 << (h_ix % 32);
>> +              h_cnt++;
>> +            }
>
> I think this would be more readable if you just produced a bitpack instead of doing it
> by hand, like into gcov-io.

I assume you mean use the bitpack streaming functionality used other
places in lto-cgraph.c, and not change the way it is done in gcov-io.c
where this was cloned from? I will change the lto-cgraph.c code to use
the bitpacking.

>> +     lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
>> +                                     (file_data->profile_info.sum_all
>> +                                      * scale
>> +                                      + REG_BR_PROB_BASE / 2)
>> +                                     / REG_BR_PROB_BASE);
>
> Use RDIV for the scaling.

ok. This was cloned from the way we do other scalings in the same
function, I will change them all to use RDIV.

>> -#if IN_LIBGCOV || !IN_GCOV
>> +#if !IN_GCOV
>>  /* Determine the index into histogram for VALUE. */
>>
>> -static unsigned
>> +GCOV_LINKAGE unsigned
> I would probably go around the trouble of declaring this static in GCOV,
> so it is inlined at we do not bload libgcov more than needed.

Do you mean leave it static when IN_LIBGCOV? It isn't included when
IN_GCOV. I just need it extern when in the compiler. So do you mean
make it static when IN_LIBGCOV and GCOV_LINKAGE (i.e. extern) when
!IN_LIBGCOV?

Thanks,
Teresa

>
> Thanks,
> Honza



--
Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Stream profile summary histogram through LTO files (issue6782131)
  2012-11-29 16:46   ` Teresa Johnson
@ 2012-11-29 16:55     ` Jan Hubicka
  0 siblings, 0 replies; 6+ messages in thread
From: Jan Hubicka @ 2012-11-29 16:55 UTC (permalink / raw)
  To: Teresa Johnson; +Cc: Jan Hubicka, reply, gcc-patches

> 
> I assume you mean use the bitpack streaming functionality used other
> places in lto-cgraph.c, and not change the way it is done in gcov-io.c
> where this was cloned from? I will change the lto-cgraph.c code to use
> the bitpacking.

Yes, we don't have the facility on gcov-io, so we need to do that by hand.
> 
> >> +     lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
> >> +                                     (file_data->profile_info.sum_all
> >> +                                      * scale
> >> +                                      + REG_BR_PROB_BASE / 2)
> >> +                                     / REG_BR_PROB_BASE);
> >
> > Use RDIV for the scaling.
> 
> ok. This was cloned from the way we do other scalings in the same
> function, I will change them all to use RDIV.

Yes, thanks!
> 
> >> -#if IN_LIBGCOV || !IN_GCOV
> >> +#if !IN_GCOV
> >>  /* Determine the index into histogram for VALUE. */
> >>
> >> -static unsigned
> >> +GCOV_LINKAGE unsigned
> > I would probably go around the trouble of declaring this static in GCOV,
> > so it is inlined at we do not bload libgcov more than needed.
> 
> Do you mean leave it static when IN_LIBGCOV? It isn't included when
> IN_GCOV. I just need it extern when in the compiler. So do you mean
> make it static when IN_LIBGCOV and GCOV_LINKAGE (i.e. extern) when
> !IN_LIBGCOV?

Yes, it should help inliner to do the right things for libgcov...

Thanks,
Honza
> 
> Thanks,
> Teresa
> 
> >
> > Thanks,
> > Honza
> 
> 
> 
> --
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Stream profile summary histogram through LTO files (issue6782131)
  2012-11-30 15:11 Teresa Johnson
@ 2012-11-30 16:17 ` Jan Hubicka
  0 siblings, 0 replies; 6+ messages in thread
From: Jan Hubicka @ 2012-11-30 16:17 UTC (permalink / raw)
  To: Teresa Johnson; +Cc: reply, hubicka, gcc-patches

> Revised patch to ensure that histograms from the profile summary are streamed
> through the LTO files so that the working set can be computed for use in
> downstream optimizations.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?
> 
> Thanks,
> Teresa
> 
> 2012-11-29  Teresa Johnson  <tejohnson@google.com>
> 
> 	* lto-cgraph.c (output_profile_summary): Stream out sum_all
>         and histogram.
> 	(input_profile_summary): Stream in sum_all and histogram.
> 	(merge_profile_summaries): Merge sum_all and histogram, and
>         change to use RDIV.
> 	(input_symtab): Call compute_working_sets after merging
>         summaries.
> 	* gcov-io.c (gcov_histo_index): Make extern for compiler.
> 	* gcov-io.h (gcov_histo_index): Ditto.
> 	* profile.c (compute_working_sets): Remove static keyword.
> 	* profile.h (compute_working_sets): Ditto.
> 	* Makefile.in (lto-cgraph.o): Depend on profile.h.

OK,
thanks!
Honza

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Stream profile summary histogram through LTO files (issue6782131)
@ 2012-11-30 15:11 Teresa Johnson
  2012-11-30 16:17 ` Jan Hubicka
  0 siblings, 1 reply; 6+ messages in thread
From: Teresa Johnson @ 2012-11-30 15:11 UTC (permalink / raw)
  To: reply, hubicka, gcc-patches

Revised patch to ensure that histograms from the profile summary are streamed
through the LTO files so that the working set can be computed for use in
downstream optimizations.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?

Thanks,
Teresa

2012-11-29  Teresa Johnson  <tejohnson@google.com>

	* lto-cgraph.c (output_profile_summary): Stream out sum_all
        and histogram.
	(input_profile_summary): Stream in sum_all and histogram.
	(merge_profile_summaries): Merge sum_all and histogram, and
        change to use RDIV.
	(input_symtab): Call compute_working_sets after merging
        summaries.
	* gcov-io.c (gcov_histo_index): Make extern for compiler.
	* gcov-io.h (gcov_histo_index): Ditto.
	* profile.c (compute_working_sets): Remove static keyword.
	* profile.h (compute_working_sets): Ditto.
	* Makefile.in (lto-cgraph.o): Depend on profile.h.

Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 193909)
+++ lto-cgraph.c	(working copy)
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-streamer.h"
 #include "gcov-io.h"
 #include "tree-pass.h"
+#include "profile.h"
 
 static void output_cgraph_opt_summary (void);
 static void input_cgraph_opt_summary (vec<symtab_node>  nodes);
@@ -593,14 +594,39 @@ lto_output_ref (struct lto_simple_output_block *ob
 static void
 output_profile_summary (struct lto_simple_output_block *ob)
 {
+  unsigned h_ix;
+  struct bitpack_d bp;
+
   if (profile_info)
     {
-      /* We do not output num, sum_all and run_max, they are not used by
-	 GCC profile feedback and they are difficult to merge from multiple
-	 units.  */
+      /* We do not output num and run_max, they are not used by
+         GCC profile feedback and they are difficult to merge from multiple
+         units.  */
       gcc_assert (profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+
+      /* sum_all is needed for computing the working set with the
+         histogram.  */
+      streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+
+      /* Create and output a bitpack of non-zero histogram entries indices.  */
+      bp = bitpack_create (ob->main_stream);
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        bp_pack_value (&bp, profile_info->histogram[h_ix].num_counters > 0, 1);
+      streamer_write_bitpack (&bp);
+      /* Now stream out only those non-zero entries.  */
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!profile_info->histogram[h_ix].num_counters)
+            continue;
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].num_counters);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].min_value);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].cum_value);
+        }
     }
   else
     streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1227,11 +1253,38 @@ static void
 input_profile_summary (struct lto_input_block *ib,
 		       struct lto_file_decl_data *file_data)
 {
+  unsigned h_ix;
+  struct bitpack_d bp;
   unsigned int runs = streamer_read_uhwi (ib);
   if (runs)
     {
       file_data->profile_info.runs = runs;
       file_data->profile_info.sum_max = streamer_read_uhwi (ib);
+      file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+
+      memset (file_data->profile_info.histogram, 0,
+              sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+      /* Input the bitpack of non-zero histogram indices.  */
+      bp = streamer_read_bitpack (ib);
+      /* Read in and unpack the full bitpack, flagging non-zero
+         histogram entries by setting the num_counters non-zero.  */
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          file_data->profile_info.histogram[h_ix].num_counters
+              = bp_unpack_value (&bp, 1);
+        }
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!file_data->profile_info.histogram[h_ix].num_counters)
+            continue;
+
+          file_data->profile_info.histogram[h_ix].num_counters
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].min_value
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].cum_value
+              = streamer_read_uhwi (ib);
+        }
     }
 
 }
@@ -1242,10 +1295,13 @@ static void
 merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
 {
   struct lto_file_decl_data *file_data;
-  unsigned int j;
+  unsigned int j, h_ix;
   gcov_unsigned_t max_runs = 0;
   struct cgraph_node *node;
   struct cgraph_edge *edge;
+  gcov_type saved_sum_all = 0;
+  gcov_ctr_summary *saved_profile_info = 0;
+  int saved_scale = 0;
 
   /* Find unit with maximal number of runs.  If we ever get serious about
      roundoff errors, we might also consider computing smallest common
@@ -1269,6 +1325,8 @@ merge_profile_summaries (struct lto_file_decl_data
   profile_info = &lto_gcov_summary;
   lto_gcov_summary.runs = max_runs;
   lto_gcov_summary.sum_max = 0;
+  memset (lto_gcov_summary.histogram, 0,
+          sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 
   /* Rescale all units to the maximal number of runs.
      sum_max can not be easily merged, as we have no idea what files come from
@@ -1276,16 +1334,48 @@ merge_profile_summaries (struct lto_file_decl_data
   for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
     if (file_data->profile_info.runs)
       {
-	int scale = ((REG_BR_PROB_BASE * max_runs
-		      + file_data->profile_info.runs / 2)
-		     / file_data->profile_info.runs);
+	int scale = RDIV (REG_BR_PROB_BASE * max_runs,
+                          file_data->profile_info.runs);
 	lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max,
-					(file_data->profile_info.sum_max
-					 * scale
-					 + REG_BR_PROB_BASE / 2)
-					/ REG_BR_PROB_BASE);
+					RDIV (file_data->profile_info.sum_max
+                                              * scale, REG_BR_PROB_BASE));
+	lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
+					RDIV (file_data->profile_info.sum_all
+                                              * scale, REG_BR_PROB_BASE));
+        /* Save a pointer to the profile_info with the largest
+           scaled sum_all and the scale for use in merging the
+           histogram.  */
+        if (lto_gcov_summary.sum_all > saved_sum_all)
+          {
+            saved_profile_info = &file_data->profile_info;
+            saved_sum_all = lto_gcov_summary.sum_all;
+            saved_scale = scale;
+          }
       }
 
+  gcc_assert (saved_profile_info);
+
+  /* Scale up the histogram from the profile that had the largest
+     scaled sum_all above.  */
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+    {
+      /* Scale up the min value as we did the corresponding sum_all
+         above. Use that to find the new histogram index.  */
+      int scaled_min = RDIV (saved_profile_info->histogram[h_ix].min_value
+                             * saved_scale, REG_BR_PROB_BASE);
+      unsigned new_ix = gcov_histo_index (scaled_min);
+      lto_gcov_summary.histogram[new_ix].min_value = scaled_min;
+      /* Some of the scaled counter values would ostensibly need to be placed
+         into different (larger) histogram buckets, but we keep things simple
+         here and place the scaled cumulative counter value in the bucket
+         corresponding to the scaled minimum counter value.  */
+      lto_gcov_summary.histogram[new_ix].cum_value
+          = RDIV (saved_profile_info->histogram[h_ix].cum_value
+                  * saved_scale, REG_BR_PROB_BASE);
+      lto_gcov_summary.histogram[new_ix].num_counters
+          = saved_profile_info->histogram[h_ix].num_counters;
+    }
+
   /* Watch roundoff errors.  */
   if (lto_gcov_summary.sum_max < max_runs)
     lto_gcov_summary.sum_max = max_runs;
@@ -1303,10 +1393,8 @@ merge_profile_summaries (struct lto_file_decl_data
       {
 	int scale;
 
-	scale =
-	   ((node->count_materialization_scale * max_runs
-	     + node->symbol.lto_file_data->profile_info.runs / 2)
-	    / node->symbol.lto_file_data->profile_info.runs);
+	scale = RDIV (node->count_materialization_scale * max_runs,
+                      node->symbol.lto_file_data->profile_info.runs);
 	node->count_materialization_scale = scale;
 	if (scale < 0)
 	  fatal_error ("Profile information in %s corrupted",
@@ -1315,10 +1403,8 @@ merge_profile_summaries (struct lto_file_decl_data
 	if (scale == REG_BR_PROB_BASE)
 	  continue;
 	for (edge = node->callees; edge; edge = edge->next_callee)
-	  edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2)
-			 / REG_BR_PROB_BASE);
-	node->count = ((node->count * scale + REG_BR_PROB_BASE / 2)
-		       / REG_BR_PROB_BASE);
+	  edge->count = RDIV (edge->count * scale, REG_BR_PROB_BASE);
+	node->count = RDIV (node->count * scale, REG_BR_PROB_BASE);
       }
 }
 
@@ -1365,7 +1451,9 @@ input_symtab (void)
     }
 
   merge_profile_summaries (file_data_vec);
+  compute_working_sets ();
 
+
   /* Clear out the aux field that was used to store enough state to
      tell which nodes should be overwritten.  */
   FOR_EACH_FUNCTION (node)
Index: gcov-io.c
===================================================================
--- gcov-io.c	(revision 193909)
+++ gcov-io.c	(working copy)
@@ -622,11 +622,15 @@ gcov_time (void)
 }
 #endif /* IN_GCOV */
 
-#if IN_LIBGCOV || !IN_GCOV
+#if !IN_GCOV
 /* Determine the index into histogram for VALUE. */
 
+#if IN_LIBGCOV
 static unsigned
-gcov_histo_index(gcov_type value)
+#else
+GCOV_LINKAGE unsigned
+#endif
+gcov_histo_index (gcov_type value)
 {
   gcov_type_unsigned v = (gcov_type_unsigned)value;
   unsigned r = 0;
@@ -664,8 +668,8 @@ static unsigned
    its entry's original cumulative counter value when computing the
    new merged cum_value.  */
 
-static void gcov_histogram_merge(gcov_bucket_type *tgt_histo,
-                                 gcov_bucket_type *src_histo)
+static void gcov_histogram_merge (gcov_bucket_type *tgt_histo,
+                                  gcov_bucket_type *src_histo)
 {
   int src_i, tgt_i, tmp_i = 0;
   unsigned src_num, tgt_num, merge_num;
@@ -801,4 +805,4 @@ static unsigned
   /* Finally, copy the merged histogram into tgt_histo.  */
   memcpy(tgt_histo, tmp_histo, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 }
-#endif /* IN_LIBGCOV || !IN_GCOV */
+#endif /* !IN_GCOV */
Index: gcov-io.h
===================================================================
--- gcov-io.h	(revision 193909)
+++ gcov-io.h	(working copy)
@@ -612,6 +612,7 @@ GCOV_LINKAGE void gcov_write_unsigned (gcov_unsign
 
 #if !IN_GCOV && !IN_LIBGCOV
 /* Available only in compiler */
+GCOV_LINKAGE unsigned gcov_histo_index (gcov_type value);
 GCOV_LINKAGE void gcov_write_string (const char *);
 GCOV_LINKAGE gcov_position_t gcov_write_tag (gcov_unsigned_t);
 GCOV_LINKAGE void gcov_write_length (gcov_position_t /*position*/);
Index: profile.c
===================================================================
--- profile.c	(revision 193909)
+++ profile.c	(working copy)
@@ -207,7 +207,7 @@ instrument_values (histogram_values values)
    the number of counters required to cover that working set percentage and
    the minimum counter value in that working set.  */
 
-static void
+void
 compute_working_sets (void)
 {
   gcov_type working_set_cum_values[NUM_GCOV_WORKING_SETS];
Index: profile.h
===================================================================
--- profile.h	(revision 193909)
+++ profile.h	(working copy)
@@ -47,4 +47,6 @@ extern gcov_type sum_edge_counts (vec<edge, va_gc>
 extern void init_node_map (void);
 extern void del_node_map (void);
 
+extern void compute_working_sets (void);
+
 #endif /* PROFILE_H */
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 193909)
+++ Makefile.in	(working copy)
@@ -2163,7 +2163,7 @@ lto-cgraph.o: lto-cgraph.c $(CONFIG_H) $(SYSTEM_H)
    $(HASHTAB_H) langhooks.h $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(CGRAPH_H) $(FUNCTION_H) $(GGC_H) $(DIAGNOSTIC_CORE_H) \
    $(EXCEPT_H) $(TIMEVAR_H) pointer-set.h $(LTO_STREAMER_H) \
-   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H)
+   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H) profile.h
 lto-streamer-in.o: lto-streamer-in.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) toplev.h $(DIAGNOSTIC_CORE_H) $(EXPR_H) $(FLAGS_H) $(PARAMS_H) \
    input.h $(HASHTAB_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) $(TREE_PASS_H) \

--
This patch is available for review at 1

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-11-30 16:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-29  4:11 [PATCH] Stream profile summary histogram through LTO files (issue6782131) Teresa Johnson
2012-11-29 16:17 ` Jan Hubicka
2012-11-29 16:46   ` Teresa Johnson
2012-11-29 16:55     ` Jan Hubicka
2012-11-30 15:11 Teresa Johnson
2012-11-30 16:17 ` Jan Hubicka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).