public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/kubaneko/heads/histogram)] divided histogram_counters.hist into two .exp and .lin, changed histogram maintaining so exp does no
@ 2023-05-02 18:24 Ondrej Kubanek
  0 siblings, 0 replies; only message in thread
From: Ondrej Kubanek @ 2023-05-02 18:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3884cb7b0bb77e646948c0f24beabe6f7cef4322

commit 3884cb7b0bb77e646948c0f24beabe6f7cef4322
Author: kubaneko <kubanek0ondrej@gmail.com>
Date:   Tue May 2 18:24:16 2023 +0000

    divided histogram_counters.hist into two .exp and .lin, changed histogram maintaining so exp does not try to recover lin and added some cleanup to histogram versioning

Diff:
---
 gcc/cfgloop.cc                           | 74 +++++++++++++++++---------------
 gcc/cfgloop.h                            |  3 +-
 gcc/cfgloopmanip.cc                      |  9 ++--
 gcc/gimple-loop-versioning-histograms.cc |  6 +--
 gcc/lto-streamer-in.cc                   | 19 ++++----
 gcc/lto-streamer-out.cc                  | 11 +++--
 gcc/profile.cc                           | 21 ++++++---
 gcc/tree-ssa-loop-ivcanon.cc             |  8 ++--
 gcc/tree-vect-loop-manip.cc              | 18 +++++++-
 gcc/tree-vect-loop.cc                    | 14 ++++--
 10 files changed, 115 insertions(+), 68 deletions(-)

diff --git a/gcc/cfgloop.cc b/gcc/cfgloop.cc
index 0f5121117ca..834f41c2dc8 100644
--- a/gcc/cfgloop.cc
+++ b/gcc/cfgloop.cc
@@ -201,7 +201,8 @@ flow_loop_free (class loop *loop)
   ggc_free (loop->exits);
   if (loop->counters)
     {
-      va_heap::release (loop->counters->hist);
+      va_heap::release (loop->counters->lin);
+      va_heap::release (loop->counters->exp);
       if (loop->counters->mod)
 	va_heap::release (loop->counters->mod);
       ggc_free (loop->counters);
@@ -2224,60 +2225,60 @@ histogram_counters_minus_upper_bound (histogram_counters *&hist_c,
     return;
   if (hist_c->sum == 0 && !hist_c->mod)
     {
-      va_heap::release (hist_c->hist);
+      va_heap::release (hist_c->lin);
+      va_heap::release (hist_c->exp);
       ggc_free (hist_c);
       hist_c = NULL;
       return;
     }
-  auto &hist = *(hist_c->hist);
+  auto &lin = *(hist_c->lin);
+  auto &exp = *(hist_c->exp);
   hist_c->adjusted = true;
   auto &sum = hist_c->sum;
   unsigned int lin_size = param_profile_histogram_size_lin;
-  unsigned int tot_size
-    = param_profile_histogram_size_lin + param_profile_histogram_size_exp;
   // If the last linear counter does not contain other iterations
-  unsigned int i = 1;
-  for (; i < lin_size; i++)
+  unsigned int i = 0;
+  for (; i < difference && i < lin.length (); ++i)
     {
-      if (i <= difference)
-	{
-	  sum -= hist[0];
-	  hist[0] = hist[i];
-	}
-      else
-	{
-	  hist[i - difference] += hist[i];
-	}
-      hist[i] = 0;
+      sum -= lin[i];
+      lin[i] = 0;
+    }
+  for (; i < lin.length (); ++i)
+    {
+      lin[i - difference] = lin[i];
+      lin[i] = 0;
     }
+  lin.truncate (lin.length () > difference ? lin.length () - difference : 0);
   // next pow2
   gcov_type_unsigned pow2 = ((gcov_type_unsigned) 1)
 			    << (ceil_log2 (lin_size) + i + 1 - lin_size);
+  i = 0;
   // we null all counters that cannot transfer to non-zero counts
-  for (; pow2 - 1 < difference && i < tot_size - 1; ++i)
+  for (; pow2 - 1 < difference && i < exp.length () - 1; ++i)
     {
-      sum -= hist[0];
-      hist[0] = hist[i];
-      hist[i] = 0;
+      sum -= exp[0];
+      exp[0] = exp[i];
+      exp[i] = 0;
       pow2 = pow2 << 1;
     }
   // if there are no more iterations we do not care
   if (hist_c->sum == 0 && !hist_c->mod)
     {
-      va_heap::release (hist_c->hist);
+      va_heap::release (hist_c->lin);
+      va_heap::release (hist_c->exp);
       ggc_free (hist_c);
       hist_c = NULL;
       return;
     }
   // we want to change index 1/(1<<portion) of iterations
   int portion = 1;
-  for (; i < tot_size - 1 && portion < 10; ++i)
+  for (; i < exp.length () - 1 && portion < 10; ++i)
     {
       // we take a sample point and suppose by uniform distribution that all
       // lesser iterations move same as this point
       gcov_type_unsigned point = (pow2 >> 1) + (pow2 >> (1 + portion));
       unsigned int ind
-	= hist_index (point >= difference ? point - difference : 0);
+	= point >= difference ? hist_index (point - difference) : 0;
       while (ind == i)
 	{
 	  // if nothing changes we decrease the portion of iteration changed
@@ -2285,9 +2286,9 @@ histogram_counters_minus_upper_bound (histogram_counters *&hist_c,
 	  point = (pow2 >> 1) + (pow2 >> (1 + portion));
 	  ind = hist_index (point >= difference ? point - difference : 0);
 	}
-      int64_t diff = hist[i] / (1 << portion);
-      hist[ind] += diff;
-      hist[i] -= diff;
+      int64_t diff = exp[i] / (1 << portion);
+      exp[ind > lin_size ? ind - lin_size : 0] += diff;
+      exp[i] -= diff;
       pow2 <<= 1;
     }
   if (hist_c->mod)
@@ -2315,25 +2316,30 @@ histogram_counters_div_upper_bound (histogram_counters *&hist_c,
     return;
   if (hist_c->sum == 0)
     {
-      va_heap::release (hist_c->hist);
+      va_heap::release (hist_c->lin);
+      va_heap::release (hist_c->exp);
       if (hist_c->mod)
 	va_heap::release (hist_c->mod);
       ggc_free (hist_c);
       hist_c = NULL;
       return;
     }
-  auto &hist = *(hist_c->hist);
+  auto &lin = *(hist_c->lin);
+  auto &exp = *(hist_c->exp);
   hist_c->adjusted = true;
   unsigned int lin_size = param_profile_histogram_size_lin;
   unsigned int tot_size
     = param_profile_histogram_size_lin + param_profile_histogram_size_exp;
   unsigned int i = 1;
-  for (; i < lin_size && i < tot_size - 1; i++)
+  for (; i < lin.length (); i++)
     {
       // we want to take the roof of the division
-      hist[i / divisor + MIN (1, i % divisor)] += hist[i];
-      hist[i] = 0;
+      lin[i / divisor + MIN (1, i % divisor)] += lin[i];
+      lin[i] = 0;
     }
+  lin.truncate (lin.length () ? (lin.length () - 1) / divisor
+				  + MIN (1, (lin.length () - 1) % divisor) + 1
+			      : 0);
 
   for (; i < tot_size - 1; i++)
     {
@@ -2342,8 +2348,8 @@ histogram_counters_div_upper_bound (histogram_counters *&hist_c,
       gcov_type_unsigned half = (upper_pow2 >> 2) + (upper_pow2 >> 1);
       unsigned int ind = hist_index (half / divisor);
       // hist is allways different then i since we know divisor>1
-      hist[ind] += hist[MIN (1, i)];
-      hist[i] = 0;
+      exp[ind > lin_size ? ind - lin_size : 0] += exp[i];
+      exp[i] = 0;
     }
   // we do not try to maintain modulo info when dividing since it is often
   // impossible
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 2c3eee123dc..758976fd51c 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -98,7 +98,8 @@ struct GTY (()) histogram_counters
 {
   bool adjusted;
   gcov_type sum;
-  vec<gcov_type, va_heap, vl_embed> *GTY ((skip)) hist;
+  vec<gcov_type, va_heap, vl_embed> *GTY ((skip)) lin;
+  vec<gcov_type, va_heap, vl_embed> *GTY ((skip)) exp;
   vec<gcov_type, va_heap, vl_embed> *GTY ((skip)) mod;
 };
 
diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index 42ce1a529b3..60bbe30170c 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -964,7 +964,8 @@ copy_loop_info (class loop *loop, class loop *target)
       target->counters = ggc_alloc<histogram_counters> ();
       target->counters->sum = loop->counters->sum;
       target->counters->adjusted = loop->counters->adjusted;
-      target->counters->hist = vec_safe_copy (loop->counters->hist);
+      target->counters->lin = vec_safe_copy (loop->counters->lin);
+      target->counters->exp = vec_safe_copy (loop->counters->exp);
       if (loop->counters->mod)
 	target->counters->mod = vec_safe_copy (loop->counters->mod);
     }
@@ -1176,17 +1177,17 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
 	       i++)
 	    {
 	      scale_step[i] = profile_probability::always ()
-			      - ((*loop->counters->hist)[i]
+			      - ((*loop->counters->lin)[i]
 				   ? profile_probability::always ()
 				       / (loop->counters->sum - psum)
-				       * (*loop->counters->hist)[i]
+				       * (*loop->counters->lin)[i]
 				   : profile_probability::never ());
 	      // we set whether it was adjusted or is still precise
 	      if (loop->counters->adjusted)
 		{
 		  scale_step[i].adjusted ();
 		}
-	      psum += (*loop->counters->hist)[i];
+	      psum += (*loop->counters->lin)[i];
 	    }
 	  ++i;
 	  for (; i <= ndupl; i++)
diff --git a/gcc/gimple-loop-versioning-histograms.cc b/gcc/gimple-loop-versioning-histograms.cc
index c9e2dc98634..a1e7ab8c0fc 100644
--- a/gcc/gimple-loop-versioning-histograms.cc
+++ b/gcc/gimple-loop-versioning-histograms.cc
@@ -112,9 +112,9 @@ pass_loop_histogram_versioning::execute (function *fn)
       /* Look for dominating iteration count.  */
       for (int i = 0; i < param_profile_histogram_size_lin; i++)
 	{
-	  if ((*(loop->counters->hist))[i] > best_val)
+	  if ((*(loop->counters->lin))[i] > best_val)
 	    {
-	      best_val = (*(loop->counters->hist))[i];
+	      best_val = (*(loop->counters->lin))[i];
 	      best_iters = i;
 	    }
 	}
@@ -238,7 +238,7 @@ pass_loop_histogram_versioning::execute (function *fn)
       /* Update counters of non-optiized loop.
          TODO: also recompute estimated number of itraitons
 	 instead of droping it bellow.  */
-      (*(loop->counters->hist))[best_iters] = 0;
+      (*(loop->counters->lin))[best_iters] = 0;
       loop->counters->sum -= best_val;
       loop->any_estimate = false;
       changed = true;
diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index ccaf10c3e55..fb31ca5d61b 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1131,16 +1131,19 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
 	{
 	  loop->counters = ggc_alloc<histogram_counters> ();
 	  loop->counters->sum = streamer_read_gcov_count (ib);
-	  loop->counters->hist = NULL;
+	  loop->counters->lin = NULL;
+	  loop->counters->exp = NULL;
 	  loop->counters->mod = NULL;
-	  vec_safe_grow (loop->counters->hist,
-			 param_profile_histogram_size_lin
-			   + param_profile_histogram_size_exp);
-	  for (int i = 0; i < param_profile_histogram_size_lin
-				+ param_profile_histogram_size_exp;
-	       ++i)
+	  int lin_len = streamer_read_hwi (ib);
+	  vec_safe_grow (loop->counters->lin, lin_len);
+	  vec_safe_grow (loop->counters->exp, param_profile_histogram_size_exp);
+	  for (int i = 0; i < lin_len; ++i)
 	    {
-	      (*loop->counters->hist)[i] = streamer_read_gcov_count (ib);
+	      (*loop->counters->lin)[i] = streamer_read_gcov_count (ib);
+	    }
+	  for (int i = 0; i < param_profile_histogram_size_exp; ++i)
+	    {
+	      (*loop->counters->exp)[i] = streamer_read_gcov_count (ib);
 	    }
 	  // do we have modulos
 	  if (streamer_read_hwi (ib))
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index 657129080ec..9ccc660dd1c 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -2185,12 +2185,15 @@ output_cfg (struct output_block *ob, struct function *fn)
       if (loop->counters)
 	{
 	  streamer_write_gcov_count (ob, loop->counters->sum);
+	  streamer_write_hwi (ob, loop->counters->lin->length ());
+	  for (unsigned int i = 0; i < loop->counters->lin->length (); ++i)
+	    {
+	      streamer_write_gcov_count (ob, (*loop->counters->lin)[i]);
+	    }
 	  for (unsigned int i = 0;
-	       i < (unsigned int) (param_profile_histogram_size_lin
-				   + param_profile_histogram_size_exp);
-	       ++i)
+	       i < (unsigned int) param_profile_histogram_size_exp; ++i)
 	    {
-	      streamer_write_gcov_count (ob, (*loop->counters->hist)[i]);
+	      streamer_write_gcov_count (ob, (*loop->counters->exp)[i]);
 	    }
 	  streamer_write_hwi (ob, loop->counters->mod != NULL);
 	  if (loop->counters->mod)
diff --git a/gcc/profile.cc b/gcc/profile.cc
index 4e3f74dfe1d..997b93aff2f 100644
--- a/gcc/profile.cc
+++ b/gcc/profile.cc
@@ -934,12 +934,22 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
 	      unsigned int tot_size = param_profile_histogram_size_lin
 				      + param_profile_histogram_size_exp;
 	      lp->counters->adjusted = false;
-	      lp->counters->hist = NULL;
+	      lp->counters->lin = NULL;
+	      lp->counters->exp = NULL;
 	      // iteration vector
-	      vec_safe_grow_cleared (lp->counters->hist, tot_size);
-	      for (int i = 0; i < (int) tot_size; ++i)
+	      vec_safe_grow_cleared (lp->counters->lin,
+				     param_profile_histogram_size_lin);
+	      vec_safe_grow_cleared (lp->counters->exp,
+				     param_profile_histogram_size_exp);
+	      for (int i = 0; i < (int) param_profile_histogram_size_lin; ++i)
 		{
-		  auto hst = lp->counters->hist;
+		  auto hst = lp->counters->lin;
+		  (*hst)[i] = act_count[t][i];
+		  sum += act_count[t][i];
+		}
+	      for (int i = 0; i < (int) param_profile_histogram_size_exp; ++i)
+		{
+		  auto hst = lp->counters->exp;
 		  (*hst)[i] = act_count[t][i];
 		  sum += act_count[t][i];
 		}
@@ -956,7 +966,8 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
 		}
 	      if (sum == 0)
 		{
-		  va_heap::release (lp->counters->hist);
+		  va_heap::release (lp->counters->lin);
+		  va_heap::release (lp->counters->exp);
 		  va_heap::release (lp->counters->mod);
 		  ggc_free (lp->counters);
 		  lp->counters = NULL;
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index 1fde151551b..49f0ae080ee 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -1109,16 +1109,16 @@ try_peel_loop (class loop *loop,
     dump_printf_loc (MSG_MISSED_OPTIMIZATION | MSG_PRIORITY_USER_FACING, locus,
 		     "missing loop histogram for loop %d.\n", loop->num);
   HOST_WIDE_INT nonhistogram_npeel = npeel + 1;
-  if (histogram_peeling && loop->counters->sum != 0)
+  if (histogram_peeling && loop->counters->sum != 0
+      && loop->counters->lin->length ())
     {
       gcov_type sum = loop->counters->sum;
       gcov_type rest = sum;
       gcov_type psum = 0;
       int good_percentage = param_profile_histogram_peel_prcnt;
-      /* TODO: Fix.  Profile size needs to be stored in the counters.  */
-      for (int i = 0; i < param_profile_histogram_size_lin; i++)
+      for (int i = 0; i < loop->counters->lin->length (); i++)
 	{
-	  psum += (*(loop->counters->hist))[i];
+	  psum += (*(loop->counters->lin))[i];
 	  // iteration has enough cumulated in partial sum and itself has at
 	  // least 1 percent or we have complete peeling
 	  if ((100 * psum) / sum >= good_percentage || psum == rest)
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 8eb2942b847..f9915338c6a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2944,7 +2944,14 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       /* TODO: Update counters or at least release vectors.
          We do not produce prologs on modern x86 targets we test on
          so this is not important for proof of concept.  */
-      prolog->counters = NULL;
+      if (prolog->counters)
+	{
+	  va_heap::release (prolog->counters->lin);
+	  va_heap::release (prolog->counters->exp);
+	  if (prolog->counters->mod)
+	    va_heap::release (prolog->counters->mod);
+	  prolog->counters = NULL;
+	}
       delete_update_ssa ();
       adjust_vec_debug_stmts ();
       scev_reset ();
@@ -3082,7 +3089,14 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 	  record_niter_bound (epilog, bound - 1, false, true);
 	}
       /* TODO: Compute epilog histograms or at least release the counters.  */
-      epilog->counters = NULL;
+      if (epilog->counters)
+	{
+	  va_heap::release (epilog->counters->lin);
+	  va_heap::release (epilog->counters->exp);
+	  if (epilog->counters->mod)
+	    va_heap::release (epilog->counters->mod);
+	  epilog->counters = NULL;
+	}
 
       delete_update_ssa ();
       adjust_vec_debug_stmts ();
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 928079a7c9b..0de8ea8965f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2081,8 +2081,10 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
       && min_profitable_estimate <= param_profile_histogram_size_lin)
     {
       gcov_type psum = 0;
-      for (int i = 0; i < min_profitable_estimate; i++)
-	psum += (*(loop->counters->hist))[i];
+      for (int i = 0; i < min_profitable_estimate
+		      && i < (int) loop->counters->lin->length ();
+	   i++)
+	psum += (*(loop->counters->lin))[i];
       /* TODO: This is less precise than needed.  If histogram was also
 	 storing number of executions of the loop header (which may
 	 be different from loop->header->count if i.e. inlining happen).
@@ -11141,7 +11143,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
 			   assumed_vf) - 1);
   /* TODO: Fix profile after vectorization.  */
   if (loop->counters)
-    loop->counters = NULL;
+    {
+      va_heap::release (loop->counters->lin);
+      va_heap::release (loop->counters->exp);
+      if (loop->counters->mod)
+      va_heap::release (loop->counters->mod);
+      loop->counters = NULL;
+    }
 
   if (dump_enabled_p ())
     {

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-02 18:24 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-02 18:24 [gcc(refs/users/kubaneko/heads/histogram)] divided histogram_counters.hist into two .exp and .lin, changed histogram maintaining so exp does no Ondrej Kubanek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).