public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Tomas Bily <tbily@suse.cz>
To: Jan Hubicka <jh@suse.cz>
Cc: hubicka@ucw.cz, gcc-patches@gcc.gnu.org, tomby@ucw.cz
Subject: Re: PATCH: Profile driven specialization of indirect/virtual calls
Date: Thu, 11 Jan 2007 15:27:00 -0000	[thread overview]
Message-ID: <20070111152730.GA10462@atrey.karlin.mff.cuni.cz> (raw)
In-Reply-To: <20061223120923.GA8486@atrey.karlin.mff.cuni.cz>

[-- Attachment #1: Type: text/plain, Size: 2712 bytes --]

Hi,

> +   stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, fold_convert (optype, build_addr (direct_call->decl, current_function_decl)));
> 
> Please watch the length of lines.  One of my favorite asepcts of GNU
> coding standard ;)
> +   stmt3 = build3 (COND_EXPR, void_type_node,
> + 	    build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
> + 	    build1 (GOTO_EXPR, void_type_node, label_decl2),
> + 	    build1 (GOTO_EXPR, void_type_node, label_decl1));
> 
> This should be probably indented in the way so the later builds appear
> after the "(" of toplevel build.
> +   /* Fix eh edges */
> +   region = lookup_stmt_eh_region (stmt1);
> +   if (region >= 0 && !TREE_NOTHROW (new_call))
> 
> tree_could_throw (stmt1) is probably canonical way to test whether
> statement needs EH.   
> + /* Find calls inside STMT for that we want to measure histograms for 
> +    indirect/virtual call optimization. */ 
> + static void
> + tree_indirect_call_to_profile (tree stmt, histogram_values *values)
> 
> There should be vertical space in between comment and function (I know
> it is not in a lot
> of my code, I will try to fix it gradually).
ok. corrected.

> +   else
> +     c_node->needed = true;
> 
> You can't just set c_node->needed, you can use cgraph_mark_needed_node (c_node)
> if you really mean to force it to be output into file.  
> But you should not need this at first place, because the function you get called
> at is being output to final file anyway.  Why have you added this?
It's not needed now. Removed.  


> Index: gcc/libgcov.c
> ===================================================================
> *** gcc/libgcov.c	(revision 119879)
> --- gcc/libgcov.c	(working copy)
> *************** __gcov_one_value_profiler (gcov_type *co
> *** 753,758 ****
> --- 753,768 ----
>   }
>   #endif
>   
> + #ifdef L_gcov_indirect_call_profiler
> + /* Tries to determine the most common value among its inputs. */
> + void
> + __gcov_indirect_call_profiler (gcov_type* counter, gcov_type value, void* cur_func, void* callee_func)
> + {
> +   if (cur_func == callee_func)
> +     __gcov_one_value_profiler (counter, value);
> + }
> 
> You want probably to get __gcov_one_value_profiler inlined here t that is
> intentionally not done by the macro machinery.  Probably
> __gcov_one_value_profiler can be moved into static inline function body visible
> for both.
ok.  

> Otherwise the patch seems fine (I would however like to know why you fiddle
> with needed flag first) THere are discussion about tree-prof.exp changes.
> Please ensure before commiting that the tree-prof.exp your new one is based on
> didn't change.
Still same. 

Bootstraped and tested on i686-linux and x86_64-linux.
OK? 

Tomas

[-- Attachment #2: indirect-call.patch --]
[-- Type: text/plain, Size: 29518 bytes --]

Index: gcc/cgraph.c
===================================================================
--- gcc/cgraph.c	(revision 120671)
+++ gcc/cgraph.c	(working copy)
@@ -109,6 +109,9 @@ int cgraph_n_nodes;
 /* Maximal uid used in cgraph nodes.  */
 int cgraph_max_uid;
 
+/* Maximal pid used for profiling */
+int cgraph_max_pid;
+
 /* Set when whole unit has been analyzed so we can access global info.  */
 bool cgraph_global_info_ready = false;
 
@@ -161,6 +164,7 @@ cgraph_create_node (void)
   node = GGC_CNEW (struct cgraph_node);
   node->next = cgraph_nodes;
   node->uid = cgraph_max_uid++;
+  node->pid = -1;
   node->order = cgraph_order++;
   if (cgraph_nodes)
     cgraph_nodes->previous = node;
@@ -654,7 +658,7 @@ void
 dump_cgraph_node (FILE *f, struct cgraph_node *node)
 {
   struct cgraph_edge *edge;
-  fprintf (f, "%s/%i:", cgraph_node_name (node), node->uid);
+  fprintf (f, "%s/%i(%i):", cgraph_node_name (node), node->uid, node->pid);
   if (node->global.inlined_to)
     fprintf (f, " (inline copy in %s/%i)",
 	     cgraph_node_name (node->global.inlined_to),
Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h	(revision 120671)
+++ gcc/cgraph.h	(working copy)
@@ -180,6 +180,10 @@ struct cgraph_node GTY((chain_next ("%h.
      into clone before compiling so the function in original form can be
      inlined later.  This pointer points to the clone.  */
   tree inline_decl;
+
+  /* unique id for profiling. pid is not suitable because of different
+     number of cfg nodes with -fprofile-generate and -fprofile-use */
+  int pid;
 };
 
 struct cgraph_edge GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_caller")))
@@ -253,6 +257,7 @@ struct cgraph_asm_node GTY(())
 extern GTY(()) struct cgraph_node *cgraph_nodes;
 extern GTY(()) int cgraph_n_nodes;
 extern GTY(()) int cgraph_max_uid;
+extern GTY(()) int cgraph_max_pid;
 extern bool cgraph_global_info_ready;
 enum cgraph_state
 {
Index: gcc/value-prof.c
===================================================================
--- gcc/value-prof.c	(revision 120671)
+++ gcc/value-prof.c	(working copy)
@@ -40,6 +40,7 @@ Software Foundation, 51 Franklin Street,
 #include "coverage.h"
 #include "tree.h"
 #include "gcov-io.h"
+#include "cgraph.h"
 #include "timevar.h"
 #include "tree-pass.h"
 #include "toplev.h"
@@ -60,6 +61,11 @@ static struct value_prof_hooks *value_pr
       FIXME: This transformation was removed together with RTL based value
       profiling.
 
+   3) Indirect/virtual call specialization. If we can determine most
+      common function callee in indirect/virtual call. We can use this
+      information to improve code effectivity (espetialy info for
+      inliner).
+
    Every such optimization should add its requirements for profiled values to
    insn_values_to_profile function.  This function is called from branch_prob
    in profile.c and the requested values are instrumented by it in the first
@@ -81,6 +87,7 @@ static bool tree_divmod_fixed_value_tran
 static bool tree_mod_pow2_value_transform (tree);
 static bool tree_mod_subtract_transform (tree);
 static bool tree_stringops_transform (block_stmt_iterator *);
+static bool tree_ic_transform (tree);
 
 /* Allocate histogram value.  */
 
@@ -254,6 +261,19 @@ dump_histogram_value (FILE *dump_file, h
 	}
       fprintf (dump_file, ".\n");
       break;
+    case HIST_TYPE_INDIR_CALL:
+      fprintf (dump_file, "Indirect call ");
+      if (hist->hvalue.counters)
+	{
+	   fprintf (dump_file, "value:"HOST_WIDEST_INT_PRINT_DEC
+		    " match:"HOST_WIDEST_INT_PRINT_DEC
+		    " all:"HOST_WIDEST_INT_PRINT_DEC,
+		    (HOST_WIDEST_INT) hist->hvalue.counters[0],
+		    (HOST_WIDEST_INT) hist->hvalue.counters[1],
+		    (HOST_WIDEST_INT) hist->hvalue.counters[2]);
+	}
+      fprintf (dump_file, ".\n");
+      break;
    }
 }
 
@@ -432,7 +452,8 @@ tree_value_profile_transformations (void
 	      && (tree_mod_subtract_transform (stmt)
 		  || tree_divmod_fixed_value_transform (stmt)
 		  || tree_mod_pow2_value_transform (stmt)
-		  || tree_stringops_transform (&bsi)))
+		  || tree_stringops_transform (&bsi)
+		  || tree_ic_transform (stmt)))
 	    {
 	      stmt = bsi_stmt (bsi);
 	      changed = true;
@@ -967,6 +988,201 @@ tree_mod_subtract_transform (tree stmt)
   return true;
 }
 
+static struct cgraph_node** pid_map = NULL;
+
+/* Initialize map of pids (pid -> cgraph node) */
+
+static void 
+init_pid_map (void)
+{
+  struct cgraph_node *n;
+
+  if (pid_map != NULL)
+    return;
+
+  pid_map 
+    = (struct cgraph_node**) xmalloc (sizeof (struct cgraph_node*) * cgraph_max_pid);
+
+  for (n = cgraph_nodes; n; n = n->next)
+    {
+      if (n->pid != -1)
+	pid_map [n->pid] = n;
+    }
+}
+
+/* Return cgraph node for function with pid */
+
+static inline struct cgraph_node*
+find_func_by_pid (int	pid)
+{
+  init_pid_map ();
+
+  return pid_map [pid];
+}
+
+/* Do transformation
+
+  if (actual_callee_addres == addres_of_most_common_function/method)
+    do direct call
+  else
+    old call
+ */
+
+static tree
+tree_ic (tree stmt, tree call, struct cgraph_node* direct_call, 
+	 int prob, gcov_type count, gcov_type all)
+{
+  tree stmt1, stmt2, stmt3;
+  tree tmp1, tmpv;
+  tree label_decl1 = create_artificial_label ();
+  tree label_decl2 = create_artificial_label ();
+  tree label1, label2;
+  tree bb1end, bb2end, bb3end;
+  tree new_call;
+  basic_block bb, bb2, bb3, bb4;
+  tree optype = build_pointer_type (void_type_node);
+  edge e12, e13, e23, e24, e34;
+  block_stmt_iterator bsi;
+  int region;
+
+  bb = bb_for_stmt (stmt);
+  bsi = bsi_for_stmt (stmt);
+
+  tmpv = create_tmp_var (optype, "PROF");
+  tmp1 = create_tmp_var (optype, "PROF");
+  stmt1 = build2 (GIMPLE_MODIFY_STMT, optype, tmpv, 
+		  unshare_expr (TREE_OPERAND (call, 0)));
+  stmt2 = build2 (GIMPLE_MODIFY_STMT, optype, tmp1, 
+		  fold_convert (optype, build_addr (direct_call->decl, 
+						    current_function_decl)));
+  stmt3 = build3 (COND_EXPR, void_type_node,
+		  build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
+		  build1 (GOTO_EXPR, void_type_node, label_decl2),
+		  build1 (GOTO_EXPR, void_type_node, label_decl1));
+  bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+  bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
+  bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
+  bb1end = stmt3;
+
+  label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
+  stmt1 = unshare_expr (stmt);
+  new_call = get_call_expr_in (stmt1);
+  TREE_OPERAND (new_call, 0) = build_addr (direct_call->decl, 
+					   current_function_decl);
+  bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
+  bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+  bb2end = stmt1;
+
+  label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
+  bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
+  bb3end = stmt;
+
+  /* Fix CFG. */
+  /* Edge e23 connects bb2 to bb3, etc. */
+  e12 = split_block (bb, bb1end);
+  bb2 = e12->dest;
+  bb2->count = count;
+  e23 = split_block (bb2, bb2end);
+  bb3 = e23->dest;
+  bb3->count = all - count;
+  e34 = split_block (bb3, bb3end);
+  bb4 = e34->dest;
+  bb4->count = all;
+
+  e12->flags &= ~EDGE_FALLTHRU;
+  e12->flags |= EDGE_FALSE_VALUE;
+  e12->probability = prob;
+  e12->count = count;
+
+  e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
+  e13->probability = REG_BR_PROB_BASE - prob;
+  e13->count = all - count;
+
+  remove_edge (e23);
+  
+  e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
+  e24->probability = REG_BR_PROB_BASE;
+  e24->count = count;
+  e34->probability = REG_BR_PROB_BASE;
+  e34->count = all - count;
+
+  /* Fix eh edges */
+  region = lookup_stmt_eh_region (stmt);
+  if (region >=0 && tree_could_throw_p (stmt1))
+    {
+      add_stmt_to_eh_region (stmt1, region);
+      make_eh_edges (stmt1);
+    }
+
+  if (region >=0 && tree_could_throw_p (stmt))
+    {
+      tree_purge_dead_eh_edges (bb4);
+      make_eh_edges (stmt);
+    }
+
+  return stmt1;
+}
+
+/*
+  For every checked indirect/virtual call determine if most common pid of
+  function/class method has probability more than 50%. If yes modify code of
+  this call to:
+ */
+
+static bool
+tree_ic_transform (tree stmt)
+{
+  histogram_value histogram;
+  gcov_type val, count, all;
+  int prob;
+  tree call, callee, modify;
+  struct cgraph_node *direct_call;
+  
+  call = get_call_expr_in (stmt);
+
+  if (!call || TREE_CODE (call) != CALL_EXPR)
+    return false;
+
+  callee = TREE_OPERAND (call, 0);
+
+  if (TREE_CODE (callee) == ADDR_EXPR)
+    return false;
+
+  histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL);
+  if (!histogram)
+    return false;
+
+  val = histogram->hvalue.counters [0];
+  count = histogram->hvalue.counters [1];
+  all = histogram->hvalue.counters [2];
+  gimple_remove_histogram_value (cfun, stmt, histogram);
+
+  if (4 * count <= 3 * all)
+    return false;
+
+  prob = (count * REG_BR_PROB_BASE + all / 2) / all;
+  direct_call = find_func_by_pid ((int)val);
+
+  if (direct_call == NULL)
+    return false;
+
+  modify = tree_ic (stmt, call, direct_call, prob, count, all);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Indirect call -> direct call ");
+      print_generic_expr (dump_file, call, TDF_SLIM);
+      fprintf (dump_file, "=> ");
+      print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
+      fprintf (dump_file, " transformation on insn ");
+      print_generic_stmt (dump_file, stmt, TDF_SLIM);
+      fprintf (dump_file, " to ");
+      print_generic_stmt (dump_file, modify, TDF_SLIM);
+    }
+
+  return true;
+}
+
 /* Return true if the stringop FNDECL with ARGLIST shall be profiled.  */
 static bool
 interesting_stringop_to_profile_p (tree fndecl, tree arglist)
@@ -1260,6 +1476,34 @@ tree_divmod_values_to_profile (tree stmt
     }
 }
 
+/* Find calls inside STMT for that we want to measure histograms for 
+   indirect/virtual call optimization. */ 
+
+static void
+tree_indirect_call_to_profile (tree stmt, histogram_values *values)
+{
+  tree			call;
+  tree			callee;
+
+  call = get_call_expr_in (stmt);
+
+  if (!call || TREE_CODE (call) != CALL_EXPR)
+    return;
+
+  callee = TREE_OPERAND (call, 0);
+  
+  if (TREE_CODE (callee) == ADDR_EXPR)
+    return;
+
+  VEC_reserve (histogram_value, heap, *values, 3);
+
+  VEC_quick_push (histogram_value, *values, 
+		  gimple_alloc_histogram_value (cfun, HIST_TYPE_INDIR_CALL,
+						stmt, callee));
+
+  return;
+}
+
 /* Find values inside STMT for that we want to measure histograms for
    string operations.  */
 static void
@@ -1303,6 +1547,7 @@ tree_values_to_profile (tree stmt, histo
     {
       tree_divmod_values_to_profile (stmt, values);
       tree_stringops_values_to_profile (stmt, values);
+      tree_indirect_call_to_profile (stmt, values);
     }
 }
 
@@ -1339,6 +1584,10 @@ tree_find_values_to_profile (histogram_v
 	  hist->n_counters = 4;
 	  break;
 
+ 	case HIST_TYPE_INDIR_CALL:
+ 	  hist->n_counters = 3;
+	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
Index: gcc/value-prof.h
===================================================================
--- gcc/value-prof.h	(revision 120671)
+++ gcc/value-prof.h	(working copy)
@@ -29,8 +29,10 @@ enum hist_type
   HIST_TYPE_POW2,	/* Histogram of power of 2 values.  */
   HIST_TYPE_SINGLE_VALUE, /* Tries to identify the value that is (almost)
 			   always constant.  */
-  HIST_TYPE_CONST_DELTA	/* Tries to identify the (almost) always constant
+  HIST_TYPE_CONST_DELTA, /* Tries to identify the (almost) always constant
 			   difference between two evaluations of a value.  */
+  HIST_TYPE_INDIR_CALL   /* Tries to identify the function that is (almost) 
+			    called in indirect call */
 };
 
 #define COUNTER_FOR_HIST_TYPE(TYPE) ((int) (TYPE) + GCOV_FIRST_VALUE_COUNTER)
@@ -94,6 +96,9 @@ struct profile_hooks {
   /* Insert code to find the most common value of a difference between two
      evaluations of an expression.  */
   void (*gen_const_delta_profiler) (histogram_value, unsigned, unsigned);
+
+  /* Insert code to find the most common indirect call */
+  void (*gen_ic_profiler) (histogram_value, unsigned, unsigned);
 };
 
 histogram_value gimple_histogram_value (struct function *, tree);
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c	(revision 120671)
+++ gcc/cgraphunit.c	(working copy)
@@ -444,6 +444,7 @@ cgraph_finalize_function (tree decl, boo
   if (node->local.finalized)
     cgraph_reset_node (node);
 
+  node->pid = cgraph_max_pid ++;
   notice_global_symbol (decl);
   node->decl = decl;
   node->local.finalized = true;
Index: gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c	(revision 0)
@@ -0,0 +1,45 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-tree_profile" } */
+
+static int a1 (void)
+{
+    return 10;
+}
+
+static int a2 (void)
+{
+    return 0;
+}
+
+typedef int (*tp) (void);
+
+static tp aa [] = {a2, a1, a1, a1, a1};
+
+void setp (int (**pp) (void), int i)
+{
+  if (!i)
+    *pp = aa [i];
+  else
+    *pp = aa [(i & 2) + 1];
+}
+
+int
+main (void)
+{
+  int (*p) (void);
+  int  i;
+
+  for (i = 0; i < 10; i ++)
+    {
+	setp (&p, i);
+	p ();
+    }
+  
+  return 0;
+}
+
+/* { dg-final-use { scan-tree-dump "Indirect call -> direct call.* a1 transformation on insn" "tree_profile"} } */
+/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */                                                                                
+/* { dg-final-use { cleanup-tree-dump "optimized" } } */                                                                                              
+/* { dg-final-use { cleanup-tree-dump "tree_profile" } } */
+                                                                                           
+
Index: gcc/testsuite/g++.dg/dg.exp
===================================================================
--- gcc/testsuite/g++.dg/dg.exp	(revision 120671)
+++ gcc/testsuite/g++.dg/dg.exp	(working copy)
@@ -41,6 +41,7 @@ set tests [prune $tests $srcdir/$subdir/
 set tests [prune $tests $srcdir/$subdir/tls/*]
 set tests [prune $tests $srcdir/$subdir/vect/*]
 set tests [prune $tests $srcdir/$subdir/gomp/*]
+set tests [prune $tests $srcdir/$subdir/tree-prof/*]
 
 # Main loop.
 dg-runtest $tests "" $DEFAULT_CXXFLAGS
Index: gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
===================================================================
--- gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C	(revision 0)
+++ gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C	(revision 0)
@@ -0,0 +1,39 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-tree_profile" } */
+
+struct A {
+  A () {}
+
+  virtual int AA (void)
+  { return 0; }
+
+};
+
+struct B : public A {
+  B () {}
+
+  virtual int AA (void)
+  { return 1; }
+};
+
+int
+main (void)
+{
+  A a;
+  B b;
+  
+  A* p;
+
+  p = &a;
+  p->AA ();
+
+  p = &b;
+  p->AA ();
+  
+  return 0;
+}
+
+/* { dg-final-use { scan-tree-dump "Indirect call -> direct call.* AA transformation on insn" "tree_profile"} } */
+/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */                                                                                
+/* { dg-final-use { cleanup-tree-dump "optimized" } } */                                                                                              
+/* { dg-final-use { cleanup-tree-dump "tree_profile" } } */                                                                                           
+
Index: gcc/testsuite/g++.dg/tree-prof/tree-prof.exp
===================================================================
--- gcc/testsuite/g++.dg/tree-prof/tree-prof.exp	(revision 0)
+++ gcc/testsuite/g++.dg/tree-prof/tree-prof.exp	(revision 0)
@@ -0,0 +1,53 @@
+#   Copyright (C) 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
+
+# Test the functionality of programs compiled with profile-directed block
+# ordering using -fprofile-generate followed by -fbranch-use.
+
+load_lib target-supports.exp
+
+# Some targets don't support tree profiling.
+if { ![check_profiling_available ""] } {
+    return
+}
+
+# The procedures in profopt.exp need these parameters.
+set tool g++
+set prof_ext "gcda gcno"
+
+# Override the list defined in profopt.exp.
+set PROFOPT_OPTIONS [list {}]
+
+if $tracelevel then {
+    strace $tracelevel
+}
+
+# Load support procs.
+load_lib profopt.exp
+
+# These are globals used by profopt-execute.  The first is options
+# needed to generate profile data, the second is options to use the
+# profile data.
+set profile_option "-fprofile-generate"
+set feedback_option "-fprofile-use"
+
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.C]] {
+    # If we're only testing specific files and this isn't one of them, skip it.
+    if ![runtest_file_p $runtests $src] then {
+        continue
+    }
+    profopt-execute $src
+}
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 120671)
+++ gcc/gcov-io.h	(working copy)
@@ -327,23 +327,26 @@ typedef HOST_WIDEST_INT gcov_type;
 #define GCOV_COUNTER_V_SINGLE	3  /* The most common value of expression.  */
 #define GCOV_COUNTER_V_DELTA	4  /* The most common difference between
 				      consecutive values of expression.  */
-#define GCOV_LAST_VALUE_COUNTER 4  /* The last of counters used for value
+
+#define GCOV_COUNTER_V_INDIR	5  /* The most common indirect address */
+#define GCOV_LAST_VALUE_COUNTER 5  /* The last of counters used for value
 				      profiling.  */
-#define GCOV_COUNTERS		5
+#define GCOV_COUNTERS		6
 
 /* Number of counters used for value profiling.  */
 #define GCOV_N_VALUE_COUNTERS \
   (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
   
   /* A list of human readable names of the counters */
-#define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", "delta"}
+#define GCOV_COUNTER_NAMES	{"arcs", "interval", "pow2", "single", "delta", "indirect_call"}
   
   /* Names of merge functions for counters.  */
 #define GCOV_MERGE_FUNCTIONS	{"__gcov_merge_add",	\
 				 "__gcov_merge_add",	\
 				 "__gcov_merge_add",	\
 				 "__gcov_merge_single",	\
-				 "__gcov_merge_delta"}
+				 "__gcov_merge_delta",  \
+				 "__gcov_merge_single" }
   
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)				\
Index: gcc/predict.c
===================================================================
--- gcc/predict.c	(revision 120671)
+++ gcc/predict.c	(working copy)
@@ -1638,6 +1638,7 @@ counts_to_freqs (void)
   count_max = MAX (true_count_max, 1);
   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
+
   return true_count_max;
 }
 
Index: gcc/profile.c
===================================================================
--- gcc/profile.c	(revision 120671)
+++ gcc/profile.c	(working copy)
@@ -192,6 +192,10 @@ instrument_values (histogram_values valu
 	  t = GCOV_COUNTER_V_DELTA;
 	  break;
 
+ 	case HIST_TYPE_INDIR_CALL:
+ 	  t = GCOV_COUNTER_V_INDIR;
+ 	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -216,6 +220,10 @@ instrument_values (histogram_values valu
 	  (profile_hooks->gen_const_delta_profiler) (hist, t, 0);
 	  break;
 
+ 	case HIST_TYPE_INDIR_CALL:
+ 	  (profile_hooks->gen_ic_profiler) (hist, t, 0);
+  	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c	(revision 120671)
+++ gcc/tree-profile.c	(working copy)
@@ -45,15 +45,54 @@ Software Foundation, 51 Franklin Street,
 #include "timevar.h"
 #include "value-prof.h"
 #include "ggc.h"
+#include "cgraph.h"
 
 static GTY(()) tree gcov_type_node;
 static GTY(()) tree tree_interval_profiler_fn;
 static GTY(()) tree tree_pow2_profiler_fn;
 static GTY(()) tree tree_one_value_profiler_fn;
+static GTY(()) tree tree_indirect_call_profiler_fn;
 \f
 
+static GTY(()) tree ic_void_ptr_var;
+static GTY(()) tree ic_gcov_type_ptr_var;
+static GTY(()) tree ptr_void;
+
 /* Do initialization work for the edge profiler.  */
 
+/* Add code:
+   static gcov*	__gcov_indirect_call_counters; // pointer to actual counter
+   static void*	__gcov_indirect_call_callee; // actual callie addres
+*/
+static void
+tree_init_ic_make_global_vars (void)
+{
+  tree  gcov_type_ptr;
+
+  ptr_void = build_pointer_type (void_type_node);
+  
+  ic_void_ptr_var 
+    = build_decl (VAR_DECL, 
+		  get_identifier ("__gcov_indirect_call_callee"), 
+		  ptr_void);
+  TREE_STATIC (ic_void_ptr_var) = 1;
+  TREE_PUBLIC (ic_void_ptr_var) = 0;
+  DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
+  DECL_INITIAL (ic_void_ptr_var) = NULL;
+  assemble_variable (ic_void_ptr_var, 0, 0, 0);
+
+  gcov_type_ptr = build_pointer_type (get_gcov_type ());
+  ic_gcov_type_ptr_var 
+    = build_decl (VAR_DECL, 
+		  get_identifier ("__gcov_indirect_call_counters"), 
+		  gcov_type_ptr);
+  TREE_STATIC (ic_gcov_type_ptr_var) = 1;
+  TREE_PUBLIC (ic_gcov_type_ptr_var) = 0;
+  DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
+  DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
+  assemble_variable (ic_gcov_type_ptr_var, 0, 0, 0);
+}
+
 static void
 tree_init_edge_profiler (void)
 {
@@ -61,6 +100,7 @@ tree_init_edge_profiler (void)
   tree pow2_profiler_fn_type;
   tree one_value_profiler_fn_type;
   tree gcov_type_ptr;
+  tree ic_profiler_fn_type;
 
   if (!gcov_type_node)
     {
@@ -93,6 +133,18 @@ tree_init_edge_profiler (void)
       tree_one_value_profiler_fn
 	      = build_fn_decl ("__gcov_one_value_profiler",
 				     one_value_profiler_fn_type);
+
+      tree_init_ic_make_global_vars ();
+      
+      /* void (*) (gcov_type *, gcov_type, void *, void *)  */
+      ic_profiler_fn_type
+	       = build_function_type_list (void_type_node,
+					  gcov_type_ptr, gcov_type_node,
+					  ptr_void,
+					  ptr_void, NULL_TREE);
+      tree_indirect_call_profiler_fn
+	      = build_fn_decl ("__gcov_indirect_call_profiler",
+				     ic_profiler_fn_type);
     }
 }
 
@@ -201,6 +253,90 @@ tree_gen_one_value_profiler (histogram_v
   bsi_insert_before (&bsi, call, BSI_SAME_STMT);
 }
 
+
+/* Output instructions as GIMPLE trees for code to find the most
+   common called function in indirect call.  
+   VALUE is the call expression whose indirect callie is profiled.
+   TAG is the tag of the section for counters, BASE is offset of the
+   counter position.  */
+
+static void
+tree_gen_ic_profiler (histogram_value value, unsigned tag, unsigned base)
+{
+  tree tmp1, stmt1, stmt2, stmt3;
+  tree stmt = value->hvalue.stmt;
+  block_stmt_iterator bsi = bsi_for_stmt (stmt);
+  tree ref = tree_coverage_counter_ref (tag, base), ref_ptr;
+
+  ref_ptr = force_gimple_operand_bsi (&bsi,
+				      build_addr (ref, current_function_decl),
+				      true, NULL_TREE);
+
+  /* Insert code:
+    
+    __gcov_indirect_call_counters = get_relevant_counter_ptr (); 
+    __gcov_indirect_call_callee = (void *) indirect call argument;
+   */
+
+  tmp1 = create_tmp_var (ptr_void, "PROF");
+  stmt1 = build2 (GIMPLE_MODIFY_STMT, 
+		  build_pointer_type (get_gcov_type ()), 
+		  ic_gcov_type_ptr_var, ref_ptr);
+  stmt2 = build2 (GIMPLE_MODIFY_STMT, ptr_void, tmp1, 
+		  unshare_expr (value->hvalue.value));
+  stmt3 = build2 (GIMPLE_MODIFY_STMT, ptr_void, 
+		  ic_void_ptr_var, tmp1);
+
+  bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
+  bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
+  bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
+}
+
+
+/* Output instructions as GIMPLE trees for code to find the most
+   common called function in indirect call. Insert instructions at the
+   begining of every possible called function.
+  */
+
+static void
+tree_gen_ic_func_profiler (void)
+{
+  struct cgraph_node * c_node = cgraph_node (current_function_decl);
+  block_stmt_iterator bsi;
+  edge e;
+  basic_block bb;
+  edge_iterator ei;
+  tree stmt1;
+  tree args, tree_uid, cur_func;
+
+  if (flag_unit_at_a_time)
+    {
+      if (!c_node->needed)
+	return;
+    }
+  
+  tree_init_edge_profiler ();
+  
+  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
+    {
+      bb = split_edge (e);
+      bsi = bsi_start (bb);
+      cur_func = force_gimple_operand_bsi (&bsi,
+					   build_addr (current_function_decl, 
+						       current_function_decl),
+					   true, NULL_TREE);
+      tree_uid = build_int_cst (gcov_type_node, c_node->pid);
+      args = tree_cons (NULL_TREE, ic_gcov_type_ptr_var,
+			tree_cons (NULL_TREE, tree_uid,
+				   tree_cons (NULL_TREE, cur_func,
+					      tree_cons (NULL_TREE, 
+							 ic_void_ptr_var,
+							 NULL_TREE))));
+      stmt1 = build_function_call_expr (tree_indirect_call_profiler_fn, args);
+      bsi_insert_after (&bsi, stmt1, BSI_SAME_STMT);
+    }
+}
+
 /* Output instructions as GIMPLE trees for code to find the most common value 
    of a difference between two evaluations of an expression.
    VALUE is the expression whose value is profiled.  TAG is the tag of the
@@ -242,6 +378,11 @@ tree_profiling (void)
   if (cgraph_state == CGRAPH_STATE_FINISHED)
     return 0;
   branch_prob ();
+
+  if (! flag_branch_probabilities 
+      && flag_profile_values)
+    tree_gen_ic_func_profiler ();
+
   if (flag_branch_probabilities
       && flag_profile_values
       && flag_value_profile_transformations)
@@ -278,7 +419,8 @@ struct profile_hooks tree_profile_hooks 
   tree_gen_interval_profiler,   /* gen_interval_profiler */
   tree_gen_pow2_profiler,       /* gen_pow2_profiler */
   tree_gen_one_value_profiler,  /* gen_one_value_profiler */
-  tree_gen_const_delta_profiler /* gen_const_delta_profiler */
+  tree_gen_const_delta_profiler,/* gen_const_delta_profiler */
+  tree_gen_ic_profiler,		/* gen_ic_profiler */
 };
 
 #include "gt-tree-profile.h"
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 120671)
+++ gcc/Makefile.in	(working copy)
@@ -1013,7 +1013,8 @@ LIB2FUNCS_ST = _eprintf __gcc_bcmp
 LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
     _gcov_fork _gcov_execl _gcov_execlp _gcov_execle \
     _gcov_execv _gcov_execvp _gcov_execve \
-    _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler
+    _gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
+    _gcov_indirect_call_profiler
 
 FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \
     _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
@@ -2354,7 +2355,7 @@ profile.o : profile.c $(CONFIG_H) $(SYST
 tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \
    $(FUNCTION_H) toplev.h $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \
-   tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) $(GGC_H) gt-tree-profile.h
+   tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) $(GGC_H) gt-tree-profile.h $(CGRAPH_H)
 value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \
    $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H) $(DIAGNOSTIC_H) \
Index: gcc/libgcov.c
===================================================================
--- gcc/libgcov.c	(revision 120671)
+++ gcc/libgcov.c	(working copy)
@@ -726,7 +726,6 @@ __gcov_pow2_profiler (gcov_type *counter
 }
 #endif
 
-#ifdef L_gcov_one_value_profiler
 /* Tries to determine the most common value among its inputs.  Checks if the
    value stored in COUNTERS[0] matches VALUE.  If this is the case, COUNTERS[1]
    is incremented.  If this is not the case and COUNTERS[1] is not zero,
@@ -737,8 +736,8 @@ __gcov_pow2_profiler (gcov_type *counter
 
    In any case, COUNTERS[2] is incremented.  */
 
-void
-__gcov_one_value_profiler (gcov_type *counters, gcov_type value)
+static inline void
+__gcov_one_value_profiler_body (gcov_type *counters, gcov_type value)
 {
   if (value == counters[0])
     counters[1]++;
@@ -751,6 +750,24 @@ __gcov_one_value_profiler (gcov_type *co
     counters[1]--;
   counters[2]++;
 }
+
+#ifdef L_gcov_one_value_profiler
+void
+__gcov_one_value_profiler (gcov_type *counters, gcov_type value)
+{
+  __gcov_one_value_profiler_body (counters, value);
+}
+#endif
+
+#ifdef L_gcov_indirect_call_profiler
+/* Tries to determine the most common value among its inputs. */
+void
+__gcov_indirect_call_profiler (gcov_type* counter, gcov_type value, 
+			       void* cur_func, void* callee_func)
+{
+  if (cur_func == callee_func)
+    __gcov_one_value_profiler_body (counter, value);
+}
 #endif
 
 #ifdef L_gcov_fork

  reply	other threads:[~2007-01-11 15:27 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-12-13  1:55 Tomas Bily
2006-12-13 10:11 ` Richard Guenther
2006-12-13 10:13   ` Jan Hubicka
2006-12-13 10:32     ` Richard Guenther
2006-12-13 17:21       ` Tomas Bily
2006-12-19  3:19         ` Seongbae Park
2006-12-27 23:34           ` Tomas Bily
2006-12-27 23:39             ` Richard Guenther
2006-12-28  0:52               ` Jan Hubicka
2006-12-19  0:15     ` Tomas Bily
2006-12-23 12:09       ` Jan Hubicka
2007-01-11 15:27         ` Tomas Bily [this message]
2007-01-17  0:23           ` Jan Hubicka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070111152730.GA10462@atrey.karlin.mff.cuni.cz \
    --to=tbily@suse.cz \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hubicka@ucw.cz \
    --cc=jh@suse.cz \
    --cc=tomby@ucw.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).