public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Propagate profile counts during switch expansion
@ 2012-10-03  1:09 Easwaran Raman
  2012-10-03 16:12 ` Xinliang David Li
  2012-10-04 13:19 ` Jan Hubicka
  0 siblings, 2 replies; 15+ messages in thread
From: Easwaran Raman @ 2012-10-03  1:09 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jan Hubicka, Steven Bosscher, David Li

Hi,
 This patch propagates the profile counts during RTL expansion. In
many cases, there is no way to determine the exact count of an edge
generated during the expansion. So this patch uses some simple
heuristics to estimate the edge counts but ensures that the counts of
the basic blocks corresponding to the cases are (nearly the) same as
at the gimple level.

Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for trunk?

- Easwaran

------
2012-10-02   Easwaran Raman  <eraman@google.com>

* cfgbuild.c (gen_probabilities_from_existing_counts): New function.
(compute_outgoing_frequencies): If at least one successor of a
BB has non-zero profile count, use the counts to compute
probabilities.
* expr.c (do_tablejump): Add a REG_BR_PROB note on the
jump to default label.
(try_tablejump): Add a parameter to specify the probability
of jumping to the default label.
* expr.h (try_tablejump): Add a new parameter.
* stmt.c (case_node): Add new fields COUNT and SUBTREE_COUNT.
(do_jump_if_equal): Pass probability for REG_BR_PROB note.
(add_case_node): Pass execution count of the case node and use
it to initialize COUNT field.
(emit_case_decision_tree): Pass default_count to emit_case_nodes.
(get_outgoing_edge_counts): New function.
(add_prob_note_to_last_insn): Likewise.
(case_probability): New macro.
(emit_case_dispatch_table): Compute probability of jumping to default
label and apply note to the jump.
(expand_case): Compute and propagate default edge count to
emit_case_dispatch_table.
(expand_sjlj_dispatch_table): Update calls to add_case_node and
emit_case_dispatch_table.
(balance_case_nodes): Update subtree_counts.
(emit_case_nodes): Compute edge probabilities and add note.

gcc/testsuite/ChangeLog:
2012-10-02   Easwaran Raman  <eraman@google.com>
* gcc.dg/tree-prof/switch-case-1.c: New test case.
* gcc.dg/tree-prof/switch-case-2.c: New test case.

Index: gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c (revision 0)
@@ -0,0 +1,40 @@
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+int g;
+
+__attribute__((noinline)) void foo (int  n)
+{
+  switch (n)
+    {
+    case 1:
+      g++; break;
+    case 2:
+      g += 2; break;
+    case 3:
+      g += 1; break;
+    case 4:
+      g += 3; break;
+    case 5:
+      g += 4; break;
+    case 6:
+      g += 5; break;
+    case 7:
+      g += 6; break;
+    case 8:
+      g += 7; break;
+    case 9:
+      g += 8; break;
+    default:
+      g += 8; break;
+   }
+}
+
+int main ()
+{
+ int i;
+ for (i = 0; i < 10000; i++)
+   foo ((i * i) % 5);
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
4000" 2 "expand"} } */
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
2000" 1 "expand"} } */
+/* { dg-final-use { cleanup-rtl-dump "expand" } } */
Index: gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c (revision 0)
@@ -0,0 +1,40 @@
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+int g;
+
+__attribute__((noinline)) void foo (int  n)
+{
+  switch (n)
+    {
+    case 99:
+      g += 2; break;
+    case 1:
+      g++; break;
+    case 100:
+      g += 1; break;
+    case 4:
+      g += 3; break;
+    case 5:
+      g += 4; break;
+    case 6:
+      g += 5; break;
+    case 7:
+      g += 6; break;
+    case 8:
+      g += 7; break;
+    case 9:
+      g += 8; break;
+    default:
+      g += 8; break;
+   }
+}
+
+int main ()
+{
+ int i;
+ for (i = 0; i < 10000; i++)
+   foo ((i * i) % 5);
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
4000" 2 "expand"} } */
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
2000" 1 "expand"} } */
+/* { dg-final-use { cleanup-rtl-dump "expand" } } */
Index: gcc/expr.c
===================================================================
--- gcc/expr.c (revision 191879)
+++ gcc/expr.c (working copy)
@@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
 #ifdef PUSH_ROUNDING
 static void emit_single_push_insn (enum machine_mode, rtx, tree);
 #endif
-static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
+static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);

@@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree

 static void
 do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
-      rtx default_label)
+      rtx default_label, int default_probability)
 {
   rtx temp, vector;

@@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
      the maximum value of the range.  */

   if (default_label)
-    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
-     default_label);
+    {
+      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
+       default_label);
+      if (default_probability != -1)
+        {
+          rtx jump_insn = get_last_insn();
+          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
+        }
+    }

+
   /* If index is in range, it must fit in Pmode.
      Convert to Pmode so we can index with it.  */
   if (mode != Pmode)
@@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r

 int
 try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
-       rtx table_label, rtx default_label)
+       rtx table_label, rtx default_label, int default_probability)
 {
   rtx index;

@@ -10972,7 +10980,7 @@ try_tablejump (tree index_type, tree index_expr, t
        TYPE_MODE (TREE_TYPE (range)),
        expand_normal (range),
        TYPE_UNSIGNED (TREE_TYPE (range))),
- table_label, default_label);
+ table_label, default_label, default_probability);
   return 1;
 }

Index: gcc/cfgbuild.c
===================================================================
--- gcc/cfgbuild.c (revision 191879)
+++ gcc/cfgbuild.c (working copy)
@@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
     purge_dead_tablejump_edges (bb, table);
 }

+/* If there is at least one edge in EDGES with a non-zero count, then
+   compute probabilities based on the existing counts.  */
+
+static bool
+gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
+  edge e;
+  edge_iterator ei;
+  gcov_type count_sum = 0;
+  FOR_EACH_EDGE(e, ei, edges)
+    count_sum += e->count;
+  if (count_sum == 0)
+    return false;
+  FOR_EACH_EDGE(e, ei, edges)
+    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
+  return true;
+}
+
 /*  Assume that frequency of basic block B is known.  Compute frequencies
     and probabilities of outgoing edges.  */

@@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
   return;
  }
     }
-
   if (single_succ_p (b))
     {
       e = single_succ_edge (b);
@@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
       e->count = b->count;
       return;
     }
-  guess_outgoing_edge_probabilities (b);
+  else if (!gen_probabilities_from_existing_counts (b->succs)){
+    /* All outgoing edges of B have zero count. Guess probabilities.  */
+    guess_outgoing_edge_probabilities (b);
+  }
   if (b->count)
     FOR_EACH_EDGE (e, ei, b->succs)
       e->count = ((b->count * e->probability + REG_BR_PROB_BASE / 2)
Index: gcc/expr.h
===================================================================
--- gcc/expr.h (revision 191879)
+++ gcc/expr.h (working copy)
@@ -486,7 +486,7 @@ extern void do_compare_rtx_and_jump (rtx, rtx, enu

 /* Two different ways of generating switch statements.  */
 extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx);
-extern int try_tablejump (tree, tree, tree, tree, rtx, rtx);
+extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, int);

 /* Functions from alias.c */
 #include "alias.h"
Index: gcc/stmt.c
===================================================================
--- gcc/stmt.c (revision 191879)
+++ gcc/stmt.c (working copy)
@@ -94,11 +94,13 @@ struct case_node
   tree low; /* Lowest index value for this label */
   tree high; /* Highest index value for this label */
   tree code_label; /* Label to jump to when node matches */
+  gcov_type             subtree_count, count; /* Execution counts */
 };

 typedef struct case_node case_node;
 typedef struct case_node *case_node_ptr;

+extern basic_block label_to_block_fn (struct function *, tree);

 static int n_occurrences (int, const char *);
 static bool tree_conflicts_with_clobbers_p (tree, HARD_REG_SET *);
@@ -112,7 +114,7 @@ static void balance_case_nodes (case_node_ptr *, c
 static int node_has_low_bound (case_node_ptr, tree);
 static int node_has_high_bound (case_node_ptr, tree);
 static int node_is_bounded (case_node_ptr, tree);
-static void emit_case_nodes (rtx, case_node_ptr, rtx, tree);
+static void emit_case_nodes (rtx, case_node_ptr, rtx, gcov_type, tree);

 /* Return the rtx-label that corresponds to a LABEL_DECL,
    creating it if necessary.  */
@@ -1648,13 +1650,15 @@ expand_stack_restore (tree var)
   fixup_args_size_notes (prev, get_last_insn (), 0);
 }

-/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE.  */
+/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE. PROB
+   is the probability of jumping to LABEL.  */
 static void
 do_jump_if_equal (enum machine_mode mode, rtx op0, rtx op1, rtx label,
-  int unsignedp)
+  int unsignedp, int prob)
 {
+  gcc_assert (prob <= REG_BR_PROB_BASE);
   do_compare_rtx_and_jump (op0, op1, EQ, unsignedp, mode,
-   NULL_RTX, NULL_RTX, label, -1);
+   NULL_RTX, NULL_RTX, label, prob);
 }

 /* Do the insertion of a case label into case_list.  The labels are
@@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,

 static struct case_node *
 add_case_node (struct case_node *head, tree low, tree high,
-               tree label, alloc_pool case_node_pool)
+               tree label, gcov_type count, alloc_pool case_node_pool)
 {
   struct case_node *r;

@@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
   r->high = high;
   r->code_label = label;
   r->parent = r->left = NULL;
+  r->count = count;
+  r->subtree_count = count;
   r->right = head;
   return r;
 }
@@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,

 static void
 emit_case_decision_tree (tree index_expr, tree index_type,
- struct case_node *case_list, rtx default_label)
+ struct case_node *case_list, rtx default_label,
+                         gcov_type default_count)
 {
   rtx index = expand_normal (index_expr);

@@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
       dump_case_nodes (dump_file, case_list, indent_step, 0);
     }

-  emit_case_nodes (index, case_list, default_label, index_type);
+  emit_case_nodes (index, case_list, default_label, default_count, index_type);
   if (default_label)
     emit_jump (default_label);
 }

+static gcov_type
+get_outgoing_edge_counts (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  gcov_type count_sum = 0;
+  FOR_EACH_EDGE(e, ei, bb->succs)
+    count_sum += e->count;
+  return count_sum;
+}
+
+#define case_probability(x, y) \
+    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)
+
 /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
    one of the labels in CASE_LIST or to the DEFAULT_LABEL.
    MINVAL, MAXVAL, and RANGE are the extrema and range of the case
-   labels in CASE_LIST.
+   labels in CASE_LIST. STMT_BB is the basic block containing the statement.

    First, a jump insn is emitted.  First we try "casesi".  If that
    fails, try "tablejump".   A target *must* have one of them (or both).
@@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
 static void
 emit_case_dispatch_table (tree index_expr, tree index_type,
   struct case_node *case_list, rtx default_label,
-  tree minval, tree maxval, tree range)
+  tree minval, tree maxval, tree range,
+                          basic_block stmt_bb)
 {
   int i, ncases;
   struct case_node *n;
   rtx *labelvec;
   rtx fallback_label = label_rtx (case_list->code_label);
   rtx table_label = gen_label_rtx ();
+  bool has_gaps = false;
+  edge default_edge = EDGE_SUCC(stmt_bb, 0);
+  gcov_type default_count = default_edge->count;
+  gcov_type count = get_outgoing_edge_counts (stmt_bb);
+  bool try_with_tablejump = false;

   if (! try_casesi (index_type, index_expr, minval, range,
     table_label, default_label, fallback_label))
     {
-      bool ok;
-
       /* Index jumptables from zero for suitable values of minval to avoid
  a subtraction.  For the rationale see:
  "http://gcc.gnu.org/ml/gcc-patches/2001-10/msg01234.html".  */
@@ -1882,11 +1907,9 @@ emit_case_dispatch_table (tree index_expr, tree in
  {
   minval = build_int_cst (index_type, 0);
   range = maxval;
+          has_gaps = true;
  }
-
-      ok = try_tablejump (index_type, index_expr, minval, range,
-  table_label, default_label);
-      gcc_assert (ok);
+      try_with_tablejump = true;
     }

   /* Get table of labels to jump to, in order of case index.  */
@@ -1921,8 +1944,47 @@ emit_case_dispatch_table (tree index_expr, tree in
     default_label = fallback_label;
   for (i = 0; i < ncases; i++)
     if (labelvec[i] == 0)
-      labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
+      {
+        has_gaps = true;
+        labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
+      }

+  int default_probability;
+  if (has_gaps)
+    {
+      /* There is at least one entry in the jump table that jumps
+         to default label. The default label can either be reached
+         through the indirect jump or the direct conditional jump
+         before that. Split the probability of reaching the
+         default label among these two jumps.  */
+      default_probability = case_probability (default_count/2,
+                                              count);
+      default_count /= 2;
+      count -= default_count;
+    }
+  else
+    {
+      default_probability = case_probability (default_count,
+                                              count);
+      count -= default_count;
+      default_count = 0;
+    }
+
+  default_edge->count = default_count;
+  if (count)
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
+        e->probability = e->count * REG_BR_PROB_BASE / count;
+    }
+
+  if (try_with_tablejump)
+    {
+      bool ok = try_tablejump (index_type, index_expr, minval, range,
+                               table_label, default_label,
default_probability);
+      gcc_assert (ok);
+    }
   /* Output the table.  */
   emit_label (table_label);

@@ -1956,6 +2018,7 @@ expand_case (gimple stmt)
   tree index_expr = gimple_switch_index (stmt);
   tree index_type = TREE_TYPE (index_expr);
   tree elt;
+  basic_block bb = gimple_bb (stmt);

   /* A list of case labels; it is first built as a list and it may then
      be rearranged into a nearly balanced binary tree.  */
@@ -1981,6 +2044,8 @@ expand_case (gimple stmt)

   /* Find the default case target label.  */
   default_label = label_rtx (CASE_LABEL (gimple_switch_default_label (stmt)));
+  edge default_edge = EDGE_SUCC(bb, 0);
+  gcov_type default_count = default_edge->count;

   /* Get upper and lower bounds of case values.  */
   elt = gimple_switch_label (stmt, 1);
@@ -1999,7 +2064,7 @@ expand_case (gimple stmt)
   uniq = 0;
   count = 0;
   struct pointer_set_t *seen_labels = pointer_set_create ();
-  for (i = gimple_switch_num_labels (stmt) - 1; i >= 1; --i)
+  for (i = ncases - 1; i >= 1; --i)
     {
       elt = gimple_switch_label (stmt, i);
       tree low = CASE_LOW (elt);
@@ -2041,8 +2106,10 @@ expand_case (gimple stmt)
    TREE_INT_CST_LOW (high),
    TREE_INT_CST_HIGH (high));

+      basic_block case_bb = label_to_block_fn (cfun, lab);
+      edge case_edge = find_edge (bb, case_bb);
       case_list = add_case_node (case_list, low, high, lab,
- case_node_pool);
+ case_edge->count, case_node_pool);
     }
   pointer_set_destroy (seen_labels);

@@ -2060,11 +2127,12 @@ expand_case (gimple stmt)

   if (expand_switch_as_decision_tree_p (range, uniq, count))
     emit_case_decision_tree (index_expr, index_type,
-     case_list, default_label);
+                             case_list, default_label,
+                             default_count);
   else
     emit_case_dispatch_table (index_expr, index_type,
       case_list, default_label,
-      minval, maxval, range);
+      minval, maxval, range, bb);

   reorder_insns (NEXT_INSN (before_case), get_last_insn (), before_case);

@@ -2126,7 +2194,7 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
         {
   tree elt = VEC_index (tree, dispatch_table, i);
   rtx lab = label_rtx (CASE_LABEL (elt));
-  do_jump_if_equal (index_mode, index, zero, lab, 0);
+  do_jump_if_equal (index_mode, index, zero, lab, 0, -1);
   force_expand_binop (index_mode, sub_optab,
       index, CONST1_RTX (index_mode),
       index, 0, OPTAB_DIRECT);
@@ -2150,12 +2218,12 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
   tree elt = VEC_index (tree, dispatch_table, i);
   tree low = CASE_LOW (elt);
   tree lab = CASE_LABEL (elt);
-  case_list = add_case_node (case_list, low, low, lab, case_node_pool);
+  case_list = add_case_node (case_list, low, low, lab, 0, case_node_pool);
  }

       emit_case_dispatch_table (index_expr, index_type,
  case_list, default_label,
- minval, maxval, range);
+ minval, maxval, range, NULL);
       emit_label (default_label);
       free_alloc_pool (case_node_pool);
     }
@@ -2237,6 +2305,9 @@ balance_case_nodes (case_node_ptr *head, case_node
   /* Optimize each of the two split parts.  */
   balance_case_nodes (&np->left, np);
   balance_case_nodes (&np->right, np);
+          np->subtree_count = np->count;
+          np->subtree_count += np->left->subtree_count;
+          np->subtree_count += np->right->subtree_count;
  }
       else
  {
@@ -2244,8 +2315,12 @@ balance_case_nodes (case_node_ptr *head, case_node
      but fill in `parent' fields.  */
   np = *head;
   np->parent = parent;
+          np->subtree_count = np->count;
   for (; np->right; np = np->right)
-    np->right->parent = np;
+            {
+      np->right->parent = np;
+              (*head)->subtree_count += np->right->subtree_count;
+            }
  }
     }
 }
@@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
   && node_has_high_bound (node, index_type));
 }

+
+/* Attach a REG_BR_PROB note to the last created RTX instruction if
+   PROBABILITY is not -1.  */
+
+static void
+add_prob_note_to_last_insn(int probability)
+{
+  if (probability != -1)
+    {
+      rtx jump_insn = get_last_insn();
+      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
+    }
+}
+
 /* Emit step-by-step code to select a case for the value of INDEX.
    The thus generated decision tree follows the form of the
    case-node binary tree NODE, whose nodes represent test conditions.
@@ -2386,10 +2475,12 @@ node_is_bounded (case_node_ptr node, tree index_ty

 static void
 emit_case_nodes (rtx index, case_node_ptr node, rtx default_label,
- tree index_type)
+ gcov_type default_count, tree index_type)
 {
   /* If INDEX has an unsigned type, we must make unsigned branches.  */
   int unsignedp = TYPE_UNSIGNED (index_type);
+  int probability;
+  gcov_type count = node->count, subtree_count = node->subtree_count;
   enum machine_mode mode = GET_MODE (index);
   enum machine_mode imode = TYPE_MODE (index_type);

@@ -2404,15 +2495,17 @@ emit_case_nodes (rtx index, case_node_ptr node, rt

   else if (tree_int_cst_equal (node->low, node->high))
     {
+      probability = case_probability (count, subtree_count + default_count);
       /* Node is single valued.  First see if the index expression matches
  this node and then check our children, if any.  */
-
       do_jump_if_equal (mode, index,
  convert_modes (mode, imode,
        expand_normal (node->low),
        unsignedp),
- label_rtx (node->code_label), unsignedp);
-
+ label_rtx (node->code_label), unsignedp, probability);
+      /* Since this case is taken at this point, reduce its weight from
+         subtree_weight.  */
+      subtree_count -= count;
       if (node->right != 0 && node->left != 0)
  {
   /* This node has children on both sides.
@@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        GT, NULL_RTX, mode, unsignedp,
        label_rtx (node->right->code_label));
-      emit_case_nodes (index, node->left, default_label, index_type);
+              probability = case_probability (node->right->count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
+      emit_case_nodes (index, node->left, default_label, default_count,
+                               index_type);
     }

   else if (node_is_bounded (node->left, index_type))
@@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        LT, NULL_RTX, mode, unsignedp,
        label_rtx (node->left->code_label));
-      emit_case_nodes (index, node->right, default_label, index_type);
+              probability = case_probability (node->left->count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
+      emit_case_nodes (index, node->right, default_label,
default_count, index_type);
     }

   /* If both children are single-valued cases with no
@@ -2460,21 +2560,25 @@ emit_case_nodes (rtx index, case_node_ptr node, rt

       /* See if the value matches what the right hand side
  wants.  */
+              probability = case_probability (node->right->count,
+                                              subtree_count + default_count);
       do_jump_if_equal (mode, index,
  convert_modes (mode, imode,
        expand_normal (node->right->low),
        unsignedp),
  label_rtx (node->right->code_label),
- unsignedp);
+ unsignedp, probability);

       /* See if the value matches what the left hand side
  wants.  */
+              probability = case_probability (node->left->count,
+                                              subtree_count + default_count);
       do_jump_if_equal (mode, index,
  convert_modes (mode, imode,
        expand_normal (node->left->low),
        unsignedp),
  label_rtx (node->left->code_label),
- unsignedp);
+ unsignedp, probability);
     }

   else
@@ -2494,10 +2598,18 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        GT, NULL_RTX, mode, unsignedp,
        label_rtx (test_label));
+              /* The default label could be reached either through the right
+                 subtree or the left subtree. Divide the probability
+                 equally.  */
+              probability = case_probability (
+                  node->right->subtree_count + default_count/2,
+                  subtree_count + default_count);
+              default_count /= 2;
+              add_prob_note_to_last_insn (probability);

       /* Value must be on the left.
  Handle the left-hand subtree.  */
-      emit_case_nodes (index, node->left, default_label, index_type);
+      emit_case_nodes (index, node->left, default_label,
default_count, index_type);
       /* If left-hand subtree does nothing,
  go to default.  */
       if (default_label)
@@ -2505,7 +2617,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt

       /* Code branches here for the right-hand subtree.  */
       expand_label (test_label);
-      emit_case_nodes (index, node->right, default_label, index_type);
+      emit_case_nodes (index, node->right, default_label,
default_count, index_type);
     }
  }

@@ -2530,21 +2642,29 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
     unsignedp),
    LT, NULL_RTX, mode, unsignedp,
    default_label);
+                  probability = case_probability (default_count/2,
+                                                  subtree_count +
default_count);
+                  default_count /= 2;
+                  add_prob_note_to_last_insn (probability);
  }

-      emit_case_nodes (index, node->right, default_label, index_type);
+      emit_case_nodes (index, node->right, default_label,
default_count, index_type);
     }
   else
-    /* We cannot process node->right normally
-       since we haven't ruled out the numbers less than
-       this node's value.  So handle node->right explicitly.  */
-    do_jump_if_equal (mode, index,
-      convert_modes
-      (mode, imode,
-       expand_normal (node->right->low),
-       unsignedp),
-      label_rtx (node->right->code_label), unsignedp);
- }
+            {
+              probability = case_probability (node->right->subtree_count,
+                                              subtree_count + default_count);
+      /* We cannot process node->right normally
+         since we haven't ruled out the numbers less than
+         this node's value.  So handle node->right explicitly.  */
+      do_jump_if_equal (mode, index,
+        convert_modes
+        (mode, imode,
+         expand_normal (node->right->low),
+         unsignedp),
+        label_rtx (node->right->code_label), unsignedp, probability);
+            }
+  }

       else if (node->right == 0 && node->left != 0)
  {
@@ -2561,20 +2681,29 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
     unsignedp),
    GT, NULL_RTX, mode, unsignedp,
    default_label);
+                  probability = case_probability (
+                      default_count/2, subtree_count + default_count);
+                  default_count /= 2;
+                  add_prob_note_to_last_insn (probability);
  }

-      emit_case_nodes (index, node->left, default_label, index_type);
+      emit_case_nodes (index, node->left, default_label,
+                               default_count, index_type);
     }
   else
-    /* We cannot process node->left normally
-       since we haven't ruled out the numbers less than
-       this node's value.  So handle node->left explicitly.  */
-    do_jump_if_equal (mode, index,
-      convert_modes
-      (mode, imode,
-       expand_normal (node->left->low),
-       unsignedp),
-      label_rtx (node->left->code_label), unsignedp);
+            {
+              probability = case_probability (node->left->subtree_count,
+                                              subtree_count + default_count);
+      /* We cannot process node->left normally
+         since we haven't ruled out the numbers less than
+         this node's value.  So handle node->left explicitly.  */
+      do_jump_if_equal (mode, index,
+        convert_modes
+        (mode, imode,
+         expand_normal (node->left->low),
+         unsignedp),
+        label_rtx (node->left->code_label), unsignedp, probability);
+            }
  }
     }
   else
@@ -2593,15 +2722,20 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
   tree test_label = 0;

   if (node_is_bounded (node->right, index_type))
-    /* Right hand node is fully bounded so we can eliminate any
-       testing and branch directly to the target code.  */
-    emit_cmp_and_jump_insns (index,
-     convert_modes
-     (mode, imode,
-      expand_normal (node->high),
-      unsignedp),
-     GT, NULL_RTX, mode, unsignedp,
-     label_rtx (node->right->code_label));
+            {
+      /* Right hand node is fully bounded so we can eliminate any
+         testing and branch directly to the target code.  */
+      emit_cmp_and_jump_insns (index,
+       convert_modes
+       (mode, imode,
+        expand_normal (node->high),
+        unsignedp),
+       GT, NULL_RTX, mode, unsignedp,
+       label_rtx (node->right->code_label));
+              probability = case_probability (node->right->subtree_count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
+            }
   else
     {
       /* Right hand node requires testing.
@@ -2616,6 +2750,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        GT, NULL_RTX, mode, unsignedp,
        label_rtx (test_label));
+              probability = case_probability
(node->right->subtree_count + default_count/2,
+                                              subtree_count + default_count);
+              default_count /= 2;
+              add_prob_note_to_last_insn (probability);
     }

   /* Value belongs to this node or to the left-hand subtree.  */
@@ -2627,9 +2765,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
     unsignedp),
    GE, NULL_RTX, mode, unsignedp,
    label_rtx (node->code_label));
+          probability = case_probability (count, subtree_count +
default_count);
+          add_prob_note_to_last_insn (probability);

   /* Handle the left-hand subtree.  */
-  emit_case_nodes (index, node->left, default_label, index_type);
+  emit_case_nodes (index, node->left, default_label, default_count,
index_type);

   /* If right node had to be handled later, do that now.  */

@@ -2641,7 +2781,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  emit_jump (default_label);

       expand_label (test_label);
-      emit_case_nodes (index, node->right, default_label, index_type);
+      emit_case_nodes (index, node->right, default_label,
default_count, index_type);
     }
  }

@@ -2658,6 +2798,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        LT, NULL_RTX, mode, unsignedp,
        default_label);
+              probability = case_probability (default_count/2,
+                                              subtree_count + default_count);
+              default_count /= 2;
+              add_prob_note_to_last_insn (probability);
     }

   /* Value belongs to this node or to the right-hand subtree.  */
@@ -2669,8 +2813,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
     unsignedp),
    LE, NULL_RTX, mode, unsignedp,
    label_rtx (node->code_label));
+          probability = case_probability (count, subtree_count +
default_count);
+          add_prob_note_to_last_insn (probability);

-  emit_case_nodes (index, node->right, default_label, index_type);
+  emit_case_nodes (index, node->right, default_label, default_count,
index_type);
  }

       else if (node->right == 0 && node->left != 0)
@@ -2686,6 +2832,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        GT, NULL_RTX, mode, unsignedp,
        default_label);
+              probability = case_probability (default_count/2,
+                                              subtree_count + default_count);
+              default_count /= 2;
+              add_prob_note_to_last_insn (probability);
     }

   /* Value belongs to this node or to the left-hand subtree.  */
@@ -2697,8 +2847,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
     unsignedp),
    GE, NULL_RTX, mode, unsignedp,
    label_rtx (node->code_label));
+          probability = case_probability (count, subtree_count +
default_count);
+          add_prob_note_to_last_insn (probability);

-  emit_case_nodes (index, node->left, default_label, index_type);
+  emit_case_nodes (index, node->left, default_label, default_count,
index_type);
  }

       else
@@ -2718,6 +2870,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        GT, NULL_RTX, mode, unsignedp,
        default_label);
+              probability = case_probability (default_count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
     }

   else if (!low_bound && high_bound)
@@ -2729,6 +2884,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
  unsignedp),
        LT, NULL_RTX, mode, unsignedp,
        default_label);
+              probability = case_probability (default_count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
     }
   else if (!low_bound && !high_bound)
     {
@@ -2750,6 +2908,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt

       emit_cmp_and_jump_insns (new_index, new_bound, GT, NULL_RTX,
        mode, 1, default_label);
+              probability = case_probability (default_count,
+                                              subtree_count + default_count);
+              add_prob_note_to_last_insn (probability);
     }

   emit_jump (label_rtx (node->code_label));

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-03  1:09 Propagate profile counts during switch expansion Easwaran Raman
@ 2012-10-03 16:12 ` Xinliang David Li
  2012-10-03 17:38   ` Steven Bosscher
  2012-10-04 13:19 ` Jan Hubicka
  1 sibling, 1 reply; 15+ messages in thread
From: Xinliang David Li @ 2012-10-03 16:12 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: gcc-patches, Jan Hubicka, Steven Bosscher

What is the status of switch expansion GIMPLE rewrite? If it is not
planned for 4.8, It will be desirable to include this fix into trunk.
It also helps set up a good base line to test against regression.


thanks,

David

On Tue, Oct 2, 2012 at 6:09 PM, Easwaran Raman <eraman@google.com> wrote:
> Hi,
>  This patch propagates the profile counts during RTL expansion. In
> many cases, there is no way to determine the exact count of an edge
> generated during the expansion. So this patch uses some simple
> heuristics to estimate the edge counts but ensures that the counts of
> the basic blocks corresponding to the cases are (nearly the) same as
> at the gimple level.
>
> Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for trunk?
>
> - Easwaran
>
> ------
> 2012-10-02   Easwaran Raman  <eraman@google.com>
>
> * cfgbuild.c (gen_probabilities_from_existing_counts): New function.
> (compute_outgoing_frequencies): If at least one successor of a
> BB has non-zero profile count, use the counts to compute
> probabilities.
> * expr.c (do_tablejump): Add a REG_BR_PROB note on the
> jump to default label.
> (try_tablejump): Add a parameter to specify the probability
> of jumping to the default label.
> * expr.h (try_tablejump): Add a new parameter.
> * stmt.c (case_node): Add new fields COUNT and SUBTREE_COUNT.
> (do_jump_if_equal): Pass probability for REG_BR_PROB note.
> (add_case_node): Pass execution count of the case node and use
> it to initialize COUNT field.
> (emit_case_decision_tree): Pass default_count to emit_case_nodes.
> (get_outgoing_edge_counts): New function.
> (add_prob_note_to_last_insn): Likewise.
> (case_probability): New macro.
> (emit_case_dispatch_table): Compute probability of jumping to default
> label and apply note to the jump.
> (expand_case): Compute and propagate default edge count to
> emit_case_dispatch_table.
> (expand_sjlj_dispatch_table): Update calls to add_case_node and
> emit_case_dispatch_table.
> (balance_case_nodes): Update subtree_counts.
> (emit_case_nodes): Compute edge probabilities and add note.
>
> gcc/testsuite/ChangeLog:
> 2012-10-02   Easwaran Raman  <eraman@google.com>
> * gcc.dg/tree-prof/switch-case-1.c: New test case.
> * gcc.dg/tree-prof/switch-case-2.c: New test case.
>
> Index: gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-prof/switch-case-1.c (revision 0)
> @@ -0,0 +1,40 @@
> +/* { dg-options "-O2 -fdump-rtl-expand-all" } */
> +int g;
> +
> +__attribute__((noinline)) void foo (int  n)
> +{
> +  switch (n)
> +    {
> +    case 1:
> +      g++; break;
> +    case 2:
> +      g += 2; break;
> +    case 3:
> +      g += 1; break;
> +    case 4:
> +      g += 3; break;
> +    case 5:
> +      g += 4; break;
> +    case 6:
> +      g += 5; break;
> +    case 7:
> +      g += 6; break;
> +    case 8:
> +      g += 7; break;
> +    case 9:
> +      g += 8; break;
> +    default:
> +      g += 8; break;
> +   }
> +}
> +
> +int main ()
> +{
> + int i;
> + for (i = 0; i < 10000; i++)
> +   foo ((i * i) % 5);
> + return 0;
> +}
> +/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
> 4000" 2 "expand"} } */
> +/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
> 2000" 1 "expand"} } */
> +/* { dg-final-use { cleanup-rtl-dump "expand" } } */
> Index: gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-prof/switch-case-2.c (revision 0)
> @@ -0,0 +1,40 @@
> +/* { dg-options "-O2 -fdump-rtl-expand-all" } */
> +int g;
> +
> +__attribute__((noinline)) void foo (int  n)
> +{
> +  switch (n)
> +    {
> +    case 99:
> +      g += 2; break;
> +    case 1:
> +      g++; break;
> +    case 100:
> +      g += 1; break;
> +    case 4:
> +      g += 3; break;
> +    case 5:
> +      g += 4; break;
> +    case 6:
> +      g += 5; break;
> +    case 7:
> +      g += 6; break;
> +    case 8:
> +      g += 7; break;
> +    case 9:
> +      g += 8; break;
> +    default:
> +      g += 8; break;
> +   }
> +}
> +
> +int main ()
> +{
> + int i;
> + for (i = 0; i < 10000; i++)
> +   foo ((i * i) % 5);
> + return 0;
> +}
> +/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
> 4000" 2 "expand"} } */
> +/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count
> 2000" 1 "expand"} } */
> +/* { dg-final-use { cleanup-rtl-dump "expand" } } */
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c (revision 191879)
> +++ gcc/expr.c (working copy)
> @@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
>  #ifdef PUSH_ROUNDING
>  static void emit_single_push_insn (enum machine_mode, rtx, tree);
>  #endif
> -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
> +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
>  static rtx const_vector_from_tree (tree);
>  static void write_complex_part (rtx, rtx, bool);
>
> @@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree
>
>  static void
>  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
> -      rtx default_label)
> +      rtx default_label, int default_probability)
>  {
>    rtx temp, vector;
>
> @@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
>       the maximum value of the range.  */
>
>    if (default_label)
> -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> -     default_label);
> +    {
> +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> +       default_label);
> +      if (default_probability != -1)
> +        {
> +          rtx jump_insn = get_last_insn();
> +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
> +        }
> +    }
>
> +
>    /* If index is in range, it must fit in Pmode.
>       Convert to Pmode so we can index with it.  */
>    if (mode != Pmode)
> @@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
>
>  int
>  try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
> -       rtx table_label, rtx default_label)
> +       rtx table_label, rtx default_label, int default_probability)
>  {
>    rtx index;
>
> @@ -10972,7 +10980,7 @@ try_tablejump (tree index_type, tree index_expr, t
>         TYPE_MODE (TREE_TYPE (range)),
>         expand_normal (range),
>         TYPE_UNSIGNED (TREE_TYPE (range))),
> - table_label, default_label);
> + table_label, default_label, default_probability);
>    return 1;
>  }
>
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c (revision 191879)
> +++ gcc/cfgbuild.c (working copy)
> @@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
>      purge_dead_tablejump_edges (bb, table);
>  }
>
> +/* If there is at least one edge in EDGES with a non-zero count, then
> +   compute probabilities based on the existing counts.  */
> +
> +static bool
> +gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    count_sum += e->count;
> +  if (count_sum == 0)
> +    return false;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
> +  return true;
> +}
> +
>  /*  Assume that frequency of basic block B is known.  Compute frequencies
>      and probabilities of outgoing edges.  */
>
> @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
>    return;
>   }
>      }
> -
>    if (single_succ_p (b))
>      {
>        e = single_succ_edge (b);
> @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
>        e->count = b->count;
>        return;
>      }
> -  guess_outgoing_edge_probabilities (b);
> +  else if (!gen_probabilities_from_existing_counts (b->succs)){
> +    /* All outgoing edges of B have zero count. Guess probabilities.  */
> +    guess_outgoing_edge_probabilities (b);
> +  }
>    if (b->count)
>      FOR_EACH_EDGE (e, ei, b->succs)
>        e->count = ((b->count * e->probability + REG_BR_PROB_BASE / 2)
> Index: gcc/expr.h
> ===================================================================
> --- gcc/expr.h (revision 191879)
> +++ gcc/expr.h (working copy)
> @@ -486,7 +486,7 @@ extern void do_compare_rtx_and_jump (rtx, rtx, enu
>
>  /* Two different ways of generating switch statements.  */
>  extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx);
> -extern int try_tablejump (tree, tree, tree, tree, rtx, rtx);
> +extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, int);
>
>  /* Functions from alias.c */
>  #include "alias.h"
> Index: gcc/stmt.c
> ===================================================================
> --- gcc/stmt.c (revision 191879)
> +++ gcc/stmt.c (working copy)
> @@ -94,11 +94,13 @@ struct case_node
>    tree low; /* Lowest index value for this label */
>    tree high; /* Highest index value for this label */
>    tree code_label; /* Label to jump to when node matches */
> +  gcov_type             subtree_count, count; /* Execution counts */
>  };
>
>  typedef struct case_node case_node;
>  typedef struct case_node *case_node_ptr;
>
> +extern basic_block label_to_block_fn (struct function *, tree);
>
>  static int n_occurrences (int, const char *);
>  static bool tree_conflicts_with_clobbers_p (tree, HARD_REG_SET *);
> @@ -112,7 +114,7 @@ static void balance_case_nodes (case_node_ptr *, c
>  static int node_has_low_bound (case_node_ptr, tree);
>  static int node_has_high_bound (case_node_ptr, tree);
>  static int node_is_bounded (case_node_ptr, tree);
> -static void emit_case_nodes (rtx, case_node_ptr, rtx, tree);
> +static void emit_case_nodes (rtx, case_node_ptr, rtx, gcov_type, tree);
>
>  /* Return the rtx-label that corresponds to a LABEL_DECL,
>     creating it if necessary.  */
> @@ -1648,13 +1650,15 @@ expand_stack_restore (tree var)
>    fixup_args_size_notes (prev, get_last_insn (), 0);
>  }
>
> -/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE.  */
> +/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE. PROB
> +   is the probability of jumping to LABEL.  */
>  static void
>  do_jump_if_equal (enum machine_mode mode, rtx op0, rtx op1, rtx label,
> -  int unsignedp)
> +  int unsignedp, int prob)
>  {
> +  gcc_assert (prob <= REG_BR_PROB_BASE);
>    do_compare_rtx_and_jump (op0, op1, EQ, unsignedp, mode,
> -   NULL_RTX, NULL_RTX, label, -1);
> +   NULL_RTX, NULL_RTX, label, prob);
>  }
>
>  /* Do the insertion of a case label into case_list.  The labels are
> @@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,
>
>  static struct case_node *
>  add_case_node (struct case_node *head, tree low, tree high,
> -               tree label, alloc_pool case_node_pool)
> +               tree label, gcov_type count, alloc_pool case_node_pool)
>  {
>    struct case_node *r;
>
> @@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
>    r->high = high;
>    r->code_label = label;
>    r->parent = r->left = NULL;
> +  r->count = count;
> +  r->subtree_count = count;
>    r->right = head;
>    return r;
>  }
> @@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,
>
>  static void
>  emit_case_decision_tree (tree index_expr, tree index_type,
> - struct case_node *case_list, rtx default_label)
> + struct case_node *case_list, rtx default_label,
> +                         gcov_type default_count)
>  {
>    rtx index = expand_normal (index_expr);
>
> @@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
>        dump_case_nodes (dump_file, case_list, indent_step, 0);
>      }
>
> -  emit_case_nodes (index, case_list, default_label, index_type);
> +  emit_case_nodes (index, case_list, default_label, default_count, index_type);
>    if (default_label)
>      emit_jump (default_label);
>  }
>
> +static gcov_type
> +get_outgoing_edge_counts (basic_block bb)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, bb->succs)
> +    count_sum += e->count;
> +  return count_sum;
> +}
> +
> +#define case_probability(x, y) \
> +    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)
> +
>  /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
>     one of the labels in CASE_LIST or to the DEFAULT_LABEL.
>     MINVAL, MAXVAL, and RANGE are the extrema and range of the case
> -   labels in CASE_LIST.
> +   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
>
>     First, a jump insn is emitted.  First we try "casesi".  If that
>     fails, try "tablejump".   A target *must* have one of them (or both).
> @@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
>  static void
>  emit_case_dispatch_table (tree index_expr, tree index_type,
>    struct case_node *case_list, rtx default_label,
> -  tree minval, tree maxval, tree range)
> +  tree minval, tree maxval, tree range,
> +                          basic_block stmt_bb)
>  {
>    int i, ncases;
>    struct case_node *n;
>    rtx *labelvec;
>    rtx fallback_label = label_rtx (case_list->code_label);
>    rtx table_label = gen_label_rtx ();
> +  bool has_gaps = false;
> +  edge default_edge = EDGE_SUCC(stmt_bb, 0);
> +  gcov_type default_count = default_edge->count;
> +  gcov_type count = get_outgoing_edge_counts (stmt_bb);
> +  bool try_with_tablejump = false;
>
>    if (! try_casesi (index_type, index_expr, minval, range,
>      table_label, default_label, fallback_label))
>      {
> -      bool ok;
> -
>        /* Index jumptables from zero for suitable values of minval to avoid
>   a subtraction.  For the rationale see:
>   "http://gcc.gnu.org/ml/gcc-patches/2001-10/msg01234.html".  */
> @@ -1882,11 +1907,9 @@ emit_case_dispatch_table (tree index_expr, tree in
>   {
>    minval = build_int_cst (index_type, 0);
>    range = maxval;
> +          has_gaps = true;
>   }
> -
> -      ok = try_tablejump (index_type, index_expr, minval, range,
> -  table_label, default_label);
> -      gcc_assert (ok);
> +      try_with_tablejump = true;
>      }
>
>    /* Get table of labels to jump to, in order of case index.  */
> @@ -1921,8 +1944,47 @@ emit_case_dispatch_table (tree index_expr, tree in
>      default_label = fallback_label;
>    for (i = 0; i < ncases; i++)
>      if (labelvec[i] == 0)
> -      labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
> +      {
> +        has_gaps = true;
> +        labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
> +      }
>
> +  int default_probability;
> +  if (has_gaps)
> +    {
> +      /* There is at least one entry in the jump table that jumps
> +         to default label. The default label can either be reached
> +         through the indirect jump or the direct conditional jump
> +         before that. Split the probability of reaching the
> +         default label among these two jumps.  */
> +      default_probability = case_probability (default_count/2,
> +                                              count);
> +      default_count /= 2;
> +      count -= default_count;
> +    }
> +  else
> +    {
> +      default_probability = case_probability (default_count,
> +                                              count);
> +      count -= default_count;
> +      default_count = 0;
> +    }
> +
> +  default_edge->count = default_count;
> +  if (count)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
> +        e->probability = e->count * REG_BR_PROB_BASE / count;
> +    }
> +
> +  if (try_with_tablejump)
> +    {
> +      bool ok = try_tablejump (index_type, index_expr, minval, range,
> +                               table_label, default_label,
> default_probability);
> +      gcc_assert (ok);
> +    }
>    /* Output the table.  */
>    emit_label (table_label);
>
> @@ -1956,6 +2018,7 @@ expand_case (gimple stmt)
>    tree index_expr = gimple_switch_index (stmt);
>    tree index_type = TREE_TYPE (index_expr);
>    tree elt;
> +  basic_block bb = gimple_bb (stmt);
>
>    /* A list of case labels; it is first built as a list and it may then
>       be rearranged into a nearly balanced binary tree.  */
> @@ -1981,6 +2044,8 @@ expand_case (gimple stmt)
>
>    /* Find the default case target label.  */
>    default_label = label_rtx (CASE_LABEL (gimple_switch_default_label (stmt)));
> +  edge default_edge = EDGE_SUCC(bb, 0);
> +  gcov_type default_count = default_edge->count;
>
>    /* Get upper and lower bounds of case values.  */
>    elt = gimple_switch_label (stmt, 1);
> @@ -1999,7 +2064,7 @@ expand_case (gimple stmt)
>    uniq = 0;
>    count = 0;
>    struct pointer_set_t *seen_labels = pointer_set_create ();
> -  for (i = gimple_switch_num_labels (stmt) - 1; i >= 1; --i)
> +  for (i = ncases - 1; i >= 1; --i)
>      {
>        elt = gimple_switch_label (stmt, i);
>        tree low = CASE_LOW (elt);
> @@ -2041,8 +2106,10 @@ expand_case (gimple stmt)
>     TREE_INT_CST_LOW (high),
>     TREE_INT_CST_HIGH (high));
>
> +      basic_block case_bb = label_to_block_fn (cfun, lab);
> +      edge case_edge = find_edge (bb, case_bb);
>        case_list = add_case_node (case_list, low, high, lab,
> - case_node_pool);
> + case_edge->count, case_node_pool);
>      }
>    pointer_set_destroy (seen_labels);
>
> @@ -2060,11 +2127,12 @@ expand_case (gimple stmt)
>
>    if (expand_switch_as_decision_tree_p (range, uniq, count))
>      emit_case_decision_tree (index_expr, index_type,
> -     case_list, default_label);
> +                             case_list, default_label,
> +                             default_count);
>    else
>      emit_case_dispatch_table (index_expr, index_type,
>        case_list, default_label,
> -      minval, maxval, range);
> +      minval, maxval, range, bb);
>
>    reorder_insns (NEXT_INSN (before_case), get_last_insn (), before_case);
>
> @@ -2126,7 +2194,7 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
>          {
>    tree elt = VEC_index (tree, dispatch_table, i);
>    rtx lab = label_rtx (CASE_LABEL (elt));
> -  do_jump_if_equal (index_mode, index, zero, lab, 0);
> +  do_jump_if_equal (index_mode, index, zero, lab, 0, -1);
>    force_expand_binop (index_mode, sub_optab,
>        index, CONST1_RTX (index_mode),
>        index, 0, OPTAB_DIRECT);
> @@ -2150,12 +2218,12 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
>    tree elt = VEC_index (tree, dispatch_table, i);
>    tree low = CASE_LOW (elt);
>    tree lab = CASE_LABEL (elt);
> -  case_list = add_case_node (case_list, low, low, lab, case_node_pool);
> +  case_list = add_case_node (case_list, low, low, lab, 0, case_node_pool);
>   }
>
>        emit_case_dispatch_table (index_expr, index_type,
>   case_list, default_label,
> - minval, maxval, range);
> + minval, maxval, range, NULL);
>        emit_label (default_label);
>        free_alloc_pool (case_node_pool);
>      }
> @@ -2237,6 +2305,9 @@ balance_case_nodes (case_node_ptr *head, case_node
>    /* Optimize each of the two split parts.  */
>    balance_case_nodes (&np->left, np);
>    balance_case_nodes (&np->right, np);
> +          np->subtree_count = np->count;
> +          np->subtree_count += np->left->subtree_count;
> +          np->subtree_count += np->right->subtree_count;
>   }
>        else
>   {
> @@ -2244,8 +2315,12 @@ balance_case_nodes (case_node_ptr *head, case_node
>       but fill in `parent' fields.  */
>    np = *head;
>    np->parent = parent;
> +          np->subtree_count = np->count;
>    for (; np->right; np = np->right)
> -    np->right->parent = np;
> +            {
> +      np->right->parent = np;
> +              (*head)->subtree_count += np->right->subtree_count;
> +            }
>   }
>      }
>  }
> @@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
>    && node_has_high_bound (node, index_type));
>  }
>
> +
> +/* Attach a REG_BR_PROB note to the last created RTX instruction if
> +   PROBABILITY is not -1.  */
> +
> +static void
> +add_prob_note_to_last_insn(int probability)
> +{
> +  if (probability != -1)
> +    {
> +      rtx jump_insn = get_last_insn();
> +      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
> +    }
> +}
> +
>  /* Emit step-by-step code to select a case for the value of INDEX.
>     The thus generated decision tree follows the form of the
>     case-node binary tree NODE, whose nodes represent test conditions.
> @@ -2386,10 +2475,12 @@ node_is_bounded (case_node_ptr node, tree index_ty
>
>  static void
>  emit_case_nodes (rtx index, case_node_ptr node, rtx default_label,
> - tree index_type)
> + gcov_type default_count, tree index_type)
>  {
>    /* If INDEX has an unsigned type, we must make unsigned branches.  */
>    int unsignedp = TYPE_UNSIGNED (index_type);
> +  int probability;
> +  gcov_type count = node->count, subtree_count = node->subtree_count;
>    enum machine_mode mode = GET_MODE (index);
>    enum machine_mode imode = TYPE_MODE (index_type);
>
> @@ -2404,15 +2495,17 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>
>    else if (tree_int_cst_equal (node->low, node->high))
>      {
> +      probability = case_probability (count, subtree_count + default_count);
>        /* Node is single valued.  First see if the index expression matches
>   this node and then check our children, if any.  */
> -
>        do_jump_if_equal (mode, index,
>   convert_modes (mode, imode,
>         expand_normal (node->low),
>         unsignedp),
> - label_rtx (node->code_label), unsignedp);
> -
> + label_rtx (node->code_label), unsignedp, probability);
> +      /* Since this case is taken at this point, reduce its weight from
> +         subtree_weight.  */
> +      subtree_count -= count;
>        if (node->right != 0 && node->left != 0)
>   {
>    /* This node has children on both sides.
> @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->right->code_label));
> -      emit_case_nodes (index, node->left, default_label, index_type);
> +              probability = case_probability (node->right->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
> +      emit_case_nodes (index, node->left, default_label, default_count,
> +                               index_type);
>      }
>
>    else if (node_is_bounded (node->left, index_type))
> @@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         LT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->left->code_label));
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +              probability = case_probability (node->left->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
> +      emit_case_nodes (index, node->right, default_label,
> default_count, index_type);
>      }
>
>    /* If both children are single-valued cases with no
> @@ -2460,21 +2560,25 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>
>        /* See if the value matches what the right hand side
>   wants.  */
> +              probability = case_probability (node->right->count,
> +                                              subtree_count + default_count);
>        do_jump_if_equal (mode, index,
>   convert_modes (mode, imode,
>         expand_normal (node->right->low),
>         unsignedp),
>   label_rtx (node->right->code_label),
> - unsignedp);
> + unsignedp, probability);
>
>        /* See if the value matches what the left hand side
>   wants.  */
> +              probability = case_probability (node->left->count,
> +                                              subtree_count + default_count);
>        do_jump_if_equal (mode, index,
>   convert_modes (mode, imode,
>         expand_normal (node->left->low),
>         unsignedp),
>   label_rtx (node->left->code_label),
> - unsignedp);
> + unsignedp, probability);
>      }
>
>    else
> @@ -2494,10 +2598,18 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         label_rtx (test_label));
> +              /* The default label could be reached either through the right
> +                 subtree or the left subtree. Divide the probability
> +                 equally.  */
> +              probability = case_probability (
> +                  node->right->subtree_count + default_count/2,
> +                  subtree_count + default_count);
> +              default_count /= 2;
> +              add_prob_note_to_last_insn (probability);
>
>        /* Value must be on the left.
>   Handle the left-hand subtree.  */
> -      emit_case_nodes (index, node->left, default_label, index_type);
> +      emit_case_nodes (index, node->left, default_label,
> default_count, index_type);
>        /* If left-hand subtree does nothing,
>   go to default.  */
>        if (default_label)
> @@ -2505,7 +2617,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>
>        /* Code branches here for the right-hand subtree.  */
>        expand_label (test_label);
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +      emit_case_nodes (index, node->right, default_label,
> default_count, index_type);
>      }
>   }
>
> @@ -2530,21 +2642,29 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>      unsignedp),
>     LT, NULL_RTX, mode, unsignedp,
>     default_label);
> +                  probability = case_probability (default_count/2,
> +                                                  subtree_count +
> default_count);
> +                  default_count /= 2;
> +                  add_prob_note_to_last_insn (probability);
>   }
>
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +      emit_case_nodes (index, node->right, default_label,
> default_count, index_type);
>      }
>    else
> -    /* We cannot process node->right normally
> -       since we haven't ruled out the numbers less than
> -       this node's value.  So handle node->right explicitly.  */
> -    do_jump_if_equal (mode, index,
> -      convert_modes
> -      (mode, imode,
> -       expand_normal (node->right->low),
> -       unsignedp),
> -      label_rtx (node->right->code_label), unsignedp);
> - }
> +            {
> +              probability = case_probability (node->right->subtree_count,
> +                                              subtree_count + default_count);
> +      /* We cannot process node->right normally
> +         since we haven't ruled out the numbers less than
> +         this node's value.  So handle node->right explicitly.  */
> +      do_jump_if_equal (mode, index,
> +        convert_modes
> +        (mode, imode,
> +         expand_normal (node->right->low),
> +         unsignedp),
> +        label_rtx (node->right->code_label), unsignedp, probability);
> +            }
> +  }
>
>        else if (node->right == 0 && node->left != 0)
>   {
> @@ -2561,20 +2681,29 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>      unsignedp),
>     GT, NULL_RTX, mode, unsignedp,
>     default_label);
> +                  probability = case_probability (
> +                      default_count/2, subtree_count + default_count);
> +                  default_count /= 2;
> +                  add_prob_note_to_last_insn (probability);
>   }
>
> -      emit_case_nodes (index, node->left, default_label, index_type);
> +      emit_case_nodes (index, node->left, default_label,
> +                               default_count, index_type);
>      }
>    else
> -    /* We cannot process node->left normally
> -       since we haven't ruled out the numbers less than
> -       this node's value.  So handle node->left explicitly.  */
> -    do_jump_if_equal (mode, index,
> -      convert_modes
> -      (mode, imode,
> -       expand_normal (node->left->low),
> -       unsignedp),
> -      label_rtx (node->left->code_label), unsignedp);
> +            {
> +              probability = case_probability (node->left->subtree_count,
> +                                              subtree_count + default_count);
> +      /* We cannot process node->left normally
> +         since we haven't ruled out the numbers less than
> +         this node's value.  So handle node->left explicitly.  */
> +      do_jump_if_equal (mode, index,
> +        convert_modes
> +        (mode, imode,
> +         expand_normal (node->left->low),
> +         unsignedp),
> +        label_rtx (node->left->code_label), unsignedp, probability);
> +            }
>   }
>      }
>    else
> @@ -2593,15 +2722,20 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>    tree test_label = 0;
>
>    if (node_is_bounded (node->right, index_type))
> -    /* Right hand node is fully bounded so we can eliminate any
> -       testing and branch directly to the target code.  */
> -    emit_cmp_and_jump_insns (index,
> -     convert_modes
> -     (mode, imode,
> -      expand_normal (node->high),
> -      unsignedp),
> -     GT, NULL_RTX, mode, unsignedp,
> -     label_rtx (node->right->code_label));
> +            {
> +      /* Right hand node is fully bounded so we can eliminate any
> +         testing and branch directly to the target code.  */
> +      emit_cmp_and_jump_insns (index,
> +       convert_modes
> +       (mode, imode,
> +        expand_normal (node->high),
> +        unsignedp),
> +       GT, NULL_RTX, mode, unsignedp,
> +       label_rtx (node->right->code_label));
> +              probability = case_probability (node->right->subtree_count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
> +            }
>    else
>      {
>        /* Right hand node requires testing.
> @@ -2616,6 +2750,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         label_rtx (test_label));
> +              probability = case_probability
> (node->right->subtree_count + default_count/2,
> +                                              subtree_count + default_count);
> +              default_count /= 2;
> +              add_prob_note_to_last_insn (probability);
>      }
>
>    /* Value belongs to this node or to the left-hand subtree.  */
> @@ -2627,9 +2765,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>      unsignedp),
>     GE, NULL_RTX, mode, unsignedp,
>     label_rtx (node->code_label));
> +          probability = case_probability (count, subtree_count +
> default_count);
> +          add_prob_note_to_last_insn (probability);
>
>    /* Handle the left-hand subtree.  */
> -  emit_case_nodes (index, node->left, default_label, index_type);
> +  emit_case_nodes (index, node->left, default_label, default_count,
> index_type);
>
>    /* If right node had to be handled later, do that now.  */
>
> @@ -2641,7 +2781,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   emit_jump (default_label);
>
>        expand_label (test_label);
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +      emit_case_nodes (index, node->right, default_label,
> default_count, index_type);
>      }
>   }
>
> @@ -2658,6 +2798,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         LT, NULL_RTX, mode, unsignedp,
>         default_label);
> +              probability = case_probability (default_count/2,
> +                                              subtree_count + default_count);
> +              default_count /= 2;
> +              add_prob_note_to_last_insn (probability);
>      }
>
>    /* Value belongs to this node or to the right-hand subtree.  */
> @@ -2669,8 +2813,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>      unsignedp),
>     LE, NULL_RTX, mode, unsignedp,
>     label_rtx (node->code_label));
> +          probability = case_probability (count, subtree_count +
> default_count);
> +          add_prob_note_to_last_insn (probability);
>
> -  emit_case_nodes (index, node->right, default_label, index_type);
> +  emit_case_nodes (index, node->right, default_label, default_count,
> index_type);
>   }
>
>        else if (node->right == 0 && node->left != 0)
> @@ -2686,6 +2832,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         default_label);
> +              probability = case_probability (default_count/2,
> +                                              subtree_count + default_count);
> +              default_count /= 2;
> +              add_prob_note_to_last_insn (probability);
>      }
>
>    /* Value belongs to this node or to the left-hand subtree.  */
> @@ -2697,8 +2847,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>      unsignedp),
>     GE, NULL_RTX, mode, unsignedp,
>     label_rtx (node->code_label));
> +          probability = case_probability (count, subtree_count +
> default_count);
> +          add_prob_note_to_last_insn (probability);
>
> -  emit_case_nodes (index, node->left, default_label, index_type);
> +  emit_case_nodes (index, node->left, default_label, default_count,
> index_type);
>   }
>
>        else
> @@ -2718,6 +2870,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         default_label);
> +              probability = case_probability (default_count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
>      }
>
>    else if (!low_bound && high_bound)
> @@ -2729,6 +2884,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         LT, NULL_RTX, mode, unsignedp,
>         default_label);
> +              probability = case_probability (default_count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
>      }
>    else if (!low_bound && !high_bound)
>      {
> @@ -2750,6 +2908,9 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>
>        emit_cmp_and_jump_insns (new_index, new_bound, GT, NULL_RTX,
>         mode, 1, default_label);
> +              probability = case_probability (default_count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
>      }
>
>    emit_jump (label_rtx (node->code_label));

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-03 16:12 ` Xinliang David Li
@ 2012-10-03 17:38   ` Steven Bosscher
  2012-10-03 18:02     ` Xinliang David Li
  0 siblings, 1 reply; 15+ messages in thread
From: Steven Bosscher @ 2012-10-03 17:38 UTC (permalink / raw)
  To: Xinliang David Li; +Cc: Easwaran Raman, gcc-patches, Jan Hubicka

On Wed, Oct 3, 2012 at 6:12 PM, Xinliang David Li <davidxl@google.com> wrote:
> What is the status of switch expansion GIMPLE rewrite? If it is not
> planned for 4.8, It will be desirable to include this fix into trunk.

I could work on it for GCC 4.8 (there's not a lot of work left to be
done for it now) but we haven't really decided yet where the pass
should be scheduled and I also would like to wait a bit to see how the
SJLJ changes work out.  So I talked about this with Easwaran, I think
his patch should be included into the trunk now. I'll adapt it for the
move to GIMPLE.

> It also helps set up a good base line to test against regression.

Agreed.

Ciao!
Steven

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-03 17:38   ` Steven Bosscher
@ 2012-10-03 18:02     ` Xinliang David Li
  2012-10-03 18:50       ` Jan Hubicka
  0 siblings, 1 reply; 15+ messages in thread
From: Xinliang David Li @ 2012-10-03 18:02 UTC (permalink / raw)
  To: Steven Bosscher; +Cc: Easwaran Raman, gcc-patches, Jan Hubicka

thanks for the update!

David

On Wed, Oct 3, 2012 at 10:37 AM, Steven Bosscher <stevenb.gcc@gmail.com> wrote:
> On Wed, Oct 3, 2012 at 6:12 PM, Xinliang David Li <davidxl@google.com> wrote:
>> What is the status of switch expansion GIMPLE rewrite? If it is not
>> planned for 4.8, It will be desirable to include this fix into trunk.
>
> I could work on it for GCC 4.8 (there's not a lot of work left to be
> done for it now) but we haven't really decided yet where the pass
> should be scheduled and I also would like to wait a bit to see how the
> SJLJ changes work out.  So I talked about this with Easwaran, I think
> his patch should be included into the trunk now. I'll adapt it for the
> move to GIMPLE.
>
>> It also helps set up a good base line to test against regression.
>
> Agreed.
>
> Ciao!
> Steven

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-03 18:02     ` Xinliang David Li
@ 2012-10-03 18:50       ` Jan Hubicka
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Hubicka @ 2012-10-03 18:50 UTC (permalink / raw)
  To: Xinliang David Li
  Cc: Steven Bosscher, Easwaran Raman, gcc-patches, Jan Hubicka

> thanks for the update!
OK, I will review the patch tomorrow then. It is a good incremental step.  I
would certainly like to see the gimple expansion in 4.8 however.

Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-03  1:09 Propagate profile counts during switch expansion Easwaran Raman
  2012-10-03 16:12 ` Xinliang David Li
@ 2012-10-04 13:19 ` Jan Hubicka
  2012-10-04 20:47   ` Easwaran Raman
  2012-10-09  0:46   ` Easwaran Raman
  1 sibling, 2 replies; 15+ messages in thread
From: Jan Hubicka @ 2012-10-04 13:19 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: gcc-patches, Jan Hubicka, Steven Bosscher, David Li

> Hi,
>  This patch propagates the profile counts during RTL expansion. In
> many cases, there is no way to determine the exact count of an edge
> generated during the expansion. So this patch uses some simple
> heuristics to estimate the edge counts but ensures that the counts of
> the basic blocks corresponding to the cases are (nearly the) same as
> at the gimple level.
> 
> Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for trunk?
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c (revision 191879)
> +++ gcc/expr.c (working copy)
> @@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
>  #ifdef PUSH_ROUNDING
>  static void emit_single_push_insn (enum machine_mode, rtx, tree);
>  #endif
> -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
> +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
>  static rtx const_vector_from_tree (tree);
>  static void write_complex_part (rtx, rtx, bool);
> 
> @@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree
> 
>  static void
>  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
> -      rtx default_label)
> +      rtx default_label, int default_probability)

Please document default_probability.
>  {
>    rtx temp, vector;
> 
> @@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
>       the maximum value of the range.  */
> 
>    if (default_label)
> -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> -     default_label);
> +    {
> +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
> +       default_label);
> +      if (default_probability != -1)
> +        {
> +          rtx jump_insn = get_last_insn();
> +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
> +        }
> +    }

dojump already does this kind of logic, but it is bit more cureful:
      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
                               if_true_label);
      if (prob != -1 && profile_status != PROFILE_ABSENT)
        { 
          for (last = NEXT_INSN (last);
               last && NEXT_INSN (last);
               last = NEXT_INSN (last))
            if (JUMP_P (last))
              break;
          if (last
              && JUMP_P (last)
              && ! NEXT_INSN (last)
              && any_condjump_p (last))
            { 
              gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
              add_reg_note (last, REG_BR_PROB, GEN_INT (prob));
            }
        }

What about making emit_cmp_and_jump_insns taking the probability argument
and moving the code above inside?  Most of other places need updating to
propagate probabilities.

(compare_and_jump_seq in loop-unswitch probably also can be updated)
> @@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
> 
>  int
>  try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
> -       rtx table_label, rtx default_label)
> +       rtx table_label, rtx default_label, int default_probability)

Simiarly here.
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c (revision 191879)
> +++ gcc/cfgbuild.c (working copy)
> @@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
>      purge_dead_tablejump_edges (bb, table);
>  }
> 
> +/* If there is at least one edge in EDGES with a non-zero count, then
> +   compute probabilities based on the existing counts.  */
> +
> +static bool
> +gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    count_sum += e->count;
> +  if (count_sum == 0)
> +    return false;
> +  FOR_EACH_EDGE(e, ei, edges)
> +    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
> +  return true;
> +}
> +
>  /*  Assume that frequency of basic block B is known.  Compute frequencies
>      and probabilities of outgoing edges.  */
> 
> @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
>    return;
>   }
>      }
> -
>    if (single_succ_p (b))
>      {
>        e = single_succ_edge (b);
> @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
>        e->count = b->count;
>        return;
>      }
> -  guess_outgoing_edge_probabilities (b);
> +  else if (!gen_probabilities_from_existing_counts (b->succs)){
> +    /* All outgoing edges of B have zero count. Guess probabilities.  */
> +    guess_outgoing_edge_probabilities (b);
> +  }

Hmm, I do not quite follow logic here.  
basic block B is one of many basic blocks that the original BB was split from.
It is possible that B may have some of original edges, but there may be new ones.
How you can guess the outgoing probabilitie shere.  Do you have an example?

Also gen_probabilities_from_existing_counts could probably also work based
on original edge frequencies.
> @@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,
> 
>  static struct case_node *
>  add_case_node (struct case_node *head, tree low, tree high,
> -               tree label, alloc_pool case_node_pool)
> +               tree label, gcov_type count, alloc_pool case_node_pool)

Please document  COUNT here.
>  {
>    struct case_node *r;
> 
> @@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
>    r->high = high;
>    r->code_label = label;
>    r->parent = r->left = NULL;
> +  r->count = count;
> +  r->subtree_count = count;
>    r->right = head;
>    return r;
>  }

.. and here
> @@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,
> 
>  static void
>  emit_case_decision_tree (tree index_expr, tree index_type,
> - struct case_node *case_list, rtx default_label)
> + struct case_node *case_list, rtx default_label,
> +                         gcov_type default_count)
>  {
>    rtx index = expand_normal (index_expr);
> 
> @@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
>        dump_case_nodes (dump_file, case_list, indent_step, 0);
>      }
> 
> -  emit_case_nodes (index, case_list, default_label, index_type);
> +  emit_case_nodes (index, case_list, default_label, default_count, index_type);
>    if (default_label)
>      emit_jump (default_label);
>  }
> 

And document functio nhere.

> +static gcov_type
> +get_outgoing_edge_counts (basic_block bb)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  gcov_type count_sum = 0;
> +  FOR_EACH_EDGE(e, ei, bb->succs)
> +    count_sum += e->count;
> +  return count_sum;
> +}
> +
> +#define case_probability(x, y) \
> +    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)

You want to use RDIV here for better rounding.  I would make this an inline
functions these days.
> +
>  /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
>     one of the labels in CASE_LIST or to the DEFAULT_LABEL.
>     MINVAL, MAXVAL, and RANGE are the extrema and range of the case
> -   labels in CASE_LIST.
> +   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
> 
>     First, a jump insn is emitted.  First we try "casesi".  If that
>     fails, try "tablejump".   A target *must* have one of them (or both).
> @@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
>  static void
>  emit_case_dispatch_table (tree index_expr, tree index_type,
>    struct case_node *case_list, rtx default_label,
> -  tree minval, tree maxval, tree range)
> +  tree minval, tree maxval, tree range,
> +                          basic_block stmt_bb)
>  {
>    int i, ncases;
>    struct case_node *n;
>    rtx *labelvec;
>    rtx fallback_label = label_rtx (case_list->code_label);
>    rtx table_label = gen_label_rtx ();
> +  bool has_gaps = false;
> +  edge default_edge = EDGE_SUCC(stmt_bb, 0);
> +  gcov_type default_count = default_edge->count;
> +  gcov_type count = get_outgoing_edge_counts (stmt_bb);
> +  bool try_with_tablejump = false;

Here, I think you want to first decide whether you base expansion on counts
(i.e. any counts involved is non-0) or probabilities.  We really want to maintain
the profile up-to-date even with guessed profile.  The inconsistencies
propagate across CFG and easilly confuse later optimizers.
Everywhere you are using count comming from the original statement, you can also use
the probability.

The newly produced control flow will be optimized and the edges frequencies will
be propagated along.

> +
> +  default_edge->count = default_count;
> +  if (count)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
> +        e->probability = e->count * REG_BR_PROB_BASE / count;
> +    }

Hmm, this updates origina BB containing the switch statement?  Of course,
modulo roundoff errors, this should hold.  I wonder where did you got the
diferences and why do you need this?  You are going to output new control flow
and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
anyway?
Also you want to use RDIV here.
> @@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
>    && node_has_high_bound (node, index_type));
>  }
> 
> +
> +/* Attach a REG_BR_PROB note to the last created RTX instruction if
> +   PROBABILITY is not -1.  */
> +
> +static void
> +add_prob_note_to_last_insn(int probability)
> +{
> +  if (probability != -1)
> +    {
> +      rtx jump_insn = get_last_insn();
> +      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
> +    }
> +}

Well, I am not sure this is safe thing to do since you do not really
know what the last instruction is.  In any case you want to test
it is an conditional jump instruction as in the code I quoted
above.

> @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         GT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->right->code_label));
> -      emit_case_nodes (index, node->left, default_label, index_type);
> +              probability = case_probability (node->right->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);
> +      emit_case_nodes (index, node->left, default_label, default_count,
> +                               index_type);

Hmm, here you seem to be distributing the probabilities of the counts reached.
What happens in the case when the edge probability needs to be distributed across
nodes of the decision tree. I.e. in
t(int a)
{
  switch (a)
   { 
     case 100:
     case 200:
     case 300:
     case 400:
     case 500:
     case 600:
     case 700:
     case 800:
     case 900:
       t();
     case 101:
     case 202:
     case 303:
     case 404:
     case 505:
     case 606:
     case 707:
     case 808:
     case 909:
       q();
   }
}

>      }
> 
>    else if (node_is_bounded (node->left, index_type))
> @@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>   unsignedp),
>         LT, NULL_RTX, mode, unsignedp,
>         label_rtx (node->left->code_label));
> -      emit_case_nodes (index, node->right, default_label, index_type);
> +              probability = case_probability (node->left->count,
> +                                              subtree_count + default_count);
> +              add_prob_note_to_last_insn (probability);

Here, and apparently in all uses of add_prob_note_to_last_insn, you want to use
the new parameter of emit_cmp_and_jump_insn.

Otherwise the patch seems to make sense.  Thanks for looking into this issue.

Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-04 13:19 ` Jan Hubicka
@ 2012-10-04 20:47   ` Easwaran Raman
  2012-10-05 22:15     ` Easwaran Raman
  2012-10-09  0:46   ` Easwaran Raman
  1 sibling, 1 reply; 15+ messages in thread
From: Easwaran Raman @ 2012-10-04 20:47 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Steven Bosscher, David Li

Hi Honza,
 I am addressing some of the questions you raise here. Will send an
updated patch later.


On Thu, Oct 4, 2012 at 6:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote:

> > @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
> >    return;
> >   }
> >      }
> > -
> >    if (single_succ_p (b))
> >      {
> >        e = single_succ_edge (b);
> > @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
> >        e->count = b->count;
> >        return;
> >      }
> > -  guess_outgoing_edge_probabilities (b);
> > +  else if (!gen_probabilities_from_existing_counts (b->succs)){
> > +    /* All outgoing edges of B have zero count. Guess probabilities.  */
> > +    guess_outgoing_edge_probabilities (b);
> > +  }
>
> Hmm, I do not quite follow logic here.
> basic block B is one of many basic blocks that the original BB was split from.
> It is possible that B may have some of original edges, but there may be new ones.
> How you can guess the outgoing probabilitie shere.  Do you have an example?

The code reaches here only when the BB has more than two successor. I
assume that this happens only when the BB is terminated by a switch
statement. During conversion to RTL, the outgoing edges of this BB and
their counts ar untouched. So I am just using those counts to compute
the edge probabilities.

> Also gen_probabilities_from_existing_counts could probably also work based
> on original edge frequencies.

Just to be clear I understand it right, you want me to use the
frequency instead of count since the frequency is derived from the
counts in profile use builds?

>
> > +
> > +  default_edge->count = default_count;
> > +  if (count)
> > +    {
> > +      edge e;
> > +      edge_iterator ei;
> > +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
> > +        e->probability = e->count * REG_BR_PROB_BASE / count;
> > +    }
>
> Hmm, this updates origina BB containing the switch statement?
The out_edges of the original BB.
> Of course,
> modulo roundoff errors, this should hold.  I wonder where did you got the
> diferences and why do you need this?

Originally, I obtained e->probability as e->count * REG_BR_PROB_BASE /
bb->count. During profile bootstrap, I noticed BBs whose sum of
outgoing edge counts exceeded the BB's count and hence I normalized
with the sum of edge counts.

> You are going to output new control flow
> and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
> anyway?
Sorry, I am lost here. I am just updating the probability of
stmt_bb->succs right?


>
> > @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
> >   unsignedp),
> >         GT, NULL_RTX, mode, unsignedp,
> >         label_rtx (node->right->code_label));
> > -      emit_case_nodes (index, node->left, default_label, index_type);
> > +              probability = case_probability (node->right->count,
> > +                                              subtree_count + default_count);
> > +              add_prob_note_to_last_insn (probability);
> > +      emit_case_nodes (index, node->left, default_label, default_count,
> > +                               index_type);
>
> Hmm, here you seem to be distributing the probabilities of the counts reached.
> What happens in the case when the edge probability needs to be distributed across
> nodes of the decision tree. I.e. in
> t(int a)
> {
>   switch (a)
>    {
>      case 100:
>      case 200:
>      case 300:
>      case 400:
>      case 500:
>      case 600:
>      case 700:
>      case 800:
>      case 900:
>        t();
>      case 101:
>      case 202:
>      case 303:
>      case 404:
>      case 505:
>      case 606:
>      case 707:
>      case 808:
>      case 909:
>        q();
>    }
> }
>
Ok, that's a bug in my patch. In the above example, there are two
outgoing edges from the BB containing switch but many case nodes. I
would end up multiplying the counts by the number of cases that lead
to a label. If I divide the counts of each case_node by the number of
case nodes that jump to the same label, will that be reasonable? In
the above case, if t() is called 900 times, I will assume that each of
the 9 cases contribute a count of 100.

Thanks,
Easwaran

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-04 20:47   ` Easwaran Raman
@ 2012-10-05 22:15     ` Easwaran Raman
  2012-10-06 11:19       ` Jan Hubicka
  0 siblings, 1 reply; 15+ messages in thread
From: Easwaran Raman @ 2012-10-05 22:15 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Steven Bosscher, David Li

>
>>
>> > +
>> > +  default_edge->count = default_count;
>> > +  if (count)
>> > +    {
>> > +      edge e;
>> > +      edge_iterator ei;
>> > +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
>> > +        e->probability = e->count * REG_BR_PROB_BASE / count;
>> > +    }
>>
>> Hmm, this updates origina BB containing the switch statement?
> The out_edges of the original BB.
>> Of course,
>> modulo roundoff errors, this should hold.  I wonder where did you got the
>> diferences and why do you need this?

The count of the outgoing edge of BB that jumps to the default label
needs to be updated (to 0 or original_default_count/2, depending on
whether default label is a jump table target). So I need to
redistribute the probabilities of the rest of the edges. That's why I
do this here.

> Originally, I obtained e->probability as e->count * REG_BR_PROB_BASE /
> bb->count. During profile bootstrap, I noticed BBs whose sum of
> outgoing edge counts exceeded the BB's count and hence I normalized
> with the sum of edge counts.
>
>> You are going to output new control flow
>> and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
>> anyway?
> Sorry, I am lost here. I am just updating the probability of
> stmt_bb->succs right?
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-05 22:15     ` Easwaran Raman
@ 2012-10-06 11:19       ` Jan Hubicka
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Hubicka @ 2012-10-06 11:19 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: Jan Hubicka, gcc-patches, Steven Bosscher, David Li

> >
> >>
> >> > +
> >> > +  default_edge->count = default_count;
> >> > +  if (count)
> >> > +    {
> >> > +      edge e;
> >> > +      edge_iterator ei;
> >> > +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
> >> > +        e->probability = e->count * REG_BR_PROB_BASE / count;
> >> > +    }
> >>
> >> Hmm, this updates origina BB containing the switch statement?
> > The out_edges of the original BB.
> >> Of course,
> >> modulo roundoff errors, this should hold.  I wonder where did you got the
> >> diferences and why do you need this?
> 
> The count of the outgoing edge of BB that jumps to the default label
> needs to be updated (to 0 or original_default_count/2, depending on
> whether default label is a jump table target). So I need to
> redistribute the probabilities of the rest of the edges. That's why I
> do this here.

Hmm, I see, OK then.  Please add a comment.

Honza
> 
> > Originally, I obtained e->probability as e->count * REG_BR_PROB_BASE /
> > bb->count. During profile bootstrap, I noticed BBs whose sum of
> > outgoing edge counts exceeded the BB's count and hence I normalized
> > with the sum of edge counts.
> >
> >> You are going to output new control flow
> >> and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
> >> anyway?
> > Sorry, I am lost here. I am just updating the probability of
> > stmt_bb->succs right?
> >
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-04 13:19 ` Jan Hubicka
  2012-10-04 20:47   ` Easwaran Raman
@ 2012-10-09  0:46   ` Easwaran Raman
  2012-10-14  7:02     ` Easwaran Raman
                       ` (2 more replies)
  1 sibling, 3 replies; 15+ messages in thread
From: Easwaran Raman @ 2012-10-09  0:46 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Steven Bosscher, David Li

[-- Attachment #1: Type: text/plain, Size: 14843 bytes --]

I have attached a revised patch. The updated ChangeLog is given below
and I have responded to your comments inline:

2012-10-08   Easwaran Raman  <eraman@google.com>
* optabs.c (emit_cmp_and_jump_insn_1): Add a new parameter to
specificy the probability of taking the jump.
(emit_cmp_and_jump_insns): Likewise.
(expand_compare_and_swap_loop): Make the jump predicted not taken.
* dojump.c (do_compare_rtx_and_jump): Remove the code attaching
REG_BR_PROB note and pass probability to emit_cmp_and_jump_insns.
* cfgbuild.c (compute_outgoing_frequencies): Do not guess outgoing
probabilities for branches with more than two successors.
* expr.c (emit_block_move_via_loop): Predict the loop backedge loop
to be highly taken.
(try_casesi): Pass the probability of jumping to the default label.
(try_tablejump): Likewise.
(do_tablejump): Likewise.
* expr.h (try_tablejump): Add a new parameter.
(try_casesi): Likewise.
(emit_cmp_and_jump_insns): Add probability as default parameter with a
default value of -1.
* except.c (sjlj_emit_function_enter): Pass probability to
emit_cmp_and_jump_insns.
* stmt.c (case_node): Add new fields PROB and SUBTREE_PROB.
(do_jump_if_equal): Pass probability for REG_BR_PROB note.
(add_case_node): Pass estimated probability of jumping to the case
label.
(emit_case_decision_tree): Pass default_prob to emit_case_nodes.
(get_outgoing_edge_probs): New function.
(conditional_probability): Likewise.
(reset_out_edges_aux): Likewise.
(compute_cases_per_edge): Likewise.
(emit_case_dispatch_table): Update probabilities of edges coming out
of the switch statement.
(expand_case): Compute and propagate default edge probability to
emit_case_dispatch_table.
(expand_sjlj_dispatch_table): Update calls to add_case_node and
emit_case_dispatch_table.
(balance_case_nodes): Update subtree_prob values.
(emit_case_nodes): Compute edge probabilities and add pass them to
emit_cmp_and_jump_insns.

gcc/testsuite/ChangeLog:
2012-10-02   Easwaran Raman  <eraman@google.com>
* gcc.dg/tree-prof/switch-case-1.c: New test case.
* gcc.dg/tree-prof/switch-case-2.c: New test case.




On Thu, Oct 4, 2012 at 6:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> Hi,
>>  This patch propagates the profile counts during RTL expansion. In
>> many cases, there is no way to determine the exact count of an edge
>> generated during the expansion. So this patch uses some simple
>> heuristics to estimate the edge counts but ensures that the counts of
>> the basic blocks corresponding to the cases are (nearly the) same as
>> at the gimple level.
>>
>> Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for trunk?
>> Index: gcc/expr.c
>> ===================================================================
>> --- gcc/expr.c (revision 191879)
>> +++ gcc/expr.c (working copy)
>> @@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
>>  #ifdef PUSH_ROUNDING
>>  static void emit_single_push_insn (enum machine_mode, rtx, tree);
>>  #endif
>> -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
>> +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
>>  static rtx const_vector_from_tree (tree);
>>  static void write_complex_part (rtx, rtx, bool);
>>
>> @@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree
>>
>>  static void
>>  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
>> -      rtx default_label)
>> +      rtx default_label, int default_probability)
>
> Please document default_probability.
Done.

>>  {
>>    rtx temp, vector;
>>
>> @@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
>>       the maximum value of the range.  */
>>
>>    if (default_label)
>> -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
>> -     default_label);
>> +    {
>> +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
>> +       default_label);
>> +      if (default_probability != -1)
>> +        {
>> +          rtx jump_insn = get_last_insn();
>> +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
>> +        }
>> +    }
>
> dojump already does this kind of logic, but it is bit more cureful:
>       emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
>                                if_true_label);
>       if (prob != -1 && profile_status != PROFILE_ABSENT)
>         {
>           for (last = NEXT_INSN (last);
>                last && NEXT_INSN (last);
>                last = NEXT_INSN (last))
>             if (JUMP_P (last))
>               break;
>           if (last
>               && JUMP_P (last)
>               && ! NEXT_INSN (last)
>               && any_condjump_p (last))
>             {
>               gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
>               add_reg_note (last, REG_BR_PROB, GEN_INT (prob));
>             }
>         }
>
> What about making emit_cmp_and_jump_insns taking the probability argument
> and moving the code above inside?  Most of other places need updating to
> propagate probabilities.

Done. Made probability a default parameter with a default parameter of
-1  and so didn't update all callsites.

> (compare_and_jump_seq in loop-unswitch probably also can be updated)
>> @@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
>>
>>  int
>>  try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
>> -       rtx table_label, rtx default_label)
>> +       rtx table_label, rtx default_label, int default_probability)
>
> Simiarly here.
>> Index: gcc/cfgbuild.c
>> ===================================================================
>> --- gcc/cfgbuild.c (revision 191879)
>> +++ gcc/cfgbuild.c (working copy)
>> @@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
>>      purge_dead_tablejump_edges (bb, table);
>>  }
>>
>> +/* If there is at least one edge in EDGES with a non-zero count, then
>> +   compute probabilities based on the existing counts.  */
>> +
>> +static bool
>> +gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
>> +  edge e;
>> +  edge_iterator ei;
>> +  gcov_type count_sum = 0;
>> +  FOR_EACH_EDGE(e, ei, edges)
>> +    count_sum += e->count;
>> +  if (count_sum == 0)
>> +    return false;
>> +  FOR_EACH_EDGE(e, ei, edges)
>> +    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
>> +  return true;
>> +}
>> +
>>  /*  Assume that frequency of basic block B is known.  Compute frequencies
>>      and probabilities of outgoing edges.  */
>>
>> @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
>>    return;
>>   }
>>      }
>> -
>>    if (single_succ_p (b))
>>      {
>>        e = single_succ_edge (b);
>> @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
>>        e->count = b->count;
>>        return;
>>      }
>> -  guess_outgoing_edge_probabilities (b);
>> +  else if (!gen_probabilities_from_existing_counts (b->succs)){
>> +    /* All outgoing edges of B have zero count. Guess probabilities.  */
>> +    guess_outgoing_edge_probabilities (b);
>> +  }
>
> Hmm, I do not quite follow logic here.
> basic block B is one of many basic blocks that the original BB was split from.
> It is possible that B may have some of original edges, but there may be new ones.
> How you can guess the outgoing probabilitie shere.  Do you have an example?
I have changed this code. If the bb has >2 edges, I just use the
probability as it is without guessing. This should work for switch
statements since the edge probabilities are adjusted in stmt.c. I am
not sure if there are other cases of multi-way branches that I need to
worry about here.

> Also gen_probabilities_from_existing_counts could probably also work based
> on original edge frequencies.
>> @@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,
>>
>>  static struct case_node *
>>  add_case_node (struct case_node *head, tree low, tree high,
>> -               tree label, alloc_pool case_node_pool)
>> +               tree label, gcov_type count, alloc_pool case_node_pool)
>
> Please document  COUNT here.
Done.

>>  {
>>    struct case_node *r;
>>
>> @@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
>>    r->high = high;
>>    r->code_label = label;
>>    r->parent = r->left = NULL;
>> +  r->count = count;
>> +  r->subtree_count = count;
>>    r->right = head;
>>    return r;
>>  }
>
> .. and here
Done.
>> @@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,
>>
>>  static void
>>  emit_case_decision_tree (tree index_expr, tree index_type,
>> - struct case_node *case_list, rtx default_label)
>> + struct case_node *case_list, rtx default_label,
>> +                         gcov_type default_count)
>>  {
>>    rtx index = expand_normal (index_expr);
>>
>> @@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
>>        dump_case_nodes (dump_file, case_list, indent_step, 0);
>>      }
>>
>> -  emit_case_nodes (index, case_list, default_label, index_type);
>> +  emit_case_nodes (index, case_list, default_label, default_count, index_type);
>>    if (default_label)
>>      emit_jump (default_label);
>>  }
>>
>
> And document functio nhere.
Done.
>
>> +static gcov_type
>> +get_outgoing_edge_counts (basic_block bb)
>> +{
>> +  edge e;
>> +  edge_iterator ei;
>> +  gcov_type count_sum = 0;
>> +  FOR_EACH_EDGE(e, ei, bb->succs)
>> +    count_sum += e->count;
>> +  return count_sum;
>> +}
>> +
>> +#define case_probability(x, y) \
>> +    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)
>
> You want to use RDIV here for better rounding.  I would make this an inline
> functions these days.
Done.

>> +
>>  /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
>>     one of the labels in CASE_LIST or to the DEFAULT_LABEL.
>>     MINVAL, MAXVAL, and RANGE are the extrema and range of the case
>> -   labels in CASE_LIST.
>> +   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
>>
>>     First, a jump insn is emitted.  First we try "casesi".  If that
>>     fails, try "tablejump".   A target *must* have one of them (or both).
>> @@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
>>  static void
>>  emit_case_dispatch_table (tree index_expr, tree index_type,
>>    struct case_node *case_list, rtx default_label,
>> -  tree minval, tree maxval, tree range)
>> +  tree minval, tree maxval, tree range,
>> +                          basic_block stmt_bb)
>>  {
>>    int i, ncases;
>>    struct case_node *n;
>>    rtx *labelvec;
>>    rtx fallback_label = label_rtx (case_list->code_label);
>>    rtx table_label = gen_label_rtx ();
>> +  bool has_gaps = false;
>> +  edge default_edge = EDGE_SUCC(stmt_bb, 0);
>> +  gcov_type default_count = default_edge->count;
>> +  gcov_type count = get_outgoing_edge_counts (stmt_bb);
>> +  bool try_with_tablejump = false;
>
> Here, I think you want to first decide whether you base expansion on counts
> (i.e. any counts involved is non-0) or probabilities.  We really want to maintain
> the profile up-to-date even with guessed profile.  The inconsistencies
> propagate across CFG and easilly confuse later optimizers.
> Everywhere you are using count comming from the original statement, you can also use
> the probability.

I have changed the code to use probabilities.

> The newly produced control flow will be optimized and the edges frequencies will
> be propagated along.
>
>> +
>> +  default_edge->count = default_count;
>> +  if (count)
>> +    {
>> +      edge e;
>> +      edge_iterator ei;
>> +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
>> +        e->probability = e->count * REG_BR_PROB_BASE / count;
>> +    }
>
> Hmm, this updates origina BB containing the switch statement?  Of course,
> modulo roundoff errors, this should hold.  I wonder where did you got the
> diferences and why do you need this?  You are going to output new control flow
> and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
> anyway?
> Also you want to use RDIV here.
I need to do this to account for the change in the count of default
edge. Changed to use RDIV.

>> @@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
>>    && node_has_high_bound (node, index_type));
>>  }
>>
>> +
>> +/* Attach a REG_BR_PROB note to the last created RTX instruction if
>> +   PROBABILITY is not -1.  */
>> +
>> +static void
>> +add_prob_note_to_last_insn(int probability)
>> +{
>> +  if (probability != -1)
>> +    {
>> +      rtx jump_insn = get_last_insn();
>> +      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
>> +    }
>> +}
>
> Well, I am not sure this is safe thing to do since you do not really
> know what the last instruction is.  In any case you want to test
> it is an conditional jump instruction as in the code I quoted
> above.
>
>> @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>>   unsignedp),
>>         GT, NULL_RTX, mode, unsignedp,
>>         label_rtx (node->right->code_label));
>> -      emit_case_nodes (index, node->left, default_label, index_type);
>> +              probability = case_probability (node->right->count,
>> +                                              subtree_count + default_count);
>> +              add_prob_note_to_last_insn (probability);
>> +      emit_case_nodes (index, node->left, default_label, default_count,
>> +                               index_type);
>
> Hmm, here you seem to be distributing the probabilities of the counts reached.
> What happens in the case when the edge probability needs to be distributed across
> nodes of the decision tree. I.e. in
> t(int a)
> {
>   switch (a)
>    {
>      case 100:
>      case 200:
>      case 300:
>      case 400:
>      case 500:
>      case 600:
>      case 700:
>      case 800:
>      case 900:
>        t();
>      case 101:
>      case 202:
>      case 303:
>      case 404:
>      case 505:
>      case 606:
>      case 707:
>      case 808:
>      case 909:
>        q();
>    }
> }

I have fixed this.
>
>>      }
>>
>>    else if (node_is_bounded (node->left, index_type))
>> @@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>>   unsignedp),
>>         LT, NULL_RTX, mode, unsignedp,
>>         label_rtx (node->left->code_label));
>> -      emit_case_nodes (index, node->right, default_label, index_type);
>> +              probability = case_probability (node->left->count,
>> +                                              subtree_count + default_count);
>> +              add_prob_note_to_last_insn (probability);
>
> Here, and apparently in all uses of add_prob_note_to_last_insn, you want to use
> the new parameter of emit_cmp_and_jump_insn.
>
> Otherwise the patch seems to make sense.  Thanks for looking into this issue.
>
> Honza

[-- Attachment #2: switch_revised.patch --]
[-- Type: application/octet-stream, Size: 41988 bytes --]

Index: optabs.c
===================================================================
--- optabs.c	(revision 191879)
+++ optabs.c	(working copy)
@@ -4249,7 +4249,7 @@ prepare_operand (enum insn_code icode, rtx x, int
    we can do the branch.  */
 
 static void
-emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label)
+emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label, int prob)
 {
   enum machine_mode optab_mode;
   enum mode_class mclass;
@@ -4261,7 +4261,16 @@ static void
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
-  emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
+  rtx insn = emit_insn (
+      GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
+  if (prob != -1
+      && insn
+      && JUMP_P (insn)
+      && any_condjump_p (insn))
+    {
+      gcc_assert (!find_reg_note (insn, REG_BR_PROB, 0));
+      add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
+    }
 }
 
 /* Generate code to compare X with Y so that the condition codes are
@@ -4279,11 +4288,14 @@ static void
 
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
    It will be potentially converted into an unsigned variant based on
-   UNSIGNEDP to select a proper jump instruction.  */
+   UNSIGNEDP to select a proper jump instruction.
+   
+   PROB is the probability of jumping to LABEL.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 enum machine_mode mode, int unsignedp, rtx label)
+			 enum machine_mode mode, int unsignedp, rtx label,
+                         int prob)
 {
   rtx op0 = x, op1 = y;
   rtx test;
@@ -4307,7 +4319,7 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_co
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label);
+  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
 }
 
 \f
@@ -6943,9 +6955,9 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg
   if (oldval != cmp_reg)
     emit_move_insn (cmp_reg, oldval);
 
-  /* ??? Mark this jump predicted not taken?  */
+  /* Mark this jump predicted not taken.  */
   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
-			   GET_MODE (success), 1, label);
+			   GET_MODE (success), 1, label, 0);
   return true;
 }
 
Index: testsuite/gcc.dg/tree-prof/switch-case-1.c
===================================================================
--- testsuite/gcc.dg/tree-prof/switch-case-1.c	(revision 0)
+++ testsuite/gcc.dg/tree-prof/switch-case-1.c	(revision 0)
@@ -0,0 +1,40 @@
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+int g;
+
+__attribute__((noinline)) void foo (int  n)
+{
+  switch (n)
+    {
+    case 1:
+      g++; break;
+    case 2:
+      g += 2; break;
+    case 3:
+      g += 1; break;
+    case 4:
+      g += 3; break;
+    case 5:
+      g += 4; break;
+    case 6:
+      g += 5; break;
+    case 7:
+      g += 6; break;
+    case 8:
+      g += 7; break;
+    case 9:
+      g += 8; break;
+    default:
+      g += 8; break;
+   }
+}
+
+int main ()
+{
+ int i;
+ for (i = 0; i < 10000; i++)
+   foo ((i * i) % 5);
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count 4000" 2 "expand"} } */
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count 2000" 1 "expand"} } */
+/* { dg-final-use { cleanup-rtl-dump "expand" } } */
Index: testsuite/gcc.dg/tree-prof/switch-case-2.c
===================================================================
--- testsuite/gcc.dg/tree-prof/switch-case-2.c	(revision 0)
+++ testsuite/gcc.dg/tree-prof/switch-case-2.c	(revision 0)
@@ -0,0 +1,40 @@
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+int g;
+
+__attribute__((noinline)) void foo (int  n)
+{
+  switch (n)
+    {
+    case 99:
+      g += 2; break;
+    case 1:
+      g++; break;
+    case 100:
+      g += 1; break;
+    case 4:
+      g += 3; break;
+    case 5:
+      g += 4; break;
+    case 6:
+      g += 5; break;
+    case 7:
+      g += 6; break;
+    case 8:
+      g += 7; break;
+    case 9:
+      g += 8; break;
+    default:
+      g += 8; break;
+   }
+}
+
+int main ()
+{
+ int i;
+ for (i = 0; i < 10000; i++)
+   foo ((i * i) % 5);
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count 4000" 2 "expand"} } */
+/* { dg-final-use { scan-rtl-dump-times ";; basic block\[^\\n\]*count 2000" 1 "expand"} } */
+/* { dg-final-use { cleanup-rtl-dump "expand" } } */
Index: dojump.c
===================================================================
--- dojump.c	(revision 191879)
+++ dojump.c	(working copy)
@@ -886,7 +886,6 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rt
 {
   rtx tem;
   rtx dummy_label = NULL_RTX;
-  rtx last;
 
   /* Reverse the comparison if that is safe and we want to jump if it is
      false.  Also convert to the reverse comparison if the target can
@@ -1069,25 +1068,8 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rt
 	    }
 	}
 
-      last = get_last_insn ();
       emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
-			       if_true_label);
-      if (prob != -1 && profile_status != PROFILE_ABSENT)
-	{
-	  for (last = NEXT_INSN (last);
-	       last && NEXT_INSN (last);
-	       last = NEXT_INSN (last))
-	    if (JUMP_P (last))
-	      break;
-	  if (last
-	      && JUMP_P (last)
-	      && ! NEXT_INSN (last)
-	      && any_condjump_p (last))
-	    {
-	      gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
-	      add_reg_note (last, REG_BR_PROB, GEN_INT (prob));
-	    }
-	}
+			       if_true_label, prob);
     }
 
   if (if_false_label)
Index: expr.c
===================================================================
--- expr.c	(revision 191879)
+++ expr.c	(working copy)
@@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
 #ifdef PUSH_ROUNDING
 static void emit_single_push_insn (enum machine_mode, rtx, tree);
 #endif
-static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
+static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);
 
@@ -1483,7 +1483,7 @@ emit_block_move_via_loop (rtx x, rtx y, rtx size,
   emit_label (cmp_label);
 
   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
-			   true, top_label);
+			   true, top_label, REG_BR_PROB_BASE * 90 / 100);
 }
 \f
 /* Copy all or part of a value X into registers starting at REGNO.
@@ -10819,10 +10819,14 @@ do_store_flag (sepops ops, rtx target, enum machin
 #endif
 
 /* Attempt to generate a casesi instruction.  Returns 1 if successful,
-   0 otherwise (i.e. if there is no casesi instruction).  */
+   0 otherwise (i.e. if there is no casesi instruction).
+
+   DEFAULT_PROBABILITY is the probability of jumping to the default
+   label.  */
 int
 try_casesi (tree index_type, tree index_expr, tree minval, tree range,
-	    rtx table_label, rtx default_label, rtx fallback_label)
+	    rtx table_label, rtx default_label, rtx fallback_label,
+            int default_probability)
 {
   struct expand_operand ops[5];
   enum machine_mode index_mode = SImode;
@@ -10844,7 +10848,8 @@ try_casesi (tree index_type, tree index_expr, tree
       index = expand_normal (index_expr);
       if (default_label)
         emit_cmp_and_jump_insns (rangertx, index, LTU, NULL_RTX,
-				 omode, 1, default_label);
+				 omode, 1, default_label,
+                                 default_probability);
       /* Now we can safely truncate.  */
       index = convert_to_mode (index_mode, index, 0);
     }
@@ -10890,11 +10895,13 @@ try_casesi (tree index_type, tree index_expr, tree
    TABLE_LABEL is a CODE_LABEL rtx for the table itself.
 
    DEFAULT_LABEL is a CODE_LABEL rtx to jump to if the
-   index value is out of range.  */
+   index value is out of range.
+   DEFAULT_PROBABILITY is the probability of jumping to
+   the default label.  */
 
 static void
 do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
-	      rtx default_label)
+	      rtx default_label, int default_probability)
 {
   rtx temp, vector;
 
@@ -10911,8 +10918,9 @@ do_tablejump (rtx index, enum machine_mode mode, r
 
   if (default_label)
     emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
-			     default_label);
+			     default_label, default_probability);
 
+
   /* If index is in range, it must fit in Pmode.
      Convert to Pmode so we can index with it.  */
   if (mode != Pmode)
@@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
 
 int
 try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
-	       rtx table_label, rtx default_label)
+	       rtx table_label, rtx default_label, int default_probability)
 {
   rtx index;
 
@@ -10972,7 +10980,7 @@ try_tablejump (tree index_type, tree index_expr, t
 			       TYPE_MODE (TREE_TYPE (range)),
 			       expand_normal (range),
 			       TYPE_UNSIGNED (TREE_TYPE (range))),
-		table_label, default_label);
+		table_label, default_label, default_probability);
   return 1;
 }
 
Index: cfgbuild.c
===================================================================
--- cfgbuild.c	(revision 191879)
+++ cfgbuild.c	(working copy)
@@ -559,8 +559,11 @@ compute_outgoing_frequencies (basic_block b)
 	  f->count = b->count - e->count;
 	  return;
 	}
+      else
+        {
+          guess_outgoing_edge_probabilities (b);
+        }
     }
-
   if (single_succ_p (b))
     {
       e = single_succ_edge (b);
@@ -568,7 +571,6 @@ compute_outgoing_frequencies (basic_block b)
       e->count = b->count;
       return;
     }
-  guess_outgoing_edge_probabilities (b);
   if (b->count)
     FOR_EACH_EDGE (e, ei, b->succs)
       e->count = ((b->count * e->probability + REG_BR_PROB_BASE / 2)
Index: expr.h
===================================================================
--- expr.h	(revision 191879)
+++ expr.h	(working copy)
@@ -190,7 +190,7 @@ extern int have_sub2_insn (rtx, rtx);
 /* Emit a pair of rtl insns to compare two rtx's and to jump
    to a label if the comparison is true.  */
 extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
-				     enum machine_mode, int, rtx);
+				     enum machine_mode, int, rtx, int prob=-1);
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
@@ -485,8 +485,8 @@ extern void do_compare_rtx_and_jump (rtx, rtx, enu
 				     enum machine_mode, rtx, rtx, rtx, int);
 
 /* Two different ways of generating switch statements.  */
-extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx);
-extern int try_tablejump (tree, tree, tree, tree, rtx, rtx);
+extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, int);
+extern int try_tablejump (tree, tree, tree, tree, rtx, rtx, int);
 
 /* Functions from alias.c */
 #include "alias.h"
Index: except.c
===================================================================
--- except.c	(revision 191879)
+++ except.c	(working copy)
@@ -1161,13 +1161,7 @@ sjlj_emit_function_enter (rtx dispatch_label)
 
       emit_cmp_and_jump_insns (x, const0_rtx, NE, 0,
 			       TYPE_MODE (integer_type_node), 0,
-			       dispatch_label);
-      last = get_last_insn ();
-      if (JUMP_P (last) && any_condjump_p (last))
-	{
-	  gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
-	  add_reg_note (last, REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE / 100));
-	}
+			       dispatch_label, REG_BR_PROB_BASE / 100);
 #else
       expand_builtin_setjmp_setup (plus_constant (Pmode, XEXP (fc, 0),
 						  sjlj_fc_jbuf_ofs),
Index: stmt.c
===================================================================
--- stmt.c	(revision 191879)
+++ stmt.c	(working copy)
@@ -94,11 +94,15 @@ struct case_node
   tree			low;	/* Lowest index value for this label */
   tree			high;	/* Highest index value for this label */
   tree			code_label; /* Label to jump to when node matches */
+  int                   prob; /* Probability of taking this case.  */
+  /* Probability of reaching subtree rooted at this node */
+  int                   subtree_prob;
 };
 
 typedef struct case_node case_node;
 typedef struct case_node *case_node_ptr;
 
+extern basic_block label_to_block_fn (struct function *, tree);
 \f
 static int n_occurrences (int, const char *);
 static bool tree_conflicts_with_clobbers_p (tree, HARD_REG_SET *);
@@ -112,7 +116,7 @@ static void balance_case_nodes (case_node_ptr *, c
 static int node_has_low_bound (case_node_ptr, tree);
 static int node_has_high_bound (case_node_ptr, tree);
 static int node_is_bounded (case_node_ptr, tree);
-static void emit_case_nodes (rtx, case_node_ptr, rtx, tree);
+static void emit_case_nodes (rtx, case_node_ptr, rtx, int, tree);
 \f
 /* Return the rtx-label that corresponds to a LABEL_DECL,
    creating it if necessary.  */
@@ -1648,23 +1652,29 @@ expand_stack_restore (tree var)
   fixup_args_size_notes (prev, get_last_insn (), 0);
 }
 
-/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE.  */
+/* Generate code to jump to LABEL if OP0 and OP1 are equal in mode MODE. PROB
+   is the probability of jumping to LABEL.  */
 static void
 do_jump_if_equal (enum machine_mode mode, rtx op0, rtx op1, rtx label,
-		  int unsignedp)
+		  int unsignedp, int prob)
 {
+  gcc_assert (prob <= REG_BR_PROB_BASE);
   do_compare_rtx_and_jump (op0, op1, EQ, unsignedp, mode,
-			   NULL_RTX, NULL_RTX, label, -1);
+			   NULL_RTX, NULL_RTX, label, prob);
 }
 \f
 /* Do the insertion of a case label into case_list.  The labels are
    fed to us in descending order from the sorted vector of case labels used
    in the tree part of the middle end.  So the list we construct is
-   sorted in ascending order.  */
+   sorted in ascending order.
+   
+   LABEL is the case label to be inserted. LOW and HIGH are the bounds
+   against which the index is compared to jump to LABEL and PROB is the
+   estimated probability LABEL is reached from the switch statement.  */
 
 static struct case_node *
 add_case_node (struct case_node *head, tree low, tree high,
-               tree label, alloc_pool case_node_pool)
+               tree label, int prob, alloc_pool case_node_pool)
 {
   struct case_node *r;
 
@@ -1677,6 +1687,8 @@ add_case_node (struct case_node *head, tree low, t
   r->high = high;
   r->code_label = label;
   r->parent = r->left = NULL;
+  r->prob = prob;
+  r->subtree_prob = prob;
   r->right = head;
   return r;
 }
@@ -1778,6 +1790,8 @@ expand_switch_as_decision_tree_p (tree range,
 
 /* Generate a decision tree, switching on INDEX_EXPR and jumping to
    one of the labels in CASE_LIST or to the DEFAULT_LABEL.
+   DEFAULT_PROB is the estimated probability that it jumps to
+   DEFAULT_LABEL.
    
    We generate a binary decision tree to select the appropriate target
    code.  This is done as follows:
@@ -1803,7 +1817,8 @@ expand_switch_as_decision_tree_p (tree range,
 
 static void
 emit_case_decision_tree (tree index_expr, tree index_type,
-			 struct case_node *case_list, rtx default_label)
+			 struct case_node *case_list, rtx default_label,
+                         int default_prob)
 {
   rtx index = expand_normal (index_expr);
 
@@ -1839,15 +1854,47 @@ emit_case_decision_tree (tree index_expr, tree ind
       dump_case_nodes (dump_file, case_list, indent_step, 0);
     }
 
-  emit_case_nodes (index, case_list, default_label, index_type);
+  emit_case_nodes (index, case_list, default_label, default_prob, index_type);
   if (default_label)
     emit_jump (default_label);
 }
 
+/* Return the sum of probabilities of outgoing edges of basic block BB.  */
+
+static int
+get_outgoing_edge_probs (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int prob_sum = 0;
+  FOR_EACH_EDGE(e, ei, bb->succs)
+    prob_sum += e->probability;
+  return prob_sum;
+}
+
+/* Computes the conditional probability of jumping to a target if the branch
+   instruction is executed.
+   TARGET_PROB is the estimated probability of jumping to a target relative
+   to some basic block BB.
+   BASE_PROB is the probability of reaching the branch instruction relative
+   to the same basic block BB.  */
+
+static inline int
+conditional_probability (int target_prob, int base_prob)
+{
+  if (base_prob > 0)
+    {
+      gcc_assert (target_prob >= 0);
+      gcc_assert (target_prob <= base_prob);
+      return RDIV (target_prob * REG_BR_PROB_BASE, base_prob);
+    }
+  return -1;
+}
+
 /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
    one of the labels in CASE_LIST or to the DEFAULT_LABEL.
    MINVAL, MAXVAL, and RANGE are the extrema and range of the case
-   labels in CASE_LIST.
+   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
 
    First, a jump insn is emitted.  First we try "casesi".  If that
    fails, try "tablejump".   A target *must* have one of them (or both).
@@ -1860,19 +1907,27 @@ emit_case_decision_tree (tree index_expr, tree ind
 static void
 emit_case_dispatch_table (tree index_expr, tree index_type,
 			  struct case_node *case_list, rtx default_label,
-			  tree minval, tree maxval, tree range)
+			  tree minval, tree maxval, tree range,
+                          basic_block stmt_bb)
 {
   int i, ncases;
   struct case_node *n;
   rtx *labelvec;
   rtx fallback_label = label_rtx (case_list->code_label);
   rtx table_label = gen_label_rtx ();
+  bool has_gaps = false;
+  edge default_edge = EDGE_SUCC(stmt_bb, 0);
+  int default_prob = default_edge->probability;
+  int base = get_outgoing_edge_probs (stmt_bb);
+  bool try_with_tablejump = false;
 
+  int new_default_prob = conditional_probability (default_prob,
+                                                  base);
+
   if (! try_casesi (index_type, index_expr, minval, range,
-		    table_label, default_label, fallback_label))
+		    table_label, default_label, fallback_label,
+                    new_default_prob))
     {
-      bool ok;
-
       /* Index jumptables from zero for suitable values of minval to avoid
 	 a subtraction.  For the rationale see:
 	 "http://gcc.gnu.org/ml/gcc-patches/2001-10/msg01234.html".  */
@@ -1882,11 +1937,9 @@ emit_case_dispatch_table (tree index_expr, tree in
 	{
 	  minval = build_int_cst (index_type, 0);
 	  range = maxval;
+          has_gaps = true;
 	}
-
-      ok = try_tablejump (index_type, index_expr, minval, range,
-			  table_label, default_label);
-      gcc_assert (ok);
+      try_with_tablejump = true;
     }
 
   /* Get table of labels to jump to, in order of case index.  */
@@ -1921,8 +1974,48 @@ emit_case_dispatch_table (tree index_expr, tree in
     default_label = fallback_label;
   for (i = 0; i < ncases; i++)
     if (labelvec[i] == 0)
-      labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
+      {
+        has_gaps = true;
+        labelvec[i] = gen_rtx_LABEL_REF (Pmode, default_label);
+      }
 
+  if (has_gaps)
+    {
+      /* There is at least one entry in the jump table that jumps
+         to default label. The default label can either be reached
+         through the indirect jump or the direct conditional jump
+         before that. Split the probability of reaching the
+         default label among these two jumps.  */
+      new_default_prob = conditional_probability (default_prob/2,
+                                                  base);
+      default_prob /= 2;
+      base -= default_prob;
+    }
+  else
+    {
+      base -= default_prob;
+      default_prob = 0;
+    }
+
+  default_edge->probability = default_prob;
+
+  /* We have altered the probability of the default edge. So the probabilities
+     of all other edges need to be adjusted so that it sums up to
+     REG_BR_PROB_BASE.  */
+  if (base)
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
+        e->probability = RDIV (e->probability * REG_BR_PROB_BASE,  base);
+    }
+
+  if (try_with_tablejump)
+    {
+      bool ok = try_tablejump (index_type, index_expr, minval, range,
+                               table_label, default_label, new_default_prob);
+      gcc_assert (ok);
+    }
   /* Output the table.  */
   emit_label (table_label);
 
@@ -1939,6 +2032,30 @@ emit_case_dispatch_table (tree index_expr, tree in
   emit_barrier ();
 }
 
+static inline void
+reset_out_edges_aux (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE(e, ei, bb->succs)
+    e->aux = (void *)0;
+}
+static inline void
+compute_cases_per_edge (gimple stmt)
+{
+  basic_block bb = gimple_bb (stmt);
+  reset_out_edges_aux (bb);
+  int ncases = gimple_switch_num_labels (stmt);
+  for (int i = ncases - 1; i >= 1; --i)
+    {
+      tree elt = gimple_switch_label (stmt, i);
+      tree lab = CASE_LABEL (elt);
+      basic_block case_bb = label_to_block_fn (cfun, lab);
+      edge case_edge = find_edge (bb, case_bb);
+      case_edge->aux = (void *)((long)(case_edge->aux) + 1);
+    }
+}
+
 /* Terminate a case (Pascal/Ada) or switch (C) statement
    in which ORIG_INDEX is the expression to be tested.
    If ORIG_TYPE is not NULL, it is the original ORIG_INDEX
@@ -1956,6 +2073,7 @@ expand_case (gimple stmt)
   tree index_expr = gimple_switch_index (stmt);
   tree index_type = TREE_TYPE (index_expr);
   tree elt;
+  basic_block bb = gimple_bb (stmt);
 
   /* A list of case labels; it is first built as a list and it may then
      be rearranged into a nearly balanced binary tree.  */
@@ -1981,6 +2099,8 @@ expand_case (gimple stmt)
 
   /* Find the default case target label.  */
   default_label = label_rtx (CASE_LABEL (gimple_switch_default_label (stmt)));
+  edge default_edge = EDGE_SUCC(bb, 0);
+  int default_prob = default_edge->probability;
 
   /* Get upper and lower bounds of case values.  */
   elt = gimple_switch_label (stmt, 1);
@@ -1999,7 +2119,9 @@ expand_case (gimple stmt)
   uniq = 0;
   count = 0;
   struct pointer_set_t *seen_labels = pointer_set_create ();
-  for (i = gimple_switch_num_labels (stmt) - 1; i >= 1; --i)
+  compute_cases_per_edge (stmt);
+
+  for (i = ncases - 1; i >= 1; --i)
     {
       elt = gimple_switch_label (stmt, i);
       tree low = CASE_LOW (elt);
@@ -2041,10 +2163,15 @@ expand_case (gimple stmt)
 				   TREE_INT_CST_LOW (high),
 				   TREE_INT_CST_HIGH (high));
 
-      case_list = add_case_node (case_list, low, high, lab,
-				 case_node_pool);
+      basic_block case_bb = label_to_block_fn (cfun, lab);
+      edge case_edge = find_edge (bb, case_bb);
+      case_list = add_case_node (
+          case_list, low, high, lab,
+          case_edge->probability / (long)(case_edge->aux),
+          case_node_pool);
     }
   pointer_set_destroy (seen_labels);
+  reset_out_edges_aux (bb);
 
   /* cleanup_tree_cfg removes all SWITCH_EXPR with a single
      destination, such as one with a default case only.
@@ -2060,11 +2187,12 @@ expand_case (gimple stmt)
 
   if (expand_switch_as_decision_tree_p (range, uniq, count))
     emit_case_decision_tree (index_expr, index_type,
-			     case_list, default_label);
+                             case_list, default_label,
+                             default_prob);
   else
     emit_case_dispatch_table (index_expr, index_type,
 			      case_list, default_label,
-			      minval, maxval, range);
+			      minval, maxval, range, bb);
 
   reorder_insns (NEXT_INSN (before_case), get_last_insn (), before_case);
 
@@ -2126,7 +2254,7 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
         {
 	  tree elt = VEC_index (tree, dispatch_table, i);
 	  rtx lab = label_rtx (CASE_LABEL (elt));
-	  do_jump_if_equal (index_mode, index, zero, lab, 0);
+	  do_jump_if_equal (index_mode, index, zero, lab, 0, -1);
 	  force_expand_binop (index_mode, sub_optab,
 			      index, CONST1_RTX (index_mode),
 			      index, 0, OPTAB_DIRECT);
@@ -2150,12 +2278,12 @@ expand_sjlj_dispatch_table (rtx dispatch_index,
 	  tree elt = VEC_index (tree, dispatch_table, i);
 	  tree low = CASE_LOW (elt);
 	  tree lab = CASE_LABEL (elt);
-	  case_list = add_case_node (case_list, low, low, lab, case_node_pool);
+	  case_list = add_case_node (case_list, low, low, lab, 0, case_node_pool);
 	}
 
       emit_case_dispatch_table (index_expr, index_type,
 				case_list, default_label,
-				minval, maxval, range);
+				minval, maxval, range, NULL);
       emit_label (default_label);
       free_alloc_pool (case_node_pool);
     }
@@ -2237,6 +2365,9 @@ balance_case_nodes (case_node_ptr *head, case_node
 	  /* Optimize each of the two split parts.  */
 	  balance_case_nodes (&np->left, np);
 	  balance_case_nodes (&np->right, np);
+          np->subtree_prob = np->prob;
+          np->subtree_prob += np->left->subtree_prob;
+          np->subtree_prob += np->right->subtree_prob;
 	}
       else
 	{
@@ -2244,8 +2375,12 @@ balance_case_nodes (case_node_ptr *head, case_node
 	     but fill in `parent' fields.  */
 	  np = *head;
 	  np->parent = parent;
+          np->subtree_prob = np->prob;
 	  for (; np->right; np = np->right)
-	    np->right->parent = np;
+            {
+	      np->right->parent = np;
+              (*head)->subtree_prob += np->right->subtree_prob;
+            }
 	}
     }
 }
@@ -2358,6 +2493,7 @@ node_is_bounded (case_node_ptr node, tree index_ty
 	  && node_has_high_bound (node, index_type));
 }
 \f
+
 /* Emit step-by-step code to select a case for the value of INDEX.
    The thus generated decision tree follows the form of the
    case-node binary tree NODE, whose nodes represent test conditions.
@@ -2386,10 +2522,12 @@ node_is_bounded (case_node_ptr node, tree index_ty
 
 static void
 emit_case_nodes (rtx index, case_node_ptr node, rtx default_label,
-		 tree index_type)
+		 int default_prob, tree index_type)
 {
   /* If INDEX has an unsigned type, we must make unsigned branches.  */
   int unsignedp = TYPE_UNSIGNED (index_type);
+  int probability;
+  int prob = node->prob, subtree_prob = node->subtree_prob;
   enum machine_mode mode = GET_MODE (index);
   enum machine_mode imode = TYPE_MODE (index_type);
 
@@ -2404,15 +2542,17 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
   else if (tree_int_cst_equal (node->low, node->high))
     {
+      probability = conditional_probability (prob, subtree_prob + default_prob);
       /* Node is single valued.  First see if the index expression matches
 	 this node and then check our children, if any.  */
-
       do_jump_if_equal (mode, index,
 			convert_modes (mode, imode,
 				       expand_normal (node->low),
 				       unsignedp),
-			label_rtx (node->code_label), unsignedp);
-
+			label_rtx (node->code_label), unsignedp, probability);
+      /* Since this case is taken at this point, reduce its weight from
+         subtree_weight.  */
+      subtree_prob -= prob;
       if (node->right != 0 && node->left != 0)
 	{
 	  /* This node has children on both sides.
@@ -2423,26 +2563,35 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
 	  if (node_is_bounded (node->right, index_type))
 	    {
+              probability = conditional_probability (
+                  node->right->prob,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->high),
 					unsignedp),
 				       GT, NULL_RTX, mode, unsignedp,
-				       label_rtx (node->right->code_label));
-	      emit_case_nodes (index, node->left, default_label, index_type);
+				       label_rtx (node->right->code_label),
+                                       probability);
+	      emit_case_nodes (index, node->left, default_label, default_prob,
+                               index_type);
 	    }
 
 	  else if (node_is_bounded (node->left, index_type))
 	    {
+              probability = conditional_probability (
+                  node->left->prob,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->high),
 					unsignedp),
 				       LT, NULL_RTX, mode, unsignedp,
-				       label_rtx (node->left->code_label));
-	      emit_case_nodes (index, node->right, default_label, index_type);
+				       label_rtx (node->left->code_label),
+                                       probability);
+	      emit_case_nodes (index, node->right, default_label, default_prob, index_type);
 	    }
 
 	  /* If both children are single-valued cases with no
@@ -2460,21 +2609,27 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
 	      /* See if the value matches what the right hand side
 		 wants.  */
+              probability = conditional_probability (
+                  node->right->prob,
+                  subtree_prob + default_prob);
 	      do_jump_if_equal (mode, index,
 				convert_modes (mode, imode,
 					       expand_normal (node->right->low),
 					       unsignedp),
 				label_rtx (node->right->code_label),
-				unsignedp);
+				unsignedp, probability);
 
 	      /* See if the value matches what the left hand side
 		 wants.  */
+              probability = conditional_probability (
+                  node->left->prob,
+                  subtree_prob + default_prob);
 	      do_jump_if_equal (mode, index,
 				convert_modes (mode, imode,
 					       expand_normal (node->left->low),
 					       unsignedp),
 				label_rtx (node->left->code_label),
-				unsignedp);
+				unsignedp, probability);
 	    }
 
 	  else
@@ -2486,6 +2641,12 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 		= build_decl (curr_insn_location (),
 			      LABEL_DECL, NULL_TREE, NULL_TREE);
 
+              /* The default label could be reached either through the right
+                 subtree or the left subtree. Divide the probability
+                 equally.  */
+              probability = conditional_probability (
+                  node->right->subtree_prob + default_prob/2,
+                  subtree_prob + default_prob);
 	      /* See if the value is on the right.  */
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
@@ -2493,11 +2654,13 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 					expand_normal (node->high),
 					unsignedp),
 				       GT, NULL_RTX, mode, unsignedp,
-				       label_rtx (test_label));
+				       label_rtx (test_label),
+                                       probability);
+              default_prob /= 2;
 
 	      /* Value must be on the left.
 		 Handle the left-hand subtree.  */
-	      emit_case_nodes (index, node->left, default_label, index_type);
+	      emit_case_nodes (index, node->left, default_label, default_prob, index_type);
 	      /* If left-hand subtree does nothing,
 		 go to default.  */
 	      if (default_label)
@@ -2505,7 +2668,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
 	      /* Code branches here for the right-hand subtree.  */
 	      expand_label (test_label);
-	      emit_case_nodes (index, node->right, default_label, index_type);
+	      emit_case_nodes (index, node->right, default_label, default_prob, index_type);
 	    }
 	}
 
@@ -2523,28 +2686,38 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 	    {
 	      if (!node_has_low_bound (node, index_type))
 		{
+                  probability = conditional_probability (
+                      default_prob/2,
+                      subtree_prob + default_prob);
 		  emit_cmp_and_jump_insns (index,
 					   convert_modes
 					   (mode, imode,
 					    expand_normal (node->high),
 					    unsignedp),
 					   LT, NULL_RTX, mode, unsignedp,
-					   default_label);
+					   default_label,
+                                           probability);
+                  default_prob /= 2;
 		}
 
-	      emit_case_nodes (index, node->right, default_label, index_type);
+	      emit_case_nodes (index, node->right, default_label, default_prob, index_type);
 	    }
 	  else
-	    /* We cannot process node->right normally
-	       since we haven't ruled out the numbers less than
-	       this node's value.  So handle node->right explicitly.  */
-	    do_jump_if_equal (mode, index,
-			      convert_modes
-			      (mode, imode,
-			       expand_normal (node->right->low),
-			       unsignedp),
-			      label_rtx (node->right->code_label), unsignedp);
-	}
+            {
+              probability = conditional_probability (
+                  node->right->subtree_prob,
+                  subtree_prob + default_prob);
+	      /* We cannot process node->right normally
+	         since we haven't ruled out the numbers less than
+	         this node's value.  So handle node->right explicitly.  */
+	      do_jump_if_equal (mode, index,
+			        convert_modes
+			        (mode, imode,
+			         expand_normal (node->right->low),
+			         unsignedp),
+			        label_rtx (node->right->code_label), unsignedp, probability);
+            }
+	  }
 
       else if (node->right == 0 && node->left != 0)
 	{
@@ -2554,27 +2727,38 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 	    {
 	      if (!node_has_high_bound (node, index_type))
 		{
+                  probability = conditional_probability (
+                      default_prob/2,
+                      subtree_prob + default_prob);
 		  emit_cmp_and_jump_insns (index,
 					   convert_modes
 					   (mode, imode,
 					    expand_normal (node->high),
 					    unsignedp),
 					   GT, NULL_RTX, mode, unsignedp,
-					   default_label);
+					   default_label,
+                                           probability);
+                  default_prob /= 2;
 		}
 
-	      emit_case_nodes (index, node->left, default_label, index_type);
+	      emit_case_nodes (index, node->left, default_label,
+                               default_prob, index_type);
 	    }
 	  else
-	    /* We cannot process node->left normally
-	       since we haven't ruled out the numbers less than
-	       this node's value.  So handle node->left explicitly.  */
-	    do_jump_if_equal (mode, index,
-			      convert_modes
-			      (mode, imode,
-			       expand_normal (node->left->low),
-			       unsignedp),
-			      label_rtx (node->left->code_label), unsignedp);
+            {
+              probability = conditional_probability (
+                  node->left->subtree_prob,
+                  subtree_prob + default_prob);
+	      /* We cannot process node->left normally
+	         since we haven't ruled out the numbers less than
+	         this node's value.  So handle node->left explicitly.  */
+	      do_jump_if_equal (mode, index,
+			        convert_modes
+			        (mode, imode,
+			         expand_normal (node->left->low),
+			         unsignedp),
+			        label_rtx (node->left->code_label), unsignedp, probability);
+            }
 	}
     }
   else
@@ -2593,15 +2777,21 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 	  tree test_label = 0;
 
 	  if (node_is_bounded (node->right, index_type))
-	    /* Right hand node is fully bounded so we can eliminate any
-	       testing and branch directly to the target code.  */
-	    emit_cmp_and_jump_insns (index,
-				     convert_modes
-				     (mode, imode,
-				      expand_normal (node->high),
-				      unsignedp),
-				     GT, NULL_RTX, mode, unsignedp,
-				     label_rtx (node->right->code_label));
+            {
+	      /* Right hand node is fully bounded so we can eliminate any
+	         testing and branch directly to the target code.  */
+              probability = conditional_probability (
+                  node->right->subtree_prob,
+                  subtree_prob + default_prob);
+	      emit_cmp_and_jump_insns (index,
+				       convert_modes
+				       (mode, imode,
+				        expand_normal (node->high),
+				        unsignedp),
+				       GT, NULL_RTX, mode, unsignedp,
+				       label_rtx (node->right->code_label),
+                                       probability);
+            }
 	  else
 	    {
 	      /* Right hand node requires testing.
@@ -2609,27 +2799,36 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
 	      test_label = build_decl (curr_insn_location (),
 				       LABEL_DECL, NULL_TREE, NULL_TREE);
+              probability = conditional_probability (
+                  node->right->subtree_prob + default_prob/2,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->high),
 					unsignedp),
 				       GT, NULL_RTX, mode, unsignedp,
-				       label_rtx (test_label));
+				       label_rtx (test_label),
+                                       probability);
+              default_prob /= 2;
 	    }
 
 	  /* Value belongs to this node or to the left-hand subtree.  */
 
+          probability = conditional_probability (
+              prob,
+              subtree_prob + default_prob);
 	  emit_cmp_and_jump_insns (index,
 				   convert_modes
 				   (mode, imode,
 				    expand_normal (node->low),
 				    unsignedp),
 				   GE, NULL_RTX, mode, unsignedp,
-				   label_rtx (node->code_label));
+				   label_rtx (node->code_label),
+                                   probability);
 
 	  /* Handle the left-hand subtree.  */
-	  emit_case_nodes (index, node->left, default_label, index_type);
+	  emit_case_nodes (index, node->left, default_label, default_prob, index_type);
 
 	  /* If right node had to be handled later, do that now.  */
 
@@ -2641,7 +2840,7 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 		emit_jump (default_label);
 
 	      expand_label (test_label);
-	      emit_case_nodes (index, node->right, default_label, index_type);
+	      emit_case_nodes (index, node->right, default_label, default_prob, index_type);
 	    }
 	}
 
@@ -2651,26 +2850,35 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 	     if they are possible.  */
 	  if (!node_has_low_bound (node, index_type))
 	    {
+              probability = conditional_probability (
+                  default_prob/2,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->low),
 					unsignedp),
 				       LT, NULL_RTX, mode, unsignedp,
-				       default_label);
+				       default_label,
+                                       probability);
+              default_prob /= 2;
 	    }
 
 	  /* Value belongs to this node or to the right-hand subtree.  */
 
+          probability = conditional_probability (
+              prob,
+              subtree_prob + default_prob);
 	  emit_cmp_and_jump_insns (index,
 				   convert_modes
 				   (mode, imode,
 				    expand_normal (node->high),
 				    unsignedp),
 				   LE, NULL_RTX, mode, unsignedp,
-				   label_rtx (node->code_label));
+				   label_rtx (node->code_label),
+                                   probability);
 
-	  emit_case_nodes (index, node->right, default_label, index_type);
+	  emit_case_nodes (index, node->right, default_label, default_prob, index_type);
 	}
 
       else if (node->right == 0 && node->left != 0)
@@ -2679,26 +2887,35 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 	     if they are possible.  */
 	  if (!node_has_high_bound (node, index_type))
 	    {
+              probability = conditional_probability (
+                  default_prob/2,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->high),
 					unsignedp),
 				       GT, NULL_RTX, mode, unsignedp,
-				       default_label);
+				       default_label,
+                                       probability);
+              default_prob /= 2;
 	    }
 
 	  /* Value belongs to this node or to the left-hand subtree.  */
 
+          probability = conditional_probability (
+              prob,
+              subtree_prob + default_prob);
 	  emit_cmp_and_jump_insns (index,
 				   convert_modes
 				   (mode, imode,
 				    expand_normal (node->low),
 				    unsignedp),
 				   GE, NULL_RTX, mode, unsignedp,
-				   label_rtx (node->code_label));
+				   label_rtx (node->code_label),
+                                   probability);
 
-	  emit_case_nodes (index, node->left, default_label, index_type);
+	  emit_case_nodes (index, node->left, default_label, default_prob, index_type);
 	}
 
       else
@@ -2711,24 +2928,32 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 
 	  if (!high_bound && low_bound)
 	    {
+              probability = conditional_probability (
+                  default_prob,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->high),
 					unsignedp),
 				       GT, NULL_RTX, mode, unsignedp,
-				       default_label);
+				       default_label,
+                                       probability);
 	    }
 
 	  else if (!low_bound && high_bound)
 	    {
+              probability = conditional_probability (
+                  default_prob,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (index,
 				       convert_modes
 				       (mode, imode,
 					expand_normal (node->low),
 					unsignedp),
 				       LT, NULL_RTX, mode, unsignedp,
-				       default_label);
+				       default_label,
+                                       probability);
 	    }
 	  else if (!low_bound && !high_bound)
 	    {
@@ -2748,8 +2973,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
 						    high, low),
 				       NULL_RTX, mode, EXPAND_NORMAL);
 
+              probability = conditional_probability (
+                  default_prob,
+                  subtree_prob + default_prob);
 	      emit_cmp_and_jump_insns (new_index, new_bound, GT, NULL_RTX,
-				       mode, 1, default_label);
+				       mode, 1, default_label, probability);
 	    }
 
 	  emit_jump (label_rtx (node->code_label));

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-09  0:46   ` Easwaran Raman
@ 2012-10-14  7:02     ` Easwaran Raman
  2012-10-14 16:22     ` Jan Hubicka
  2012-10-17 16:05     ` Gary Funck
  2 siblings, 0 replies; 15+ messages in thread
From: Easwaran Raman @ 2012-10-14  7:02 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Steven Bosscher, David Li

Ping.


On Mon, Oct 8, 2012 at 5:46 PM, Easwaran Raman <eraman@google.com> wrote:
> I have attached a revised patch. The updated ChangeLog is given below
> and I have responded to your comments inline:
>
> 2012-10-08   Easwaran Raman  <eraman@google.com>
> * optabs.c (emit_cmp_and_jump_insn_1): Add a new parameter to
> specificy the probability of taking the jump.
> (emit_cmp_and_jump_insns): Likewise.
> (expand_compare_and_swap_loop): Make the jump predicted not taken.
> * dojump.c (do_compare_rtx_and_jump): Remove the code attaching
> REG_BR_PROB note and pass probability to emit_cmp_and_jump_insns.
> * cfgbuild.c (compute_outgoing_frequencies): Do not guess outgoing
> probabilities for branches with more than two successors.
> * expr.c (emit_block_move_via_loop): Predict the loop backedge loop
> to be highly taken.
> (try_casesi): Pass the probability of jumping to the default label.
> (try_tablejump): Likewise.
> (do_tablejump): Likewise.
> * expr.h (try_tablejump): Add a new parameter.
> (try_casesi): Likewise.
> (emit_cmp_and_jump_insns): Add probability as default parameter with a
> default value of -1.
> * except.c (sjlj_emit_function_enter): Pass probability to
> emit_cmp_and_jump_insns.
> * stmt.c (case_node): Add new fields PROB and SUBTREE_PROB.
> (do_jump_if_equal): Pass probability for REG_BR_PROB note.
> (add_case_node): Pass estimated probability of jumping to the case
> label.
> (emit_case_decision_tree): Pass default_prob to emit_case_nodes.
> (get_outgoing_edge_probs): New function.
> (conditional_probability): Likewise.
> (reset_out_edges_aux): Likewise.
> (compute_cases_per_edge): Likewise.
> (emit_case_dispatch_table): Update probabilities of edges coming out
> of the switch statement.
> (expand_case): Compute and propagate default edge probability to
> emit_case_dispatch_table.
> (expand_sjlj_dispatch_table): Update calls to add_case_node and
> emit_case_dispatch_table.
> (balance_case_nodes): Update subtree_prob values.
> (emit_case_nodes): Compute edge probabilities and add pass them to
> emit_cmp_and_jump_insns.
>
> gcc/testsuite/ChangeLog:
> 2012-10-02   Easwaran Raman  <eraman@google.com>
> * gcc.dg/tree-prof/switch-case-1.c: New test case.
> * gcc.dg/tree-prof/switch-case-2.c: New test case.
>
>
>
>
> On Thu, Oct 4, 2012 at 6:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>>> Hi,
>>>  This patch propagates the profile counts during RTL expansion. In
>>> many cases, there is no way to determine the exact count of an edge
>>> generated during the expansion. So this patch uses some simple
>>> heuristics to estimate the edge counts but ensures that the counts of
>>> the basic blocks corresponding to the cases are (nearly the) same as
>>> at the gimple level.
>>>
>>> Bootstrapped and profile-bootstrapped on an x86_64/linux machine. OK for trunk?
>>> Index: gcc/expr.c
>>> ===================================================================
>>> --- gcc/expr.c (revision 191879)
>>> +++ gcc/expr.c (working copy)
>>> @@ -154,7 +154,7 @@ static rtx do_store_flag (sepops, rtx, enum machin
>>>  #ifdef PUSH_ROUNDING
>>>  static void emit_single_push_insn (enum machine_mode, rtx, tree);
>>>  #endif
>>> -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
>>> +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
>>>  static rtx const_vector_from_tree (tree);
>>>  static void write_complex_part (rtx, rtx, bool);
>>>
>>> @@ -10894,7 +10894,7 @@ try_casesi (tree index_type, tree index_expr, tree
>>>
>>>  static void
>>>  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
>>> -      rtx default_label)
>>> +      rtx default_label, int default_probability)
>>
>> Please document default_probability.
> Done.
>
>>>  {
>>>    rtx temp, vector;
>>>
>>> @@ -10910,9 +10910,17 @@ do_tablejump (rtx index, enum machine_mode mode, r
>>>       the maximum value of the range.  */
>>>
>>>    if (default_label)
>>> -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
>>> -     default_label);
>>> +    {
>>> +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
>>> +       default_label);
>>> +      if (default_probability != -1)
>>> +        {
>>> +          rtx jump_insn = get_last_insn();
>>> +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
>>> +        }
>>> +    }
>>
>> dojump already does this kind of logic, but it is bit more cureful:
>>       emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
>>                                if_true_label);
>>       if (prob != -1 && profile_status != PROFILE_ABSENT)
>>         {
>>           for (last = NEXT_INSN (last);
>>                last && NEXT_INSN (last);
>>                last = NEXT_INSN (last))
>>             if (JUMP_P (last))
>>               break;
>>           if (last
>>               && JUMP_P (last)
>>               && ! NEXT_INSN (last)
>>               && any_condjump_p (last))
>>             {
>>               gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
>>               add_reg_note (last, REG_BR_PROB, GEN_INT (prob));
>>             }
>>         }
>>
>> What about making emit_cmp_and_jump_insns taking the probability argument
>> and moving the code above inside?  Most of other places need updating to
>> propagate probabilities.
>
> Done. Made probability a default parameter with a default parameter of
> -1  and so didn't update all callsites.
>
>> (compare_and_jump_seq in loop-unswitch probably also can be updated)
>>> @@ -10954,7 +10962,7 @@ do_tablejump (rtx index, enum machine_mode mode, r
>>>
>>>  int
>>>  try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
>>> -       rtx table_label, rtx default_label)
>>> +       rtx table_label, rtx default_label, int default_probability)
>>
>> Simiarly here.
>>> Index: gcc/cfgbuild.c
>>> ===================================================================
>>> --- gcc/cfgbuild.c (revision 191879)
>>> +++ gcc/cfgbuild.c (working copy)
>>> @@ -533,6 +533,23 @@ find_bb_boundaries (basic_block bb)
>>>      purge_dead_tablejump_edges (bb, table);
>>>  }
>>>
>>> +/* If there is at least one edge in EDGES with a non-zero count, then
>>> +   compute probabilities based on the existing counts.  */
>>> +
>>> +static bool
>>> +gen_probabilities_from_existing_counts ( VEC(edge,gc) *edges) {
>>> +  edge e;
>>> +  edge_iterator ei;
>>> +  gcov_type count_sum = 0;
>>> +  FOR_EACH_EDGE(e, ei, edges)
>>> +    count_sum += e->count;
>>> +  if (count_sum == 0)
>>> +    return false;
>>> +  FOR_EACH_EDGE(e, ei, edges)
>>> +    e->probability = e->count * REG_BR_PROB_BASE / count_sum;
>>> +  return true;
>>> +}
>>> +
>>>  /*  Assume that frequency of basic block B is known.  Compute frequencies
>>>      and probabilities of outgoing edges.  */
>>>
>>> @@ -560,7 +577,6 @@ compute_outgoing_frequencies (basic_block b)
>>>    return;
>>>   }
>>>      }
>>> -
>>>    if (single_succ_p (b))
>>>      {
>>>        e = single_succ_edge (b);
>>> @@ -568,7 +584,10 @@ compute_outgoing_frequencies (basic_block b)
>>>        e->count = b->count;
>>>        return;
>>>      }
>>> -  guess_outgoing_edge_probabilities (b);
>>> +  else if (!gen_probabilities_from_existing_counts (b->succs)){
>>> +    /* All outgoing edges of B have zero count. Guess probabilities.  */
>>> +    guess_outgoing_edge_probabilities (b);
>>> +  }
>>
>> Hmm, I do not quite follow logic here.
>> basic block B is one of many basic blocks that the original BB was split from.
>> It is possible that B may have some of original edges, but there may be new ones.
>> How you can guess the outgoing probabilitie shere.  Do you have an example?
> I have changed this code. If the bb has >2 edges, I just use the
> probability as it is without guessing. This should work for switch
> statements since the edge probabilities are adjusted in stmt.c. I am
> not sure if there are other cases of multi-way branches that I need to
> worry about here.
>
>> Also gen_probabilities_from_existing_counts could probably also work based
>> on original edge frequencies.
>>> @@ -1664,7 +1668,7 @@ do_jump_if_equal (enum machine_mode mode, rtx op0,
>>>
>>>  static struct case_node *
>>>  add_case_node (struct case_node *head, tree low, tree high,
>>> -               tree label, alloc_pool case_node_pool)
>>> +               tree label, gcov_type count, alloc_pool case_node_pool)
>>
>> Please document  COUNT here.
> Done.
>
>>>  {
>>>    struct case_node *r;
>>>
>>> @@ -1677,6 +1681,8 @@ add_case_node (struct case_node *head, tree low, t
>>>    r->high = high;
>>>    r->code_label = label;
>>>    r->parent = r->left = NULL;
>>> +  r->count = count;
>>> +  r->subtree_count = count;
>>>    r->right = head;
>>>    return r;
>>>  }
>>
>> .. and here
> Done.
>>> @@ -1803,7 +1809,8 @@ expand_switch_as_decision_tree_p (tree range,
>>>
>>>  static void
>>>  emit_case_decision_tree (tree index_expr, tree index_type,
>>> - struct case_node *case_list, rtx default_label)
>>> + struct case_node *case_list, rtx default_label,
>>> +                         gcov_type default_count)
>>>  {
>>>    rtx index = expand_normal (index_expr);
>>>
>>> @@ -1839,15 +1846,29 @@ emit_case_decision_tree (tree index_expr, tree ind
>>>        dump_case_nodes (dump_file, case_list, indent_step, 0);
>>>      }
>>>
>>> -  emit_case_nodes (index, case_list, default_label, index_type);
>>> +  emit_case_nodes (index, case_list, default_label, default_count, index_type);
>>>    if (default_label)
>>>      emit_jump (default_label);
>>>  }
>>>
>>
>> And document functio nhere.
> Done.
>>
>>> +static gcov_type
>>> +get_outgoing_edge_counts (basic_block bb)
>>> +{
>>> +  edge e;
>>> +  edge_iterator ei;
>>> +  gcov_type count_sum = 0;
>>> +  FOR_EACH_EDGE(e, ei, bb->succs)
>>> +    count_sum += e->count;
>>> +  return count_sum;
>>> +}
>>> +
>>> +#define case_probability(x, y) \
>>> +    ((y) ? (gcc_assert (x <= y), (x) * REG_BR_PROB_BASE  / (y))  : -1)
>>
>> You want to use RDIV here for better rounding.  I would make this an inline
>> functions these days.
> Done.
>
>>> +
>>>  /* Generate a dispatch tabler, switching on INDEX_EXPR and jumping to
>>>     one of the labels in CASE_LIST or to the DEFAULT_LABEL.
>>>     MINVAL, MAXVAL, and RANGE are the extrema and range of the case
>>> -   labels in CASE_LIST.
>>> +   labels in CASE_LIST. STMT_BB is the basic block containing the statement.
>>>
>>>     First, a jump insn is emitted.  First we try "casesi".  If that
>>>     fails, try "tablejump".   A target *must* have one of them (or both).
>>> @@ -1860,19 +1881,23 @@ emit_case_decision_tree (tree index_expr, tree ind
>>>  static void
>>>  emit_case_dispatch_table (tree index_expr, tree index_type,
>>>    struct case_node *case_list, rtx default_label,
>>> -  tree minval, tree maxval, tree range)
>>> +  tree minval, tree maxval, tree range,
>>> +                          basic_block stmt_bb)
>>>  {
>>>    int i, ncases;
>>>    struct case_node *n;
>>>    rtx *labelvec;
>>>    rtx fallback_label = label_rtx (case_list->code_label);
>>>    rtx table_label = gen_label_rtx ();
>>> +  bool has_gaps = false;
>>> +  edge default_edge = EDGE_SUCC(stmt_bb, 0);
>>> +  gcov_type default_count = default_edge->count;
>>> +  gcov_type count = get_outgoing_edge_counts (stmt_bb);
>>> +  bool try_with_tablejump = false;
>>
>> Here, I think you want to first decide whether you base expansion on counts
>> (i.e. any counts involved is non-0) or probabilities.  We really want to maintain
>> the profile up-to-date even with guessed profile.  The inconsistencies
>> propagate across CFG and easilly confuse later optimizers.
>> Everywhere you are using count comming from the original statement, you can also use
>> the probability.
>
> I have changed the code to use probabilities.
>
>> The newly produced control flow will be optimized and the edges frequencies will
>> be propagated along.
>>
>>> +
>>> +  default_edge->count = default_count;
>>> +  if (count)
>>> +    {
>>> +      edge e;
>>> +      edge_iterator ei;
>>> +      FOR_EACH_EDGE (e, ei, stmt_bb->succs)
>>> +        e->probability = e->count * REG_BR_PROB_BASE / count;
>>> +    }
>>
>> Hmm, this updates origina BB containing the switch statement?  Of course,
>> modulo roundoff errors, this should hold.  I wonder where did you got the
>> diferences and why do you need this?  You are going to output new control flow
>> and find_many_sub_basic_blocks will recompute all the counts/frequencies inside
>> anyway?
>> Also you want to use RDIV here.
> I need to do this to account for the change in the count of default
> edge. Changed to use RDIV.
>
>>> @@ -2358,6 +2433,20 @@ node_is_bounded (case_node_ptr node, tree index_ty
>>>    && node_has_high_bound (node, index_type));
>>>  }
>>>
>>> +
>>> +/* Attach a REG_BR_PROB note to the last created RTX instruction if
>>> +   PROBABILITY is not -1.  */
>>> +
>>> +static void
>>> +add_prob_note_to_last_insn(int probability)
>>> +{
>>> +  if (probability != -1)
>>> +    {
>>> +      rtx jump_insn = get_last_insn();
>>> +      add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (probability));
>>> +    }
>>> +}
>>
>> Well, I am not sure this is safe thing to do since you do not really
>> know what the last instruction is.  In any case you want to test
>> it is an conditional jump instruction as in the code I quoted
>> above.
>>
>>> @@ -2430,7 +2523,11 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>>>   unsignedp),
>>>         GT, NULL_RTX, mode, unsignedp,
>>>         label_rtx (node->right->code_label));
>>> -      emit_case_nodes (index, node->left, default_label, index_type);
>>> +              probability = case_probability (node->right->count,
>>> +                                              subtree_count + default_count);
>>> +              add_prob_note_to_last_insn (probability);
>>> +      emit_case_nodes (index, node->left, default_label, default_count,
>>> +                               index_type);
>>
>> Hmm, here you seem to be distributing the probabilities of the counts reached.
>> What happens in the case when the edge probability needs to be distributed across
>> nodes of the decision tree. I.e. in
>> t(int a)
>> {
>>   switch (a)
>>    {
>>      case 100:
>>      case 200:
>>      case 300:
>>      case 400:
>>      case 500:
>>      case 600:
>>      case 700:
>>      case 800:
>>      case 900:
>>        t();
>>      case 101:
>>      case 202:
>>      case 303:
>>      case 404:
>>      case 505:
>>      case 606:
>>      case 707:
>>      case 808:
>>      case 909:
>>        q();
>>    }
>> }
>
> I have fixed this.
>>
>>>      }
>>>
>>>    else if (node_is_bounded (node->left, index_type))
>>> @@ -2442,7 +2539,10 @@ emit_case_nodes (rtx index, case_node_ptr node, rt
>>>   unsignedp),
>>>         LT, NULL_RTX, mode, unsignedp,
>>>         label_rtx (node->left->code_label));
>>> -      emit_case_nodes (index, node->right, default_label, index_type);
>>> +              probability = case_probability (node->left->count,
>>> +                                              subtree_count + default_count);
>>> +              add_prob_note_to_last_insn (probability);
>>
>> Here, and apparently in all uses of add_prob_note_to_last_insn, you want to use
>> the new parameter of emit_cmp_and_jump_insn.
>>
>> Otherwise the patch seems to make sense.  Thanks for looking into this issue.
>>
>> Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-09  0:46   ` Easwaran Raman
  2012-10-14  7:02     ` Easwaran Raman
@ 2012-10-14 16:22     ` Jan Hubicka
  2012-10-15 19:08       ` Easwaran Raman
  2012-10-17 16:05     ` Gary Funck
  2 siblings, 1 reply; 15+ messages in thread
From: Jan Hubicka @ 2012-10-14 16:22 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: Jan Hubicka, gcc-patches, Steven Bosscher, David Li

Hi,

Index: optabs.c
===================================================================
--- optabs.c	(revision 191879)
+++ optabs.c	(working copy)
@@ -4249,7 +4249,7 @@ prepare_operand (enum insn_code icode, rtx x, int
    we can do the branch.  */
 
 static void
-emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label)
+emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label, int prob)
 {
   enum machine_mode optab_mode;
   enum mode_class mclass;
@@ -4261,7 +4261,16 @@ static void
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
-  emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
+  rtx insn = emit_insn (
+      GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));

I think we did not change to style of mixing declaration and code yet.  So
please put declaration ahead.

I think you want to keep emit_jump_insn.  Also do nothing when profile_status
== PROFILE_ABSENT.

Index: cfgbuild.c
===================================================================
--- cfgbuild.c	(revision 191879)
+++ cfgbuild.c	(working copy)
@@ -559,8 +559,11 @@ compute_outgoing_frequencies (basic_block b)
 	  f->count = b->count - e->count;
 	  return;
 	}
+      else
+        {
+          guess_outgoing_edge_probabilities (b);
+        }

Add comment here that we rely on multiway BBs having sane probabilities already.
You still want to do guessing when the edges out are EH. Those also can be many.
Index: expr.h
===================================================================
--- expr.h	(revision 191879)
+++ expr.h	(working copy)
@@ -190,7 +190,7 @@ extern int have_sub2_insn (rtx, rtx);
 /* Emit a pair of rtl insns to compare two rtx's and to jump
    to a label if the comparison is true.  */
 extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
-				     enum machine_mode, int, rtx);
+				     enum machine_mode, int, rtx, int prob=-1);

Hmm, probably first appreance of this C++ construct. I suppose it is OK.
 
+static inline void
+reset_out_edges_aux (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE(e, ei, bb->succs)
+    e->aux = (void *)0;
+}
+static inline void
+compute_cases_per_edge (gimple stmt)
+{
+  basic_block bb = gimple_bb (stmt);
+  reset_out_edges_aux (bb);
+  int ncases = gimple_switch_num_labels (stmt);
+  for (int i = ncases - 1; i >= 1; --i)
+    {
+      tree elt = gimple_switch_label (stmt, i);
+      tree lab = CASE_LABEL (elt);
+      basic_block case_bb = label_to_block_fn (cfun, lab);
+      edge case_edge = find_edge (bb, case_bb);
+      case_edge->aux = (void *)((long)(case_edge->aux) + 1);
+    }
+}

Comments and newlines per coding standard.

With the these changes, the patch is OK

Thanks,
Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-14 16:22     ` Jan Hubicka
@ 2012-10-15 19:08       ` Easwaran Raman
  2012-10-16  0:33         ` Jan Hubicka
  0 siblings, 1 reply; 15+ messages in thread
From: Easwaran Raman @ 2012-10-15 19:08 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: gcc-patches, Steven Bosscher, David Li

On Sun, Oct 14, 2012 at 8:09 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
>
> Index: optabs.c
> ===================================================================
> --- optabs.c    (revision 191879)
> +++ optabs.c    (working copy)
> @@ -4249,7 +4249,7 @@ prepare_operand (enum insn_code icode, rtx x, int
>     we can do the branch.  */
>
>  static void
> -emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label)
> +emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label, int prob)
>  {
>    enum machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4261,7 +4261,16 @@ static void
>
>    gcc_assert (icode != CODE_FOR_nothing);
>    gcc_assert (insn_operand_matches (icode, 0, test));
> -  emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
> +  rtx insn = emit_insn (
> +      GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
>
> I think we did not change to style of mixing declaration and code yet.  So
> please put declaration ahead.
Ok.

>
> I think you want to keep emit_jump_insn.  Also do nothing when profile_status
> == PROFILE_ABSENT.

Why should this be dependent on profile_status? The PROB passed could
also come from static prediction right.

> Index: cfgbuild.c
> ===================================================================
> --- cfgbuild.c  (revision 191879)
> +++ cfgbuild.c  (working copy)
> @@ -559,8 +559,11 @@ compute_outgoing_frequencies (basic_block b)
>           f->count = b->count - e->count;
>           return;
>         }
> +      else
> +        {
> +          guess_outgoing_edge_probabilities (b);
> +        }
>
> Add comment here that we rely on multiway BBs having sane probabilities already.
> You still want to do guessing when the edges out are EH. Those also can be many.
I think this should work:

-  if (single_succ_p (b))
+  else if (single_succ_p (b))
     {
       e = single_succ_edge (b);
       e->probability = REG_BR_PROB_BASE;
       e->count = b->count;
       return;
     }
-  guess_outgoing_edge_probabilities (b);
+  else
+    {
+      /* We rely on BBs with more than two successors to have sane
probabilities
+         and do not guess them here. For BBs terminated by switch statements
+         expanded to jump-table jump, we have done the right thing during
+         expansion. For EH edges, we still guess the probabilities here.  */
+      bool complex_edge = false;
+      FOR_EACH_EDGE (e, ei, b->succs)
+        if (e->flags & EDGE_COMPLEX)
+          {
+            complex_edge = true;
+            break;
+          }
+      if (complex_edge)
+        guess_outgoing_edge_probabilities (b);
+    }
+


> Index: expr.h
> ===================================================================
> --- expr.h      (revision 191879)
> +++ expr.h      (working copy)
> @@ -190,7 +190,7 @@ extern int have_sub2_insn (rtx, rtx);
>  /* Emit a pair of rtl insns to compare two rtx's and to jump
>     to a label if the comparison is true.  */
>  extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> -                                    enum machine_mode, int, rtx);
> +                                    enum machine_mode, int, rtx, int prob=-1);
>
> Hmm, probably first appreance of this C++ construct. I suppose it is OK.
http://gcc.gnu.org/codingconventions.html#Default says it is ok for POD values.

>
> +static inline void
> +reset_out_edges_aux (basic_block bb)
> +{
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE(e, ei, bb->succs)
> +    e->aux = (void *)0;
> +}
> +static inline void
> +compute_cases_per_edge (gimple stmt)
> +{
> +  basic_block bb = gimple_bb (stmt);
> +  reset_out_edges_aux (bb);
> +  int ncases = gimple_switch_num_labels (stmt);
> +  for (int i = ncases - 1; i >= 1; --i)
> +    {
> +      tree elt = gimple_switch_label (stmt, i);
> +      tree lab = CASE_LABEL (elt);
> +      basic_block case_bb = label_to_block_fn (cfun, lab);
> +      edge case_edge = find_edge (bb, case_bb);
> +      case_edge->aux = (void *)((long)(case_edge->aux) + 1);
> +    }
> +}
>
> Comments and newlines per coding standard.
Ok.

> With the these changes, the patch is OK

Thanks,
Easwaran
>
> Thanks,
> Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-15 19:08       ` Easwaran Raman
@ 2012-10-16  0:33         ` Jan Hubicka
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Hubicka @ 2012-10-16  0:33 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: Jan Hubicka, gcc-patches, Steven Bosscher, David Li

> On Sun, Oct 14, 2012 at 8:09 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> > Hi,
> >
> > Index: optabs.c
> > ===================================================================
> > --- optabs.c    (revision 191879)
> > +++ optabs.c    (working copy)
> > @@ -4249,7 +4249,7 @@ prepare_operand (enum insn_code icode, rtx x, int
> >     we can do the branch.  */
> >
> >  static void
> > -emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label)
> > +emit_cmp_and_jump_insn_1 (rtx test, enum machine_mode mode, rtx label, int prob)
> >  {
> >    enum machine_mode optab_mode;
> >    enum mode_class mclass;
> > @@ -4261,7 +4261,16 @@ static void
> >
> >    gcc_assert (icode != CODE_FOR_nothing);
> >    gcc_assert (insn_operand_matches (icode, 0, test));
> > -  emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
> > +  rtx insn = emit_insn (
> > +      GEN_FCN (icode) (test, XEXP (test, 0), XEXP (test, 1), label));
> >
> > I think we did not change to style of mixing declaration and code yet.  So
> > please put declaration ahead.
> Ok.
> 
> >
> > I think you want to keep emit_jump_insn.  Also do nothing when profile_status
> > == PROFILE_ABSENT.
> 
> Why should this be dependent on profile_status? The PROB passed could
> also come from static prediction right.

In that case profile_status is PROFILE_GUESSED.
> I think this should work:
> 
> -  if (single_succ_p (b))
> +  else if (single_succ_p (b))
>      {
>        e = single_succ_edge (b);
>        e->probability = REG_BR_PROB_BASE;
>        e->count = b->count;
>        return;
>      }
> -  guess_outgoing_edge_probabilities (b);
> +  else
> +    {
> +      /* We rely on BBs with more than two successors to have sane
> probabilities
> +         and do not guess them here. For BBs terminated by switch statements
> +         expanded to jump-table jump, we have done the right thing during
> +         expansion. For EH edges, we still guess the probabilities here.  */
> +      bool complex_edge = false;
> +      FOR_EACH_EDGE (e, ei, b->succs)
> +        if (e->flags & EDGE_COMPLEX)
> +          {
> +            complex_edge = true;
> +            break;
> +          }
> +      if (complex_edge)
> +        guess_outgoing_edge_probabilities (b);
> +    }
> +

 OK.

Honza

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Propagate profile counts during switch expansion
  2012-10-09  0:46   ` Easwaran Raman
  2012-10-14  7:02     ` Easwaran Raman
  2012-10-14 16:22     ` Jan Hubicka
@ 2012-10-17 16:05     ` Gary Funck
  2 siblings, 0 replies; 15+ messages in thread
From: Gary Funck @ 2012-10-17 16:05 UTC (permalink / raw)
  To: Easwaran Raman; +Cc: Jan Hubicka, gcc-patches, Steven Bosscher, David Li

On 10/08/12 17:46:03, Easwaran Raman wrote:
> I have attached a revised patch. The updated ChangeLog is given below
> and I have responded to your comments inline:
> 
> 2012-10-08   Easwaran Raman  <eraman@google.com>
> * optabs.c (emit_cmp_and_jump_insn_1): Add a new parameter to
> specificy the probability of taking the jump.

Typo above: specificy
 
> * except.c (sjlj_emit_function_enter): Pass probability to
> emit_cmp_and_jump_insns.

On some targets (e. g., IA64), the following patch leads to an unused
variable warning-as-error:

> Index: except.c
> ===================================================================
> --- except.c	(revision 191879)
> +++ except.c	(working copy)
> @@ -1161,13 +1161,7 @@ sjlj_emit_function_enter (rtx dispatch_label)
>  
>        emit_cmp_and_jump_insns (x, const0_rtx, NE, 0,
>  			       TYPE_MODE (integer_type_node), 0,
> -			       dispatch_label);
> -      last = get_last_insn ();
> -      if (JUMP_P (last) && any_condjump_p (last))
> -	{
> -	  gcc_assert (!find_reg_note (last, REG_BR_PROB, 0));
> -	  add_reg_note (last, REG_BR_PROB, GEN_INT (REG_BR_PROB_BASE / 100));
> -	}
> +			       dispatch_label, REG_BR_PROB_BASE / 100);
>  #else
>        expand_builtin_setjmp_setup (plus_constant (Pmode, XEXP (fc, 0),
>  						  sjlj_fc_jbuf_ofs),


[...] gcc-trunk/src/gcc/except.c:1156:14: error: unused variable ‘last’
[-Werror=unused-variable]
       rtx x, last;
              ^
cc1plus: all warnings being treated as errors

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2012-10-17 15:38 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-03  1:09 Propagate profile counts during switch expansion Easwaran Raman
2012-10-03 16:12 ` Xinliang David Li
2012-10-03 17:38   ` Steven Bosscher
2012-10-03 18:02     ` Xinliang David Li
2012-10-03 18:50       ` Jan Hubicka
2012-10-04 13:19 ` Jan Hubicka
2012-10-04 20:47   ` Easwaran Raman
2012-10-05 22:15     ` Easwaran Raman
2012-10-06 11:19       ` Jan Hubicka
2012-10-09  0:46   ` Easwaran Raman
2012-10-14  7:02     ` Easwaran Raman
2012-10-14 16:22     ` Jan Hubicka
2012-10-15 19:08       ` Easwaran Raman
2012-10-16  0:33         ` Jan Hubicka
2012-10-17 16:05     ` Gary Funck

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).