public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6086] Fix loop split incorrect count and probability
@ 2021-12-21  3:50 Xiong Hu Luo
  0 siblings, 0 replies; only message in thread
From: Xiong Hu Luo @ 2021-12-21  3:50 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:cd5ae148c47c6dee05adb19acd6a523f7187be7f

commit r12-6086-gcd5ae148c47c6dee05adb19acd6a523f7187be7f
Author: Xionghu Luo <luoxhu@linux.ibm.com>
Date:   Tue Dec 7 23:17:51 2021 -0600

    Fix loop split incorrect count and probability
    
    In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
    kind of split. split_loop only works for single loop and insert edge at
    exit when split, while split_loop_on_cond is not limited to single loop
    and insert edge at latch when split.  Both split behavior should consider
    loop count and probability update.  For split_loop, loop split condition
    is moved in front of loop1 and loop2; But split_loop_on_cond moves the
    condition between loop1 and loop2, this patch does:
     1) profile count proportion for both original loop and copied loop
    without dropping down the true branch's count;
     2) probability update in the two loops and between the two loops.
    
    Regression tested pass.
    
    Changes diff for split_loop and split_loop_on_cond cases:
    
    1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
    ...
       <bb 2> [local count: 118111600]:
       if (beg_5(D) < end_8(D))
         goto <bb 14>; [89.00%]
       else
         goto <bb 6>; [11.00%]
    
       <bb 14> [local count: 105119324]:
       if (beg2_6(D) < c_9(D))
    -    goto <bb 15>; [100.00%]
    +    goto <bb 15>; [33.00%]
       else
    -    goto <bb 16>; [100.00%]
    +    goto <bb 16>; [67.00%]
    
    -  <bb 15> [local count: 105119324]:
    +  <bb 15> [local count: 34689377]:
       _25 = beg_5(D) + 1;
       _26 = end_8(D) - beg_5(D);
       _27 = beg2_6(D) + _26;
       _28 = MIN_EXPR <c_9(D), _27>;
    
    -  <bb 3> [local count: 955630225]:
    +  <bb 3> [local count: 315357973]:
       # i_16 = PHI <i_11(8), beg_5(D)(15)>
       # j_17 = PHI <j_12(8), beg2_6(D)(15)>
       printf ("a: %d %d\n", i_16, j_17);
       i_11 = i_16 + 1;
       j_12 = j_17 + 1;
       if (j_12 < _28)
    -    goto <bb 8>; [89.00%]
    +    goto <bb 8>; [29.37%]
       else
    -    goto <bb 17>; [11.00%]
    +    goto <bb 17>; [70.63%]
    
    -  <bb 8> [local count: 850510901]:
    +  <bb 8> [local count: 280668596]:
       goto <bb 3>; [100.00%]
    
    -  <bb 16> [local count: 105119324]:
    +  <bb 16> [local count: 70429947]:
       # i_22 = PHI <beg_5(D)(14), i_29(17)>
       # j_23 = PHI <beg2_6(D)(14), j_30(17)>
    
       <bb 10> [local count: 955630225]:
       # i_2 = PHI <i_22(16), i_20(13)>
       # j_1 = PHI <j_23(16), j_21(13)>
       i_20 = i_2 + 1;
       j_21 = j_1 + 1;
       if (end_8(D) > i_20)
    -    goto <bb 13>; [89.00%]
    +    goto <bb 13>; [59.63%]
       else
    -    goto <bb 9>; [11.00%]
    +    goto <bb 9>; [40.37%]
    
    -  <bb 13> [local count: 850510901]:
    +  <bb 13> [local count: 569842305]:
       goto <bb 10>; [100.00%]
    
       <bb 17> [local count: 105119324]:
       # i_29 = PHI <i_11(3)>
       # j_30 = PHI <j_12(3)>
       if (end_8(D) > i_29)
         goto <bb 16>; [80.00%]
       else
         goto <bb 9>; [20.00%]
    
       <bb 9> [local count: 105119324]:
    
       <bb 6> [local count: 118111600]:
       return 0;
    
     }
       <bb 2> [local count: 118111600]:
    -  if (beg_5(D) < end_8(D))
    +  _1 = end_6(D) - beg_7(D);
    +  j_9 = _1 + beg2_8(D);
    +  if (end_6(D) > beg_7(D))
         goto <bb 14>; [89.00%]
       else
         goto <bb 6>; [11.00%]
    
       <bb 14> [local count: 105119324]:
    -  if (beg2_6(D) < c_9(D))
    -    goto <bb 15>; [100.00%]
    +  if (j_9 >= c_11(D))
    +    goto <bb 15>; [33.00%]
       else
    -    goto <bb 16>; [100.00%]
    +    goto <bb 16>; [67.00%]
    
    -  <bb 15> [local count: 105119324]:
    -  _25 = beg_5(D) + 1;
    -  _26 = end_8(D) - beg_5(D);
    -  _27 = beg2_6(D) + _26;
    -  _28 = MIN_EXPR <c_9(D), _27>;
    -
    -  <bb 3> [local count: 955630225]:
    -  # i_16 = PHI <i_11(8), beg_5(D)(15)>
    -  # j_17 = PHI <j_12(8), beg2_6(D)(15)>
    -  printf ("a: %d %d\n", i_16, j_17);
    -  i_11 = i_16 + 1;
    -  j_12 = j_17 + 1;
    -  if (j_12 < _28)
    -    goto <bb 8>; [89.00%]
    +  <bb 15> [local count: 34689377]:
    +  _27 = end_6(D) + -1;
    +  _28 = beg_7(D) - end_6(D);
    +  _29 = j_9 + _28;
    +  _30 = _29 + 1;
    +  _31 = MAX_EXPR <c_11(D), _30>;
    +
    +  <bb 3> [local count: 315357973]:
    +  # i_18 = PHI <i_13(8), end_6(D)(15)>
    +  # j_19 = PHI <j_14(8), j_9(15)>
    +  printf ("a: %d %d\n", i_18, j_19);
    +  i_13 = i_18 + -1;
    +  j_14 = j_19 + -1;
    +  if (j_14 >= _31)
    +    goto <bb 8>; [29.37%]
       else
    -    goto <bb 17>; [11.00%]
    +    goto <bb 17>; [70.63%]
    
    -  <bb 8> [local count: 850510901]:
    +  <bb 8> [local count: 280668596]:
       goto <bb 3>; [100.00%]
    
    -  <bb 16> [local count: 105119324]:
    -  # i_22 = PHI <beg_5(D)(14), i_29(17)>
    -  # j_23 = PHI <beg2_6(D)(14), j_30(17)>
    +  <bb 16> [local count: 70429947]:
    +  # i_24 = PHI <end_6(D)(14), i_32(17)>
    +  # j_25 = PHI <j_9(14), j_33(17)>
    
       <bb 10> [local count: 955630225]:
    -  # i_2 = PHI <i_22(16), i_20(13)>
    -  # j_1 = PHI <j_23(16), j_21(13)>
    -  i_20 = i_2 + 1;
    -  j_21 = j_1 + 1;
    -  if (end_8(D) > i_20)
    +  # i_3 = PHI <i_24(16), i_22(13)>
    +  # j_2 = PHI <j_25(16), j_23(13)>
    +  i_22 = i_3 + -1;
    +  j_23 = j_2 + -1;
    +  if (beg_7(D) < i_22)
         goto <bb 13>; [89.00%]
       else
         goto <bb 9>; [11.00%]
    
    -  <bb 13> [local count: 850510901]:
    +  <bb 13> [local count: 569842305]:
       goto <bb 10>; [100.00%]
    
       <bb 17> [local count: 105119324]:
    -  # i_29 = PHI <i_11(3)>
    -  # j_30 = PHI <j_12(3)>
    -  if (end_8(D) > i_29)
    +  # i_32 = PHI <i_13(3)>
    +  # j_33 = PHI <j_14(3)>
    +  if (beg_7(D) < i_32)
         goto <bb 16>; [80.00%]
       else
         goto <bb 9>; [20.00%]
    
       <bb 9> [local count: 105119324]:
    
       <bb 6> [local count: 118111600]:
       return 0;
    
     }
    
    2) diff base/loop-cond-split-1.c.151t.lsplit  patched/loop-cond-split-1.c.151t.lsplit:
    ...
       <bb 2> [local count: 118111600]:
       if (n_7(D) > 0)
         goto <bb 4>; [89.00%]
       else
         goto <bb 3>; [11.00%]
    
       <bb 3> [local count: 118111600]:
       return;
    
       <bb 4> [local count: 105119324]:
       pretmp_3 = ga;
    
    -  <bb 5> [local count: 955630225]:
    +  <bb 5> [local count: 315357973]:
       # i_13 = PHI <i_10(20), 0(4)>
       # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
       if (prephitmp_12 != 0)
         goto <bb 6>; [33.00%]
       else
         goto <bb 7>; [67.00%]
    
       <bb 6> [local count: 315357972]:
       _2 = do_something ();
       ga = _2;
    
    -  <bb 7> [local count: 955630225]:
    +  <bb 7> [local count: 315357973]:
       # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
       i_10 = inc (i_13);
       if (n_7(D) > i_10)
         goto <bb 21>; [89.00%]
       else
         goto <bb 11>; [11.00%]
    
       <bb 11> [local count: 105119324]:
       goto <bb 3>; [100.00%]
    
    -  <bb 21> [local count: 850510901]:
    +  <bb 21> [local count: 280668596]:
       if (prephitmp_12 != 0)
    -    goto <bb 20>; [100.00%]
    +    goto <bb 20>; [33.00%]
       else
    -    goto <bb 19>; [INV]
    +    goto <bb 19>; [67.00%]
    
    -  <bb 20> [local count: 850510901]:
    +  <bb 20> [local count: 280668596]:
       goto <bb 5>; [100.00%]
    
    -  <bb 19> [count: 0]:
    +  <bb 19> [local count: 70429947]:
       # i_23 = PHI <i_10(21)>
       # prephitmp_25 = PHI <prephitmp_5(21)>
    
    -  <bb 12> [local count: 955630225]:
    +  <bb 12> [local count: 640272252]:
       # i_15 = PHI <i_23(19), i_22(16)>
       # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
       i_22 = inc (i_15);
       if (n_7(D) > i_22)
         goto <bb 16>; [89.00%]
       else
         goto <bb 11>; [11.00%]
    
    -  <bb 16> [local count: 850510901]:
    +  <bb 16> [local count: 569842305]:
       goto <bb 12>; [100.00%]
    
     }
    
    gcc/ChangeLog:
    
    2021-12-21  Xionghu Luo  <luoxhu@linux.ibm.com>
    
            * tree-ssa-loop-split.c (split_loop): Fix incorrect
            profile_count and probability.
            (do_split_loop_on_cond): Likewise.

Diff:
---
 gcc/tree-ssa-loop-split.c | 85 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 77 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..33128061aab 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -575,7 +575,10 @@ split_loop (class loop *loop1)
 					    stmts2);
 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
 	if (!initial_true)
-	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+	edge true_edge, false_edge;
+	extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 
 	/* Now version the loop, placing loop2 after loop1 connecting
 	   them, and fix up SSA form for that.  */
@@ -583,11 +586,11 @@ split_loop (class loop *loop1)
 	basic_block cond_bb;
 
 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   true);
+					  true_edge->probability,
+					  true_edge->probability.invert (),
+					  profile_probability::always (),
+					  profile_probability::always (),
+					  true);
 	gcc_assert (loop2);
 
 	edge new_e = connect_loops (loop1, loop2);
@@ -607,6 +610,38 @@ split_loop (class loop *loop1)
 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
 
+	update_ssa (TODO_update_ssa);
+
+	/* Proportion first loop's bb counts except those dominated by true
+	   branch to avoid drop 1s down.  */
+	basic_block *bbs1, *bbs2;
+	bbs1 = get_loop_body (loop1);
+	unsigned j;
+	for (j = 0; j < loop1->num_nodes; j++)
+	  if (bbs1[j] == loop1->latch
+	      || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
+	    bbs1[j]->count
+	      = bbs1[j]->count.apply_probability (true_edge->probability);
+	free (bbs1);
+
+	/* Fix first loop's exit probability after scaling.  */
+	edge exit_to_latch1 = single_pred_edge (loop1->latch);
+	exit_to_latch1->probability = exit_to_latch1->probability.apply_scale (
+	  true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
+	single_exit (loop1)->probability
+	  = exit_to_latch1->probability.invert ();
+
+	/* Proportion second loop's bb counts except those dominated by false
+	   branch to avoid drop 1s down.  */
+	basic_block bbi_copy = get_bb_copy (false_edge->dest);
+	bbs2 = get_loop_body (loop2);
+	for (j = 0; j < loop2->num_nodes; j++)
+	  if (bbs2[j] == loop2->latch
+	      || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
+	    bbs2[j]->count = bbs2[j]->count.apply_probability (
+	      true_edge->probability.invert ());
+	free (bbs2);
+
 	/* Finally patch out the two copies of the condition to be always
 	   true/false (or opposite).  */
 	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
@@ -1486,8 +1521,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-				     profile_probability::always (),
-				     profile_probability::never (),
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
 				     profile_probability::always (),
 				     profile_probability::always (),
 				     true);
@@ -1535,6 +1570,40 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
      between loop1 and loop2.  */
   connect_loop_phis (loop1, loop2, to_loop2);
 
+  update_ssa (TODO_update_ssa);
+
+  edge true_edge, false_edge, skip_edge1, skip_edge2;
+  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+
+  /* Proportion first loop's bb counts except those dominated by true
+     branch to avoid drop 1s down.  */
+  skip_edge1 = true_invar ? false_edge : true_edge;
+  skip_edge2 = true_invar ? true_edge : false_edge;
+  basic_block *bbs1, *bbs2;
+  bbs1 = get_loop_body (loop1);
+  unsigned j;
+  for (j = 0; j < loop1->num_nodes; j++)
+    if (bbs1[j] == loop1->latch
+	|| !dominated_by_p (CDI_DOMINATORS, bbs1[j], skip_edge1->dest))
+      bbs1[j]->count
+	= bbs1[j]->count.apply_probability (skip_edge1->probability);
+  free (bbs1);
+
+  /* Fix first loop's exit probability after scaling.  */
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
+  /* Proportion second loop's bb counts except those dominated by false
+     branch to avoid drop 1s down.  */
+  basic_block bbi_copy = get_bb_copy (skip_edge2->dest);
+  bbs2 = get_loop_body (loop2);
+  for (j = 0; j < loop2->num_nodes; j++)
+    if (bbs2[j] == loop2->latch
+	|| !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
+      bbs2[j]->count
+	= bbs2[j]->count.apply_probability (skip_edge2->probability);
+  free (bbs2);
+
   free_original_copy_tables ();
 
   return true;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-12-21  3:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-21  3:50 [gcc r12-6086] Fix loop split incorrect count and probability Xiong Hu Luo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).