public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Xionghu Luo <luoxhu@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org, dje.gcc@gmail.com,
	wschmidt@linux.ibm.com, guojiufu@linux.ibm.com,
	linkw@gcc.gnu.org, rguenther@suse.de, hubicka@ucw.cz
Subject: Re: [PATCH] Fix loop split incorrect count and probability
Date: Wed, 4 Aug 2021 10:42:05 +0800	[thread overview]
Message-ID: <dbb59d53-4b8a-d07a-3460-7daeba635ec4@linux.ibm.com> (raw)
In-Reply-To: <20210803085813.2217766-1-luoxhu@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 538 bytes --]

I' like to split this patch:

https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576488.html

to two patches:

0001-Fix-loop-split-incorrect-count-and-probability.patch
0002-Don-t-move-cold-code-out-of-loop-by-checking-bb-coun.patch

since they are solving two different things, please help to review
the attached series.  They show obvious performance improvement on
both P8 and P9 for CPU2017, and I am not sure how it will affect other
platforms like X86 and AArch64, it will be grateful if someone could
try it.  Thanks.


Xionghu

[-- Attachment #2: 0001-Fix-loop-split-incorrect-count-and-probability.patch --]
[-- Type: text/plain, Size: 4898 bytes --]

From 4e1ef5b1f423484a6789750e7cc0cf2e94517f20 Mon Sep 17 00:00:00 2001
From: Xionghu Luo <luoxhu@linux.ibm.com>
Date: Tue, 3 Aug 2021 03:44:14 -0500
Subject: [PATCH 1/2] Fix loop split incorrect count and probability

loop split condition is moved between loop1 and loop2, the split bb's
count and probability should also be duplicated instead of (100% vs INV),
secondly, the original loop1 and loop2 count need be propotional from the
original loop.

Regression tested pass, OK for master?

diff base/loop-cond-split-1.c.151t.lsplit  patched/loop-cond-split-1.c.151t.lsplit:
...
   int prephitmp_16;
   int prephitmp_25;

   <bb 2> [local count: 118111600]:
   if (n_7(D) > 0)
     goto <bb 4>; [89.00%]
   else
     goto <bb 3>; [11.00%]

   <bb 3> [local count: 118111600]:
   return;

   <bb 4> [local count: 105119324]:
   pretmp_3 = ga;

-  <bb 5> [local count: 955630225]:
+  <bb 5> [local count: 315357973]:
   # i_13 = PHI <i_10(20), 0(4)>
   # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
   if (prephitmp_12 != 0)
     goto <bb 6>; [33.00%]
   else
     goto <bb 7>; [67.00%]

-  <bb 6> [local count: 315357972]:
+  <bb 6> [local count: 104068130]:
   _2 = do_something ();
   ga = _2;

-  <bb 7> [local count: 955630225]:
+  <bb 7> [local count: 315357973]:
   # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
   i_10 = inc (i_13);
   if (n_7(D) > i_10)
     goto <bb 21>; [89.00%]
   else
     goto <bb 11>; [11.00%]

   <bb 11> [local count: 105119324]:
   goto <bb 3>; [100.00%]

-  <bb 21> [local count: 850510901]:
+  <bb 21> [local count: 280668596]:
   if (prephitmp_12 != 0)
-    goto <bb 20>; [100.00%]
+    goto <bb 20>; [33.00%]
   else
-    goto <bb 19>; [INV]
+    goto <bb 19>; [67.00%]

-  <bb 20> [local count: 850510901]:
+  <bb 20> [local count: 280668596]:
   goto <bb 5>; [100.00%]

-  <bb 19> [count: 0]:
+  <bb 19> [local count: 70429947]:
   # i_23 = PHI <i_10(21)>
   # prephitmp_25 = PHI <prephitmp_5(21)>

-  <bb 12> [local count: 955630225]:
+  <bb 12> [local count: 640272252]:
   # i_15 = PHI <i_23(19), i_22(16)>
   # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
   i_22 = inc (i_15);
   if (n_7(D) > i_22)
     goto <bb 16>; [89.00%]
   else
     goto <bb 11>; [11.00%]

-  <bb 16> [local count: 850510901]:
+  <bb 16> [local count: 569842305]:
   goto <bb 12>; [100.00%]

 }

gcc/ChangeLog:

	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
	(do_split_loop_on_cond): Likewise.
---
 gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..d30782888f3 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -575,7 +575,11 @@ split_loop (class loop *loop1)
 					    stmts2);
 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
 	if (!initial_true)
-	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
+			   ? EDGE_SUCC (bbs[i], 0)
+			   : EDGE_SUCC (bbs[i], 1);
 
 	/* Now version the loop, placing loop2 after loop1 connecting
 	   them, and fix up SSA form for that.  */
@@ -583,10 +587,10 @@ split_loop (class loop *loop1)
 	basic_block cond_bb;
 
 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
+					   true_edge->probability,
+					   true_edge->probability.invert (),
+					   true_edge->probability,
+					   true_edge->probability.invert (),
 					   true);
 	gcc_assert (loop2);
 
@@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-				     profile_probability::always (),
-				     profile_probability::never (),
-				     profile_probability::always (),
-				     profile_probability::always (),
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
 				     true);
   if (!loop2)
     {
@@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
 
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
   /* Due to introduction of a control flow edge from loop1 latch to loop2
      pre-header, we should update PHIs in loop2 to reflect this connection
      between loop1 and loop2.  */
-- 
2.27.0.90.geebb51ba8c


[-- Attachment #3: 0002-Don-t-move-cold-code-out-of-loop-by-checking-bb-coun.patch --]
[-- Type: text/plain, Size: 8766 bytes --]

From b015708a4a3a3cdadb184012174f813c11bcdec2 Mon Sep 17 00:00:00 2001
From: Xiong Hu Luo <luoxhu@linux.ibm.com>
Date: Mon, 5 Jul 2021 03:57:11 -0500
Subject: [PATCH 2/2] Don't move cold code out of loop by checking bb count

There was a patch trying to avoid move cold block out of loop:

https://gcc.gnu.org/pipermail/gcc/2014-November/215551.html

Richard suggested to "never hoist anything from a bb with lower execution
frequency to a bb with higher one in LIM invariantness_dom_walker
before_dom_children".

This patch does this profile count check in both gimple LIM
move_computations_worker and RTL loop-invariant.c find_invariants_bb,
if the loop bb is colder than loop preheader, don't hoist it out of
loop.

SPEC2017 performance evaluation shows 1% performance improvement for
intrate GEOMEAN and no obvious regression for others.  Especially,
500.perlbench_r +7.52% (Perf shows function S_regtry of perlbench is
largely improved.), and 548.exchange2_r+1.98%, 526.blender_r +1.00%
on P8LE.

Regression and bootstrap tested pass on P8LE, any comments?  Thanks.

gcc/ChangeLog:

	* loop-invariant.c (find_invariants_bb): Check profile count
	before motion.
	(find_invariants_body): Add argument.
	* tree-ssa-loop-im.c (move_computations_worker): Check profile
	count before motion.
	(execute_sm): Likewise.
	(execute_sm_exit): Check pointer validness.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/recip-3.c: Adjust.
---
 gcc/loop-invariant.c                    |  10 +-
 gcc/testsuite/gcc.dg/tree-ssa/recip-3.c |   2 +-
 gcc/tree-ssa-loop-im.c                  | 154 +++++++++++++++++++++++-
 3 files changed, 157 insertions(+), 9 deletions(-)

diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index fca0c2b24be..5c3be7bf0eb 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -1183,9 +1183,14 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
    call.  */
 
 static void
-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
+find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
+		    bool always_executed)
 {
   rtx_insn *insn;
+  basic_block preheader = loop_preheader_edge (loop)->src;
+
+  if (preheader->count > bb->count)
+    return;
 
   FOR_BB_INSNS (bb, insn)
     {
@@ -1214,8 +1219,7 @@ find_invariants_body (class loop *loop, basic_block *body,
   unsigned i;
 
   for (i = 0; i < loop->num_nodes; i++)
-    find_invariants_bb (body[i],
-			bitmap_bit_p (always_reached, i),
+    find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
 			bitmap_bit_p (always_executed, i));
 }
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
index 638bf38db8c..641c91e719e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-3.c
@@ -23,4 +23,4 @@ float h ()
 	F[0] += E / d;
 }
 
-/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
+/* { dg-final { scan-tree-dump-times " / " 5 "recip" } } */
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index d9f75d5025e..80d61ddf531 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -1154,6 +1154,58 @@ move_computations_worker (basic_block bb)
 	  continue;
 	}
 
+      edge e = loop_preheader_edge (level);
+      if (e->src->count > bb->count)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "PHI node NOT moved to %d from %d:\n",
+		       e->src->index, bb->index);
+	      print_gimple_stmt (dump_file, stmt, 0);
+	      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+		       level->num);
+	    }
+	  gsi_next (&bsi);
+	  continue;
+	}
+      else
+	{
+	  unsigned i;
+	  bool skip_phi_move = false;
+	  for (i = 0; i < gimple_phi_num_args (stmt); i++)
+	    {
+	      tree def = PHI_ARG_DEF (stmt, i);
+
+	      if (TREE_CODE (def) != SSA_NAME)
+		continue;
+
+	      gimple *def_stmt = SSA_NAME_DEF_STMT (def);
+
+	      if (!gimple_bb (def_stmt))
+		continue;
+
+	      if (!dominated_by_p (CDI_DOMINATORS, e->src,
+				   gimple_bb (def_stmt)))
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    {
+		      fprintf (dump_file, "PHI node NOT moved to %d from %d\n",
+			       e->src->index, bb->index);
+		      print_gimple_stmt (dump_file, stmt, 0);
+		      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+			       level->num);
+		    }
+		  skip_phi_move = true;
+		  break;
+		}
+	    }
+	  if (skip_phi_move)
+	    {
+	      gsi_next (&bsi);
+	      continue;
+	    }
+	}
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Moving PHI node\n");
@@ -1191,14 +1243,13 @@ move_computations_worker (basic_block bb)
 	  tree lhs = gimple_assign_lhs (new_stmt);
 	  SSA_NAME_RANGE_INFO (lhs) = NULL;
 	}
-      gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
+      gsi_insert_on_edge (e, new_stmt);
       remove_phi_node (&bsi, false);
     }
 
   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
     {
       edge e;
-
       gimple *stmt = gsi_stmt (bsi);
 
       lim_data = get_lim_data (stmt);
@@ -1221,7 +1272,83 @@ move_computations_worker (basic_block bb)
       /* We do not really want to move conditionals out of the loop; we just
 	 placed it here to force its operands to be moved if necessary.  */
       if (gimple_code (stmt) == GIMPLE_COND)
-	continue;
+	{
+	  gsi_next (&bsi);
+	  continue;
+	}
+
+      e = loop_preheader_edge (level);
+      if (e->src->count > bb->count)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "stmt: Statement NOT moved to %d from %d \n",
+		       e->src->index, bb->index);
+	      print_gimple_stmt (dump_file, stmt, 0);
+	      fprintf (dump_file, "(cost %u) out of loop %d.\n\n", cost,
+		       level->num);
+	    }
+	  gsi_next (&bsi);
+	  continue;
+	}
+      else
+	{
+	  if (is_gimple_assign (stmt))
+	    {
+	      tree rhs1 = gimple_assign_rhs1 (stmt);
+	      tree rhs2 = gimple_assign_rhs2 (stmt);
+	      if (TREE_CODE (rhs1) == MEM_REF)
+		{
+		  rhs2 = TREE_OPERAND (rhs1, 1);
+		  rhs1 = TREE_OPERAND (rhs1, 0);
+		}
+	      gimple *stmt1 = NULL, *stmt2 = NULL;
+	      basic_block def_bb;
+	      if (rhs1 && TREE_CODE (rhs1) == SSA_NAME)
+		{
+		  stmt1 = SSA_NAME_DEF_STMT (rhs1);
+		  def_bb = gimple_bb (stmt1);
+		  if (stmt1
+		      && def_bb
+		      && (def_bb == bb
+			  || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb)))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file,
+				   "stmt1: Statement NOT moved to %d from %d\n",
+				   e->src->index, bb->index);
+			  print_gimple_stmt (dump_file, stmt, 0);
+			  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
+				   cost, level->num);
+			}
+		      gsi_next (&bsi);
+		      continue;
+		    }
+		}
+	      if (rhs2 && TREE_CODE (rhs2) == SSA_NAME)
+		{
+		  stmt2 = SSA_NAME_DEF_STMT (rhs2);
+		  def_bb = gimple_bb (stmt2);
+		  if (stmt2 && def_bb
+		      && (def_bb == bb
+			  || !dominated_by_p (CDI_DOMINATORS, e->src, def_bb)))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file,
+				   "stmt2: Statement NOT moved to %d from %d\n",
+				   e->src->index, bb->index);
+			  print_gimple_stmt (dump_file, stmt, 0);
+			  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
+				   cost, level->num);
+			}
+		      gsi_next (&bsi);
+		      continue;
+		    }
+		}
+	    }
+	}
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
@@ -1231,7 +1358,6 @@ move_computations_worker (basic_block bb)
 		   cost, level->num);
 	}
 
-      e = loop_preheader_edge (level);
       gcc_assert (!gimple_vdef (stmt));
       if (gimple_vuse (stmt))
 	{
@@ -2133,6 +2259,19 @@ execute_sm (class loop *loop, im_mem_ref *ref,
   bool multi_threaded_model_p = false;
   gimple_stmt_iterator gsi;
   sm_aux *aux = new sm_aux;
+  basic_block bb = gimple_bb (first_mem_ref_loc (loop, ref)->stmt);
+
+  edge e = loop_preheader_edge (loop);
+  if (e->src->count > bb->count)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "Don't execute store motion of ");
+	  print_generic_expr (dump_file, ref->mem.ref);
+	  fprintf (dump_file, " from loop %d\n", loop->num);
+	}
+      return;
+    }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -2241,7 +2380,12 @@ execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
 	}
       else
 	{
-	  sm_aux *aux = *aux_map.get (ref);
+	  sm_aux **paux = aux_map.get (ref);
+	  sm_aux *aux;
+	  if (paux)
+	    aux = *paux;
+	  else
+	    continue;
 	  if (!aux->store_flag || kind == sm_ord)
 	    {
 	      gassign *store;
-- 
2.27.0.90.geebb51ba8c


  reply	other threads:[~2021-08-04  2:42 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-03  8:58 Xionghu Luo
2021-08-04  2:42 ` Xionghu Luo [this message]
2021-08-06 11:46 ` Richard Biener
2021-08-09  2:37   ` Xionghu Luo
2021-08-10 14:47     ` Richard Biener
2021-08-11  3:03       ` Feng Xue OS
2021-10-26 13:05         ` Jan Hubicka
2021-10-27  1:42           ` Xionghu Luo
2021-08-11  8:32       ` Xionghu Luo
2021-08-11  9:16         ` Richard Biener
2021-08-12  3:24           ` Xionghu Luo
2021-09-22  8:40           ` Xionghu Luo
2021-09-23 12:17             ` Richard Biener
2021-10-15  5:51               ` Xionghu Luo
2021-10-21  8:43                 ` Xionghu Luo
2021-10-21 10:55                   ` Richard Biener
2021-10-26  5:40                     ` Xionghu Luo
2021-10-26 11:59                       ` Richard Biener
2021-10-26 12:19                         ` Jan Hubicka
2021-08-09  4:33   ` Feng Xue OS

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dbb59d53-4b8a-d07a-3460-7daeba635ec4@linux.ibm.com \
    --to=luoxhu@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=guojiufu@linux.ibm.com \
    --cc=hubicka@ucw.cz \
    --cc=linkw@gcc.gnu.org \
    --cc=rguenther@suse.de \
    --cc=segher@kernel.crashing.org \
    --cc=wschmidt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).