public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jiufu Guo <guojiufu@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: guojiufu@linux.ibm.com, wschmidt@linux.ibm.com,
	segher@kernel.crashing.org,        pthaugen@us.ibm.com,
	hubicka@ucw.cz
Subject: [PATCH] correct COUNT and PROB for unrolled loop
Date: Mon, 03 Feb 2020 08:17:00 -0000	[thread overview]
Message-ID: <1580717822-6073-1-git-send-email-guojiufu@linux.ibm.com> (raw)

Hi,
PR68212 mentioned that the COUNT of unrolled loop was not correct, and
comments of this PR also mentioned that loop become 'cold'.  The patches
of the PR fixed part of the issue.  With reference the patch
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02368.html) and comment
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02380.html), below patch
is drafted to fix other part of this issue.

The following patch fixes the wrong COUNT/PROB of unrolled loop.  And the
patch handles the case where unrolling in unreliable count number can
cause a loop to no longer look hot and therefor not get aligned.  This
patch corrects the PROB of loop exit edge, and corrects RPOB/COUNT of
latch block, and the loop count after last peeling.  This patch scale by
profile_probability::likely () if unrolled count gets unrealistically small.

Bootstrap/regtest on powerpc64le with no new regressions.
And spec2017 result is fine: a couple INT benchmarks that showed around
1.7% improvement, everything else was +/- <= 1%.

Ok for trunk?

Jiufu Guo

2020-02-03  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>

	PR rtl-optimization/68212
	* cfgloopmanip.c (duplicate_loop_to_header_edge): Correct COUNT/PROB
	for unrolled/peeled blocks.

testsuite/ChangeLog:
2020-02-03  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>
	PR rtl-optimization/68212
	* gcc.dg/pr68212.c: New test.


---
 gcc/cfgloopmanip.c             | 53 ++++++++++++++++++++++++++++++++++++++++--
 gcc/testsuite/gcc.dg/pr68212.c | 13 +++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr68212.c

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  /* If original loop is executed COUNT_IN times, the unrolled
 	     loop will account SCALE_MAIN_DEN times.  */
 	  scale_main = count_in.probability_in (scale_main_den);
+
+	  /* If we are guessing at the number of iterations and count_in
+	     becomes unrealistically small, reset probability.  */
+	  if (!(count_in.reliable_p () || loop->any_estimate))
+	    {
+	      profile_count new_count_in = count_in.apply_probability (scale_main);
+	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	      if (new_count_in.apply_scale (1, 10) < preheader_count)
+		scale_main = profile_probability::likely ();
+	    }
+
 	  scale_act = scale_main * prob_pass_main;
 	}
       else
 	{
+	  profile_count new_loop_count;
 	  profile_count preheader_count = e->count ();
-	  for (i = 0; i < ndupl; i++)
-	    scale_main = scale_main * scale_step[i];
 	  scale_act = preheader_count.probability_in (count_in);
+	  /* Compute final preheader count after peeling NDUPL copies.  */
+	  for (i = 0; i < ndupl; i++)
+	    preheader_count = preheader_count.apply_probability (scale_step[i]);
+	  /* Subtract out exit(s) from peeled copies.  */
+	  new_loop_count = count_in - (e->count () - preheader_count);
+	  scale_main = new_loop_count.probability_in (count_in);
 	}
     }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  scale_bbs_frequencies (new_bbs, n, scale_act);
 	  scale_act = scale_act * scale_step[j];
 	}
+
+      /* Need to update PROB of exit edge and corresponding COUNT.  */
+      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+	  && bbs_to_scale)
+	{
+	  edge new_exit = new_spec_edges[SE_ORIG];
+	  profile_count new_count_in = new_exit->src->count;
+	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+	    if (e != new_exit)
+	      break;
+
+	  gcc_assert (e && e != new_exit);
+
+	  new_exit->probability = preheader_count.probability_in (new_count_in);
+	  e->probability = new_exit->probability.invert ();
+
+	  profile_count new_latch_count
+	    = new_exit->src->count.apply_probability (e->probability);
+	  profile_count old_latch_count = e->dest->count;
+
+	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+						 new_latch_count,
+						 old_latch_count);
+
+	  if (current_ir_type () != IR_GIMPLE)
+	    update_br_prob_note (e->src);
+	}
     }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 0000000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments -fdump-rtl-loop2_unroll" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
+/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */
-- 
2.7.4

             reply	other threads:[~2020-02-03  8:17 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-03  8:17 Jiufu Guo [this message]
2020-02-03 16:04 ` Pat Haugen
2020-02-03 16:20   ` Jeff Law
2020-02-03 16:23     ` Jan Hubicka
2020-02-11  2:29       ` Jiufu Guo
2020-02-17  6:23         ` [PATCH V2] " Jiufu Guo
2020-02-28  7:56           ` Jiufu Guo
2020-03-19  2:21           ` Jiufu Guo
2020-05-19  6:15             ` Jiufu Guo
2020-06-03  5:22               ` [PATCH V2] PING^ " Jiufu Guo
2020-06-18  1:22                 ` [PATCH V2] PING^2 " Jiufu Guo
2020-07-02  2:35                   ` Jiufu Guo
2020-07-09 11:55                     ` Martin Liška
2020-07-10  2:14                       ` Jiufu Guo
2020-07-10  7:37                         ` Martin Liška
2020-07-10 13:09                           ` Jiufu Guo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1580717822-6073-1-git-send-email-guojiufu@linux.ibm.com \
    --to=guojiufu@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hubicka@ucw.cz \
    --cc=pthaugen@us.ibm.com \
    --cc=segher@kernel.crashing.org \
    --cc=wschmidt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).