public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] correct COUNT and PROB for unrolled loop
@ 2020-02-03  8:17 Jiufu Guo
  2020-02-03 16:04 ` Pat Haugen
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-02-03  8:17 UTC (permalink / raw)
  To: gcc-patches; +Cc: guojiufu, wschmidt, segher, pthaugen, hubicka

Hi,
PR68212 mentioned that the COUNT of unrolled loop was not correct, and
comments of this PR also mentioned that loop become 'cold'.  The patches
of the PR fixed part of the issue.  With reference the patch
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02368.html) and comment
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02380.html), below patch
is drafted to fix other part of this issue.

The following patch fixes the wrong COUNT/PROB of unrolled loop.  And the
patch handles the case where unrolling in unreliable count number can
cause a loop to no longer look hot and therefor not get aligned.  This
patch corrects the PROB of loop exit edge, and corrects RPOB/COUNT of
latch block, and the loop count after last peeling.  This patch scale by
profile_probability::likely () if unrolled count gets unrealistically small.

Bootstrap/regtest on powerpc64le with no new regressions.
And spec2017 result is fine: a couple INT benchmarks that showed around
1.7% improvement, everything else was +/- <= 1%.

Ok for trunk?

Jiufu Guo

2020-02-03  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>

	PR rtl-optimization/68212
	* cfgloopmanip.c (duplicate_loop_to_header_edge): Correct COUNT/PROB
	for unrolled/peeled blocks.

testsuite/ChangeLog:
2020-02-03  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>
	PR rtl-optimization/68212
	* gcc.dg/pr68212.c: New test.


---
 gcc/cfgloopmanip.c             | 53 ++++++++++++++++++++++++++++++++++++++++--
 gcc/testsuite/gcc.dg/pr68212.c | 13 +++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr68212.c

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  /* If original loop is executed COUNT_IN times, the unrolled
 	     loop will account SCALE_MAIN_DEN times.  */
 	  scale_main = count_in.probability_in (scale_main_den);
+
+	  /* If we are guessing at the number of iterations and count_in
+	     becomes unrealistically small, reset probability.  */
+	  if (!(count_in.reliable_p () || loop->any_estimate))
+	    {
+	      profile_count new_count_in = count_in.apply_probability (scale_main);
+	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	      if (new_count_in.apply_scale (1, 10) < preheader_count)
+		scale_main = profile_probability::likely ();
+	    }
+
 	  scale_act = scale_main * prob_pass_main;
 	}
       else
 	{
+	  profile_count new_loop_count;
 	  profile_count preheader_count = e->count ();
-	  for (i = 0; i < ndupl; i++)
-	    scale_main = scale_main * scale_step[i];
 	  scale_act = preheader_count.probability_in (count_in);
+	  /* Compute final preheader count after peeling NDUPL copies.  */
+	  for (i = 0; i < ndupl; i++)
+	    preheader_count = preheader_count.apply_probability (scale_step[i]);
+	  /* Subtract out exit(s) from peeled copies.  */
+	  new_loop_count = count_in - (e->count () - preheader_count);
+	  scale_main = new_loop_count.probability_in (count_in);
 	}
     }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  scale_bbs_frequencies (new_bbs, n, scale_act);
 	  scale_act = scale_act * scale_step[j];
 	}
+
+      /* Need to update PROB of exit edge and corresponding COUNT.  */
+      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+	  && bbs_to_scale)
+	{
+	  edge new_exit = new_spec_edges[SE_ORIG];
+	  profile_count new_count_in = new_exit->src->count;
+	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+	    if (e != new_exit)
+	      break;
+
+	  gcc_assert (e && e != new_exit);
+
+	  new_exit->probability = preheader_count.probability_in (new_count_in);
+	  e->probability = new_exit->probability.invert ();
+
+	  profile_count new_latch_count
+	    = new_exit->src->count.apply_probability (e->probability);
+	  profile_count old_latch_count = e->dest->count;
+
+	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+						 new_latch_count,
+						 old_latch_count);
+
+	  if (current_ir_type () != IR_GIMPLE)
+	    update_br_prob_note (e->src);
+	}
     }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 0000000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments -fdump-rtl-loop2_unroll" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
+/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */
-- 
2.7.4

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] correct COUNT and PROB for unrolled loop
  2020-02-03  8:17 [PATCH] correct COUNT and PROB for unrolled loop Jiufu Guo
@ 2020-02-03 16:04 ` Pat Haugen
  2020-02-03 16:20   ` Jeff Law
  0 siblings, 1 reply; 16+ messages in thread
From: Pat Haugen @ 2020-02-03 16:04 UTC (permalink / raw)
  To: Jiufu Guo, gcc-patches; +Cc: wschmidt, segher, pthaugen, hubicka

On 2/3/20 2:17 AM, Jiufu Guo wrote:
> +/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */

Sorry I didn't catch this addition to the original testcase earlier, but I wonder how stable this test is going to be. If there are future changes to default count/probability, or changes in their representation, this may fail and need to be updated. The fact that the loop is still getting aligned is the main concern.

-Pat

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] correct COUNT and PROB for unrolled loop
  2020-02-03 16:04 ` Pat Haugen
@ 2020-02-03 16:20   ` Jeff Law
  2020-02-03 16:23     ` Jan Hubicka
  0 siblings, 1 reply; 16+ messages in thread
From: Jeff Law @ 2020-02-03 16:20 UTC (permalink / raw)
  To: Pat Haugen, Jiufu Guo, gcc-patches; +Cc: wschmidt, segher, pthaugen, hubicka

On Mon, 2020-02-03 at 10:04 -0600, Pat Haugen wrote:
> On 2/3/20 2:17 AM, Jiufu Guo wrote:
> > +/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */
> 
> Sorry I didn't catch this addition to the original testcase earlier, but I wonder how stable this test is going to be. If there are future changes to default count/probability, or changes in their representation, this may fail and need to be updated. The fact that the loop is still getting aligned is the main concern.
Unless you're really interested in those probabilities, I'd suggest not
testing for them.  If you really need to test for them, then I'd
suggest testing for them being "close" rather than a specific value for
REG_BR_PROB.

jeff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] correct COUNT and PROB for unrolled loop
  2020-02-03 16:20   ` Jeff Law
@ 2020-02-03 16:23     ` Jan Hubicka
  2020-02-11  2:29       ` Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Hubicka @ 2020-02-03 16:23 UTC (permalink / raw)
  To: Jeff Law; +Cc: Pat Haugen, Jiufu Guo, gcc-patches, wschmidt, segher, pthaugen

> On Mon, 2020-02-03 at 10:04 -0600, Pat Haugen wrote:
> > On 2/3/20 2:17 AM, Jiufu Guo wrote:
> > > +/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */
> > 
> > Sorry I didn't catch this addition to the original testcase earlier, but I wonder how stable this test is going to be. If there are future changes to default count/probability, or changes in their representation, this may fail and need to be updated. The fact that the loop is still getting aligned is the main concern.
> Unless you're really interested in those probabilities, I'd suggest not
> testing for them.  If you really need to test for them, then I'd
> suggest testing for them being "close" rather than a specific value for
> REG_BR_PROB.

Note that REG_BR_PROB now encodes the actual probability as well as the
profile quality (i.e. it is m_val * 8 + m_quality).
We may want to invent better way to dump them, but it is better to match
for CFG edge probability rather than the REG_BR_PROB_NOTE.

honza
> 
> jeff
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] correct COUNT and PROB for unrolled loop
  2020-02-03 16:23     ` Jan Hubicka
@ 2020-02-11  2:29       ` Jiufu Guo
  2020-02-17  6:23         ` [PATCH V2] " Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-02-11  2:29 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Jeff Law, Pat Haugen, gcc-patches, wschmidt, segher

Jan Hubicka <hubicka@ucw.cz> writes:

>> On Mon, 2020-02-03 at 10:04 -0600, Pat Haugen wrote:
>> > On 2/3/20 2:17 AM, Jiufu Guo wrote:
>> > > +/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 937042044" 1 "loop2_unroll"} } */
>> > 
>> > Sorry I didn't catch this addition to the original testcase
>> > earlier, but I wonder how stable this test is going to be. If
>> > there are future changes to default count/probability, or changes
>> > in their representation, this may fail and need to be updated. The
>> > fact that the loop is still getting aligned is the main concern.
>> Unless you're really interested in those probabilities, I'd suggest not
>> testing for them.  If you really need to test for them, then I'd
>> suggest testing for them being "close" rather than a specific value for
>> REG_BR_PROB.
>
> Note that REG_BR_PROB now encodes the actual probability as well as the
> profile quality (i.e. it is m_val * 8 + m_quality).
> We may want to invent better way to dump them, but it is better to match
> for CFG edge probability rather than the REG_BR_PROB_NOTE.
Thanks Honza, Pat, Jeff.
String like "count 661119332" in dump file may also not perfect.
I would removing this line before commit.

Welcome for any other comments!

Thanks,
Jiufu Guo.

>
> honza
>> 
>> jeff
>> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] correct COUNT and PROB for unrolled loop
  2020-02-11  2:29       ` Jiufu Guo
@ 2020-02-17  6:23         ` Jiufu Guo
  2020-02-28  7:56           ` Jiufu Guo
  2020-03-19  2:21           ` Jiufu Guo
  0 siblings, 2 replies; 16+ messages in thread
From: Jiufu Guo @ 2020-02-17  6:23 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Jeff Law, Pat Haugen, gcc-patches, wschmidt, segher


Hi Honza and all,

I updated the patch a little as below. Bootstrap and regtest are ok
on powerpc64le.

Is OK for trunk?

Thanks for comments.
Jiufu

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  /* If original loop is executed COUNT_IN times, the unrolled
 	     loop will account SCALE_MAIN_DEN times.  */
 	  scale_main = count_in.probability_in (scale_main_den);
+
+	  /* If we are guessing at the number of iterations and count_in
+	     becomes unrealistically small, reset probability.  */
+	  if (!(count_in.reliable_p () || loop->any_estimate))
+	    {
+	      profile_count new_count_in = count_in.apply_probability (scale_main);
+	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	      if (new_count_in.apply_scale (1, 10) < preheader_count)
+		scale_main = profile_probability::likely ();
+	    }
+
 	  scale_act = scale_main * prob_pass_main;
 	}
       else
 	{
+	  profile_count new_loop_count;
 	  profile_count preheader_count = e->count ();
-	  for (i = 0; i < ndupl; i++)
-	    scale_main = scale_main * scale_step[i];
 	  scale_act = preheader_count.probability_in (count_in);
+	  /* Compute final preheader count after peeling NDUPL copies.  */
+	  for (i = 0; i < ndupl; i++)
+	    preheader_count = preheader_count.apply_probability (scale_step[i]);
+	  /* Subtract out exit(s) from peeled copies.  */
+	  new_loop_count = count_in - (e->count () - preheader_count);
+	  scale_main = new_loop_count.probability_in (count_in);
 	}
     }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  scale_bbs_frequencies (new_bbs, n, scale_act);
 	  scale_act = scale_act * scale_step[j];
 	}
+
+      /* Need to update PROB of exit edge and corresponding COUNT.  */
+      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+	  && bbs_to_scale)
+	{
+	  edge new_exit = new_spec_edges[SE_ORIG];
+	  profile_count new_count_in = new_exit->src->count;
+	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+	    if (e != new_exit)
+	      break;
+
+	  gcc_assert (e && e != new_exit);
+
+	  new_exit->probability = preheader_count.probability_in (new_count_in);
+	  e->probability = new_exit->probability.invert ();
+
+	  profile_count new_latch_count
+	    = new_exit->src->count.apply_probability (e->probability);
+	  profile_count old_latch_count = e->dest->count;
+
+	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+						 new_latch_count,
+						 old_latch_count);
+
+	  if (current_ir_type () != IR_GIMPLE)
+	    update_br_prob_note (e->src);
+	}
     }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 0000000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
+
-- 
2.7.4

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] correct COUNT and PROB for unrolled loop
  2020-02-17  6:23         ` [PATCH V2] " Jiufu Guo
@ 2020-02-28  7:56           ` Jiufu Guo
  2020-03-19  2:21           ` Jiufu Guo
  1 sibling, 0 replies; 16+ messages in thread
From: Jiufu Guo @ 2020-02-28  7:56 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Jeff Law, Pat Haugen, gcc-patches, wschmidt, segher

Jiufu Guo <guojiufu@linux.ibm.com> writes:

Hi!

I'd like to ping following patch. just in case it may make sense to
include in GCC 10. Thanks!

https://gcc.gnu.org/ml/gcc-patches/2020-02/msg00927.html

Jiufu

> Hi Honza and all,
>
> I updated the patch a little as below. Bootstrap and regtest are ok
> on powerpc64le.
>
> Is OK for trunk?
>
> Thanks for comments.
> Jiufu
>
> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index 727e951..ded0046 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify-me.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "dumpfile.h"
> +#include "cfgrtl.h"
>  
>  static void copy_loops_to (class loop **, int,
>  			   class loop *);
> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>  	  /* If original loop is executed COUNT_IN times, the unrolled
>  	     loop will account SCALE_MAIN_DEN times.  */
>  	  scale_main = count_in.probability_in (scale_main_den);
> +
> +	  /* If we are guessing at the number of iterations and count_in
> +	     becomes unrealistically small, reset probability.  */
> +	  if (!(count_in.reliable_p () || loop->any_estimate))
> +	    {
> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
> +		scale_main = profile_probability::likely ();
> +	    }
> +
>  	  scale_act = scale_main * prob_pass_main;
>  	}
>        else
>  	{
> +	  profile_count new_loop_count;
>  	  profile_count preheader_count = e->count ();
> -	  for (i = 0; i < ndupl; i++)
> -	    scale_main = scale_main * scale_step[i];
>  	  scale_act = preheader_count.probability_in (count_in);
> +	  /* Compute final preheader count after peeling NDUPL copies.  */
> +	  for (i = 0; i < ndupl; i++)
> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
> +	  /* Subtract out exit(s) from peeled copies.  */
> +	  new_loop_count = count_in - (e->count () - preheader_count);
> +	  scale_main = new_loop_count.probability_in (count_in);
>  	}
>      }
>  
> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>  	  scale_act = scale_act * scale_step[j];
>  	}
> +
> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
> +	  && bbs_to_scale)
> +	{
> +	  edge new_exit = new_spec_edges[SE_ORIG];
> +	  profile_count new_count_in = new_exit->src->count;
> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +	  edge e;
> +	  edge_iterator ei;
> +
> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
> +	    if (e != new_exit)
> +	      break;
> +
> +	  gcc_assert (e && e != new_exit);
> +
> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
> +	  e->probability = new_exit->probability.invert ();
> +
> +	  profile_count new_latch_count
> +	    = new_exit->src->count.apply_probability (e->probability);
> +	  profile_count old_latch_count = e->dest->count;
> +
> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
> +						 new_latch_count,
> +						 old_latch_count);
> +
> +	  if (current_ir_type () != IR_GIMPLE)
> +	    update_br_prob_note (e->src);
> +	}
>      }
>    free (new_bbs);
>    free (orig_loops);
> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
> new file mode 100644
> index 0000000..f3b7c22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68212.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
> +
> +void foo(long int *a, long int *b, long int n)
> +{
> +  long int i;
> +
> +  for (i = 0; i < n; i++)
> +    a[i] = *b;
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] correct COUNT and PROB for unrolled loop
  2020-02-17  6:23         ` [PATCH V2] " Jiufu Guo
  2020-02-28  7:56           ` Jiufu Guo
@ 2020-03-19  2:21           ` Jiufu Guo
  2020-05-19  6:15             ` Jiufu Guo
  1 sibling, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-03-19  2:21 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Jeff Law, Pat Haugen, gcc-patches, wschmidt, segher

Jiufu Guo <guojiufu@linux.ibm.com> writes:

Hi!

I'd like to ping following patch. As near end of gcc10 stage 4, it seems
I would ask approval for GCC11 trunk.

Thanks,
Jiufu Guo

> Hi Honza and all,
>
> I updated the patch a little as below. Bootstrap and regtest are ok
> on powerpc64le.
>
> Is OK for trunk?
>
> Thanks for comments.
> Jiufu
>
> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index 727e951..ded0046 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify-me.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "dumpfile.h"
> +#include "cfgrtl.h"
>  
>  static void copy_loops_to (class loop **, int,
>  			   class loop *);
> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>  	  /* If original loop is executed COUNT_IN times, the unrolled
>  	     loop will account SCALE_MAIN_DEN times.  */
>  	  scale_main = count_in.probability_in (scale_main_den);
> +
> +	  /* If we are guessing at the number of iterations and count_in
> +	     becomes unrealistically small, reset probability.  */
> +	  if (!(count_in.reliable_p () || loop->any_estimate))
> +	    {
> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
> +		scale_main = profile_probability::likely ();
> +	    }
> +
>  	  scale_act = scale_main * prob_pass_main;
>  	}
>        else
>  	{
> +	  profile_count new_loop_count;
>  	  profile_count preheader_count = e->count ();
> -	  for (i = 0; i < ndupl; i++)
> -	    scale_main = scale_main * scale_step[i];
>  	  scale_act = preheader_count.probability_in (count_in);
> +	  /* Compute final preheader count after peeling NDUPL copies.  */
> +	  for (i = 0; i < ndupl; i++)
> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
> +	  /* Subtract out exit(s) from peeled copies.  */
> +	  new_loop_count = count_in - (e->count () - preheader_count);
> +	  scale_main = new_loop_count.probability_in (count_in);
>  	}
>      }
>  
> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>  	  scale_act = scale_act * scale_step[j];
>  	}
> +
> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
> +	  && bbs_to_scale)
> +	{
> +	  edge new_exit = new_spec_edges[SE_ORIG];
> +	  profile_count new_count_in = new_exit->src->count;
> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
> +	  edge e;
> +	  edge_iterator ei;
> +
> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
> +	    if (e != new_exit)
> +	      break;
> +
> +	  gcc_assert (e && e != new_exit);
> +
> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
> +	  e->probability = new_exit->probability.invert ();
> +
> +	  profile_count new_latch_count
> +	    = new_exit->src->count.apply_probability (e->probability);
> +	  profile_count old_latch_count = e->dest->count;
> +
> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
> +						 new_latch_count,
> +						 old_latch_count);
> +
> +	  if (current_ir_type () != IR_GIMPLE)
> +	    update_br_prob_note (e->src);
> +	}
>      }
>    free (new_bbs);
>    free (orig_loops);
> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
> new file mode 100644
> index 0000000..f3b7c22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr68212.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
> +
> +void foo(long int *a, long int *b, long int n)
> +{
> +  long int i;
> +
> +  for (i = 0; i < n; i++)
> +    a[i] = *b;
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] correct COUNT and PROB for unrolled loop
  2020-03-19  2:21           ` Jiufu Guo
@ 2020-05-19  6:15             ` Jiufu Guo
  2020-06-03  5:22               ` [PATCH V2] PING^ " Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-05-19  6:15 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Jeff Law, Pat Haugen, gcc-patches, wschmidt, segher

Jiufu Guo <guojiufu@linux.ibm.com> writes:

Hi,

I'd like to ping this patch for trunk on stage 1.

This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
unrolled blocks, and also could help the improve the cold/hot issue of
the unrolled loops.

patch is also at
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Thanks,
Jiufu

> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
> Hi!
>
> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
> I would ask approval for GCC11 trunk.
>
> Thanks,
> Jiufu Guo
>
>> Hi Honza and all,
>>
>> I updated the patch a little as below. Bootstrap and regtest are ok
>> on powerpc64le.
>>
>> Is OK for trunk?
>>
>> Thanks for comments.
>> Jiufu
>>
>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>> index 727e951..ded0046 100644
>> --- a/gcc/cfgloopmanip.c
>> +++ b/gcc/cfgloopmanip.c
>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "gimplify-me.h"
>>  #include "tree-ssa-loop-manip.h"
>>  #include "dumpfile.h"
>> +#include "cfgrtl.h"
>>  
>>  static void copy_loops_to (class loop **, int,
>>  			   class loop *);
>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>  	  /* If original loop is executed COUNT_IN times, the unrolled
>>  	     loop will account SCALE_MAIN_DEN times.  */
>>  	  scale_main = count_in.probability_in (scale_main_den);
>> +
>> +	  /* If we are guessing at the number of iterations and count_in
>> +	     becomes unrealistically small, reset probability.  */
>> +	  if (!(count_in.reliable_p () || loop->any_estimate))
>> +	    {
>> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
>> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
>> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
>> +		scale_main = profile_probability::likely ();
>> +	    }
>> +
>>  	  scale_act = scale_main * prob_pass_main;
>>  	}
>>        else
>>  	{
>> +	  profile_count new_loop_count;
>>  	  profile_count preheader_count = e->count ();
>> -	  for (i = 0; i < ndupl; i++)
>> -	    scale_main = scale_main * scale_step[i];
>>  	  scale_act = preheader_count.probability_in (count_in);
>> +	  /* Compute final preheader count after peeling NDUPL copies.  */
>> +	  for (i = 0; i < ndupl; i++)
>> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
>> +	  /* Subtract out exit(s) from peeled copies.  */
>> +	  new_loop_count = count_in - (e->count () - preheader_count);
>> +	  scale_main = new_loop_count.probability_in (count_in);
>>  	}
>>      }
>>  
>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>>  	  scale_act = scale_act * scale_step[j];
>>  	}
>> +
>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>> +	  && bbs_to_scale)
>> +	{
>> +	  edge new_exit = new_spec_edges[SE_ORIG];
>> +	  profile_count new_count_in = new_exit->src->count;
>> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
>> +	  edge e;
>> +	  edge_iterator ei;
>> +
>> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>> +	    if (e != new_exit)
>> +	      break;
>> +
>> +	  gcc_assert (e && e != new_exit);
>> +
>> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
>> +	  e->probability = new_exit->probability.invert ();
>> +
>> +	  profile_count new_latch_count
>> +	    = new_exit->src->count.apply_probability (e->probability);
>> +	  profile_count old_latch_count = e->dest->count;
>> +
>> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>> +						 new_latch_count,
>> +						 old_latch_count);
>> +
>> +	  if (current_ir_type () != IR_GIMPLE)
>> +	    update_br_prob_note (e->src);
>> +	}
>>      }
>>    free (new_bbs);
>>    free (orig_loops);
>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>> new file mode 100644
>> index 0000000..f3b7c22
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
>> +
>> +void foo(long int *a, long int *b, long int n)
>> +{
>> +  long int i;
>> +
>> +  for (i = 0; i < n; i++)
>> +    a[i] = *b;
>> +}
>> +
>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
>> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH V2] PING^ correct COUNT and PROB for unrolled loop
  2020-05-19  6:15             ` Jiufu Guo
@ 2020-06-03  5:22               ` Jiufu Guo
  2020-06-18  1:22                 ` [PATCH V2] PING^2 " Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-06-03  5:22 UTC (permalink / raw)
  To: Jiufu Guo via Gcc-patches; +Cc: Jan Hubicka, wschmidt, segher

Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:

Hi,

I would like to reping this, hope to get approval for this patch.
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

BR,
Jiufu Guo

> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
> Hi,
>
> I'd like to ping this patch for trunk on stage 1.
>
> This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
> unrolled blocks, and also could help the improve the cold/hot issue of
> the unrolled loops.
>
> patch is also at
> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> Thanks,
> Jiufu
>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>> Hi!
>>
>> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
>> I would ask approval for GCC11 trunk.
>>
>> Thanks,
>> Jiufu Guo
>>
>>> Hi Honza and all,
>>>
>>> I updated the patch a little as below. Bootstrap and regtest are ok
>>> on powerpc64le.
>>>
>>> Is OK for trunk?
>>>
>>> Thanks for comments.
>>> Jiufu
>>>
>>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>>> index 727e951..ded0046 100644
>>> --- a/gcc/cfgloopmanip.c
>>> +++ b/gcc/cfgloopmanip.c
>>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>>  #include "gimplify-me.h"
>>>  #include "tree-ssa-loop-manip.h"
>>>  #include "dumpfile.h"
>>> +#include "cfgrtl.h"
>>>  
>>>  static void copy_loops_to (class loop **, int,
>>>  			   class loop *);
>>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>  	  /* If original loop is executed COUNT_IN times, the unrolled
>>>  	     loop will account SCALE_MAIN_DEN times.  */
>>>  	  scale_main = count_in.probability_in (scale_main_den);
>>> +
>>> +	  /* If we are guessing at the number of iterations and count_in
>>> +	     becomes unrealistically small, reset probability.  */
>>> +	  if (!(count_in.reliable_p () || loop->any_estimate))
>>> +	    {
>>> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
>>> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
>>> +		scale_main = profile_probability::likely ();
>>> +	    }
>>> +
>>>  	  scale_act = scale_main * prob_pass_main;
>>>  	}
>>>        else
>>>  	{
>>> +	  profile_count new_loop_count;
>>>  	  profile_count preheader_count = e->count ();
>>> -	  for (i = 0; i < ndupl; i++)
>>> -	    scale_main = scale_main * scale_step[i];
>>>  	  scale_act = preheader_count.probability_in (count_in);
>>> +	  /* Compute final preheader count after peeling NDUPL copies.  */
>>> +	  for (i = 0; i < ndupl; i++)
>>> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
>>> +	  /* Subtract out exit(s) from peeled copies.  */
>>> +	  new_loop_count = count_in - (e->count () - preheader_count);
>>> +	  scale_main = new_loop_count.probability_in (count_in);
>>>  	}
>>>      }
>>>  
>>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>>>  	  scale_act = scale_act * scale_step[j];
>>>  	}
>>> +
>>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>>> +	  && bbs_to_scale)
>>> +	{
>>> +	  edge new_exit = new_spec_edges[SE_ORIG];
>>> +	  profile_count new_count_in = new_exit->src->count;
>>> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>> +	  edge e;
>>> +	  edge_iterator ei;
>>> +
>>> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>>> +	    if (e != new_exit)
>>> +	      break;
>>> +
>>> +	  gcc_assert (e && e != new_exit);
>>> +
>>> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
>>> +	  e->probability = new_exit->probability.invert ();
>>> +
>>> +	  profile_count new_latch_count
>>> +	    = new_exit->src->count.apply_probability (e->probability);
>>> +	  profile_count old_latch_count = e->dest->count;
>>> +
>>> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>>> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>>> +						 new_latch_count,
>>> +						 old_latch_count);
>>> +
>>> +	  if (current_ir_type () != IR_GIMPLE)
>>> +	    update_br_prob_note (e->src);
>>> +	}
>>>      }
>>>    free (new_bbs);
>>>    free (orig_loops);
>>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>>> new file mode 100644
>>> index 0000000..f3b7c22
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>>> @@ -0,0 +1,13 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
>>> +
>>> +void foo(long int *a, long int *b, long int n)
>>> +{
>>> +  long int i;
>>> +
>>> +  for (i = 0; i < n; i++)
>>> +    a[i] = *b;
>>> +}
>>> +
>>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
>>> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-06-03  5:22               ` [PATCH V2] PING^ " Jiufu Guo
@ 2020-06-18  1:22                 ` Jiufu Guo
  2020-07-02  2:35                   ` Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-06-18  1:22 UTC (permalink / raw)
  To: Jiufu Guo via Gcc-patches; +Cc: Jan Hubicka, wschmidt, segher

Jiufu Guo <guojiufu@linux.ibm.com> writes:

Gentle ping.
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

BR,
Jiufu Guo

> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>
> Hi,
>
> I would like to reping this, hope to get approval for this patch.
> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> BR,
> Jiufu Guo
>
>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>
>> Hi,
>>
>> I'd like to ping this patch for trunk on stage 1.
>>
>> This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
>> unrolled blocks, and also could help the improve the cold/hot issue of
>> the unrolled loops.
>>
>> patch is also at
>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>>
>> Thanks,
>> Jiufu
>>
>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>
>>> Hi!
>>>
>>> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
>>> I would ask approval for GCC11 trunk.
>>>
>>> Thanks,
>>> Jiufu Guo
>>>
>>>> Hi Honza and all,
>>>>
>>>> I updated the patch a little as below. Bootstrap and regtest are ok
>>>> on powerpc64le.
>>>>
>>>> Is OK for trunk?
>>>>
>>>> Thanks for comments.
>>>> Jiufu
>>>>
>>>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>>>> index 727e951..ded0046 100644
>>>> --- a/gcc/cfgloopmanip.c
>>>> +++ b/gcc/cfgloopmanip.c
>>>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>  #include "gimplify-me.h"
>>>>  #include "tree-ssa-loop-manip.h"
>>>>  #include "dumpfile.h"
>>>> +#include "cfgrtl.h"
>>>>  
>>>>  static void copy_loops_to (class loop **, int,
>>>>  			   class loop *);
>>>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>>  	  /* If original loop is executed COUNT_IN times, the unrolled
>>>>  	     loop will account SCALE_MAIN_DEN times.  */
>>>>  	  scale_main = count_in.probability_in (scale_main_den);
>>>> +
>>>> +	  /* If we are guessing at the number of iterations and count_in
>>>> +	     becomes unrealistically small, reset probability.  */
>>>> +	  if (!(count_in.reliable_p () || loop->any_estimate))
>>>> +	    {
>>>> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
>>>> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>>> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
>>>> +		scale_main = profile_probability::likely ();
>>>> +	    }
>>>> +
>>>>  	  scale_act = scale_main * prob_pass_main;
>>>>  	}
>>>>        else
>>>>  	{
>>>> +	  profile_count new_loop_count;
>>>>  	  profile_count preheader_count = e->count ();
>>>> -	  for (i = 0; i < ndupl; i++)
>>>> -	    scale_main = scale_main * scale_step[i];
>>>>  	  scale_act = preheader_count.probability_in (count_in);
>>>> +	  /* Compute final preheader count after peeling NDUPL copies.  */
>>>> +	  for (i = 0; i < ndupl; i++)
>>>> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
>>>> +	  /* Subtract out exit(s) from peeled copies.  */
>>>> +	  new_loop_count = count_in - (e->count () - preheader_count);
>>>> +	  scale_main = new_loop_count.probability_in (count_in);
>>>>  	}
>>>>      }
>>>>  
>>>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>>>>  	  scale_act = scale_act * scale_step[j];
>>>>  	}
>>>> +
>>>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>>>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>>>> +	  && bbs_to_scale)
>>>> +	{
>>>> +	  edge new_exit = new_spec_edges[SE_ORIG];
>>>> +	  profile_count new_count_in = new_exit->src->count;
>>>> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>>> +	  edge e;
>>>> +	  edge_iterator ei;
>>>> +
>>>> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>>>> +	    if (e != new_exit)
>>>> +	      break;
>>>> +
>>>> +	  gcc_assert (e && e != new_exit);
>>>> +
>>>> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
>>>> +	  e->probability = new_exit->probability.invert ();
>>>> +
>>>> +	  profile_count new_latch_count
>>>> +	    = new_exit->src->count.apply_probability (e->probability);
>>>> +	  profile_count old_latch_count = e->dest->count;
>>>> +
>>>> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>>>> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>>>> +						 new_latch_count,
>>>> +						 old_latch_count);
>>>> +
>>>> +	  if (current_ir_type () != IR_GIMPLE)
>>>> +	    update_br_prob_note (e->src);
>>>> +	}
>>>>      }
>>>>    free (new_bbs);
>>>>    free (orig_loops);
>>>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>>>> new file mode 100644
>>>> index 0000000..f3b7c22
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>>>> @@ -0,0 +1,13 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
>>>> +
>>>> +void foo(long int *a, long int *b, long int n)
>>>> +{
>>>> +  long int i;
>>>> +
>>>> +  for (i = 0; i < n; i++)
>>>> +    a[i] = *b;
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
>>>> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-06-18  1:22                 ` [PATCH V2] PING^2 " Jiufu Guo
@ 2020-07-02  2:35                   ` Jiufu Guo
  2020-07-09 11:55                     ` Martin Liška
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-07-02  2:35 UTC (permalink / raw)
  To: Jiufu Guo via Gcc-patches, Jan Hubicka; +Cc: wschmidt, segher

Jiufu Guo <guojiufu@linux.ibm.com> writes:

I would like to reping this patch.
Since this is correcting COUNT and PROB for hot blocks, it helps some
cases.

https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Thanks,
Jiufu Guo

> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>
> Gentle ping.
> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> BR,
> Jiufu Guo
>
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>
>> Hi,
>>
>> I would like to reping this, hope to get approval for this patch.
>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>>
>> BR,
>> Jiufu Guo
>>
>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>
>>> Hi,
>>>
>>> I'd like to ping this patch for trunk on stage 1.
>>>
>>> This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
>>> unrolled blocks, and also could help the improve the cold/hot issue of
>>> the unrolled loops.
>>>
>>> patch is also at
>>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>>>
>>> Thanks,
>>> Jiufu
>>>
>>>> Jiufu Guo <guojiufu@linux.ibm.com> writes:
>>>>
>>>> Hi!
>>>>
>>>> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
>>>> I would ask approval for GCC11 trunk.
>>>>
>>>> Thanks,
>>>> Jiufu Guo
>>>>
>>>>> Hi Honza and all,
>>>>>
>>>>> I updated the patch a little as below. Bootstrap and regtest are ok
>>>>> on powerpc64le.
>>>>>
>>>>> Is OK for trunk?
>>>>>
>>>>> Thanks for comments.
>>>>> Jiufu
>>>>>
>>>>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>>>>> index 727e951..ded0046 100644
>>>>> --- a/gcc/cfgloopmanip.c
>>>>> +++ b/gcc/cfgloopmanip.c
>>>>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>>  #include "gimplify-me.h"
>>>>>  #include "tree-ssa-loop-manip.h"
>>>>>  #include "dumpfile.h"
>>>>> +#include "cfgrtl.h"
>>>>>  
>>>>>  static void copy_loops_to (class loop **, int,
>>>>>  			   class loop *);
>>>>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>>>  	  /* If original loop is executed COUNT_IN times, the unrolled
>>>>>  	     loop will account SCALE_MAIN_DEN times.  */
>>>>>  	  scale_main = count_in.probability_in (scale_main_den);
>>>>> +
>>>>> +	  /* If we are guessing at the number of iterations and count_in
>>>>> +	     becomes unrealistically small, reset probability.  */
>>>>> +	  if (!(count_in.reliable_p () || loop->any_estimate))
>>>>> +	    {
>>>>> +	      profile_count new_count_in = count_in.apply_probability (scale_main);
>>>>> +	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>>>> +	      if (new_count_in.apply_scale (1, 10) < preheader_count)
>>>>> +		scale_main = profile_probability::likely ();
>>>>> +	    }
>>>>> +
>>>>>  	  scale_act = scale_main * prob_pass_main;
>>>>>  	}
>>>>>        else
>>>>>  	{
>>>>> +	  profile_count new_loop_count;
>>>>>  	  profile_count preheader_count = e->count ();
>>>>> -	  for (i = 0; i < ndupl; i++)
>>>>> -	    scale_main = scale_main * scale_step[i];
>>>>>  	  scale_act = preheader_count.probability_in (count_in);
>>>>> +	  /* Compute final preheader count after peeling NDUPL copies.  */
>>>>> +	  for (i = 0; i < ndupl; i++)
>>>>> +	    preheader_count = preheader_count.apply_probability (scale_step[i]);
>>>>> +	  /* Subtract out exit(s) from peeled copies.  */
>>>>> +	  new_loop_count = count_in - (e->count () - preheader_count);
>>>>> +	  scale_main = new_loop_count.probability_in (count_in);
>>>>>  	}
>>>>>      }
>>>>>  
>>>>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
>>>>>  	  scale_bbs_frequencies (new_bbs, n, scale_act);
>>>>>  	  scale_act = scale_act * scale_step[j];
>>>>>  	}
>>>>> +
>>>>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>>>>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>>>>> +	  && bbs_to_scale)
>>>>> +	{
>>>>> +	  edge new_exit = new_spec_edges[SE_ORIG];
>>>>> +	  profile_count new_count_in = new_exit->src->count;
>>>>> +	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>>>> +	  edge e;
>>>>> +	  edge_iterator ei;
>>>>> +
>>>>> +	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>>>>> +	    if (e != new_exit)
>>>>> +	      break;
>>>>> +
>>>>> +	  gcc_assert (e && e != new_exit);
>>>>> +
>>>>> +	  new_exit->probability = preheader_count.probability_in (new_count_in);
>>>>> +	  e->probability = new_exit->probability.invert ();
>>>>> +
>>>>> +	  profile_count new_latch_count
>>>>> +	    = new_exit->src->count.apply_probability (e->probability);
>>>>> +	  profile_count old_latch_count = e->dest->count;
>>>>> +
>>>>> +	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>>>>> +	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>>>>> +						 new_latch_count,
>>>>> +						 old_latch_count);
>>>>> +
>>>>> +	  if (current_ir_type () != IR_GIMPLE)
>>>>> +	    update_br_prob_note (e->src);
>>>>> +	}
>>>>>      }
>>>>>    free (new_bbs);
>>>>>    free (orig_loops);
>>>>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
>>>>> new file mode 100644
>>>>> index 0000000..f3b7c22
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>>>>> @@ -0,0 +1,13 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
>>>>> +
>>>>> +void foo(long int *a, long int *b, long int n)
>>>>> +{
>>>>> +  long int i;
>>>>> +
>>>>> +  for (i = 0; i < n; i++)
>>>>> +    a[i] = *b;
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
>>>>> +

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-07-02  2:35                   ` Jiufu Guo
@ 2020-07-09 11:55                     ` Martin Liška
  2020-07-10  2:14                       ` Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Martin Liška @ 2020-07-09 11:55 UTC (permalink / raw)
  To: Jiufu Guo, Jiufu Guo via Gcc-patches, Jan Hubicka
  Cc: wschmidt, segher, Richard Biener, Bin.Cheng

On 7/2/20 4:35 AM, Jiufu Guo via Gcc-patches wrote:
> I would like to reping this patch.
> Since this is correcting COUNT and PROB for hot blocks, it helps some
> cases.
> 
> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Hey.

I've just briefly looked at the patch and I don't feel the right person
to make a review for it.

I believe Richi, Bin or Honza can help us here?
Martin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-07-09 11:55                     ` Martin Liška
@ 2020-07-10  2:14                       ` Jiufu Guo
  2020-07-10  7:37                         ` Martin Liška
  0 siblings, 1 reply; 16+ messages in thread
From: Jiufu Guo @ 2020-07-10  2:14 UTC (permalink / raw)
  To: Martin Liška
  Cc: Jiufu Guo via Gcc-patches, Jan Hubicka, wschmidt, segher,
	Richard Biener, Bin.Cheng


Hi Martin,

Thanks so much for your time and kindly help!!!

Wish Richi, Bin or Honza have time to review this patch. ;-)

--Here is a summmary---

PR68212 mentioned that the COUNT of unrolled loop was not correct, and
comments of this PR also mentioned that loop become 'cold'.

The following patch fixes the wrong COUNT/PROB of unrolled loop.  And
the patch resets the COUNT the case where unrolling in unreliable count
number can cause a loop to no longer look hot and therefor not get aligned.

Belows messages are referenced.
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02368.html) and comment
(https://gcc.gnu.org/ml/gcc-patches/2018-11/msg02380.html,
https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00044.html),

Bootstrap and regtest are ok on powerpc64le.  Is this ok for trunk?

ChangeLog:
2020-07-10  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>

	PR rtl-optimization/68212
	* cfgloopmanip.c (duplicate_loop_to_header_edge): Correct COUNT/PROB
	for unrolled/peeled blocks.

testsuite/ChangeLog:
2020-07-10  Jiufu Guo   <guojiufu@cn.ibm.com>
	    Pat Haugen  <pthaugen@us.ibm.com>
	PR rtl-optimization/68212
	* gcc.dg/pr68212.c: New test.

diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 727e951..ded0046 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify-me.h"
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
+#include "cfgrtl.h"
 
 static void copy_loops_to (class loop **, int,
 			   class loop *);
@@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  /* If original loop is executed COUNT_IN times, the unrolled
 	     loop will account SCALE_MAIN_DEN times.  */
 	  scale_main = count_in.probability_in (scale_main_den);
+
+	  /* If we are guessing at the number of iterations and count_in
+	     becomes unrealistically small, reset probability.  */
+	  if (!(count_in.reliable_p () || loop->any_estimate))
+	    {
+	      profile_count new_count_in = count_in.apply_probability (scale_main);
+	      profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	      if (new_count_in.apply_scale (1, 10) < preheader_count)
+		scale_main = profile_probability::likely ();
+	    }
+
 	  scale_act = scale_main * prob_pass_main;
 	}
       else
 	{
+	  profile_count new_loop_count;
 	  profile_count preheader_count = e->count ();
-	  for (i = 0; i < ndupl; i++)
-	    scale_main = scale_main * scale_step[i];
 	  scale_act = preheader_count.probability_in (count_in);
+	  /* Compute final preheader count after peeling NDUPL copies.  */
+	  for (i = 0; i < ndupl; i++)
+	    preheader_count = preheader_count.apply_probability (scale_step[i]);
+	  /* Subtract out exit(s) from peeled copies.  */
+	  new_loop_count = count_in - (e->count () - preheader_count);
+	  scale_main = new_loop_count.probability_in (count_in);
 	}
     }
 
@@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, edge e,
 	  scale_bbs_frequencies (new_bbs, n, scale_act);
 	  scale_act = scale_act * scale_step[j];
 	}
+
+      /* Need to update PROB of exit edge and corresponding COUNT.  */
+      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
+	  && bbs_to_scale)
+	{
+	  edge new_exit = new_spec_edges[SE_ORIG];
+	  profile_count new_count_in = new_exit->src->count;
+	  profile_count preheader_count = loop_preheader_edge (loop)->count ();
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, new_exit->src->succs)
+	    if (e != new_exit)
+	      break;
+
+	  gcc_assert (e && e != new_exit);
+
+	  new_exit->probability = preheader_count.probability_in (new_count_in);
+	  e->probability = new_exit->probability.invert ();
+
+	  profile_count new_latch_count
+	    = new_exit->src->count.apply_probability (e->probability);
+	  profile_count old_latch_count = e->dest->count;
+
+	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+	    scale_bbs_frequencies_profile_count (new_bbs + i, 1,
+						 new_latch_count,
+						 old_latch_count);
+
+	  if (current_ir_type () != IR_GIMPLE)
+	    update_br_prob_note (e->src);
+	}
     }
   free (new_bbs);
   free (orig_loops);
diff --git a/gcc/testsuite/gcc.dg/pr68212.c b/gcc/testsuite/gcc.dg/pr68212.c
new file mode 100644
index 0000000..f3b7c22
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68212.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=4 -fdump-rtl-alignments" } */
+
+void foo(long int *a, long int *b, long int n)
+{
+  long int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = *b;
+}
+
+/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 "alignments"} } */
+
-- 
2.7.4

Thanks!

Jiufu Guo.


Martin Liška <mliska@suse.cz> writes:

> On 7/2/20 4:35 AM, Jiufu Guo via Gcc-patches wrote:
>> I would like to reping this patch.
>> Since this is correcting COUNT and PROB for hot blocks, it helps some
>> cases.
>>
>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> Hey.
>
> I've just briefly looked at the patch and I don't feel the right person
> to make a review for it.
>
> I believe Richi, Bin or Honza can help us here?
> Martin


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-07-10  2:14                       ` Jiufu Guo
@ 2020-07-10  7:37                         ` Martin Liška
  2020-07-10 13:09                           ` Jiufu Guo
  0 siblings, 1 reply; 16+ messages in thread
From: Martin Liška @ 2020-07-10  7:37 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: Jiufu Guo via Gcc-patches, Jan Hubicka, wschmidt, segher,
	Richard Biener, Bin.Cheng

On 7/10/20 4:14 AM, Jiufu Guo wrote:
> Thanks so much for your time and kindly help!!!

And I run your patch on SPEC2006 with:
https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549728.html

Doing that I see just few changes:

diff -qr /tmp/before /tmp/after
Files /tmp/before/Meat.fppized.f90.000i.profile-report and /tmp/after/Meat.fppized.f90.000i.profile-report differ
Files /tmp/before/bezier.cpp.000i.profile-report and /tmp/after/bezier.cpp.000i.profile-report differ
Files /tmp/before/module_big_step_utilities_em.fppized.f90.000i.profile-report and /tmp/after/module_big_step_utilities_em.fppized.f90.000i.profile-report differ
Files /tmp/before/module_cu_bmj.fppized.f90.000i.profile-report and /tmp/after/module_cu_bmj.fppized.f90.000i.profile-report differ
Files /tmp/before/momx2.f.000i.profile-report and /tmp/after/momx2.f.000i.profile-report differ
Files /tmp/before/momx3.f.000i.profile-report and /tmp/after/momx3.f.000i.profile-report differ
Files /tmp/before/tml.f.000i.profile-report and /tmp/after/tml.f.000i.profile-report differ
Files /tmp/before/tranx2.f.000i.profile-report and /tmp/after/tranx2.f.000i.profile-report differ
Files /tmp/before/tranx3.f.000i.profile-report and /tmp/after/tranx3.f.000i.profile-report differ

But I see few regression, e.g.:

$ cat bezier.ii
int bezier_value_t, bezier_value_du_1;
int bezier_value_u[4], bezier_value_du[4];
void bezier_value() {
   int i = 1;
   for (; i < 4; i++) {
     bezier_value_u[i] = 1;
     bezier_value_du[i] = i * bezier_value_u[i - 1];
     bezier_value_t = bezier_value_du_1;
   }
}

$ g++ bezier.ii -c -march=native -O3 -Wno-multichar -Wno-aggressive-loop-optimizations -fdump-tree-pcom=/tmp/bad.txt
...

And your patch changed:

-  <bb 15> [local count: 134217728]:
+  <bb 15> [local count: 89478486]:

where the function looks like:

   <bb 3> [local count: 268435456]:
...
   if (ivtmp_45 > 1)
     goto <bb 15>; [50.00%]
   else
     goto <bb 10>; [50.00%]

   <bb 15> [local count: 89478486]:
   goto <bb 3>; [100.00%]

So 89478486 != 268435456 / 2. That seems a regression caused by your patch.

Can you please check it?
Martin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH V2] PING^2 correct COUNT and PROB for unrolled loop
  2020-07-10  7:37                         ` Martin Liška
@ 2020-07-10 13:09                           ` Jiufu Guo
  0 siblings, 0 replies; 16+ messages in thread
From: Jiufu Guo @ 2020-07-10 13:09 UTC (permalink / raw)
  To: Martin Liška
  Cc: Jiufu Guo via Gcc-patches, Jan Hubicka, wschmidt, segher,
	Richard Biener, Bin.Cheng


Hi Martin,

Martin Liška <mliska@suse.cz> writes:

> On 7/10/20 4:14 AM, Jiufu Guo wrote:
>> Thanks so much for your time and kindly help!!!
>
> And I run your patch on SPEC2006 with:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549728.html
>
> Doing that I see just few changes:
>
> diff -qr /tmp/before /tmp/after
> Files /tmp/before/Meat.fppized.f90.000i.profile-report and /tmp/after/Meat.fppized.f90.000i.profile-report differ
> Files /tmp/before/bezier.cpp.000i.profile-report and /tmp/after/bezier.cpp.000i.profile-report differ
> Files /tmp/before/module_big_step_utilities_em.fppized.f90.000i.profile-report and /tmp/after/module_big_step_utilities_em.fppized.f90.000i.profile-report differ
> Files /tmp/before/module_cu_bmj.fppized.f90.000i.profile-report and /tmp/after/module_cu_bmj.fppized.f90.000i.profile-report differ
> Files /tmp/before/momx2.f.000i.profile-report and /tmp/after/momx2.f.000i.profile-report differ
> Files /tmp/before/momx3.f.000i.profile-report and /tmp/after/momx3.f.000i.profile-report differ
> Files /tmp/before/tml.f.000i.profile-report and /tmp/after/tml.f.000i.profile-report differ
> Files /tmp/before/tranx2.f.000i.profile-report and /tmp/after/tranx2.f.000i.profile-report differ
> Files /tmp/before/tranx3.f.000i.profile-report and /tmp/after/tranx3.f.000i.profile-report differ
>
> But I see few regression, e.g.:
>
> $ cat bezier.ii
> int bezier_value_t, bezier_value_du_1;
> int bezier_value_u[4], bezier_value_du[4];
> void bezier_value() {
>   int i = 1;
>   for (; i < 4; i++) {
>     bezier_value_u[i] = 1;
>     bezier_value_du[i] = i * bezier_value_u[i - 1];
>     bezier_value_t = bezier_value_du_1;
>   }
> }
>
> $ g++ bezier.ii -c -march=native -O3 -Wno-multichar -Wno-aggressive-loop-optimizations -fdump-tree-pcom=/tmp/bad.txt
> ...
>
> And your patch changed:
>
> -  <bb 15> [local count: 134217728]:
> +  <bb 15> [local count: 89478486]:
>
> where the function looks like:
>
>   <bb 3> [local count: 268435456]:
> ...
>   if (ivtmp_45 > 1)
>     goto <bb 15>; [50.00%]
>   else
>     goto <bb 10>; [50.00%]
>
>   <bb 15> [local count: 89478486]:
>   goto <bb 3>; [100.00%]
>
> So 89478486 != 268435456 / 2. That seems a regression caused by your patch.
>
> Can you please check it?
Thanks a lot for your tests and findings!
Sure, I will have a check.

BR.
Jiufu,

> Martin

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-07-10 13:09 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-03  8:17 [PATCH] correct COUNT and PROB for unrolled loop Jiufu Guo
2020-02-03 16:04 ` Pat Haugen
2020-02-03 16:20   ` Jeff Law
2020-02-03 16:23     ` Jan Hubicka
2020-02-11  2:29       ` Jiufu Guo
2020-02-17  6:23         ` [PATCH V2] " Jiufu Guo
2020-02-28  7:56           ` Jiufu Guo
2020-03-19  2:21           ` Jiufu Guo
2020-05-19  6:15             ` Jiufu Guo
2020-06-03  5:22               ` [PATCH V2] PING^ " Jiufu Guo
2020-06-18  1:22                 ` [PATCH V2] PING^2 " Jiufu Guo
2020-07-02  2:35                   ` Jiufu Guo
2020-07-09 11:55                     ` Martin Liška
2020-07-10  2:14                       ` Jiufu Guo
2020-07-10  7:37                         ` Martin Liška
2020-07-10 13:09                           ` Jiufu Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).