public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Here is an updated patch. (issue4438079)
@ 2011-04-28  4:56 Sharad Singhai
  2011-04-28 15:22 ` Diego Novillo
  0 siblings, 1 reply; 4+ messages in thread
From: Sharad Singhai @ 2011-04-28  4:56 UTC (permalink / raw)
  To: reply, dnovillo, gcc-patches

Hi Diego,

Thanks for the quick feedback. Here is a an updated version of the patch.

2011-04-27  Sharad Singhai  <singhai@google.com>

	ChangeLog.google-main
	* params.def: Add new parameters to control peeling.
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
	different peeling parameters when profile feedback is available.
	* loop-unroll.c (decide_peel_once_rolling): Ditto.
	(decide_peel_completely): Ditto.
	* doc/invoke.texi: Document new peeling parameters.

	testsuite/ChangeLog.google-main
	* gcc.dg/vect/O3-vect-pr34223.c: Add new peeling
	parameters.
	* gcc.dg/vect/vect.exp: Allow reading flags in individual
	tests.

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 172904)
+++ doc/invoke.texi	(working copy)
@@ -8523,11 +8523,28 @@
 The maximum number of peelings of a single loop.
 
 @item max-completely-peeled-insns
+@item max-completely-peeled-insns-feedback
 The maximum number of insns of a completely peeled loop.
 
+The @option{max-completely-peeled-insns-feedback} is used only when profile
+feedback is available and the loop is hot. Because of the real profiles, this
+value may set to be larger for hot loops.
+
+@item max-once-peeled-insns
+@item max-once-peeled-insns-feedback
+The maximum number of insns of a peeled loop that rolls only once.
+The @option{max-once-peeled-insns-feedback}  is used only when profile feedback
+is available and the loop is hot. Because of the real profiles, this value
+may set to be larger for hot loops.
+
 @item max-completely-peel-times
+@item max-completely-peel-times-feedback
 The maximum number of iterations of a loop to be suitable for complete peeling.
 
+The @option{max-completely-peel-times-feedback} is used only when profile feedback
+is available and the loop is hot. Because of the real profiles, this value may
+set to be larger for hot loops.
+
 @item max-completely-peel-loop-nest-depth
 The maximum depth of a loop nest suitable for complete peeling.
 
Index: testsuite/gcc.dg/vect/O3-vect-pr34223.c
===================================================================
--- testsuite/gcc.dg/vect/O3-vect-pr34223.c	(revision 172904)
+++ testsuite/gcc.dg/vect/O3-vect-pr34223.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-options "[vect_cflags] --param max-completely-peel-times=16" } */
 
 #include "tree-vect.h"
 
Index: testsuite/gcc.dg/vect/vect.exp
===================================================================
--- testsuite/gcc.dg/vect/vect.exp	(revision 172904)
+++ testsuite/gcc.dg/vect/vect.exp	(working copy)
@@ -24,6 +24,12 @@
 global DEFAULT_VECTCFLAGS
 set DEFAULT_VECTCFLAGS ""
 
+# So that we can read flags in individual tests.
+proc vect_cflags { } {
+  global DEFAULT_VECTCFLAGS
+  return $DEFAULT_VECTCFLAGS
+}
+
 # If the target system supports vector instructions, the default action
 # for a test is 'run', otherwise it's 'compile'.  Save current default.
 # Executing vector instructions on a system without hardware vector support
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 172904)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -326,6 +326,7 @@
 			    enum unroll_level ul)
 {
   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+  unsigned HOST_WIDE_INT max_peeled_insns;
   gimple cond;
   struct loop_size size;
 
@@ -336,9 +337,23 @@
     return false;
   n_unroll = tree_low_cst (niter, 1);
 
-  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+  if (profile_info
+      && flag_branch_probabilities
+      && optimize_loop_for_speed_p (loop))
+    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+  else
+    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+
   if (n_unroll > max_unroll)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "  Not unrolling loop %d limited by max unroll"
+                   " (%d > %d)\n",
+                   loop->num, (int) n_unroll, (int) max_unroll);
+        }
     return false;
+  }
 
   if (n_unroll)
     {
@@ -356,14 +371,21 @@
 		   (int) unr_insns);
 	}
 
-      if (unr_insns > ninsns
-	  && (unr_insns
-	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
+      if (profile_info
+          && flag_branch_probabilities
+          && optimize_loop_for_speed_p (loop))
+        max_peeled_insns =
+          PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+      else
+        max_peeled_insns = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+
+      if (unr_insns > max_peeled_insns)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "Not unrolling loop %d "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
+		     "(--param max-completely-peeled-insns(-feedback) limit. "
+                     "(%u > %u)).\n",
+                     loop->num, (unsigned) unr_insns, (unsigned) max_peeled_insns);
 	  return false;
 	}
 
@@ -371,7 +393,8 @@
 	  && unr_insns > ninsns)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
+	    fprintf (dump_file, "Not unrolling loop %d (NO_GROWTH %d > %d).\n",
+                     loop->num, (int) unr_insns, (int) ninsns);
 	  return false;
 	}
     }
@@ -418,8 +441,9 @@
   update_stmt (cond);
   update_ssa (TODO_update_ssa);
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
+  if (dump_file)
+    fprintf (dump_file, "Unrolled loop %d completely by factor %d.\n",
+             loop->num, (int) n_unroll);
 
   return true;
 }
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 172904)
+++ loop-unroll.c	(working copy)
@@ -324,15 +324,23 @@
 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
   struct niter_desc *desc;
+  unsigned max_peeled_insns;
 
+  if (profile_info && flag_branch_probabilities)
+    max_peeled_insns =
+      (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK);
+  else
+    max_peeled_insns = (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS);
+
   if (dump_file)
     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 
   /* Is the loop small enough?  */
-  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
+  if (max_peeled_insns < loop->ninsns)
     {
       if (dump_file)
-	fprintf (dump_file, ";; Not considering loop, is too big\n");
+	fprintf (dump_file, ";; Not considering loop, is too big (%d > %u)\n",
+                 loop->ninsns, max_peeled_insns);
       return;
     }
 
@@ -362,7 +370,7 @@
 static void
 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
-  unsigned npeel;
+  unsigned npeel, max_insns, max_peel;
   struct niter_desc *desc;
 
   if (dump_file)
@@ -393,16 +401,30 @@
       return;
     }
 
+  if (profile_info && flag_branch_probabilities)
+    {
+      max_insns =
+        (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+      max_peel =
+        (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+    }
+  else
+    {
+      max_insns = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+      max_peel = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+    }
+
   /* npeel = number of iterations to peel.  */
-  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
-  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
-    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+  npeel = max_insns / loop->ninsns;
+  if (npeel > max_peel)
+    npeel = max_peel;
 
   /* Is the loop small enough?  */
   if (!npeel)
     {
       if (dump_file)
-	fprintf (dump_file, ";; Not considering loop, is too big\n");
+	fprintf (dump_file, ";; Not considering loop, is too big, npeel=%u.\n",
+                 npeel);
       return;
     }
 
@@ -435,7 +457,7 @@
 
   /* Success.  */
   if (dump_file)
-    fprintf (dump_file, ";; Decided to peel loop completely\n");
+    fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel);
   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 }
 
Index: params.def
===================================================================
--- params.def	(revision 172904)
+++ params.def	(working copy)
@@ -299,16 +299,37 @@
 	"max-completely-peeled-insns",
 	"The maximum number of insns of a completely peeled loop",
 	400, 0, 0)
+/* The maximum number of insns of a peeled loop, when feedback
+   information is available.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK,
+	"max-completely-peeled-insns-feedback",
+	"The maximum number of insns of a completely peeled loop when profile "
+         "feedback is available",
+	600, 0, 0)
 /* The maximum number of peelings of a single loop that is peeled completely.  */
 DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
 	"max-completely-peel-times",
 	"The maximum number of peelings of a single loop that is peeled completely",
-	16, 0, 0)
+	8, 0, 0)
+/* The maximum number of peelings of a single loop that is peeled
+   completely, when feedback information is available.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK,
+	"max-completely-peel-times-feedback",
+	"The maximum number of peelings of a single loop that is peeled "
+         "completely, when profile feedback is available",
+ 	16, 0, 0)
 /* The maximum number of insns of a peeled loop that rolls only once.  */
 DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS,
 	"max-once-peeled-insns",
 	"The maximum number of insns of a peeled loop that rolls only once",
 	400, 0, 0)
+/* The maximum number of insns of a peeled loop that rolls only once,
+   when feedback information is available.  */
+DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK,
+	"max-once-peeled-insns-feedback",
+	"The maximum number of insns of a peeled loop that rolls only once, "
+         "when profile feedback is available",
+         600, 0, 0)
 /* The maximum depth of a loop nest we completely peel.  */
 DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS,
 	 "max-completely-peel-loop-nest-depth",

--
This patch is available for review at http://codereview.appspot.com/4438079

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Here is an updated patch. (issue4438079)
  2011-04-28  4:56 Here is an updated patch. (issue4438079) Sharad Singhai
@ 2011-04-28 15:22 ` Diego Novillo
  2011-04-28 15:35   ` Richard Guenther
  0 siblings, 1 reply; 4+ messages in thread
From: Diego Novillo @ 2011-04-28 15:22 UTC (permalink / raw)
  To: Sharad Singhai; +Cc: reply, gcc-patches

On Wed, Apr 27, 2011 at 22:09, Sharad Singhai <singhai@google.com> wrote:
> Hi Diego,
>
> Thanks for the quick feedback. Here is a an updated version of the patch.
>
> 2011-04-27  Sharad Singhai  <singhai@google.com>
>
>        ChangeLog.google-main
>        * params.def: Add new parameters to control peeling.
>        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
>        different peeling parameters when profile feedback is available.
>        * loop-unroll.c (decide_peel_once_rolling): Ditto.
>        (decide_peel_completely): Ditto.
>        * doc/invoke.texi: Document new peeling parameters.
>
>        testsuite/ChangeLog.google-main
>        * gcc.dg/vect/O3-vect-pr34223.c: Add new peeling
>        parameters.
>        * gcc.dg/vect/vect.exp: Allow reading flags in individual
>        tests.

OK for google/main.

Will you be submitting this patch for trunk as well?  If you get it
approved for trunk then you will not need to maintain it in
google/main :)


Diego.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Here is an updated patch. (issue4438079)
  2011-04-28 15:22 ` Diego Novillo
@ 2011-04-28 15:35   ` Richard Guenther
  2011-04-28 23:19     ` Xinliang David Li
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Guenther @ 2011-04-28 15:35 UTC (permalink / raw)
  To: Diego Novillo; +Cc: Sharad Singhai, reply, gcc-patches

On Thu, Apr 28, 2011 at 4:47 PM, Diego Novillo <dnovillo@google.com> wrote:
> On Wed, Apr 27, 2011 at 22:09, Sharad Singhai <singhai@google.com> wrote:
>> Hi Diego,
>>
>> Thanks for the quick feedback. Here is a an updated version of the patch.
>>
>> 2011-04-27  Sharad Singhai  <singhai@google.com>
>>
>>        ChangeLog.google-main
>>        * params.def: Add new parameters to control peeling.
>>        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
>>        different peeling parameters when profile feedback is available.
>>        * loop-unroll.c (decide_peel_once_rolling): Ditto.
>>        (decide_peel_completely): Ditto.
>>        * doc/invoke.texi: Document new peeling parameters.
>>
>>        testsuite/ChangeLog.google-main
>>        * gcc.dg/vect/O3-vect-pr34223.c: Add new peeling
>>        parameters.
>>        * gcc.dg/vect/vect.exp: Allow reading flags in individual
>>        tests.
>
> OK for google/main.
>
> Will you be submitting this patch for trunk as well?  If you get it
> approved for trunk then you will not need to maintain it in
> google/main :)

The doc change looks weird, you should have separate entries and
not combine them.  Checking just for profile_info != NULL is bogus,
please instead check profile_status == PROFILE_READ.

Did you do any measurements to decide on the param defaults?
Parameter defaults should be documented.  Did you consider
adding a single parameter that scales the existing parameters for
hot loops with profile-feedback?

Richard.

> Diego.
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Here is an updated patch. (issue4438079)
  2011-04-28 15:35   ` Richard Guenther
@ 2011-04-28 23:19     ` Xinliang David Li
  0 siblings, 0 replies; 4+ messages in thread
From: Xinliang David Li @ 2011-04-28 23:19 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Diego Novillo, Sharad Singhai, reply, gcc-patches

Introducing the parameters for FDO allows FDO specific tunings.   In
general, these parameters are kludges lacking a better way of doing
it. In the long run, we are working on smarter mechanism to make
decisions based on hot program traces and locality regions as well as
information such as uArch differences -- however this mechanism is not
yet in place.

Thanks,

David

On Thu, Apr 28, 2011 at 8:08 AM, Richard Guenther
<richard.guenther@gmail.com> wrote:
> On Thu, Apr 28, 2011 at 4:47 PM, Diego Novillo <dnovillo@google.com> wrote:
>> On Wed, Apr 27, 2011 at 22:09, Sharad Singhai <singhai@google.com> wrote:
>>> Hi Diego,
>>>
>>> Thanks for the quick feedback. Here is a an updated version of the patch.
>>>
>>> 2011-04-27  Sharad Singhai  <singhai@google.com>
>>>
>>>        ChangeLog.google-main
>>>        * params.def: Add new parameters to control peeling.
>>>        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
>>>        different peeling parameters when profile feedback is available.
>>>        * loop-unroll.c (decide_peel_once_rolling): Ditto.
>>>        (decide_peel_completely): Ditto.
>>>        * doc/invoke.texi: Document new peeling parameters.
>>>
>>>        testsuite/ChangeLog.google-main
>>>        * gcc.dg/vect/O3-vect-pr34223.c: Add new peeling
>>>        parameters.
>>>        * gcc.dg/vect/vect.exp: Allow reading flags in individual
>>>        tests.
>>
>> OK for google/main.
>>
>> Will you be submitting this patch for trunk as well?  If you get it
>> approved for trunk then you will not need to maintain it in
>> google/main :)
>
> The doc change looks weird, you should have separate entries and
> not combine them.  Checking just for profile_info != NULL is bogus,
> please instead check profile_status == PROFILE_READ.
>
> Did you do any measurements to decide on the param defaults?
> Parameter defaults should be documented.  Did you consider
> adding a single parameter that scales the existing parameters for
> hot loops with profile-feedback?
>
> Richard.
>
>> Diego.
>>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-04-28 22:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-28  4:56 Here is an updated patch. (issue4438079) Sharad Singhai
2011-04-28 15:22 ` Diego Novillo
2011-04-28 15:35   ` Richard Guenther
2011-04-28 23:19     ` Xinliang David Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).