public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2 3/3] Consider doloop cmp use in ivopts
@ 2019-05-14  3:10 linkw
  2019-05-14  7:26 ` Richard Biener
  2019-06-19 11:47 ` [PATCH v3 3/3] PR80791 " Kewen.Lin
  0 siblings, 2 replies; 43+ messages in thread
From: linkw @ 2019-05-14  3:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: segher, wschmidt, bin.cheng, rguenther, jakub, Kewen Lin

From: Kewen Lin <linkw@linux.ibm.com>

Previous version link for background:
https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00912.html

Firstly, it's to call predict_doloop_p hook to check this
loop will be transformed to doloop or not, if yes, find
the expected comp iv use and its dependent original iv,
set the iv candidate as bind_cand of the group.
In following candidate selection process, we will bypass
the group with bind_cand, since we don't want to affect
global decision making for an iv use which will be
eliminated eventually.  At the time of iv candidate set
finalization, we will fill the cost pair for the group
with bind_cand.  If the bind_cand is already in the final
set, then just use it. Otherwise, we can check whether one
of existing final set is better and fill with that if so.

Bootstrapped and regression testing passed on powerpc64le.

Is it ok for trunk?

gcc/ChangeLog

2019-05-14  Kewen Lin  <linkw@gcc.gnu.org>

	PR middle-end/80791
	* tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize_loop): Call
	predict_doloop_p hook and bind_cand_for_doloop_uses.
	(bind_cand_for_doloop_uses): New function.
	(find_optimal_iv_set): Call handle_groups_with_bind_cand.
	(handle_groups_with_bind_cand): New function.
	(record_group): Init bind_cand.
	(set_group_iv_cost): Consider bind_cand group.
	(iv_ca_dump): Add dump for bind_cand.
	(try_add_cand_for): Bypass bind_cand group.
	(iv_ca_extend): Likewise.
	(iv_ca_narrow): Likewise.
	(iv_ca_replace): Likewise.

gcc/testsuite/ChangeLog

2019-05-14  Kewen Lin  <linkw@gcc.gnu.org>

	PR middle-end/80791
	* gcc.dg/tree-ssa/ivopts-lt.c : Adjust.

---
 gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c |   7 +-
 gcc/tree-ssa-loop-ivopts.c                | 155 +++++++++++++++++++++++++++++-
 2 files changed, 156 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 171c85a..f61288c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n)
   while (i < n);
 }
 
-/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts"} } */
-/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI" 2 "ivopts" { target { powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 885c8e8..50b5900 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -393,6 +393,8 @@ struct iv_group
   struct cost_pair *cost_map;
   /* The selected candidate for the group.  */
   struct iv_cand *selected;
+  /* The bind candidate for this group, for doloop only so far.  */
+  struct iv_cand *bind_cand;
   /* Uses in the group.  */
   vec<struct iv_use *> vuses;
 };
@@ -1592,6 +1594,7 @@ record_group (struct ivopts_data *data, enum use_type type)
   group->type = type;
   group->related_cands = BITMAP_ALLOC (NULL);
   group->vuses.create (1);
+  group->bind_cand = NULL;
 
   data->vgroups.safe_push (group);
   return group;
@@ -3612,7 +3615,9 @@ set_group_iv_cost (struct ivopts_data *data,
 {
   unsigned i, s;
 
-  if (cost.infinite_cost_p ())
+  gcc_assert (cand);
+  /* For the group with bind_cand, make it always have cost pair.  */
+  if (cost.infinite_cost_p () && group->bind_cand != cand)
     {
       BITMAP_FREE (inv_vars);
       BITMAP_FREE (inv_exprs);
@@ -6067,7 +6072,8 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
 		 group->id, cp->cand->id, cp->cost.cost,
 		 cp->cost.complexity);
       else
-	fprintf (file, "   group:%d --> ??\n", group->id);
+	fprintf (file, "   group:%d --> ?? %s\n", group->id,
+		 group->bind_cand ? "(bind)" : "");
     }
 
   const char *pref = "";
@@ -6110,6 +6116,9 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
   for (i = 0; i < ivs->upto; i++)
     {
       group = data->vgroups[i];
+      /* Ignore groups binded with some cand.  */
+      if (group->bind_cand)
+	continue;
       old_cp = iv_ca_cand_for_group (ivs, group);
 
       if (old_cp
@@ -6165,7 +6174,9 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
   for (i = 0; i < data->vgroups.length (); i++)
     {
       group = data->vgroups[i];
-
+      /* Ignore groups binded with some cand.  */
+      if (group->bind_cand)
+	continue;
       old_cp = iv_ca_cand_for_group (ivs, group);
       if (old_cp->cand != cand)
 	continue;
@@ -6348,6 +6359,9 @@ iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
       for (j = 0; j < ivs->upto; j++)
 	{
 	  struct iv_group *group = data->vgroups[j];
+	  /* Ignore groups binded with some cand.  */
+	  if (group->bind_cand)
+	    continue;
 	  old_cp = iv_ca_cand_for_group (ivs, group);
 
 	  if (old_cp->cand != cand)
@@ -6406,6 +6420,15 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
   struct iv_ca_delta *best_delta = NULL, *act_delta;
   struct cost_pair *cp;
 
+  /* Bypass the candidate selection for the groups with bind_cand, but need
+     to keep upto up to date, to avoid the count of visited groups becomes
+     inconsistent in futher handlings.  */
+  if (group->bind_cand)
+    {
+      ivs->upto++;
+      return true;
+    }
+
   iv_ca_add_group (data, ivs, group);
   best_cost = iv_ca_cost (ivs);
   cp = iv_ca_cand_for_group (ivs, group);
@@ -6635,6 +6658,59 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
   return set;
 }
 
+/* Since we bypass the candidate determination process for the groups with
+   bind_cand previously, now we want to fill the cost pair for them.  The
+   simplest way is to fill the bind_cand directly, but for some cases it
+   exposes more opportunities for downstream optimization if we rewrite the
+   cmp use with one candidate in the final set.  So the idea is:
+     1) if the bind_cand is already in final set, use bind_cand.
+     2) otherwise, check whether final set has better candidate,
+	fill with it if yes; or still go with bind_cand.  */
+
+static void
+handle_groups_with_bind_cand (struct ivopts_data *data, struct iv_ca *set)
+{
+  for (unsigned i = 0; i < data->vgroups.length (); i++)
+    {
+      struct iv_group *group = data->vgroups[i];
+      if (group->bind_cand)
+	{
+	  /* Since we always bypass it before.  */
+	  gcc_assert (!iv_ca_cand_for_group (set, group));
+
+	  struct cost_pair *best_cp
+	    = get_group_iv_cost (data, group, group->bind_cand);
+	  gcc_assert (best_cp);
+
+	  if (!bitmap_bit_p (set->cands, group->bind_cand->id))
+	    {
+	      /* Count in cost of bind_cand.  */
+	      best_cp->cost.cost += best_cp->cand->cost;
+	      unsigned j;
+	      bitmap_iterator bi;
+	      EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, j, bi)
+	      {
+		struct iv_cand *cand = data->vcands[j];
+		/* Since the purpose of rewrite here is to expose more
+		   opportunities to downstream, the cost saving isn't
+		   critical because this cmp use gets elimintated
+		   finally.  So far we can't see any gains to replace
+		   original non memory base iv cand with memory based
+		   iv cand, but the rewrite could cause doloop unable
+		   to find it's finite.  */
+		if (group->bind_cand->iv->base_object == NULL_TREE
+		    && cand->iv->base_object != NULL_TREE)
+		  continue;
+		struct cost_pair *cp = get_group_iv_cost (data, group, cand);
+		if (cp && cp->cost < best_cp->cost)
+		  best_cp = cp;
+	      }
+	    }
+	  iv_ca_set_cp (data, set, group, best_cp);
+	}
+    }
+}
+
 static struct iv_ca *
 find_optimal_iv_set (struct ivopts_data *data)
 {
@@ -6672,6 +6748,8 @@ find_optimal_iv_set (struct ivopts_data *data)
   else if (origset)
     iv_ca_free (&origset);
 
+  handle_groups_with_bind_cand (data, set);
+
   for (i = 0; i < data->vgroups.length (); i++)
     {
       struct iv_group *group = data->vgroups[i];
@@ -7442,12 +7520,69 @@ loop_body_includes_call (basic_block *body, unsigned num_nodes)
   return false;
 }
 
+/* Doloop optimization RTL pass can make the related comparison computation
+   become dead and get eliminated, then these comparison IV uses should NOT
+   be considered in optimal IVs determination, set bind_cand for this kind
+   of group, bypass them in later candidate determination algorithm.  */
+
+static void
+bind_cand_for_doloop_uses (struct ivopts_data *data)
+{
+  struct loop *loop = data->current_loop;
+
+  for (unsigned i = 0; i < data->vgroups.length (); i++)
+    {
+      struct iv_group *group = data->vgroups[i];
+      if (group->type == USE_COMPARE)
+	{
+	  gcc_assert (group->vuses.length () == 1);
+	  struct iv_use *use = group->vuses[0];
+	  gimple *stmt = use->stmt;
+	  if (gimple_code (stmt) == GIMPLE_COND)
+	    {
+	      basic_block bb = gimple_bb (stmt);
+	      edge true_edge, false_edge;
+	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+
+	      /* This comparison is used for loop latch.  Require latch is empty
+		 for now.  */
+	      if ((loop->latch == true_edge->dest
+		   || loop->latch == false_edge->dest)
+		  && empty_block_p (loop->latch))
+		{
+		  for (unsigned j = 0; j < data->vcands.length (); j++)
+		    {
+		      if (bitmap_bit_p (group->related_cands, j))
+			{
+			  struct iv_cand *cand = data->vcands[j];
+			  tree op = use->iv->ssa_name;
+			  if (op == cand->var_before || op == cand->var_after)
+			    {
+			      group->bind_cand = cand;
+			      if (dump_file && (dump_flags & TDF_DETAILS))
+				{
+				  fprintf (dump_file, "Doloop cmp iv use: ");
+				  print_gimple_stmt (dump_file, stmt,
+						     TDF_DETAILS);
+				  dump_cand (dump_file, cand);
+				}
+			      break;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+    }
+}
+
 /* Optimizes the LOOP.  Returns true if anything changed.  */
 
 static bool
 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
 {
   bool changed = false;
+  bool bind_for_doloop_p = false;
   struct iv_ca *iv_ca;
   edge exit = single_dom_exit (loop);
   basic_block *body;
@@ -7496,6 +7631,20 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
   /* Finds candidates for the induction variables (item 2).  */
   find_iv_candidates (data);
 
+  bind_for_doloop_p = targetm.predict_doloop_p (loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file,
+	       "Predict loop %d can %sperform doloop optimization later.\n",
+	       loop->num, bind_for_doloop_p ? "" : "not ");
+      flow_loop_dump (loop, dump_file, NULL, 1);
+    }
+
+  /* Some compare iv_use is probably useless once the doloop optimization
+     performs.  Set bind_cand for the use (group).  */
+  if (bind_for_doloop_p)
+    bind_cand_for_doloop_uses (data);
+
   /* Calculates the costs (item 3, part 1).  */
   determine_iv_costs (data);
   determine_group_iv_costs (data);
-- 
2.7.4

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2019-09-14  9:35 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-14  3:10 [PATCH v2 3/3] Consider doloop cmp use in ivopts linkw
2019-05-14  7:26 ` Richard Biener
2019-05-15  5:03   ` Kewen.Lin
2019-05-15  8:47     ` Richard Biener
2019-05-15 16:17       ` Segher Boessenkool
2019-05-16  7:25         ` Richard Biener
2019-05-16 17:35           ` Segher Boessenkool
2019-05-16  3:53       ` Kewen.Lin
2019-05-16 18:41       ` Jeff Law
2019-05-16 21:42         ` Segher Boessenkool
2019-06-19 11:47 ` [PATCH v3 3/3] PR80791 " Kewen.Lin
2019-06-20  9:09   ` Segher Boessenkool
2019-06-20 12:08     ` Kewen.Lin
2019-06-20 12:17       ` Kewen.Lin
2019-07-10  2:31         ` [PING^1][PATCH v4 " Kewen.Lin
2019-07-12 12:40           ` Richard Biener
2019-07-12 14:10             ` Segher Boessenkool
2019-07-15  6:40             ` Kewen.Lin
2019-07-15  6:50             ` Bin.Cheng
2019-07-21  9:06   ` [PATCH v3 " Bin.Cheng
2019-07-22  5:42     ` Kewen.Lin
2019-07-22  6:53       ` Segher Boessenkool
2019-07-22  7:18         ` Kewen.Lin
2019-07-22  8:02         ` Richard Biener
2019-07-22 21:47           ` Segher Boessenkool
2019-07-23  6:14             ` Kewen.Lin
2019-07-23  7:38             ` Richard Biener
2019-07-23  6:09           ` Kewen.Lin
2019-07-23  8:05             ` Richard Biener
2019-07-23  6:28       ` [PATCH v5 " Kewen.Lin
2019-08-14  7:48         ` [PATCH v6 " Kewen.Lin
2019-08-21 13:42           ` Bin.Cheng
2019-08-22  7:09             ` Kewen.Lin
2019-08-22  8:07               ` Bin.Cheng
2019-08-22  9:16                 ` Kewen.Lin
2019-08-23  5:31                   ` Bin.Cheng
2019-08-23  9:57                     ` Kewen.Lin
2019-08-23 10:43                       ` Bin.Cheng
2019-08-23 11:02                         ` Segher Boessenkool
2019-09-11  6:18                           ` Kewen.Lin
2019-09-12  8:14                             ` Richard Biener
2019-09-14  9:35                               ` Kewen.Lin
2019-08-24 22:43                         ` Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).