public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] modulo-sched: Carefully process loop counter initialization [PR97421]
@ 2020-11-29 19:24 Roman Zhuykov
  0 siblings, 0 replies; only message in thread
From: Roman Zhuykov @ 2020-11-29 19:24 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Alex Coplan, Jakub Jelinek, Andrey Belevantsev,
	Alexander Monakov

[-- Attachment #1: Type: text/plain, Size: 8686 bytes --]

Hi all!

Same patch attached with commit message and inlined below. It was successfully reg-strapped on aarch64-linux. Planning also to briefly check amd64 build before push.

Pushing in a few days if no objections. Any opinion about backports?


Roman

--
    modulo-sched: Carefully process loop counter initialization [PR97421]
    
    Do not allow direct adjustment of pre-header initialization instruction for
    count register if is read in some instruction below in that basic block.
    
    gcc/ChangeLog:
    
    	PR rtl-optimization/97421
    	* modulo-sched.c (generate_prolog_epilog): Remove forward
    	declaration, adjust last argument name and type.
    	(const_iteration_count): Add bool pointer parameter to return
    	whether count register is read in pre-header after its
    	initialization.
    	(sms_schedule): Fix count register initialization adjustment
    	procedure according to what const_iteration_count said.
    
    gcc/testsuite/ChangeLog:
    
    	PR rtl-optimization/97421
    	* gcc.c-torture/execute/pr97421-1.c: New test.
    	* gcc.c-torture/execute/pr97421-2.c: New test.
    	* gcc.c-torture/execute/pr97421-3.c: New test.

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 6f699a874e..4568674aa6 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
 static void set_node_sched_params (ddg_ptr);
 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
-static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
-                                    rtx, rtx);
 static int calculate_stage_count (partial_schedule_ptr, int);
 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
 					   int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
    this constant.  Otherwise return 0.  */
 static rtx_insn *
 const_iteration_count (rtx count_reg, basic_block pre_header,
-		       int64_t * count)
+		       int64_t *count, bool* adjust_inplace)
 {
   rtx_insn *insn;
   rtx_insn *head, *tail;
 
+  *adjust_inplace = false;
+  bool read_after = false;
+
   if (! pre_header)
     return NULL;
 
   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
 
   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
-    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
-	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+    if (single_set (insn) && rtx_equal_p (count_reg,
+					  SET_DEST (single_set (insn))))
       {
 	rtx pat = single_set (insn);
 
 	if (CONST_INT_P (SET_SRC (pat)))
 	  {
 	    *count = INTVAL (SET_SRC (pat));
+	    *adjust_inplace = !read_after;
 	    return insn;
 	  }
 
 	return NULL;
       }
+    else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
+      {
+	read_after = true;
+	if (reg_set_p (count_reg, insn))
+	   break;
+      }
 
   return NULL;
 }
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 static void
 generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
-                        rtx count_reg, rtx count_init)
+			rtx count_reg, bool adjust_init)
 {
   int i;
   int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
   start_sequence ();
 
-  if (!count_init)
+  if (adjust_init)
     {
       /* Generate instructions at the beginning of the prolog to
-         adjust the loop count by STAGE_COUNT.  If loop count is constant
-         (count_init), this constant is adjusted by STAGE_COUNT in
-         generate_prolog_epilog function.  */
+	 adjust the loop count by STAGE_COUNT.  If loop count is constant
+	 and it not used anywhere in prologue, this constant is adjusted by
+	 STAGE_COUNT outside of generate_prolog_epilog function.  */
       rtx sub_reg = NULL_RTX;
 
       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
       rtx_insn *count_init;
       int mii, rec_mii, stage_count, min_cycle;
       int64_t loop_count = 0;
-      bool opt_sc_p;
+      bool opt_sc_p, adjust_inplace = false;
+      basic_block pre_header;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
 	}
 
 
-      /* In case of th loop have doloop register it gets special
-	 handling.  */
-      count_init = NULL;
-      if ((count_reg = doloop_register_get (head, tail)))
-	{
-	  basic_block pre_header;
-
-	  pre_header = loop_preheader_edge (loop)->src;
-	  count_init = const_iteration_count (count_reg, pre_header,
-					      &loop_count);
-	}
+      count_reg = doloop_register_get (head, tail);
       gcc_assert (count_reg);
 
+      pre_header = loop_preheader_edge (loop)->src;
+      count_init = const_iteration_count (count_reg, pre_header, &loop_count,
+					  &adjust_inplace);
+
       if (dump_file && count_init)
         {
           fprintf (dump_file, "SMS const-doloop ");
@@ -1701,9 +1704,20 @@ sms_schedule (void)
 	      print_partial_schedule (ps, dump_file);
 	    }
  
-          /* case the BCT count is not known , Do loop-versioning */
-	  if (count_reg && ! count_init)
+	  if (count_init)
+	    {
+	       if (adjust_inplace)
+		{
+		  /* When possible, set new iteration count of loop kernel in
+		     place.  Otherwise, generate_prolog_epilog creates an insn
+		     to adjust.  */
+		  SET_SRC (single_set (count_init)) = GEN_INT (loop_count
+							    - stage_count + 1);
+		}
+	    }
+	  else
             {
+	      /* case the BCT count is not known , Do loop-versioning */
 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
 					 gen_int_mode (stage_count,
 						       GET_MODE (count_reg)));
@@ -1713,12 +1727,7 @@ sms_schedule (void)
 	      loop_version (loop, comp_rtx, &condition_bb,
 	  		    prob, prob.invert (),
 			    prob, prob.invert (), true);
-	     }
-
-	  /* Set new iteration count of loop kernel.  */
-          if (count_reg && count_init)
-	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
-						     - stage_count + 1);
+	    }
 
 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
 	  permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1735,7 +1744,7 @@ sms_schedule (void)
 	  if (dump_file)
 	    print_node_sched_params (dump_file, g->num_nodes, ps);
 	  /* Generate prolog and epilog.  */
-          generate_prolog_epilog (ps, loop, count_reg, count_init);
+	  generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
 	  break;
 	}
 
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
new file mode 100644
index 0000000000..e32fb129f1
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, d, e;
+int *volatile c = &a;
+
+__attribute__((noinline))
+void f(void)
+{
+  for (int g = 2; g >= 0; g--) {
+    d = 0;
+    for (b = 0; b <= 2; b++)
+      ;
+    e = *c;
+  }
+}
+
+int main(void)
+{
+  f();
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
new file mode 100644
index 0000000000..142bcbcee9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
+
+static int a, b, c;
+int *d = &c;
+int **e = &d;
+int ***f = &e;
+int main()
+{
+  int h;
+  for (a = 2; a; a--)
+    for (h = 0; h <= 2; h++)
+      for (b = 0; b <= 2; b++)
+        ***f = 6;
+
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
new file mode 100644
index 0000000000..3f1485a4a3
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
@@ -0,0 +1,22 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, c;
+short d;
+void e(void) {
+  unsigned f = 0;
+  for (; f <= 2; f++) {
+    int g[1];
+    int h = (long)g;
+    c = 0;
+    for (; c < 10; c++)
+      g[0] = a = 0;
+    for (; a <= 2; a++)
+      b = d;
+  }
+}
+int main(void) {
+  e();
+  if (a != 3)
+    __builtin_abort();
+}




[-- Attachment #2: pr97421.diff --]
[-- Type: text/x-patch, Size: 8426 bytes --]

    modulo-sched: Carefully process loop counter initialization [PR97421]
    
    Do not allow direct adjustment of pre-header initialization instruction for
    count register if is read in some instruction below in that basic block.
    
    gcc/ChangeLog:
    
    	PR rtl-optimization/97421
    	* modulo-sched.c (generate_prolog_epilog): Remove forward
    	declaration, adjust last argument name and type.
    	(const_iteration_count): Add bool pointer parameter to return
    	whether count register is read in pre-header after its
    	initialization.
    	(sms_schedule): Fix count register initialization adjustment
    	procedure according to what const_iteration_count said.
    
    gcc/testsuite/ChangeLog:
    
    	PR rtl-optimization/97421
    	* gcc.c-torture/execute/pr97421-1.c: New test.
    	* gcc.c-torture/execute/pr97421-2.c: New test.
    	* gcc.c-torture/execute/pr97421-3.c: New test.

diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 6f699a874e..4568674aa6 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
 static void set_node_sched_params (ddg_ptr);
 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
 static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
-static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
-                                    rtx, rtx);
 static int calculate_stage_count (partial_schedule_ptr, int);
 static void calculate_must_precede_follow (ddg_node_ptr, int, int,
 					   int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
    this constant.  Otherwise return 0.  */
 static rtx_insn *
 const_iteration_count (rtx count_reg, basic_block pre_header,
-		       int64_t * count)
+		       int64_t *count, bool* adjust_inplace)
 {
   rtx_insn *insn;
   rtx_insn *head, *tail;
 
+  *adjust_inplace = false;
+  bool read_after = false;
+
   if (! pre_header)
     return NULL;
 
   get_ebb_head_tail (pre_header, pre_header, &head, &tail);
 
   for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
-    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
-	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+    if (single_set (insn) && rtx_equal_p (count_reg,
+					  SET_DEST (single_set (insn))))
       {
 	rtx pat = single_set (insn);
 
 	if (CONST_INT_P (SET_SRC (pat)))
 	  {
 	    *count = INTVAL (SET_SRC (pat));
+	    *adjust_inplace = !read_after;
 	    return insn;
 	  }
 
 	return NULL;
       }
+    else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
+      {
+	read_after = true;
+	if (reg_set_p (count_reg, insn))
+	   break;
+      }
 
   return NULL;
 }
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 /* Generate the instructions (including reg_moves) for prolog & epilog.  */
 static void
 generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
-                        rtx count_reg, rtx count_init)
+			rtx count_reg, bool adjust_init)
 {
   int i;
   int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
   /* Generate the prolog, inserting its insns on the loop-entry edge.  */
   start_sequence ();
 
-  if (!count_init)
+  if (adjust_init)
     {
       /* Generate instructions at the beginning of the prolog to
-         adjust the loop count by STAGE_COUNT.  If loop count is constant
-         (count_init), this constant is adjusted by STAGE_COUNT in
-         generate_prolog_epilog function.  */
+	 adjust the loop count by STAGE_COUNT.  If loop count is constant
+	 and it not used anywhere in prologue, this constant is adjusted by
+	 STAGE_COUNT outside of generate_prolog_epilog function.  */
       rtx sub_reg = NULL_RTX;
 
       sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
       rtx_insn *count_init;
       int mii, rec_mii, stage_count, min_cycle;
       int64_t loop_count = 0;
-      bool opt_sc_p;
+      bool opt_sc_p, adjust_inplace = false;
+      basic_block pre_header;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
 	}
 
 
-      /* In case of th loop have doloop register it gets special
-	 handling.  */
-      count_init = NULL;
-      if ((count_reg = doloop_register_get (head, tail)))
-	{
-	  basic_block pre_header;
-
-	  pre_header = loop_preheader_edge (loop)->src;
-	  count_init = const_iteration_count (count_reg, pre_header,
-					      &loop_count);
-	}
+      count_reg = doloop_register_get (head, tail);
       gcc_assert (count_reg);
 
+      pre_header = loop_preheader_edge (loop)->src;
+      count_init = const_iteration_count (count_reg, pre_header, &loop_count,
+					  &adjust_inplace);
+
       if (dump_file && count_init)
         {
           fprintf (dump_file, "SMS const-doloop ");
@@ -1701,9 +1704,20 @@ sms_schedule (void)
 	      print_partial_schedule (ps, dump_file);
 	    }
  
-          /* case the BCT count is not known , Do loop-versioning */
-	  if (count_reg && ! count_init)
+	  if (count_init)
+	    {
+	       if (adjust_inplace)
+		{
+		  /* When possible, set new iteration count of loop kernel in
+		     place.  Otherwise, generate_prolog_epilog creates an insn
+		     to adjust.  */
+		  SET_SRC (single_set (count_init)) = GEN_INT (loop_count
+							    - stage_count + 1);
+		}
+	    }
+	  else
             {
+	      /* case the BCT count is not known , Do loop-versioning */
 	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
 					 gen_int_mode (stage_count,
 						       GET_MODE (count_reg)));
@@ -1713,12 +1727,7 @@ sms_schedule (void)
 	      loop_version (loop, comp_rtx, &condition_bb,
 	  		    prob, prob.invert (),
 			    prob, prob.invert (), true);
-	     }
-
-	  /* Set new iteration count of loop kernel.  */
-          if (count_reg && count_init)
-	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
-						     - stage_count + 1);
+	    }
 
 	  /* Now apply the scheduled kernel to the RTL of the loop.  */
 	  permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1735,7 +1744,7 @@ sms_schedule (void)
 	  if (dump_file)
 	    print_node_sched_params (dump_file, g->num_nodes, ps);
 	  /* Generate prolog and epilog.  */
-          generate_prolog_epilog (ps, loop, count_reg, count_init);
+	  generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
 	  break;
 	}
 
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
new file mode 100644
index 0000000000..e32fb129f1
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, d, e;
+int *volatile c = &a;
+
+__attribute__((noinline))
+void f(void)
+{
+  for (int g = 2; g >= 0; g--) {
+    d = 0;
+    for (b = 0; b <= 2; b++)
+      ;
+    e = *c;
+  }
+}
+
+int main(void)
+{
+  f();
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
new file mode 100644
index 0000000000..142bcbcee9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
+
+static int a, b, c;
+int *d = &c;
+int **e = &d;
+int ***f = &e;
+int main()
+{
+  int h;
+  for (a = 2; a; a--)
+    for (h = 0; h <= 2; h++)
+      for (b = 0; b <= 2; b++)
+        ***f = 6;
+
+  if (b != 3)
+    __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
new file mode 100644
index 0000000000..3f1485a4a3
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
@@ -0,0 +1,22 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, c;
+short d;
+void e(void) {
+  unsigned f = 0;
+  for (; f <= 2; f++) {
+    int g[1];
+    int h = (long)g;
+    c = 0;
+    for (; c < 10; c++)
+      g[0] = a = 0;
+    for (; a <= 2; a++)
+      b = d;
+  }
+}
+int main(void) {
+  e();
+  if (a != 3)
+    __builtin_abort();
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-11-29 19:24 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-29 19:24 [PATCH] modulo-sched: Carefully process loop counter initialization [PR97421] Roman Zhuykov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).