public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-2963] rtl-optimization/96812 - remap dependence info on RTL loop unrolling
@ 2020-09-01  9:52 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2020-09-01  9:52 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d6a05b494b4b714e996a5ca09c5a4a1c41dbd648

commit r11-2963-gd6a05b494b4b714e996a5ca09c5a4a1c41dbd648
Author: Richard Biener <rguenther@suse.de>
Date:   Thu Aug 27 14:21:33 2020 +0200

    rtl-optimization/96812 - remap dependence info on RTL loop unrolling
    
    This carries over the PR87609 fix also to RTL loop unrolling.  The
    gcc.dg/torture/pr90328.c testcase otherwise is miscompiled with
    the tree-ssa-address.c hunk (or alternatively with -fno-ivopts
    on master).  I've tried to find the correct abstraction and
    adjusted two other duplicate_insn_chain users for which I do not
    have testcases.  There may be other insn-chain copying routines
    that could be affected but hopefully most appropriately go through
    CFG hooks.
    
    2020-08-27  Richard Biener  <rguenther@suse.de>
    
            PR rtl-optimization/96812
            * tree-ssa-address.c (copy_ref_info): Also copy dependence info.
            * cfgrtl.h (duplicate_insn_chain): Adjust prototype.
            * cfgrtl.c (duplicate_insn_chain): Remap dependence info
            if requested.
            (cfg_layout_duplicate_bb): Make sure we remap dependence info.
            * modulo-sched.c (duplicate_insns_of_cycles): Remap dependence
            info.
            (generate_prolog_epilog): Adjust.
            * config/c6x/c6x.c (hwloop_optimize): Remap dependence info.

Diff:
---
 gcc/cfgrtl.c           | 60 +++++++++++++++++++++++++++++++++++++++++++++-----
 gcc/cfgrtl.h           |  3 ++-
 gcc/config/c6x/c6x.c   |  3 ++-
 gcc/modulo-sched.c     | 10 +++++----
 gcc/tree-ssa-address.c | 10 +++++++++
 5 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 03fa688fed6..eb5ccd42ed7 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -61,6 +61,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "tree-pass.h"
 #include "print-rtl.h"
+#include "rtl-iter.h"
+#include "gimplify.h"
 
 /* Disable warnings about missing quoting in GCC diagnostics.  */
 #if __GNUC__ >= 10
@@ -4199,7 +4201,8 @@ cfg_layout_can_duplicate_bb_p (const_basic_block bb)
 }
 
 rtx_insn *
-duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
+duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
+		      class loop *loop, copy_bb_data *id)
 {
   rtx_insn *insn, *next, *copy;
   rtx_note *last;
@@ -4228,6 +4231,51 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
 	      && ANY_RETURN_P (JUMP_LABEL (insn)))
 	    JUMP_LABEL (copy) = JUMP_LABEL (insn);
           maybe_copy_prologue_epilogue_insn (insn, copy);
+	  /* If requested remap dependence info of cliques brought in
+	     via inlining.  */
+	  if (id)
+	    {
+	      subrtx_iterator::array_type array;
+	      FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+		if (MEM_P (*iter) && MEM_EXPR (*iter))
+		  {
+		    tree op = MEM_EXPR (*iter);
+		    if (TREE_CODE (op) == WITH_SIZE_EXPR)
+		      op = TREE_OPERAND (op, 0);
+		    while (handled_component_p (op))
+		      op = TREE_OPERAND (op, 0);
+		    if ((TREE_CODE (op) == MEM_REF
+			 || TREE_CODE (op) == TARGET_MEM_REF)
+			&& MR_DEPENDENCE_CLIQUE (op) > 1
+			&& (!loop
+			    || (MR_DEPENDENCE_CLIQUE (op)
+				!= loop->owned_clique)))
+		      {
+			if (!id->dependence_map)
+			  id->dependence_map = new hash_map<dependence_hash,
+			      unsigned short>;
+			bool existed;
+			unsigned short &newc = id->dependence_map->get_or_insert
+					 (MR_DEPENDENCE_CLIQUE (op), &existed);
+			if (!existed)
+			  {
+			    gcc_assert
+			      (MR_DEPENDENCE_CLIQUE (op) <= cfun->last_clique);
+			    newc = ++cfun->last_clique;
+			  }
+			/* We cannot adjust MR_DEPENDENCE_CLIQUE in-place
+			   since MEM_EXPR is shared so make a copy and
+			   walk to the subtree again.  */
+			tree new_expr = unshare_expr (MEM_EXPR (*iter));
+			if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
+			  new_expr = TREE_OPERAND (new_expr, 0);
+			while (handled_component_p (new_expr))
+			  new_expr = TREE_OPERAND (new_expr, 0);
+			MR_DEPENDENCE_CLIQUE (new_expr) = newc;
+			set_mem_expr (const_cast <rtx> (*iter), new_expr);
+		      }
+		  }
+	    }
 	  break;
 
 	case JUMP_TABLE_DATA:
@@ -4292,12 +4340,14 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to)
 /* Create a duplicate of the basic block BB.  */
 
 static basic_block
-cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
+cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *id)
 {
   rtx_insn *insn;
   basic_block new_bb;
 
-  insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb));
+  class loop *loop = (id && current_loops) ? bb->loop_father : NULL;
+
+  insn = duplicate_insn_chain (BB_HEAD (bb), BB_END (bb), loop, id);
   new_bb = create_basic_block (insn,
 			       insn ? get_last_insn () : NULL,
 			       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
@@ -4308,7 +4358,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
       insn = BB_HEADER (bb);
       while (NEXT_INSN (insn))
 	insn = NEXT_INSN (insn);
-      insn = duplicate_insn_chain (BB_HEADER (bb), insn);
+      insn = duplicate_insn_chain (BB_HEADER (bb), insn, loop, id);
       if (insn)
 	BB_HEADER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
     }
@@ -4318,7 +4368,7 @@ cfg_layout_duplicate_bb (basic_block bb, copy_bb_data *)
       insn = BB_FOOTER (bb);
       while (NEXT_INSN (insn))
 	insn = NEXT_INSN (insn);
-      insn = duplicate_insn_chain (BB_FOOTER (bb), insn);
+      insn = duplicate_insn_chain (BB_FOOTER (bb), insn, loop, id);
       if (insn)
 	BB_FOOTER (new_bb) = unlink_insn_chain (insn, get_last_insn ());
     }
diff --git a/gcc/cfgrtl.h b/gcc/cfgrtl.h
index ae03e82650d..ae62d6cf05c 100644
--- a/gcc/cfgrtl.h
+++ b/gcc/cfgrtl.h
@@ -49,7 +49,8 @@ extern bool purge_all_dead_edges (void);
 extern bool fixup_abnormal_edges (void);
 extern rtx_insn *unlink_insn_chain (rtx_insn *, rtx_insn *);
 extern void relink_block_chain (bool);
-extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *);
+extern rtx_insn *duplicate_insn_chain (rtx_insn *, rtx_insn *,
+				       class loop *, class copy_bb_data *);
 extern void cfg_layout_initialize (int);
 extern void cfg_layout_finalize (void);
 extern void break_superblocks (void);
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index 6bd793bdd21..9aa7ef0620c 100644
--- a/gcc/config/c6x/c6x.c
+++ b/gcc/config/c6x/c6x.c
@@ -5600,7 +5600,8 @@ hwloop_optimize (hwloop_info loop)
       int j;
       rtx_insn *this_iter;
 
-      this_iter = duplicate_insn_chain (head_insn, tail_insn);
+      copy_bb_data id;
+      this_iter = duplicate_insn_chain (head_insn, tail_insn, NULL, &id);
       j = 0;
       while (this_iter)
 	{
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 86701e05550..6f699a874e3 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -1085,10 +1085,11 @@ optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
 
 static void
 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
-			   int to_stage, rtx count_reg)
+			   int to_stage, rtx count_reg, class loop *loop)
 {
   int row;
   ps_insn_ptr ps_ij;
+  copy_bb_data id;
 
   for (row = 0; row < ps->ii; row++)
     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
@@ -1113,7 +1114,8 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
 	if (from_stage <= last_u && to_stage >= first_u)
 	  {
 	    if (u < ps->g->num_nodes)
-	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
+	      duplicate_insn_chain (ps_first_note (ps, u), u_insn,
+				    loop, &id);
 	    else
 	      emit_insn (copy_rtx (PATTERN (u_insn)));
 	  }
@@ -1151,7 +1153,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
     }
 
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, 0, i, count_reg);
+    duplicate_insns_of_cycles (ps, 0, i, count_reg, loop);
 
   /* Put the prolog on the entry edge.  */
   e = loop_preheader_edge (loop);
@@ -1165,7 +1167,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
   start_sequence ();
 
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
+    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg, loop);
 
   /* Put the epilogue on the exit edge.  */
   gcc_assert (single_exit (loop));
diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 93eae192212..baf495a00bc 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1044,6 +1044,16 @@ copy_ref_info (tree new_ref, tree old_ref)
 	}
     }
 
+  /* We can transfer dependence info.  */
+  if (!MR_DEPENDENCE_CLIQUE (new_ref)
+      && (TREE_CODE (base) == MEM_REF
+	  || TREE_CODE (base) == TARGET_MEM_REF)
+      && MR_DEPENDENCE_CLIQUE (base))
+    {
+      MR_DEPENDENCE_CLIQUE (new_ref) = MR_DEPENDENCE_CLIQUE (base);
+      MR_DEPENDENCE_BASE (new_ref) = MR_DEPENDENCE_BASE (base);
+    }
+
   /* And alignment info.  Note we cannot transfer misalignment info
      since that sits on the SSA name but this is flow-sensitive info
      which we cannot transfer in this generic routine.  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-09-01  9:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-01  9:52 [gcc r11-2963] rtl-optimization/96812 - remap dependence info on RTL loop unrolling Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).