public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add late non-iterating FRE with optimize > 1
@ 2019-06-27 11:30 Richard Biener
  2019-07-01  7:53 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Biener @ 2019-06-27 11:30 UTC (permalink / raw)
  To: gcc-patches


This fixes FREs handling of TARGET_MEM_REF (it didn't consider
&TARGET_MEM_REF) and adds a late FRE pass which has iteration
disabled and runs only at -O[2s]+ to limit the compile-time
impact.

This helps cases where unrolling and vectorization exposes
"piecewise" redundancies DOM cannot handle.  Thus

 (vector *)&a = { 1, 2, 3, 4 };
 .. = a[2];

there's still the opposite case not handled (PR83518) but
I will see whether I can make it work without too much cost:

 a[0] = 1;
 a[1] = 2;
 a[2] = 3;
 a[3] = 4;
 ... = (vector *)&a;

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I'll commit the TARGET_MEM_REF fixing indepenently.

Any comments?  I'm not sure I like globbing the iteration
parameter and the optimize > 1 check; maybe I should simply
rename it to 'late' ...

The compile-time impact might be non-trivial for those testcases
that run into a large overhead from the alias-stmt walking but
I didn't do any measurements yet.

Thanks,
Richard.

2019-06-27  Richard Biener  <rguenther@suse.de>

	* tree-ssa-sccvn.c (class pass_fre): Add may_iterate
	pass parameter.
	(pass_fre::execute): Honor it.
	* passes.def: Adjust pass_fre invocations to allow iterating,
	add non-iterating pass_fre before late threading/dom.

Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c	(revision 272742)
+++ gcc/tree-ssa-sccvn.c	(working copy)
@@ -791,39 +791,6 @@ vn_reference_eq (const_vn_reference_t co
 static void
 copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
 {
-  if (TREE_CODE (ref) == TARGET_MEM_REF)
-    {
-      vn_reference_op_s temp;
-
-      result->reserve (3);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = TREE_TYPE (ref);
-      temp.opcode = TREE_CODE (ref);
-      temp.op0 = TMR_INDEX (ref);
-      temp.op1 = TMR_STEP (ref);
-      temp.op2 = TMR_OFFSET (ref);
-      temp.off = -1;
-      temp.clique = MR_DEPENDENCE_CLIQUE (ref);
-      temp.base = MR_DEPENDENCE_BASE (ref);
-      result->quick_push (temp);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = NULL_TREE;
-      temp.opcode = ERROR_MARK;
-      temp.op0 = TMR_INDEX2 (ref);
-      temp.off = -1;
-      result->quick_push (temp);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = NULL_TREE;
-      temp.opcode = TREE_CODE (TMR_BASE (ref));
-      temp.op0 = TMR_BASE (ref);
-      temp.off = -1;
-      result->quick_push (temp);
-      return;
-    }
-
   /* For non-calls, store the information that makes up the address.  */
   tree orig = ref;
   while (ref)
@@ -853,6 +820,20 @@ copy_reference_ops_from_ref (tree ref, v
 	  temp.base = MR_DEPENDENCE_BASE (ref);
 	  temp.reverse = REF_REVERSE_STORAGE_ORDER (ref);
 	  break;
+	case TARGET_MEM_REF:
+	  /* The base address gets its own vn_reference_op_s structure.  */
+	  temp.op0 = TMR_INDEX (ref);
+	  temp.op1 = TMR_STEP (ref);
+	  temp.op2 = TMR_OFFSET (ref);
+	  temp.clique = MR_DEPENDENCE_CLIQUE (ref);
+	  temp.base = MR_DEPENDENCE_BASE (ref);
+	  result->safe_push (temp);
+	  memset (&temp, 0, sizeof (temp));
+	  temp.type = NULL_TREE;
+	  temp.opcode = ERROR_MARK;
+	  temp.op0 = TMR_INDEX2 (ref);
+	  temp.off = -1;
+	  break;
 	case BIT_FIELD_REF:
 	  /* Record bits, position and storage order.  */
 	  temp.op0 = TREE_OPERAND (ref, 1);
@@ -6872,14 +6853,24 @@ class pass_fre : public gimple_opt_pass
 {
 public:
   pass_fre (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_fre, ctxt)
+    : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_fre (m_ctxt); }
-  virtual bool gate (function *) { return flag_tree_fre != 0; }
+  void set_pass_param (unsigned int n, bool param)
+    {
+      gcc_assert (n == 0);
+      may_iterate = param;
+    }
+  virtual bool gate (function *)
+    {
+      return flag_tree_fre != 0 && (may_iterate || optimize > 1);
+    }
   virtual unsigned int execute (function *);
 
+private:
+  bool may_iterate;
 }; // class pass_fre
 
 unsigned int
@@ -6888,15 +6879,16 @@ pass_fre::execute (function *fun)
   unsigned todo = 0;
 
   /* At -O[1g] use the cheap non-iterating mode.  */
+  bool iterate_p = may_iterate && (optimize > 1);
   calculate_dominance_info (CDI_DOMINATORS);
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
   default_vn_walk_kind = VN_WALKREWRITE;
-  todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true);
+  todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true);
   free_rpo_vn ();
 
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_finalize ();
 
   return todo;
Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 272742)
+++ gcc/passes.def	(working copy)
@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
-	  NEXT_PASS (pass_fre);
+	  NEXT_PASS (pass_fre, true /* may_iterate */);
 	  NEXT_PASS (pass_early_vrp);
 	  NEXT_PASS (pass_merge_phi);
           NEXT_PASS (pass_dse);
@@ -117,7 +117,7 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	      NEXT_PASS (pass_ch);
-	      NEXT_PASS (pass_fre);
+	      NEXT_PASS (pass_fre, true /* may_iterate */);
 	      /* We use pass_lim to rewrite in-memory iteration and reduction
 		 variable accesses in loops into local variables accesses.  */
 	      NEXT_PASS (pass_lim);
@@ -199,7 +199,7 @@ along with GCC; see the file COPYING3.
 	 execute TODO_rebuild_alias at this point.  */
       NEXT_PASS (pass_build_alias);
       NEXT_PASS (pass_return_slot);
-      NEXT_PASS (pass_fre);
+      NEXT_PASS (pass_fre, true /* may_iterate */);
       NEXT_PASS (pass_merge_phi);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */);
@@ -312,6 +312,7 @@ along with GCC; see the file COPYING3.
       NEXT_PASS (pass_strength_reduction);
       NEXT_PASS (pass_split_paths);
       NEXT_PASS (pass_tracer);
+      NEXT_PASS (pass_fre, false /* may_iterate */);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
       NEXT_PASS (pass_strlen);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-07-01  7:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-27 11:30 [PATCH] Add late non-iterating FRE with optimize > 1 Richard Biener
2019-07-01  7:53 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).