public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add late non-iterating FRE with optimize > 1
@ 2019-06-27 11:30 Richard Biener
  2019-07-01  7:53 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Richard Biener @ 2019-06-27 11:30 UTC (permalink / raw)
  To: gcc-patches


This fixes FREs handling of TARGET_MEM_REF (it didn't consider
&TARGET_MEM_REF) and adds a late FRE pass which has iteration
disabled and runs only at -O[2s]+ to limit the compile-time
impact.

This helps cases where unrolling and vectorization exposes
"piecewise" redundancies DOM cannot handle.  Thus

 (vector *)&a = { 1, 2, 3, 4 };
 .. = a[2];

there's still the opposite case not handled (PR83518) but
I will see whether I can make it work without too much cost:

 a[0] = 1;
 a[1] = 2;
 a[2] = 3;
 a[3] = 4;
 ... = (vector *)&a;

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I'll commit the TARGET_MEM_REF fixing indepenently.

Any comments?  I'm not sure I like globbing the iteration
parameter and the optimize > 1 check; maybe I should simply
rename it to 'late' ...

The compile-time impact might be non-trivial for those testcases
that run into a large overhead from the alias-stmt walking but
I didn't do any measurements yet.

Thanks,
Richard.

2019-06-27  Richard Biener  <rguenther@suse.de>

	* tree-ssa-sccvn.c (class pass_fre): Add may_iterate
	pass parameter.
	(pass_fre::execute): Honor it.
	* passes.def: Adjust pass_fre invocations to allow iterating,
	add non-iterating pass_fre before late threading/dom.

Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c	(revision 272742)
+++ gcc/tree-ssa-sccvn.c	(working copy)
@@ -791,39 +791,6 @@ vn_reference_eq (const_vn_reference_t co
 static void
 copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
 {
-  if (TREE_CODE (ref) == TARGET_MEM_REF)
-    {
-      vn_reference_op_s temp;
-
-      result->reserve (3);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = TREE_TYPE (ref);
-      temp.opcode = TREE_CODE (ref);
-      temp.op0 = TMR_INDEX (ref);
-      temp.op1 = TMR_STEP (ref);
-      temp.op2 = TMR_OFFSET (ref);
-      temp.off = -1;
-      temp.clique = MR_DEPENDENCE_CLIQUE (ref);
-      temp.base = MR_DEPENDENCE_BASE (ref);
-      result->quick_push (temp);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = NULL_TREE;
-      temp.opcode = ERROR_MARK;
-      temp.op0 = TMR_INDEX2 (ref);
-      temp.off = -1;
-      result->quick_push (temp);
-
-      memset (&temp, 0, sizeof (temp));
-      temp.type = NULL_TREE;
-      temp.opcode = TREE_CODE (TMR_BASE (ref));
-      temp.op0 = TMR_BASE (ref);
-      temp.off = -1;
-      result->quick_push (temp);
-      return;
-    }
-
   /* For non-calls, store the information that makes up the address.  */
   tree orig = ref;
   while (ref)
@@ -853,6 +820,20 @@ copy_reference_ops_from_ref (tree ref, v
 	  temp.base = MR_DEPENDENCE_BASE (ref);
 	  temp.reverse = REF_REVERSE_STORAGE_ORDER (ref);
 	  break;
+	case TARGET_MEM_REF:
+	  /* The base address gets its own vn_reference_op_s structure.  */
+	  temp.op0 = TMR_INDEX (ref);
+	  temp.op1 = TMR_STEP (ref);
+	  temp.op2 = TMR_OFFSET (ref);
+	  temp.clique = MR_DEPENDENCE_CLIQUE (ref);
+	  temp.base = MR_DEPENDENCE_BASE (ref);
+	  result->safe_push (temp);
+	  memset (&temp, 0, sizeof (temp));
+	  temp.type = NULL_TREE;
+	  temp.opcode = ERROR_MARK;
+	  temp.op0 = TMR_INDEX2 (ref);
+	  temp.off = -1;
+	  break;
 	case BIT_FIELD_REF:
 	  /* Record bits, position and storage order.  */
 	  temp.op0 = TREE_OPERAND (ref, 1);
@@ -6872,14 +6853,24 @@ class pass_fre : public gimple_opt_pass
 {
 public:
   pass_fre (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_fre, ctxt)
+    : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_fre (m_ctxt); }
-  virtual bool gate (function *) { return flag_tree_fre != 0; }
+  void set_pass_param (unsigned int n, bool param)
+    {
+      gcc_assert (n == 0);
+      may_iterate = param;
+    }
+  virtual bool gate (function *)
+    {
+      return flag_tree_fre != 0 && (may_iterate || optimize > 1);
+    }
   virtual unsigned int execute (function *);
 
+private:
+  bool may_iterate;
 }; // class pass_fre
 
 unsigned int
@@ -6888,15 +6879,16 @@ pass_fre::execute (function *fun)
   unsigned todo = 0;
 
   /* At -O[1g] use the cheap non-iterating mode.  */
+  bool iterate_p = may_iterate && (optimize > 1);
   calculate_dominance_info (CDI_DOMINATORS);
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
   default_vn_walk_kind = VN_WALKREWRITE;
-  todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true);
+  todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true);
   free_rpo_vn ();
 
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_finalize ();
 
   return todo;
Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 272742)
+++ gcc/passes.def	(working copy)
@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
-	  NEXT_PASS (pass_fre);
+	  NEXT_PASS (pass_fre, true /* may_iterate */);
 	  NEXT_PASS (pass_early_vrp);
 	  NEXT_PASS (pass_merge_phi);
           NEXT_PASS (pass_dse);
@@ -117,7 +117,7 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	      NEXT_PASS (pass_ch);
-	      NEXT_PASS (pass_fre);
+	      NEXT_PASS (pass_fre, true /* may_iterate */);
 	      /* We use pass_lim to rewrite in-memory iteration and reduction
 		 variable accesses in loops into local variables accesses.  */
 	      NEXT_PASS (pass_lim);
@@ -199,7 +199,7 @@ along with GCC; see the file COPYING3.
 	 execute TODO_rebuild_alias at this point.  */
       NEXT_PASS (pass_build_alias);
       NEXT_PASS (pass_return_slot);
-      NEXT_PASS (pass_fre);
+      NEXT_PASS (pass_fre, true /* may_iterate */);
       NEXT_PASS (pass_merge_phi);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */);
@@ -312,6 +312,7 @@ along with GCC; see the file COPYING3.
       NEXT_PASS (pass_strength_reduction);
       NEXT_PASS (pass_split_paths);
       NEXT_PASS (pass_tracer);
+      NEXT_PASS (pass_fre, false /* may_iterate */);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
       NEXT_PASS (pass_strlen);

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Add late non-iterating FRE with optimize > 1
  2019-06-27 11:30 [PATCH] Add late non-iterating FRE with optimize > 1 Richard Biener
@ 2019-07-01  7:53 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2019-07-01  7:53 UTC (permalink / raw)
  To: gcc-patches

On Thu, 27 Jun 2019, Richard Biener wrote:

> 
> This fixes FREs handling of TARGET_MEM_REF (it didn't consider
> &TARGET_MEM_REF) and adds a late FRE pass which has iteration
> disabled and runs only at -O[2s]+ to limit the compile-time
> impact.
> 
> This helps cases where unrolling and vectorization exposes
> "piecewise" redundancies DOM cannot handle.  Thus
> 
>  (vector *)&a = { 1, 2, 3, 4 };
>  .. = a[2];
> 
> there's still the opposite case not handled (PR83518) but
> I will see whether I can make it work without too much cost:
> 
>  a[0] = 1;
>  a[1] = 2;
>  a[2] = 3;
>  a[3] = 4;
>  ... = (vector *)&a;
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> 
> I'll commit the TARGET_MEM_REF fixing indepenently.
> 
> Any comments?  I'm not sure I like globbing the iteration
> parameter and the optimize > 1 check; maybe I should simply
> rename it to 'late' ...
> 
> The compile-time impact might be non-trivial for those testcases
> that run into a large overhead from the alias-stmt walking but
> I didn't do any measurements yet.

Testing went good with only one false-positive fallout of
gcc.dg/tree-ssa/pr77445-2.c where new threading opportunities
arise in thread3 due to late FRE but those opportunities cross
loops and thus are not considered.  But that breaks the testcases
dump scanning for profile correctness.  I've re-visited PR77445
and applied the obvious change.

I have commited the FRE TARGET_MEM_REF fix separately and now
the patch below.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Richard.

2019-07-01  Richard Biener  <rguenther@suse.de>

	* tree-ssa-sccvn.c (class pass_fre): Add may_iterate
	pass parameter.
	(pass_fre::execute): Honor it.
	* passes.def: Adjust pass_fre invocations to allow iterating,
	add non-iterating pass_fre before late threading/dom.

	* gcc.dg/tree-ssa/pr77445-2.c: Adjust.

Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c	(revision 272742)
+++ gcc/tree-ssa-sccvn.c	(working copy)
@@ -6872,14 +6853,24 @@ class pass_fre : public gimple_opt_pass
 {
 public:
   pass_fre (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_fre, ctxt)
+    : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true)
   {}
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_fre (m_ctxt); }
-  virtual bool gate (function *) { return flag_tree_fre != 0; }
+  void set_pass_param (unsigned int n, bool param)
+    {
+      gcc_assert (n == 0);
+      may_iterate = param;
+    }
+  virtual bool gate (function *)
+    {
+      return flag_tree_fre != 0 && (may_iterate || optimize > 1);
+    }
   virtual unsigned int execute (function *);
 
+private:
+  bool may_iterate;
 }; // class pass_fre
 
 unsigned int
@@ -6888,15 +6879,16 @@ pass_fre::execute (function *fun)
   unsigned todo = 0;
 
   /* At -O[1g] use the cheap non-iterating mode.  */
+  bool iterate_p = may_iterate && (optimize > 1);
   calculate_dominance_info (CDI_DOMINATORS);
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
   default_vn_walk_kind = VN_WALKREWRITE;
-  todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true);
+  todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true);
   free_rpo_vn ();
 
-  if (optimize > 1)
+  if (iterate_p)
     loop_optimizer_finalize ();
 
   return todo;
Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 272742)
+++ gcc/passes.def	(working copy)
@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
-	  NEXT_PASS (pass_fre);
+	  NEXT_PASS (pass_fre, true /* may_iterate */);
 	  NEXT_PASS (pass_early_vrp);
 	  NEXT_PASS (pass_merge_phi);
           NEXT_PASS (pass_dse);
@@ -117,7 +117,7 @@ along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	      NEXT_PASS (pass_ch);
-	      NEXT_PASS (pass_fre);
+	      NEXT_PASS (pass_fre, true /* may_iterate */);
 	      /* We use pass_lim to rewrite in-memory iteration and reduction
 		 variable accesses in loops into local variables accesses.  */
 	      NEXT_PASS (pass_lim);
@@ -199,7 +199,7 @@ along with GCC; see the file COPYING3.
 	 execute TODO_rebuild_alias at this point.  */
       NEXT_PASS (pass_build_alias);
       NEXT_PASS (pass_return_slot);
-      NEXT_PASS (pass_fre);
+      NEXT_PASS (pass_fre, true /* may_iterate */);
       NEXT_PASS (pass_merge_phi);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */);
@@ -312,6 +312,7 @@ along with GCC; see the file COPYING3.
       NEXT_PASS (pass_strength_reduction);
       NEXT_PASS (pass_split_paths);
       NEXT_PASS (pass_tracer);
+      NEXT_PASS (pass_fre, false /* may_iterate */);
       NEXT_PASS (pass_thread_jumps);
       NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
       NEXT_PASS (pass_strlen);
Index: gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c	(revision 272842)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c	(working copy)
@@ -125,7 +125,7 @@ enum STATES FMS( u8 **in , u32 *transiti
    jump threading opportunities.  Skip the later tests on aarch64.  */
 /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
 /* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */
-/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */
-/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */
-/* { dg-final { scan-tree-dump-not "not considered" "thread3" { target { ! aarch64*-*-* } } } } */
-/* { dg-final { scan-tree-dump-not "not considered" "thread4" { target { ! aarch64*-*-* } } } } */ 
+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread1" } } */
+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread2" } } */
+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread3" { target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread4" { target { ! aarch64*-*-* } } } } */ 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-07-01  7:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-27 11:30 [PATCH] Add late non-iterating FRE with optimize > 1 Richard Biener
2019-07-01  7:53 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).