public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] Rerun loop-header-copying just before vectorization
@ 2015-06-19 17:37 Alan Lawrence
  2015-06-25  5:04 ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: Alan Lawrence @ 2015-06-19 17:37 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1097 bytes --]

This is a respin of https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02139.html . 
Changes are:

    * Separate the two passes by descending from a common base class, allowing 
different predicates;
    * Test flag_tree_vectorize, and loop->force_vectorize/dont_vectorize - this 
fixes the test failing before;
    * Simplify the check for "code after exit edge";
    * Revert unnecessary changes to pass_tree_loop_init::execute;
    * Revert change to slp-perm-7 test (following fix by Marc Glisse)

Bootstrapped + check-gcc on aarch64 and x86_64 (linux).

gcc/ChangeLog:

	* tree-pass.h (make_pass_ch_vect): New.
	* passes.def: Add pass_ch_vect just before pass_if_conversion.

	* tree-ssa-loop-ch.c (pass_ch_base, pass_ch_vect, pass_data_ch_vect,
	pass_ch::process_loop_p): New.
	(pass_ch): Extend pass_ch_base.

	(pass_ch::execute): Move all but loop_optimizer_init/finalize to...
	(pass_ch_base::execute): ...here.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/vect-strided-a-u16-i4.c (main1): Narrow scope of x,y,z,w.
	of unsigned
	* gcc.dg/vect/vect-ifcvt-11.c: New.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: rerun-loop-ch-2.patch --]
[-- Type: text/x-patch; name=rerun-loop-ch-2.patch, Size: 7717 bytes --]

diff --git a/gcc/passes.def b/gcc/passes.def
index 4690e23..5755035 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -247,6 +247,7 @@ along with GCC; see the file COPYING3.  If not see
 	  PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops)
 	      NEXT_PASS (pass_expand_omp_ssa);
 	  POP_INSERT_PASSES ()
+	  NEXT_PASS (pass_ch_vect);
 	  NEXT_PASS (pass_if_conversion);
 	  /* pass_vectorize must immediately follow pass_if_conversion.
 	     Please do not add any other passes in between.  */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c
new file mode 100644
index 0000000..7e32369
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-11.c
@@ -0,0 +1,36 @@
+/* { dg-require-effective-target vect_condition } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 16
+
+extern void abort (void);
+
+int A[N] = {36, 39, 42, 45, 43, 32, 21, 12, 23, 34, 45, 56, 67, 78, 81, 11};
+int B[N] = {144,195,210,225,172,128,105,60, 92, 136,225,280,268,390,324,55};
+
+__attribute__((noinline))
+void foo ()
+{
+  for (int i = 0; i < N; i++)
+    {
+      int m = (A[i] & i) ? 5 : 4;
+      A[i] = A[i] * m;
+    }
+}
+
+int main ()
+{
+
+  check_vect ();
+  foo ();
+  /* check results:  */
+  for (int i = 0; i < N; i++)
+    if (A[i] != B[i])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
index af33ed4..0be68b3 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
@@ -21,7 +21,6 @@ main1 ()
   s *ptr = arr;
   s res[N];
   int i;
-  unsigned short x, y, z, w;
 
   for (i = 0; i < N; i++)
     {
@@ -35,6 +34,7 @@ main1 ()
 
   for (i = 0; i < N; i++)
     {
+      unsigned short x, y, z, w;
       x = ptr->b - ptr->a;
       y = ptr->d - ptr->c;
       res[i].c = x + y;
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 172bd82..083e771 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -380,6 +380,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 6ece78b..bd409ef 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -144,6 +144,17 @@ do_while_loop_p (struct loop *loop)
 
 namespace {
 
+class ch_base : public gimple_opt_pass
+{
+  protected:
+    ch_base (pass_data data, gcc::context *ctxt)
+      : gimple_opt_pass (data, ctxt)
+    {}
+
+  unsigned int copy_headers (function *fun);
+  virtual bool process_loop_p (struct loop *loop) = 0;
+};
+
 const pass_data pass_data_ch =
 {
   GIMPLE_PASS, /* type */
@@ -157,21 +168,61 @@ const pass_data pass_data_ch =
   0, /* todo_flags_finish */
 };
 
-class pass_ch : public gimple_opt_pass
+/* This pass calls loop_optimizer_init before it executes,
+   and loop_optimizer_finalize after.  */
+class pass_ch : public ch_base
 {
 public:
   pass_ch (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_ch, ctxt)
+    : ch_base (pass_data_ch, ctxt)
   {}
 
   /* opt_pass methods: */
   virtual bool gate (function *) { return flag_tree_ch != 0; }
   virtual unsigned int execute (function *);
 
+protected:
+  /* ch_base method: */
+  virtual bool process_loop_p (struct loop *loop);
 }; // class pass_ch
 
+const pass_data pass_data_ch_vect =
+{
+  GIMPLE_PASS, /* type */
+  "ch_vect", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_TREE_CH, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+/* This is a more aggressive version, designed to run just before if-conversion
+   and vectorization, to put more loops into their required form.  */
+class pass_ch_vect : public ch_base
+{
+public:
+  pass_ch_vect (gcc::context *ctxt)
+    : ch_base (pass_data_ch_vect, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *fun)
+  {
+    return flag_tree_ch != 0
+	   && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
+  }
+  virtual unsigned int execute (function *);
+
+protected:
+  /* ch_base method: */
+  virtual bool process_loop_p (struct loop *loop);
+}; // class pass_ch_vect
+
 unsigned int
-pass_ch::execute (function *fun)
+ch_base::copy_headers (function *fun)
 {
   struct loop *loop;
   basic_block header;
@@ -181,13 +232,8 @@ pass_ch::execute (function *fun)
   unsigned bbs_size;
   bool changed = false;
 
-  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
-		       | LOOPS_HAVE_SIMPLE_LATCHES);
   if (number_of_loops (fun) <= 1)
-    {
-      loop_optimizer_finalize ();
       return 0;
-    }
 
   bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
   copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
@@ -204,7 +250,7 @@ pass_ch::execute (function *fun)
 	 written as such, or because jump threading transformed it into one),
 	 we might be in fact peeling the first iteration of the loop.  This
 	 in general is not a good idea.  */
-      if (do_while_loop_p (loop))
+      if (!process_loop_p (loop))
 	continue;
 
       /* Iterate the header copying up to limit; this takes care of the cases
@@ -291,17 +337,76 @@ pass_ch::execute (function *fun)
       changed = true;
     }
 
-  update_ssa (TODO_update_ssa);
+  if (changed)
+    update_ssa (TODO_update_ssa);
   free (bbs);
   free (copied_bbs);
 
-  loop_optimizer_finalize ();
   return changed ? TODO_cleanup_cfg : 0;
 }
 
+unsigned int
+pass_ch::execute (function *fun)
+{
+  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
+		       | LOOPS_HAVE_SIMPLE_LATCHES);
+
+  unsigned int res = copy_headers (fun);
+
+  loop_optimizer_finalize ();
+  return res;
+}
+
+unsigned int
+pass_ch_vect::execute (function *fun)
+{
+  return copy_headers (fun);
+}
+
+bool
+pass_ch::process_loop_p (struct loop *loop)
+{
+  return !do_while_loop_p (loop);
+}
+
+bool
+pass_ch_vect::process_loop_p (struct loop *loop)
+{
+  if (!flag_tree_vectorize && !loop->force_vectorize)
+    return false;
+
+  if (loop->dont_vectorize)
+    return false;
+
+  if (!do_while_loop_p (loop))
+    return true;
+
+ /* The vectorizer won't handle anything with multiple exits, so skip.  */
+  edge exit = single_exit (loop);
+  if (!exit)
+    return false;
+
+  /* Apply copying if the exit block looks to have code after it.  */
+  edge_iterator ei;
+  edge e;
+  FOR_EACH_EDGE (e, ei, exit->src->succs)
+    if (!loop_exit_edge_p (loop, e)
+	&& e->dest != loop->header
+	&& e->dest != loop->latch)
+      return true; /* Block with exit edge has code after it.  */
+
+  return false;
+}
+
 } // anon namespace
 
 gimple_opt_pass *
+make_pass_ch_vect (gcc::context *ctxt)
+{
+  return new pass_ch_vect (ctxt);
+}
+
+gimple_opt_pass *
 make_pass_ch (gcc::context *ctxt)
 {
   return new pass_ch (ctxt);

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] Rerun loop-header-copying just before vectorization
  2015-06-19 17:37 [PATCH v2] Rerun loop-header-copying just before vectorization Alan Lawrence
@ 2015-06-25  5:04 ` Jeff Law
  2015-07-02 11:52   ` Alan Lawrence
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Law @ 2015-06-25  5:04 UTC (permalink / raw)
  To: Alan Lawrence, gcc-patches

On 06/19/2015 11:32 AM, Alan Lawrence wrote:
> This is a respin of
> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02139.html . Changes are:
>
>     * Separate the two passes by descending from a common base class,
> allowing different predicates;
>     * Test flag_tree_vectorize, and loop->force_vectorize/dont_vectorize
> - this fixes the test failing before;
>     * Simplify the check for "code after exit edge";
>     * Revert unnecessary changes to pass_tree_loop_init::execute;
>     * Revert change to slp-perm-7 test (following fix by Marc Glisse)
So FWIW, if you don't want to make this a separate pass, you'd probably 
want the code which allows us to run the phi-only propagator as a 
subroutine to propagate and eliminate those degenerate PHIs.  I posted 
it a year or two ago, but went a different direction to solve whatever 
issue I was looking at.

I'm comfortable with this as a separate pass and relying on cfg cleanups 
to handle this stuff for us as this implementation of your patch 
currently does.


>
> Bootstrapped + check-gcc on aarch64 and x86_64 (linux).
>
> gcc/ChangeLog:
>
>      * tree-pass.h (make_pass_ch_vect): New.
>      * passes.def: Add pass_ch_vect just before pass_if_conversion.
>
>      * tree-ssa-loop-ch.c (pass_ch_base, pass_ch_vect, pass_data_ch_vect,
>      pass_ch::process_loop_p): New.
>      (pass_ch): Extend pass_ch_base.
>
>      (pass_ch::execute): Move all but loop_optimizer_init/finalize to...
>      (pass_ch_base::execute): ...here.
>
> gcc/testsuite/ChangeLog:
>
>      * gcc.dg/vect/vect-strided-a-u16-i4.c (main1): Narrow scope of
> x,y,z,w.
>      of unsigned
>      * gcc.dg/vect/vect-ifcvt-11.c: New.
Can you add a function comment to ch_base::copy_headers.  I know it 
didn't have one before, but it really should have one.

I'd also add a comment to the execute methods.  pass_ch initializes and 
finalizes loop structures while pass_ch_vect::execute assumes the loop 
structures are already initialized and finalization is assumed to be 
handled earlier in the call chain.

I'd also suggest a comment to the process_loop_p method.

> +
> +  /* Apply copying if the exit block looks to have code after it.  */
> +  edge_iterator ei;
> +  edge e;
> +  FOR_EACH_EDGE (e, ei, exit->src->succs)
> +    if (!loop_exit_edge_p (loop, e)
> +	&& e->dest != loop->header
> +	&& e->dest != loop->latch)
> +      return true; /* Block with exit edge has code after it.  */
Don't put comments on the same line as code.  Instead I'd suggest 
describing the CFG pattern your looking for as part of the comment 
before the loop over the edges.


With those comment fixes, this is OK for the trunk.

jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] Rerun loop-header-copying just before vectorization
  2015-06-25  5:04 ` Jeff Law
@ 2015-07-02 11:52   ` Alan Lawrence
  0 siblings, 0 replies; 3+ messages in thread
From: Alan Lawrence @ 2015-07-02 11:52 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

> With those comment fixes, this is OK for the trunk.
> 
> jeff

Thank you for review - I've pushed r225311 with what I hope are appropriate 
comment fixes.

Cheers, Alan

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-07-02 11:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-19 17:37 [PATCH v2] Rerun loop-header-copying just before vectorization Alan Lawrence
2015-06-25  5:04 ` Jeff Law
2015-07-02 11:52   ` Alan Lawrence

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).