public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR81090, properly free niter estimates
@ 2017-06-14 12:56 Richard Biener
  2017-06-16 12:18 ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2017-06-14 12:56 UTC (permalink / raw)
  To: gcc-patches


niter estimates are not kept up-to-date (they reference gimple stmts
and trees) in the keep-loop-stuff infrastructure so similar to the
SCEV cache we rely on people freeing it after passes.

The following brings us a step closer to that by freeing them whenever
SCEV is invalidated (we only compute them when SCEV is active) plus
removing the odd record-bounds pass that just computes them, leaving
scavenging to following passes.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-06-14  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81090
	* passes.def (pass_record_bounds): Remove.
	* tree-pass.h (make_pass_record_bounds): Likewise.
	* tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
	make_pass_record_bounds): Likewise.
	* tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
	not free niter estimates.
	* tree-scalar-evolution.c (scev_finalize): Free niter estimates.
	(scev_reset): Likewise.

	* gcc.dg/graphite/pr81090.c: New testcase.

Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 249193)
+++ gcc/passes.def	(working copy)
@@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
 	  /* All unswitching, final value replacement and splitting can expose
 	     empty loops.  Remove them now.  */
 	  NEXT_PASS (pass_cd_dce);
-	  NEXT_PASS (pass_record_bounds);
 	  NEXT_PASS (pass_iv_canon);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_copy_prop);
Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr81090.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr81090.c	(working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -floop-nest-optimize" } */
+
+int x3, za;
+int hg[1];
+
+void
+yw (int dq)
+{
+  const int r7 = 2;
+
+  while (dq < 1)
+    {
+      for (x3 = 0; x3 < r7; ++x3)
+	for (za = 0; za < r7; ++za)
+	  hg[1] = 0;
+      ++dq;
+    }
+
+  x3 = 0;
+  while (x3 < r7)
+    {
+      ++x3;
+      if (x3 == 0)
+	break;
+    }
+}
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 249193)
+++ gcc/tree-pass.h	(working copy)
@@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
 extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
Index: gcc/tree-scalar-evolution.c
===================================================================
--- gcc/tree-scalar-evolution.c	(revision 249193)
+++ gcc/tree-scalar-evolution.c	(working copy)
@@ -3300,14 +3300,8 @@ scev_reset_htab (void)
 void
 scev_reset (void)
 {
-  struct loop *loop;
-
   scev_reset_htab ();
-
-  FOR_EACH_LOOP (loop, 0)
-    {
-      loop->nb_iterations = NULL_TREE;
-    }
+  free_numbers_of_iterations_estimates (cfun);
 }
 
 /* Return true if the IV calculation in TYPE can overflow based on the knowledge
@@ -3636,6 +3630,7 @@ scev_finalize (void)
     return;
   scalar_evolution_info->empty ();
   scalar_evolution_info = NULL;
+  free_numbers_of_iterations_estimates (cfun);
 }
 
 /* Returns true if the expression EXPR is considered to be too expensive
Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c	(revision 249193)
+++ gcc/tree-ssa-loop-ivcanon.c	(working copy)
@@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
   bool irred_invalidated = false;
   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  free_numbers_of_iterations_estimates (cfun);
   estimate_numbers_of_iterations ();
 
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c	(revision 249193)
+++ gcc/tree-ssa-loop.c	(working copy)
@@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
   return new pass_scev_cprop (ctxt);
 }
 
-/* Record bounds on numbers of iterations of loops.  */
-
-namespace {
-
-const pass_data pass_data_record_bounds =
-{
-  GIMPLE_PASS, /* type */
-  "*record_bounds", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_TREE_LOOP_BOUNDS, /* tv_id */
-  ( PROP_cfg | PROP_ssa ), /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  0, /* todo_flags_finish */
-};
-
-class pass_record_bounds : public gimple_opt_pass
-{
-public:
-  pass_record_bounds (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_record_bounds, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual unsigned int execute (function *);
-
-}; // class pass_record_bounds
-
-unsigned int
-pass_record_bounds::execute (function *fun)
-{
-  if (number_of_loops (fun) <= 1)
-    return 0;
-
-  estimate_numbers_of_iterations ();
-  scev_reset ();
-  return 0;
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_record_bounds (gcc::context *ctxt)
-{
-  return new pass_record_bounds (ctxt);
-}
-
 /* Induction variable optimizations.  */
 
 namespace {

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
  2017-06-14 12:56 [PATCH] Fix PR81090, properly free niter estimates Richard Biener
@ 2017-06-16 12:18 ` Richard Biener
  2017-06-19 12:31   ` Christophe Lyon
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2017-06-16 12:18 UTC (permalink / raw)
  To: gcc-patches

On Wed, 14 Jun 2017, Richard Biener wrote:

> 
> niter estimates are not kept up-to-date (they reference gimple stmts
> and trees) in the keep-loop-stuff infrastructure so similar to the
> SCEV cache we rely on people freeing it after passes.
> 
> The following brings us a step closer to that by freeing them whenever
> SCEV is invalidated (we only compute them when SCEV is active) plus
> removing the odd record-bounds pass that just computes them, leaving
> scavenging to following passes.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Some awkward interactions with peeling means I'm installing the
following less aggressive variant.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-06-16  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81090
	* passes.def (pass_record_bounds): Remove.
	* tree-pass.h (make_pass_record_bounds): Likewise.
	* tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
	make_pass_record_bounds): Likewise.
	* tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
	not free niter estimates at the beginning but at the end.
	* tree-scalar-evolution.c (scev_finalize): Free niter estimates.

	* gcc.dg/graphite/pr81090.c: New testcase.

Index: gcc/passes.def
===================================================================
--- gcc/passes.def	(revision 249246)
+++ gcc/passes.def	(working copy)
@@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
 	  /* All unswitching, final value replacement and splitting can expose
 	     empty loops.  Remove them now.  */
 	  NEXT_PASS (pass_cd_dce);
-	  NEXT_PASS (pass_record_bounds);
 	  NEXT_PASS (pass_iv_canon);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_copy_prop);
Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr81090.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr81090.c	(working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -floop-nest-optimize" } */
+
+int x3, za;
+int hg[1];
+
+void
+yw (int dq)
+{
+  const int r7 = 2;
+
+  while (dq < 1)
+    {
+      for (x3 = 0; x3 < r7; ++x3)
+	for (za = 0; za < r7; ++za)
+	  hg[1] = 0;
+      ++dq;
+    }
+
+  x3 = 0;
+  while (x3 < r7)
+    {
+      ++x3;
+      if (x3 == 0)
+	break;
+    }
+}
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 249246)
+++ gcc/tree-pass.h	(working copy)
@@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
 extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
Index: gcc/tree-scalar-evolution.c
===================================================================
--- gcc/tree-scalar-evolution.c	(revision 249246)
+++ gcc/tree-scalar-evolution.c	(working copy)
@@ -3636,6 +3636,7 @@ scev_finalize (void)
     return;
   scalar_evolution_info->empty ();
   scalar_evolution_info = NULL;
+  free_numbers_of_iterations_estimates (cfun);
 }
 
 /* Returns true if the expression EXPR is considered to be too expensive
Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c	(revision 249246)
+++ gcc/tree-ssa-loop-ivcanon.c	(working copy)
@@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
   bool irred_invalidated = false;
   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  free_numbers_of_iterations_estimates (cfun);
   estimate_numbers_of_iterations ();
 
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
@@ -1230,6 +1229,7 @@ canonicalize_induction_variables (void)
 
   /* Clean up the information about numbers of iterations, since brute force
      evaluation could reveal new information.  */
+  free_numbers_of_iterations_estimates (cfun);
   scev_reset ();
 
   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c	(revision 249246)
+++ gcc/tree-ssa-loop.c	(working copy)
@@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
   return new pass_scev_cprop (ctxt);
 }
 
-/* Record bounds on numbers of iterations of loops.  */
-
-namespace {
-
-const pass_data pass_data_record_bounds =
-{
-  GIMPLE_PASS, /* type */
-  "*record_bounds", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_TREE_LOOP_BOUNDS, /* tv_id */
-  ( PROP_cfg | PROP_ssa ), /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  0, /* todo_flags_finish */
-};
-
-class pass_record_bounds : public gimple_opt_pass
-{
-public:
-  pass_record_bounds (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_record_bounds, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual unsigned int execute (function *);
-
-}; // class pass_record_bounds
-
-unsigned int
-pass_record_bounds::execute (function *fun)
-{
-  if (number_of_loops (fun) <= 1)
-    return 0;
-
-  estimate_numbers_of_iterations ();
-  scev_reset ();
-  return 0;
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_record_bounds (gcc::context *ctxt)
-{
-  return new pass_record_bounds (ctxt);
-}
-
 /* Induction variable optimizations.  */
 
 namespace {

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
  2017-06-16 12:18 ` Richard Biener
@ 2017-06-19 12:31   ` Christophe Lyon
  2017-06-19 12:35     ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Christophe Lyon @ 2017-06-19 12:31 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi Richard,

On 16 June 2017 at 14:18, Richard Biener <rguenther@suse.de> wrote:
> On Wed, 14 Jun 2017, Richard Biener wrote:
>
>>
>> niter estimates are not kept up-to-date (they reference gimple stmts
>> and trees) in the keep-loop-stuff infrastructure so similar to the
>> SCEV cache we rely on people freeing it after passes.
>>
>> The following brings us a step closer to that by freeing them whenever
>> SCEV is invalidated (we only compute them when SCEV is active) plus
>> removing the odd record-bounds pass that just computes them, leaving
>> scavenging to following passes.
>>
>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Some awkward interactions with peeling means I'm installing the
> following less aggressive variant.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
>
> Richard.
>
> 2017-06-16  Richard Biener  <rguenther@suse.de>
>
>         PR tree-optimization/81090
>         * passes.def (pass_record_bounds): Remove.
>         * tree-pass.h (make_pass_record_bounds): Likewise.
>         * tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
>         make_pass_record_bounds): Likewise.
>         * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
>         not free niter estimates at the beginning but at the end.
>         * tree-scalar-evolution.c (scev_finalize): Free niter estimates.
>
>         * gcc.dg/graphite/pr81090.c: New testcase.
>

Sorry to bother you again...
With this commit (r249249), I've noticed regressions on aarch64/arm:
FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
scan-tree-dump-not vect "LOOP VECTORIZED"
FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump-not vect "LOOP VECTORIZED"

Christophe

> Index: gcc/passes.def
> ===================================================================
> --- gcc/passes.def      (revision 249246)
> +++ gcc/passes.def      (working copy)
> @@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
>           /* All unswitching, final value replacement and splitting can expose
>              empty loops.  Remove them now.  */
>           NEXT_PASS (pass_cd_dce);
> -         NEXT_PASS (pass_record_bounds);
>           NEXT_PASS (pass_iv_canon);
>           NEXT_PASS (pass_loop_distribution);
>           NEXT_PASS (pass_copy_prop);
> Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/graphite/pr81090.c     (nonexistent)
> +++ gcc/testsuite/gcc.dg/graphite/pr81090.c     (working copy)
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -floop-nest-optimize" } */
> +
> +int x3, za;
> +int hg[1];
> +
> +void
> +yw (int dq)
> +{
> +  const int r7 = 2;
> +
> +  while (dq < 1)
> +    {
> +      for (x3 = 0; x3 < r7; ++x3)
> +       for (za = 0; za < r7; ++za)
> +         hg[1] = 0;
> +      ++dq;
> +    }
> +
> +  x3 = 0;
> +  while (x3 < r7)
> +    {
> +      ++x3;
> +      if (x3 == 0)
> +       break;
> +    }
> +}
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h     (revision 249246)
> +++ gcc/tree-pass.h     (working copy)
> @@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
>  extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
> -extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
> Index: gcc/tree-scalar-evolution.c
> ===================================================================
> --- gcc/tree-scalar-evolution.c (revision 249246)
> +++ gcc/tree-scalar-evolution.c (working copy)
> @@ -3636,6 +3636,7 @@ scev_finalize (void)
>      return;
>    scalar_evolution_info->empty ();
>    scalar_evolution_info = NULL;
> +  free_numbers_of_iterations_estimates (cfun);
>  }
>
>  /* Returns true if the expression EXPR is considered to be too expensive
> Index: gcc/tree-ssa-loop-ivcanon.c
> ===================================================================
> --- gcc/tree-ssa-loop-ivcanon.c (revision 249246)
> +++ gcc/tree-ssa-loop-ivcanon.c (working copy)
> @@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
>    bool irred_invalidated = false;
>    bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
>
> -  free_numbers_of_iterations_estimates (cfun);
>    estimate_numbers_of_iterations ();
>
>    FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> @@ -1230,6 +1229,7 @@ canonicalize_induction_variables (void)
>
>    /* Clean up the information about numbers of iterations, since brute force
>       evaluation could reveal new information.  */
> +  free_numbers_of_iterations_estimates (cfun);
>    scev_reset ();
>
>    if (!bitmap_empty_p (loop_closed_ssa_invalidated))
> Index: gcc/tree-ssa-loop.c
> ===================================================================
> --- gcc/tree-ssa-loop.c (revision 249246)
> +++ gcc/tree-ssa-loop.c (working copy)
> @@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
>    return new pass_scev_cprop (ctxt);
>  }
>
> -/* Record bounds on numbers of iterations of loops.  */
> -
> -namespace {
> -
> -const pass_data pass_data_record_bounds =
> -{
> -  GIMPLE_PASS, /* type */
> -  "*record_bounds", /* name */
> -  OPTGROUP_NONE, /* optinfo_flags */
> -  TV_TREE_LOOP_BOUNDS, /* tv_id */
> -  ( PROP_cfg | PROP_ssa ), /* properties_required */
> -  0, /* properties_provided */
> -  0, /* properties_destroyed */
> -  0, /* todo_flags_start */
> -  0, /* todo_flags_finish */
> -};
> -
> -class pass_record_bounds : public gimple_opt_pass
> -{
> -public:
> -  pass_record_bounds (gcc::context *ctxt)
> -    : gimple_opt_pass (pass_data_record_bounds, ctxt)
> -  {}
> -
> -  /* opt_pass methods: */
> -  virtual unsigned int execute (function *);
> -
> -}; // class pass_record_bounds
> -
> -unsigned int
> -pass_record_bounds::execute (function *fun)
> -{
> -  if (number_of_loops (fun) <= 1)
> -    return 0;
> -
> -  estimate_numbers_of_iterations ();
> -  scev_reset ();
> -  return 0;
> -}
> -
> -} // anon namespace
> -
> -gimple_opt_pass *
> -make_pass_record_bounds (gcc::context *ctxt)
> -{
> -  return new pass_record_bounds (ctxt);
> -}
> -
>  /* Induction variable optimizations.  */
>
>  namespace {

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
  2017-06-19 12:31   ` Christophe Lyon
@ 2017-06-19 12:35     ` Richard Biener
  2017-06-20  8:38       ` Alan Hayward
       [not found]       ` <BACD880D-5189-4AAF-A306-614265C9744A@arm.com>
  0 siblings, 2 replies; 7+ messages in thread
From: Richard Biener @ 2017-06-19 12:35 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches, alan.hayward

On Mon, 19 Jun 2017, Christophe Lyon wrote:

> Hi Richard,
> 
> On 16 June 2017 at 14:18, Richard Biener <rguenther@suse.de> wrote:
> > On Wed, 14 Jun 2017, Richard Biener wrote:
> >
> >>
> >> niter estimates are not kept up-to-date (they reference gimple stmts
> >> and trees) in the keep-loop-stuff infrastructure so similar to the
> >> SCEV cache we rely on people freeing it after passes.
> >>
> >> The following brings us a step closer to that by freeing them whenever
> >> SCEV is invalidated (we only compute them when SCEV is active) plus
> >> removing the odd record-bounds pass that just computes them, leaving
> >> scavenging to following passes.
> >>
> >> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >
> > Some awkward interactions with peeling means I'm installing the
> > following less aggressive variant.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
> >
> > Richard.
> >
> > 2017-06-16  Richard Biener  <rguenther@suse.de>
> >
> >         PR tree-optimization/81090
> >         * passes.def (pass_record_bounds): Remove.
> >         * tree-pass.h (make_pass_record_bounds): Likewise.
> >         * tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
> >         make_pass_record_bounds): Likewise.
> >         * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
> >         not free niter estimates at the beginning but at the end.
> >         * tree-scalar-evolution.c (scev_finalize): Free niter estimates.
> >
> >         * gcc.dg/graphite/pr81090.c: New testcase.
> >
> 
> Sorry to bother you again...
> With this commit (r249249), I've noticed regressions on aarch64/arm:
> FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
> scan-tree-dump-not vect "LOOP VECTORIZED"
> FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump-not vect "LOOP VECTORIZED"

So the testcase gets vectorized now (for whatever reason) and still passes
execution.  Not sure why the testcase checked for not being vectorized.

Alan?

Richard.

> Christophe
> 
> > Index: gcc/passes.def
> > ===================================================================
> > --- gcc/passes.def      (revision 249246)
> > +++ gcc/passes.def      (working copy)
> > @@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
> >           /* All unswitching, final value replacement and splitting can expose
> >              empty loops.  Remove them now.  */
> >           NEXT_PASS (pass_cd_dce);
> > -         NEXT_PASS (pass_record_bounds);
> >           NEXT_PASS (pass_iv_canon);
> >           NEXT_PASS (pass_loop_distribution);
> >           NEXT_PASS (pass_copy_prop);
> > Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/graphite/pr81090.c     (nonexistent)
> > +++ gcc/testsuite/gcc.dg/graphite/pr81090.c     (working copy)
> > @@ -0,0 +1,27 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -floop-nest-optimize" } */
> > +
> > +int x3, za;
> > +int hg[1];
> > +
> > +void
> > +yw (int dq)
> > +{
> > +  const int r7 = 2;
> > +
> > +  while (dq < 1)
> > +    {
> > +      for (x3 = 0; x3 < r7; ++x3)
> > +       for (za = 0; za < r7; ++za)
> > +         hg[1] = 0;
> > +      ++dq;
> > +    }
> > +
> > +  x3 = 0;
> > +  while (x3 < r7)
> > +    {
> > +      ++x3;
> > +      if (x3 == 0)
> > +       break;
> > +    }
> > +}
> > Index: gcc/tree-pass.h
> > ===================================================================
> > --- gcc/tree-pass.h     (revision 249246)
> > +++ gcc/tree-pass.h     (working copy)
> > @@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
> >  extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
> >  extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
> >  extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
> > -extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
> >  extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
> >  extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
> >  extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
> > Index: gcc/tree-scalar-evolution.c
> > ===================================================================
> > --- gcc/tree-scalar-evolution.c (revision 249246)
> > +++ gcc/tree-scalar-evolution.c (working copy)
> > @@ -3636,6 +3636,7 @@ scev_finalize (void)
> >      return;
> >    scalar_evolution_info->empty ();
> >    scalar_evolution_info = NULL;
> > +  free_numbers_of_iterations_estimates (cfun);
> >  }
> >
> >  /* Returns true if the expression EXPR is considered to be too expensive
> > Index: gcc/tree-ssa-loop-ivcanon.c
> > ===================================================================
> > --- gcc/tree-ssa-loop-ivcanon.c (revision 249246)
> > +++ gcc/tree-ssa-loop-ivcanon.c (working copy)
> > @@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
> >    bool irred_invalidated = false;
> >    bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
> >
> > -  free_numbers_of_iterations_estimates (cfun);
> >    estimate_numbers_of_iterations ();
> >
> >    FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> > @@ -1230,6 +1229,7 @@ canonicalize_induction_variables (void)
> >
> >    /* Clean up the information about numbers of iterations, since brute force
> >       evaluation could reveal new information.  */
> > +  free_numbers_of_iterations_estimates (cfun);
> >    scev_reset ();
> >
> >    if (!bitmap_empty_p (loop_closed_ssa_invalidated))
> > Index: gcc/tree-ssa-loop.c
> > ===================================================================
> > --- gcc/tree-ssa-loop.c (revision 249246)
> > +++ gcc/tree-ssa-loop.c (working copy)
> > @@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
> >    return new pass_scev_cprop (ctxt);
> >  }
> >
> > -/* Record bounds on numbers of iterations of loops.  */
> > -
> > -namespace {
> > -
> > -const pass_data pass_data_record_bounds =
> > -{
> > -  GIMPLE_PASS, /* type */
> > -  "*record_bounds", /* name */
> > -  OPTGROUP_NONE, /* optinfo_flags */
> > -  TV_TREE_LOOP_BOUNDS, /* tv_id */
> > -  ( PROP_cfg | PROP_ssa ), /* properties_required */
> > -  0, /* properties_provided */
> > -  0, /* properties_destroyed */
> > -  0, /* todo_flags_start */
> > -  0, /* todo_flags_finish */
> > -};
> > -
> > -class pass_record_bounds : public gimple_opt_pass
> > -{
> > -public:
> > -  pass_record_bounds (gcc::context *ctxt)
> > -    : gimple_opt_pass (pass_data_record_bounds, ctxt)
> > -  {}
> > -
> > -  /* opt_pass methods: */
> > -  virtual unsigned int execute (function *);
> > -
> > -}; // class pass_record_bounds
> > -
> > -unsigned int
> > -pass_record_bounds::execute (function *fun)
> > -{
> > -  if (number_of_loops (fun) <= 1)
> > -    return 0;
> > -
> > -  estimate_numbers_of_iterations ();
> > -  scev_reset ();
> > -  return 0;
> > -}
> > -
> > -} // anon namespace
> > -
> > -gimple_opt_pass *
> > -make_pass_record_bounds (gcc::context *ctxt)
> > -{
> > -  return new pass_record_bounds (ctxt);
> > -}
> > -
> >  /* Induction variable optimizations.  */
> >
> >  namespace {
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
  2017-06-19 12:35     ` Richard Biener
@ 2017-06-20  8:38       ` Alan Hayward
       [not found]       ` <BACD880D-5189-4AAF-A306-614265C9744A@arm.com>
  1 sibling, 0 replies; 7+ messages in thread
From: Alan Hayward @ 2017-06-20  8:38 UTC (permalink / raw)
  To: Richard Biener; +Cc: Christophe Lyon, gcc-patches, nd


> On 19 Jun 2017, at 13:35, Richard Biener <rguenther@suse.de> wrote:
> 
> On Mon, 19 Jun 2017, Christophe Lyon wrote:
> 
>> Hi Richard,
>> 
>> On 16 June 2017 at 14:18, Richard Biener <rguenther@suse.de> wrote:
>>> On Wed, 14 Jun 2017, Richard Biener wrote:
>>> 
>>>> 
>>>> niter estimates are not kept up-to-date (they reference gimple stmts
>>>> and trees) in the keep-loop-stuff infrastructure so similar to the
>>>> SCEV cache we rely on people freeing it after passes.
>>>> 
>>>> The following brings us a step closer to that by freeing them whenever
>>>> SCEV is invalidated (we only compute them when SCEV is active) plus
>>>> removing the odd record-bounds pass that just computes them, leaving
>>>> scavenging to following passes.
>>>> 
>>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>>> 
>>> Some awkward interactions with peeling means I'm installing the
>>> following less aggressive variant.
>>> 
>>> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
>>> 
>>> Richard.
>>> 
>>> 2017-06-16  Richard Biener  <rguenther@suse.de>
>>> 
>>>        PR tree-optimization/81090
>>>        * passes.def (pass_record_bounds): Remove.
>>>        * tree-pass.h (make_pass_record_bounds): Likewise.
>>>        * tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
>>>        make_pass_record_bounds): Likewise.
>>>        * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
>>>        not free niter estimates at the beginning but at the end.
>>>        * tree-scalar-evolution.c (scev_finalize): Free niter estimates.
>>> 
>>>        * gcc.dg/graphite/pr81090.c: New testcase.
>>> 
>> 
>> Sorry to bother you again...
>> With this commit (r249249), I've noticed regressions on aarch64/arm:
>> FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
>> scan-tree-dump-not vect "LOOP VECTORIZED"
>> FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump-not vect "LOOP VECTORIZED"
> 
> So the testcase gets vectorized now (for whatever reason) and still passes
> execution.  Not sure why the testcase checked for not being vectorized.
> 
> Alan?
> 
> Richard.
> 

I’ve not looked at the new patch, but pr65947-9.c was added to test:

+ /* Condition reduction with maximum possible loop size.  Will fail to
+    vectorize because the vectorisation requires a slot for default values.  */

So, in the pr65947-9.c, if nothing passes the IF clause, then LAST needs to be
set to -72.


To vectorise pr65947-9.c the code will do: 

+      /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
+        which is updated with the current index of the loop for every match of
+        the original loop's cond_expr (VEC_STMT).  This results in a vector
+        containing the last time the condition passed for that vector lane.
+        The first match will be a 1 to allow 0 to be used for non-matching
+        indexes.  If there are no matches at all then the vector will be all
+        zeroes.  */

Which will then be used in the following way:

+      /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
+        various data values where the condition matched and another vector
+        (INDUCTION_INDEX) containing all the indexes of those matches.  We
+        need to extract the last matching index (which will be the index with
+        highest value) and use this to index into the data vector.
+        For the case where there were no matches, the data vector will contain
+        all default values and the index vector will be all zeros.  */


However, the INDUCTION_INDEX values will have a max size:

+      /* The additional index will be the same type as the condition.  Check
+        that the loop can fit into this less one (because we'll use up the
+        zero slot for when there are no matches).  */
+      tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
+      if (wi::geu_p (ni, wi::to_widest (max_index)))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "loop size is greater than data size.\n");
+         return false;
+       }
+    }


Going back to pr65947-9.c,  the loop size is 255.

+ #define N 255

Given that the default value takes up value 0 of our index, it means the max
Possible: size of a value in INDUCTION_INDEX will be 256, 

However, the type of INDUCTION_INDEX will match the type of A, which is char:

+char
+condition_reduction (char *a, char min_v)
+{
+  char last = -72;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}

So, it’s not safe for the loop to be vectorised, and it should fail to do so
with the message "loop size is greater than data size”.


(All the code snippets are taken from patch 956442b598f3a7241119624edb9af0155a42bf6c
"Support for vectorizing conditional expressions” on 2015-10-23,
It’s probable some of the code may have changed since then.)


Hope that makes sense,

Alan.



>> Christophe
>> 
>>> Index: gcc/passes.def
>>> ===================================================================
>>> --- gcc/passes.def      (revision 249246)
>>> +++ gcc/passes.def      (working copy)
>>> @@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
>>>          /* All unswitching, final value replacement and splitting can expose
>>>             empty loops.  Remove them now.  */
>>>          NEXT_PASS (pass_cd_dce);
>>> -         NEXT_PASS (pass_record_bounds);
>>>          NEXT_PASS (pass_iv_canon);
>>>          NEXT_PASS (pass_loop_distribution);
>>>          NEXT_PASS (pass_copy_prop);
>>> Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
>>> ===================================================================
>>> --- gcc/testsuite/gcc.dg/graphite/pr81090.c     (nonexistent)
>>> +++ gcc/testsuite/gcc.dg/graphite/pr81090.c     (working copy)
>>> @@ -0,0 +1,27 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -floop-nest-optimize" } */
>>> +
>>> +int x3, za;
>>> +int hg[1];
>>> +
>>> +void
>>> +yw (int dq)
>>> +{
>>> +  const int r7 = 2;
>>> +
>>> +  while (dq < 1)
>>> +    {
>>> +      for (x3 = 0; x3 < r7; ++x3)
>>> +       for (za = 0; za < r7; ++za)
>>> +         hg[1] = 0;
>>> +      ++dq;
>>> +    }
>>> +
>>> +  x3 = 0;
>>> +  while (x3 < r7)
>>> +    {
>>> +      ++x3;
>>> +      if (x3 == 0)
>>> +       break;
>>> +    }
>>> +}
>>> Index: gcc/tree-pass.h
>>> ===================================================================
>>> --- gcc/tree-pass.h     (revision 249246)
>>> +++ gcc/tree-pass.h     (working copy)
>>> @@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
>>> extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
>>> -extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
>>> Index: gcc/tree-scalar-evolution.c
>>> ===================================================================
>>> --- gcc/tree-scalar-evolution.c (revision 249246)
>>> +++ gcc/tree-scalar-evolution.c (working copy)
>>> @@ -3636,6 +3636,7 @@ scev_finalize (void)
>>>     return;
>>>   scalar_evolution_info->empty ();
>>>   scalar_evolution_info = NULL;
>>> +  free_numbers_of_iterations_estimates (cfun);
>>> }
>>> 
>>> /* Returns true if the expression EXPR is considered to be too expensive
>>> Index: gcc/tree-ssa-loop-ivcanon.c
>>> ===================================================================
>>> --- gcc/tree-ssa-loop-ivcanon.c (revision 249246)
>>> +++ gcc/tree-ssa-loop-ivcanon.c (working copy)
>>> @@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
>>>   bool irred_invalidated = false;
>>>   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
>>> 
>>> -  free_numbers_of_iterations_estimates (cfun);
>>>   estimate_numbers_of_iterations ();
>>> 
>>>   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
>>> @@ -1230,6 +1229,7 @@ canonicalize_induction_variables (void)
>>> 
>>>   /* Clean up the information about numbers of iterations, since brute force
>>>      evaluation could reveal new information.  */
>>> +  free_numbers_of_iterations_estimates (cfun);
>>>   scev_reset ();
>>> 
>>>   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
>>> Index: gcc/tree-ssa-loop.c
>>> ===================================================================
>>> --- gcc/tree-ssa-loop.c (revision 249246)
>>> +++ gcc/tree-ssa-loop.c (working copy)
>>> @@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
>>>   return new pass_scev_cprop (ctxt);
>>> }
>>> 
>>> -/* Record bounds on numbers of iterations of loops.  */
>>> -
>>> -namespace {
>>> -
>>> -const pass_data pass_data_record_bounds =
>>> -{
>>> -  GIMPLE_PASS, /* type */
>>> -  "*record_bounds", /* name */
>>> -  OPTGROUP_NONE, /* optinfo_flags */
>>> -  TV_TREE_LOOP_BOUNDS, /* tv_id */
>>> -  ( PROP_cfg | PROP_ssa ), /* properties_required */
>>> -  0, /* properties_provided */
>>> -  0, /* properties_destroyed */
>>> -  0, /* todo_flags_start */
>>> -  0, /* todo_flags_finish */
>>> -};
>>> -
>>> -class pass_record_bounds : public gimple_opt_pass
>>> -{
>>> -public:
>>> -  pass_record_bounds (gcc::context *ctxt)
>>> -    : gimple_opt_pass (pass_data_record_bounds, ctxt)
>>> -  {}
>>> -
>>> -  /* opt_pass methods: */
>>> -  virtual unsigned int execute (function *);
>>> -
>>> -}; // class pass_record_bounds
>>> -
>>> -unsigned int
>>> -pass_record_bounds::execute (function *fun)
>>> -{
>>> -  if (number_of_loops (fun) <= 1)
>>> -    return 0;
>>> -
>>> -  estimate_numbers_of_iterations ();
>>> -  scev_reset ();
>>> -  return 0;
>>> -}
>>> -
>>> -} // anon namespace
>>> -
>>> -gimple_opt_pass *
>>> -make_pass_record_bounds (gcc::context *ctxt)
>>> -{
>>> -  return new pass_record_bounds (ctxt);
>>> -}
>>> -
>>> /* Induction variable optimizations.  */
>>> 
>>> namespace {
>> 
>> 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
       [not found]       ` <BACD880D-5189-4AAF-A306-614265C9744A@arm.com>
@ 2017-06-20  9:45         ` Richard Biener
  2017-06-21 12:47           ` Christophe Lyon
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2017-06-20  9:45 UTC (permalink / raw)
  To: Alan Hayward; +Cc: Christophe Lyon, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5647 bytes --]

On Tue, 20 Jun 2017, Alan Hayward wrote:

> 
> > On 19 Jun 2017, at 13:35, Richard Biener <rguenther@suse.de> wrote:
> >
> > On Mon, 19 Jun 2017, Christophe Lyon wrote:
> >
> >> Hi Richard,
> >>
> >> On 16 June 2017 at 14:18, Richard Biener <rguenther@suse.de> wrote:
> >>> On Wed, 14 Jun 2017, Richard Biener wrote:
> >>>
> >>>>
> >>>> niter estimates are not kept up-to-date (they reference gimple stmts
> >>>> and trees) in the keep-loop-stuff infrastructure so similar to the
> >>>> SCEV cache we rely on people freeing it after passes.
> >>>>
> >>>> The following brings us a step closer to that by freeing them whenever
> >>>> SCEV is invalidated (we only compute them when SCEV is active) plus
> >>>> removing the odd record-bounds pass that just computes them, leaving
> >>>> scavenging to following passes.
> >>>>
> >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >>>
> >>> Some awkward interactions with peeling means I'm installing the
> >>> following less aggressive variant.
> >>>
> >>> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
> >>>
> >>> Richard.
> >>>
> >>> 2017-06-16  Richard Biener  <rguenther@suse.de>
> >>>
> >>>        PR tree-optimization/81090
> >>>        * passes.def (pass_record_bounds): Remove.
> >>>        * tree-pass.h (make_pass_record_bounds): Likewise.
> >>>        * tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
> >>>        make_pass_record_bounds): Likewise.
> >>>        * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
> >>>        not free niter estimates at the beginning but at the end.
> >>>        * tree-scalar-evolution.c (scev_finalize): Free niter estimates.
> >>>
> >>>        * gcc.dg/graphite/pr81090.c: New testcase.
> >>>
> >>
> >> Sorry to bother you again...
> >> With this commit (r249249), I've noticed regressions on aarch64/arm:
> >> FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
> >> scan-tree-dump-not vect "LOOP VECTORIZED"
> >> FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump-not vect "LOOP VECTORIZED"
> >
> > So the testcase gets vectorized now (for whatever reason) and still passes
> > execution.  Not sure why the testcase checked for not being vectorized.
> >
> > Alan?
> >
> > Richard.
> 
> I’ve not looked at the new patch, but pr65947-9.c was added to test:
> 
> + /* Condition reduction with maximum possible loop size.  Will fail to
> +    vectorize because the vectorisation requires a slot for default values.  */
> 
> So, in the pr65947-9.c, if nothing passes the IF clause, then LAST needs to be
> set to -72.

So the runtime part of the testcase fails to test this case and we expect
it to FAIL if vectorized?

Index: testsuite/gcc.dg/vect/pr65947-9.c
===================================================================
--- testsuite/gcc.dg/vect/pr65947-9.c   (revision 249145)
+++ testsuite/gcc.dg/vect/pr65947-9.c   (working copy)
@@ -34,9 +34,9 @@ main (void)
 
   check_vect ();
 
-  char ret = condition_reduction (a, 16);
+  char ret = condition_reduction (a, 1);
 
-  if (ret != 10)
+  if (ret != -72)
     abort ();
 
   return 0;

On aarch64 I can reproduce the inline copy in main to be vectorized
(doesn't happen on x86_64).  niter analysis says:

Analyzing # of iterations of loop 1
  exit condition [253, + , 4294967295] != 0
  bounds on difference of bases: -253 ... -253
  result:
    # of iterations 253, bounded by 253
Analyzing # of iterations of loop 1
  exit condition [253, + , 4294967295] != 0
  bounds on difference of bases: -253 ... -253
  result:
    # of iterations 253, bounded by 253
Statement (exit)if (ivtmp_45 != 0)
 is executed at most 253 (bounded by 253) + 1 times in loop 1.

so it fits there.  While the offline copy has

Analyzing # of iterations of loop 1
  exit condition [254, + , 4294967295] != 0
  bounds on difference of bases: -254 ... -254
  result:
    # of iterations 254, bounded by 254
Analyzing # of iterations of loop 1
  exit condition [254, + , 4294967295] != 0
  bounds on difference of bases: -254 ... -254
  result:
    # of iterations 254, bounded by 254
Statement (exit)if (ivtmp_7 != 0)
 is executed at most 254 (bounded by 254) + 1 times in loop 1.

we peeled one iteration (ch_loop does that) so we have the place
left.

Marking the function noinline works as a fix I guess.

Tested on x86_64-unknown-linux-gnu, installed.

Richard.

2017-06-20  Richard Biener  <rguenther@suse.de>

	* gcc.dg/vect/pr65947-9.c: Adjust.

Index: gcc/testsuite/gcc.dg/vect/pr65947-9.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr65947-9.c	(revision 249145)
+++ gcc/testsuite/gcc.dg/vect/pr65947-9.c	(working copy)
@@ -9,10 +9,10 @@ extern void abort (void) __attribute__ (
 /* Condition reduction with maximum possible loop size.  Will fail to
    vectorize because the vectorisation requires a slot for default values.  */
 
-char
+signed char __attribute__((noinline,noclone))
 condition_reduction (char *a, char min_v)
 {
-  char last = -72;
+  signed char last = -72;
 
   for (int i = 0; i < N; i++)
     if (a[i] < min_v)
@@ -21,10 +21,10 @@ condition_reduction (char *a, char min_v
   return last;
 }
 
-char
-main (void)
+int
+main ()
 {
-  char a[N] = {
+  signed char a[N] = {
   11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
   1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
   21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
@@ -34,11 +34,14 @@ main (void)
 
   check_vect ();
 
-  char ret = condition_reduction (a, 16);
-
+  signed char ret = condition_reduction (a, 16);
   if (ret != 10)
     abort ();
 
+  ret = condition_reduction (a, 1);
+  if (ret != -72)
+    abort ();
+
   return 0;
 }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Fix PR81090, properly free niter estimates
  2017-06-20  9:45         ` Richard Biener
@ 2017-06-21 12:47           ` Christophe Lyon
  0 siblings, 0 replies; 7+ messages in thread
From: Christophe Lyon @ 2017-06-21 12:47 UTC (permalink / raw)
  To: Richard Biener; +Cc: Alan Hayward, gcc-patches

On 20 June 2017 at 11:45, Richard Biener <rguenther@suse.de> wrote:
> On Tue, 20 Jun 2017, Alan Hayward wrote:
>
>>
>> > On 19 Jun 2017, at 13:35, Richard Biener <rguenther@suse.de> wrote:
>> >
>> > On Mon, 19 Jun 2017, Christophe Lyon wrote:
>> >
>> >> Hi Richard,
>> >>
>> >> On 16 June 2017 at 14:18, Richard Biener <rguenther@suse.de> wrote:
>> >>> On Wed, 14 Jun 2017, Richard Biener wrote:
>> >>>
>> >>>>
>> >>>> niter estimates are not kept up-to-date (they reference gimple stmts
>> >>>> and trees) in the keep-loop-stuff infrastructure so similar to the
>> >>>> SCEV cache we rely on people freeing it after passes.
>> >>>>
>> >>>> The following brings us a step closer to that by freeing them whenever
>> >>>> SCEV is invalidated (we only compute them when SCEV is active) plus
>> >>>> removing the odd record-bounds pass that just computes them, leaving
>> >>>> scavenging to following passes.
>> >>>>
>> >>>> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>> >>>
>> >>> Some awkward interactions with peeling means I'm installing the
>> >>> following less aggressive variant.
>> >>>
>> >>> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.
>> >>>
>> >>> Richard.
>> >>>
>> >>> 2017-06-16  Richard Biener  <rguenther@suse.de>
>> >>>
>> >>>        PR tree-optimization/81090
>> >>>        * passes.def (pass_record_bounds): Remove.
>> >>>        * tree-pass.h (make_pass_record_bounds): Likewise.
>> >>>        * tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
>> >>>        make_pass_record_bounds): Likewise.
>> >>>        * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
>> >>>        not free niter estimates at the beginning but at the end.
>> >>>        * tree-scalar-evolution.c (scev_finalize): Free niter estimates.
>> >>>
>> >>>        * gcc.dg/graphite/pr81090.c: New testcase.
>> >>>
>> >>
>> >> Sorry to bother you again...
>> >> With this commit (r249249), I've noticed regressions on aarch64/arm:
>> >> FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
>> >> scan-tree-dump-not vect "LOOP VECTORIZED"
>> >> FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump-not vect "LOOP VECTORIZED"
>> >
>> > So the testcase gets vectorized now (for whatever reason) and still passes
>> > execution.  Not sure why the testcase checked for not being vectorized.
>> >
>> > Alan?
>> >
>> > Richard.
>>
>> I’ve not looked at the new patch, but pr65947-9.c was added to test:
>>
>> + /* Condition reduction with maximum possible loop size.  Will fail to
>> +    vectorize because the vectorisation requires a slot for default values.  */
>>
>> So, in the pr65947-9.c, if nothing passes the IF clause, then LAST needs to be
>> set to -72.
>
> So the runtime part of the testcase fails to test this case and we expect
> it to FAIL if vectorized?
>
> Index: testsuite/gcc.dg/vect/pr65947-9.c
> ===================================================================
> --- testsuite/gcc.dg/vect/pr65947-9.c   (revision 249145)
> +++ testsuite/gcc.dg/vect/pr65947-9.c   (working copy)
> @@ -34,9 +34,9 @@ main (void)
>
>    check_vect ();
>
> -  char ret = condition_reduction (a, 16);
> +  char ret = condition_reduction (a, 1);
>
> -  if (ret != 10)
> +  if (ret != -72)
>      abort ();
>
>    return 0;
>
> On aarch64 I can reproduce the inline copy in main to be vectorized
> (doesn't happen on x86_64).  niter analysis says:
>
> Analyzing # of iterations of loop 1
>   exit condition [253, + , 4294967295] != 0
>   bounds on difference of bases: -253 ... -253
>   result:
>     # of iterations 253, bounded by 253
> Analyzing # of iterations of loop 1
>   exit condition [253, + , 4294967295] != 0
>   bounds on difference of bases: -253 ... -253
>   result:
>     # of iterations 253, bounded by 253
> Statement (exit)if (ivtmp_45 != 0)
>  is executed at most 253 (bounded by 253) + 1 times in loop 1.
>
> so it fits there.  While the offline copy has
>
> Analyzing # of iterations of loop 1
>   exit condition [254, + , 4294967295] != 0
>   bounds on difference of bases: -254 ... -254
>   result:
>     # of iterations 254, bounded by 254
> Analyzing # of iterations of loop 1
>   exit condition [254, + , 4294967295] != 0
>   bounds on difference of bases: -254 ... -254
>   result:
>     # of iterations 254, bounded by 254
> Statement (exit)if (ivtmp_7 != 0)
>  is executed at most 254 (bounded by 254) + 1 times in loop 1.
>
> we peeled one iteration (ch_loop does that) so we have the place
> left.
>
> Marking the function noinline works as a fix I guess.
>
> Tested on x86_64-unknown-linux-gnu, installed.
>
> Richard.
>
> 2017-06-20  Richard Biener  <rguenther@suse.de>
>
>         * gcc.dg/vect/pr65947-9.c: Adjust.

Hi,

After this change (r249400), the test fails on aarch64/arm:
FAIL:    gcc.dg/vect/pr65947-9.c -flto -ffat-lto-objects
scan-tree-dump vect "loop size is greater than data size"
FAIL:    gcc.dg/vect/pr65947-9.c scan-tree-dump vect "loop size is
greater than data size"

Christophe

>
> Index: gcc/testsuite/gcc.dg/vect/pr65947-9.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr65947-9.c       (revision 249145)
> +++ gcc/testsuite/gcc.dg/vect/pr65947-9.c       (working copy)
> @@ -9,10 +9,10 @@ extern void abort (void) __attribute__ (
>  /* Condition reduction with maximum possible loop size.  Will fail to
>     vectorize because the vectorisation requires a slot for default values.  */
>
> -char
> +signed char __attribute__((noinline,noclone))
>  condition_reduction (char *a, char min_v)
>  {
> -  char last = -72;
> +  signed char last = -72;
>
>    for (int i = 0; i < N; i++)
>      if (a[i] < min_v)
> @@ -21,10 +21,10 @@ condition_reduction (char *a, char min_v
>    return last;
>  }
>
> -char
> -main (void)
> +int
> +main ()
>  {
> -  char a[N] = {
> +  signed char a[N] = {
>    11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
>    1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
>    21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
> @@ -34,11 +34,14 @@ main (void)
>
>    check_vect ();
>
> -  char ret = condition_reduction (a, 16);
> -
> +  signed char ret = condition_reduction (a, 16);
>    if (ret != 10)
>      abort ();
>
> +  ret = condition_reduction (a, 1);
> +  if (ret != -72)
> +    abort ();
> +
>    return 0;
>  }
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-06-21 12:47 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-14 12:56 [PATCH] Fix PR81090, properly free niter estimates Richard Biener
2017-06-16 12:18 ` Richard Biener
2017-06-19 12:31   ` Christophe Lyon
2017-06-19 12:35     ` Richard Biener
2017-06-20  8:38       ` Alan Hayward
     [not found]       ` <BACD880D-5189-4AAF-A306-614265C9744A@arm.com>
2017-06-20  9:45         ` Richard Biener
2017-06-21 12:47           ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).