public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix for PR64081 in RTL loop unroller
@ 2014-12-19 10:24 Zamyatin, Igor
  2015-01-09  6:09 ` Jeff Law
  0 siblings, 1 reply; 9+ messages in thread
From: Zamyatin, Igor @ 2014-12-19 10:24 UTC (permalink / raw)
  To: GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

Hi!

This is an attempt to extend RTL unroller to allow cases like mentioned in the PR - 
namely when loop has duplicated exit blocks and back edges.

Bootstrapped and regtested on x86_64, also checking wide range of benchmarks - spec2K, spec2006, EEMBC
Is it ok for trunk in case if no testing issues?

Thanks,
Igor

Changelog:

Gcc:

2014-12-19  Igor Zamyatin  <igor.zamyatin@intel.com>

	PR rtl-optimization/64081
	* loop-iv.c (def_pred_latch_p): New function.
	(latch_dominating_def): Allow specific cases with non-single
	definitions.
	(iv_get_reaching_def): Likewise.
	(check_complex_exit_p): New function.
	(check_simple_exit): Use check_complex_exit_p to allow certain cases
	with exits not executing on any iteration.


Testsuite:

2014-12-19  Igor Zamyatin  <igor.zamyatin@intel.com>

	PR rtl-optimization/64081
	* gcc.dg/pr64081.c: New test.


diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index f55cea2..d5d48f1 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -314,34 +314,67 @@ iv_analysis_loop_init (struct loop *loop)
   check_iv_ref_table_size ();
 }
 
+/* Checks that def is in an immediate predecessor of the latch block.  */
+
+static bool
+def_pred_latch_p (df_ref d_ref)
+{
+  basic_block bb = DF_REF_BB (d_ref);
+  edge_iterator ei;
+  edge e;
+
+  FOR_EACH_EDGE (e, ei, current_loop->latch->preds)
+    {
+      if (e->src == bb)
+	return true;
+    }
+  return false;
+}
+
 /* Finds the definition of REG that dominates loop latch and stores
    it to DEF.  Returns false if there is not a single definition
-   dominating the latch.  If REG has no definition in loop, DEF
+   dominating the latch or all defs are same and they are on different
+   predecessors of loop latch.  If REG has no definition in loop, DEF
    is set to NULL and true is returned.  */
 
 static bool
 latch_dominating_def (rtx reg, df_ref *def)
 {
   df_ref single_rd = NULL, adef;
-  unsigned regno = REGNO (reg);
+  unsigned regno = REGNO (reg), def_num = 0;
   struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (current_loop->latch);
 
   for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef))
     {
+      /* Initialize this to true for the very first iteration when
+	 SINGLE_RD is NULL.  */
+      bool def_pred_latch = true;
+
       if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef))
 	  || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef)))
 	continue;
 
-      /* More than one reaching definition.  */
+      /* More than one reaching definition is ok in case definitions are
+	 in predecessors of latch block and those definitions are the same.
+	 Probably this could be relaxed and check for sub-dominance instead
+	 predecessor.  */
       if (single_rd)
-	return false;
-
-      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
-	return false;
+	{
+	  def_num++;
+	  if (!(def_pred_latch = def_pred_latch_p (adef))
+	      || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)),
+			       PATTERN (DF_REF_INSN (adef))))
+	    return false;
+	}
 
       single_rd = adef;
     }
 
+  /* If we have single definition it has to be excuted on each iteration.  */
+  if (!def_num && single_rd
+      && !just_once_each_iteration_p (current_loop, DF_REF_BB (single_rd)))
+    return false;
+
   *def = single_rd;
   return true;
 }
@@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def)
 static enum iv_grd_result
 iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 {
-  df_ref use, adef;
+  df_ref use, adef = NULL;
   basic_block def_bb, use_bb;
   rtx_insn *def_insn;
-  bool dom_p;
+  bool dom_p, dom_latch_p = false;
 
   *def = NULL;
   if (!simple_reg_p (reg))
@@ -369,11 +402,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
   if (!DF_REF_CHAIN (use))
     return GRD_INVARIANT;
 
-  /* More than one reaching def.  */
+  adef = DF_REF_CHAIN (use)->ref;
+  /* Having more than one reaching def is ok once all defs are in blocks which
+     are latch's predecessors.  */
   if (DF_REF_CHAIN (use)->next)
-    return GRD_INVALID;
+    {
+      df_link* defs;
+      unsigned int def_num = 0;
 
-  adef = DF_REF_CHAIN (use)->ref;
+      for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+	{
+	  if (!def_pred_latch_p (defs->ref))
+	    return GRD_INVALID;
+	  def_num++;
+	}
+      /* Make sure all preds contain definitions.  */
+      if (def_num != EDGE_COUNT (current_loop->latch->preds))
+	return GRD_INVALID;
+
+      dom_latch_p = true;
+    }
 
   /* We do not handle setting only part of the register.  */
   if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE)
@@ -396,8 +444,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 
   /* The definition does not dominate the use.  This is still OK if
      this may be a use of a biv, i.e. if the def_bb dominates loop
-     latch.  */
-  if (just_once_each_iteration_p (current_loop, def_bb))
+     latch or all defs are in latch's predecessors.  */
+  if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb))
     return GRD_MAYBE_BIV;
 
   return GRD_INVALID;
@@ -2910,6 +2958,49 @@ fail:
   return;
 }
 
+/* Check whether exit is not simple but still good for further analysis.
+   Looks like such loops mostly are a result of jump threading.  */
+
+static bool
+check_complex_exit_p (struct loop* loop, basic_block bb)
+{
+  edge e;
+  basic_block pred, exit;
+
+  if (EDGE_COUNT (bb->preds) > 1)
+    return false;
+
+  e = EDGE_PRED (bb, 0);
+
+  pred = e->src;
+  if (EDGE_COUNT (pred->succs) != 2)
+    return false;
+
+  /* Predecessor must be tested (at least) once during any iteration.  */
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred))
+    return false;
+
+  if (EDGE_SUCC (pred, 0)->dest == bb)
+    exit = EDGE_SUCC (pred, 1)->dest;
+  else
+    exit = EDGE_SUCC (pred, 0)->dest;
+
+  /* Check that EXIT is really loop exit.  */
+  if (flow_bb_inside_loop_p (loop, exit))
+    {
+      edge_iterator eei;
+      edge ee;
+
+      FOR_EACH_EDGE (ee, eei, exit->succs)
+	{
+	  if (!flow_bb_inside_loop_p (loop, ee->dest))
+	    return true;
+	}
+    }
+  return false;
+
+}
+
 /* Checks whether E is a simple exit from LOOP and stores its description
    into DESC.  */
 
@@ -2929,7 +3020,8 @@ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
     return;
 
   /* It must be tested (at least) once during any iteration.  */
-  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)
+      && !check_complex_exit_p (loop, exit_bb))
     return;
 
   /* It must end in a simple conditional jump.  */
diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c
new file mode 100644
index 0000000..6151d00
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr64081.c
@@ -0,0 +1,41 @@
+/* PR rtl-optimization/64081 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */
+
+long token;
+long *data;
+long *arr1;
+long *arr2;
+
+int main (int argc, const char* argv[])
+{
+  unsigned long i;
+  static long pos, start, count;
+  static int dir;
+
+  pos = 0;
+  dir = 1;
+
+  for (i = 0; i < argc; i++)
+    {
+      start = pos;
+      for (count = 0; count <= 400; count++)
+	{
+	  if (token == data[pos])
+	    break;
+	  if (dir == 1)
+	    pos = arr1[pos];
+	  else
+	    pos = arr2[pos];
+	  if (pos == -1)
+	    {
+	      pos = start;
+	      dir = !dir;
+	    }
+	}
+    }
+  return pos + dir;
+}
+
+/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */
+/* { dg-final { cleanup-rtl-dump "loop*" } } */


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Fix for PR64081 in RTL loop unroller
  2014-12-19 10:24 [PATCH] Fix for PR64081 in RTL loop unroller Zamyatin, Igor
@ 2015-01-09  6:09 ` Jeff Law
       [not found]   ` <CAEoMCqTzzMz-qO5x1q=5Htjxe1Von02iB_BpW7umXyQy_nVxtw@mail.gmail.com>
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Law @ 2015-01-09  6:09 UTC (permalink / raw)
  To: Zamyatin, Igor, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

On 12/19/14 03:20, Zamyatin, Igor wrote:
> Hi!
>
> This is an attempt to extend RTL unroller to allow cases like mentioned in the PR -
> namely when loop has duplicated exit blocks and back edges.
>
> Bootstrapped and regtested on x86_64, also checking wide range of benchmarks - spec2K, spec2006, EEMBC
> Is it ok for trunk in case if no testing issues?
>
> Thanks,
> Igor
>
> Changelog:
>
> Gcc:
>
> 2014-12-19  Igor Zamyatin  <igor.zamyatin@intel.com>
>
> 	PR rtl-optimization/64081
> 	* loop-iv.c (def_pred_latch_p): New function.
> 	(latch_dominating_def): Allow specific cases with non-single
> 	definitions.
> 	(iv_get_reaching_def): Likewise.
> 	(check_complex_exit_p): New function.
> 	(check_simple_exit): Use check_complex_exit_p to allow certain cases
> 	with exits not executing on any iteration.
>
>
> Testsuite:
>
> 2014-12-19  Igor Zamyatin  <igor.zamyatin@intel.com>
>
> 	PR rtl-optimization/64081
> 	* gcc.dg/pr64081.c: New test.
>
>
> diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
> index f55cea2..d5d48f1 100644
> --- a/gcc/loop-iv.c
> +++ b/gcc/loop-iv.c
>   /* Finds the definition of REG that dominates loop latch and stores
>      it to DEF.  Returns false if there is not a single definition
> -   dominating the latch.  If REG has no definition in loop, DEF
> +   dominating the latch or all defs are same and they are on different
> +   predecessors of loop latch.  If REG has no definition in loop, DEF
>      is set to NULL and true is returned.  */
Is it really sufficient here to verify that all the defs are on latch 
predecessors, what about the case where there is a predecessor without a 
def.  How do you guarantee domination in that case?

ISTM that given the structure for the code you're writing that you'd 
want to verify that in the event of multiple definitions that all of 
them appear on immediate predecessors of the latch *and* that each 
immediate predecessor has a definition.


>
>   static bool
>   latch_dominating_def (rtx reg, df_ref *def)
>   {
>     df_ref single_rd = NULL, adef;
> -  unsigned regno = REGNO (reg);
> +  unsigned regno = REGNO (reg), def_num = 0;
>     struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (current_loop->latch);
>
>     for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef))
>       {
> +      /* Initialize this to true for the very first iteration when
> +	 SINGLE_RD is NULL.  */
> +      bool def_pred_latch = true;
> +
>         if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef))
>   	  || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef)))
>   	continue;
>
> -      /* More than one reaching definition.  */
> +      /* More than one reaching definition is ok in case definitions are
> +	 in predecessors of latch block and those definitions are the same.
> +	 Probably this could be relaxed and check for sub-dominance instead
> +	 predecessor.  */
>         if (single_rd)
> -	return false;
> -
> -      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
> -	return false;
> +	{
> +	  def_num++;
> +	  if (!(def_pred_latch = def_pred_latch_p (adef))
> +	      || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)),
Whitespace nit here.  Whitespace goes before the open paren for the 
function call, not after.


> @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def)
>   static enum iv_grd_result
>   iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
And in this routine, you appear to do both checks.  ie, each def is on 
an immediate predecessor and each immediate predecessor has a def.  Is 
there some reason why iv_get_reaching_def has the stronger check while 
latch_dominating_def does not?

jeff


^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] Fix for PR64081 in RTL loop unroller
       [not found]     ` <CAKdSQZnrxfXcUMXwNW=c0PPaFgFe6rUKhkW+2T1U8BKop8K8=A@mail.gmail.com>
@ 2015-01-13 18:38       ` Zamyatin, Igor
  2015-01-13 19:06         ` Jeff Law
  0 siblings, 1 reply; 9+ messages in thread
From: Zamyatin, Igor @ 2015-01-13 18:38 UTC (permalink / raw)
  To: GCC Patches (gcc-patches@gcc.gnu.org); +Cc: Jeff Law (law@redhat.com), ysrumyan

> 
> Is it really sufficient here to verify that all the defs are on latch predecessors,
> what about the case where there is a predecessor without a def.  How do
> you guarantee domination in that case?
> 
> ISTM that given the structure for the code you're writing that you'd want to
> verify that in the event of multiple definitions that all of them appear on
> immediate predecessors of the latch *and* that each immediate
> predecessor has a definition.

Yes, do you think it's better to check exactly immediate predecessors?

> > -
> > -      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
> > -       return false;
> > +       {
> > +         def_num++;
> > +         if (!(def_pred_latch = def_pred_latch_p (adef))
> > +             || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)),
> 
> Whitespace nit here.  Whitespace goes before the open paren for the
> function call, not after.

Thanks for catching this!

> 
> 
> > @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def)
> >   static enum iv_grd_result
> >   iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
> 
> And in this routine, you appear to do both checks.  ie, each def is on an
> immediate predecessor and each immediate predecessor has a def.  Is there
> some reason why iv_get_reaching_def has the stronger check while
> latch_dominating_def does not?

Looks like I was sure that latch_dominating_def always goes after iv_get_reaching_def but now I see it is not true. Will add another check in latch_dominating_def.

Thanks,
Igor

> 
> jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Fix for PR64081 in RTL loop unroller
  2015-01-13 18:38       ` Zamyatin, Igor
@ 2015-01-13 19:06         ` Jeff Law
  2015-01-15 16:55           ` Zamyatin, Igor
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Law @ 2015-01-13 19:06 UTC (permalink / raw)
  To: Zamyatin, Igor, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

On 01/13/15 11:01, Zamyatin, Igor wrote:
>>
>> Is it really sufficient here to verify that all the defs are on latch predecessors,
>> what about the case where there is a predecessor without a def.  How do
>> you guarantee domination in that case?
>>
>> ISTM that given the structure for the code you're writing that you'd want to
>> verify that in the event of multiple definitions that all of them appear on
>> immediate predecessors of the latch *and* that each immediate
>> predecessor has a definition.
>
> Yes, do you think it's better to check exactly immediate predecessors?
I'd use the same structure that you have in iv_get_reaching_def.  If 
there was a reasonable way to factor that test into a single function 
and call it from both places that would be even better.

Jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] Fix for PR64081 in RTL loop unroller
  2015-01-13 19:06         ` Jeff Law
@ 2015-01-15 16:55           ` Zamyatin, Igor
  2015-01-15 17:11             ` Jeff Law
  0 siblings, 1 reply; 9+ messages in thread
From: Zamyatin, Igor @ 2015-01-15 16:55 UTC (permalink / raw)
  To: Jeff Law, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

> 
> On 01/13/15 11:01, Zamyatin, Igor wrote:
> >>
> >> Is it really sufficient here to verify that all the defs are on latch
> >> predecessors, what about the case where there is a predecessor
> >> without a def.  How do you guarantee domination in that case?
> >>
> >> ISTM that given the structure for the code you're writing that you'd
> >> want to verify that in the event of multiple definitions that all of
> >> them appear on immediate predecessors of the latch *and* that each
> >> immediate predecessor has a definition.
> >
> > Yes, do you think it's better to check exactly immediate predecessors?
> I'd use the same structure that you have in iv_get_reaching_def.  If there
> was a reasonable way to factor that test into a single function and call it from
> both places that would be even better.

Not sure it's possible to merge DF_REG_DEF_CHAIN walk and DF_REF_CHAIN walk...

Thanks,
Igor

> 
> Jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Fix for PR64081 in RTL loop unroller
  2015-01-15 16:55           ` Zamyatin, Igor
@ 2015-01-15 17:11             ` Jeff Law
  2015-01-16 15:34               ` Zamyatin, Igor
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Law @ 2015-01-15 17:11 UTC (permalink / raw)
  To: Zamyatin, Igor, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

On 01/15/15 09:36, Zamyatin, Igor wrote:
>>
>> On 01/13/15 11:01, Zamyatin, Igor wrote:
>>>>
>>>> Is it really sufficient here to verify that all the defs are on latch
>>>> predecessors, what about the case where there is a predecessor
>>>> without a def.  How do you guarantee domination in that case?
>>>>
>>>> ISTM that given the structure for the code you're writing that you'd
>>>> want to verify that in the event of multiple definitions that all of
>>>> them appear on immediate predecessors of the latch *and* that each
>>>> immediate predecessor has a definition.
>>>
>>> Yes, do you think it's better to check exactly immediate predecessors?
>> I'd use the same structure that you have in iv_get_reaching_def.  If there
>> was a reasonable way to factor that test into a single function and call it from
>> both places that would be even better.
>
> Not sure it's possible to merge DF_REG_DEF_CHAIN walk and DF_REF_CHAIN walk...
OK.  Just use the same overall structure if we can't pull the test out 
into a single function that could be called from both places.

jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] Fix for PR64081 in RTL loop unroller
  2015-01-15 17:11             ` Jeff Law
@ 2015-01-16 15:34               ` Zamyatin, Igor
  2015-01-16 16:54                 ` Jeff Law
  0 siblings, 1 reply; 9+ messages in thread
From: Zamyatin, Igor @ 2015-01-16 15:34 UTC (permalink / raw)
  To: Jeff Law, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

> > Not sure it's possible to merge DF_REG_DEF_CHAIN walk and
> DF_REF_CHAIN walk...
> OK.  Just use the same overall structure if we can't pull the test out into a
> single function that could be called from both places.
> 

Thanks, is updated patch ok for trunk?

Igor


Changelog:

gcc

2015-01-16  Igor Zamyatin  <igor.zamyatin@intel.com>

	PR rtl-optimization/64081
	* loop-iv.c (def_pred_latch_p): New function.
	(latch_dominating_def): Allow specific cases with non-single
	definitions.
	(iv_get_reaching_def): Likewise.
	(check_complex_exit_p): New function.
	(check_simple_exit): Use check_complex_exit_p to allow certain cases
	with exits not executing on any iteration.

testsuite

2014-01-16  Igor Zamyatin  <igor.zamyatin@intel.com>

	PR rtl-optimization/64081
	* gcc.dg/pr64081.c: New test.


diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index f55cea2..674bd0b 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -314,34 +314,71 @@ iv_analysis_loop_init (struct loop *loop)
   check_iv_ref_table_size ();
 }
 
+/* Checks that def is in an immediate predecessor of the latch block.  */
+
+static bool
+def_pred_latch_p (df_ref d_ref)
+{
+  basic_block bb = DF_REF_BB (d_ref);
+  edge_iterator ei;
+  edge e;
+
+  FOR_EACH_EDGE (e, ei, current_loop->latch->preds)
+    {
+      if (e->src == bb)
+	return true;
+    }
+  return false;
+}
+
 /* Finds the definition of REG that dominates loop latch and stores
    it to DEF.  Returns false if there is not a single definition
-   dominating the latch.  If REG has no definition in loop, DEF
+   dominating the latch or all defs are same and they are on different
+   predecessors of loop latch.  If REG has no definition in loop, DEF
    is set to NULL and true is returned.  */
 
 static bool
 latch_dominating_def (rtx reg, df_ref *def)
 {
   df_ref single_rd = NULL, adef;
-  unsigned regno = REGNO (reg);
+  unsigned regno = REGNO (reg), def_num = 0;
   struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (current_loop->latch);
 
   for (adef = DF_REG_DEF_CHAIN (regno); adef; adef = DF_REF_NEXT_REG (adef))
     {
+      /* Initialize this to true for the very first iteration when
+	 SINGLE_RD is NULL.  */
+      bool def_pred_latch = true;
+
       if (!bitmap_bit_p (df->blocks_to_analyze, DF_REF_BBNO (adef))
 	  || !bitmap_bit_p (&bb_info->out, DF_REF_ID (adef)))
 	continue;
 
-      /* More than one reaching definition.  */
+      /* More than one reaching definition is ok in case definitions are
+	 in predecessors of latch block and those definitions are the same.
+	 Probably this could be relaxed and check for sub-dominance instead
+	 predecessor.  */
+      def_num++;
       if (single_rd)
-	return false;
-
-      if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
-	return false;
+	{
+	  if (!(def_pred_latch = def_pred_latch_p (adef))
+	      || !rtx_equal_p (PATTERN (DF_REF_INSN (single_rd)),
+			       PATTERN (DF_REF_INSN (adef))))
+	    return false;
+	}
 
       single_rd = adef;
     }
 
+  /* If we have single definition it has to be excuted on each iteration.  */
+  if ((def_num == 1) && single_rd
+      && !just_once_each_iteration_p (current_loop, DF_REF_BB (single_rd)))
+    return false;
+
+  /* Make sure all preds contain definitions.  */
+  if (def_num != EDGE_COUNT (current_loop->latch->preds))
+    return false;
+
   *def = single_rd;
   return true;
 }
@@ -351,10 +388,10 @@ latch_dominating_def (rtx reg, df_ref *def)
 static enum iv_grd_result
 iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 {
-  df_ref use, adef;
+  df_ref use, adef = NULL;
   basic_block def_bb, use_bb;
   rtx_insn *def_insn;
-  bool dom_p;
+  bool dom_p, dom_latch_p = false;
 
   *def = NULL;
   if (!simple_reg_p (reg))
@@ -369,11 +406,26 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
   if (!DF_REF_CHAIN (use))
     return GRD_INVARIANT;
 
-  /* More than one reaching def.  */
+  adef = DF_REF_CHAIN (use)->ref;
+  /* Having more than one reaching def is ok once all defs are in blocks which
+     are latch's predecessors.  */
   if (DF_REF_CHAIN (use)->next)
-    return GRD_INVALID;
+    {
+      df_link* defs;
+      unsigned int def_num = 0;
 
-  adef = DF_REF_CHAIN (use)->ref;
+      for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+	{
+	  if (!def_pred_latch_p (defs->ref))
+	    return GRD_INVALID;
+	  def_num++;
+	}
+      /* Make sure all preds contain definitions.  */
+      if (def_num != EDGE_COUNT (current_loop->latch->preds))
+	return GRD_INVALID;
+
+      dom_latch_p = true;
+    }
 
   /* We do not handle setting only part of the register.  */
   if (DF_REF_FLAGS (adef) & DF_REF_READ_WRITE)
@@ -396,8 +448,8 @@ iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 
   /* The definition does not dominate the use.  This is still OK if
      this may be a use of a biv, i.e. if the def_bb dominates loop
-     latch.  */
-  if (just_once_each_iteration_p (current_loop, def_bb))
+     latch or all defs are in latch's predecessors.  */
+  if (dom_latch_p || just_once_each_iteration_p (current_loop, def_bb))
     return GRD_MAYBE_BIV;
 
   return GRD_INVALID;
@@ -2910,6 +2962,49 @@ fail:
   return;
 }
 
+/* Check whether exit is not simple but still good for further analysis.
+   Looks like such loops mostly are a result of jump threading.  */
+
+static bool
+check_complex_exit_p (struct loop* loop, basic_block bb)
+{
+  edge e;
+  basic_block pred, exit;
+
+  if (EDGE_COUNT (bb->preds) > 1)
+    return false;
+
+  e = EDGE_PRED (bb, 0);
+
+  pred = e->src;
+  if (EDGE_COUNT (pred->succs) != 2)
+    return false;
+
+  /* Predecessor must be tested (at least) once during any iteration.  */
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, pred))
+    return false;
+
+  if (EDGE_SUCC (pred, 0)->dest == bb)
+    exit = EDGE_SUCC (pred, 1)->dest;
+  else
+    exit = EDGE_SUCC (pred, 0)->dest;
+
+  /* Check that EXIT is really loop exit.  */
+  if (flow_bb_inside_loop_p (loop, exit))
+    {
+      edge_iterator eei;
+      edge ee;
+
+      FOR_EACH_EDGE (ee, eei, exit->succs)
+	{
+	  if (!flow_bb_inside_loop_p (loop, ee->dest))
+	    return true;
+	}
+    }
+  return false;
+
+}
+
 /* Checks whether E is a simple exit from LOOP and stores its description
    into DESC.  */
 
@@ -2929,7 +3024,8 @@ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
     return;
 
   /* It must be tested (at least) once during any iteration.  */
-  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb)
+      && !check_complex_exit_p (loop, exit_bb))
     return;
 
   /* It must end in a simple conditional jump.  */
diff --git a/gcc/testsuite/gcc.dg/pr64081.c b/gcc/testsuite/gcc.dg/pr64081.c
new file mode 100644
index 0000000..6151d00
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr64081.c
@@ -0,0 +1,41 @@
+/* PR rtl-optimization/64081 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-loops -fdump-rtl-loop2_unroll" } */
+
+long token;
+long *data;
+long *arr1;
+long *arr2;
+
+int main (int argc, const char* argv[])
+{
+  unsigned long i;
+  static long pos, start, count;
+  static int dir;
+
+  pos = 0;
+  dir = 1;
+
+  for (i = 0; i < argc; i++)
+    {
+      start = pos;
+      for (count = 0; count <= 400; count++)
+	{
+	  if (token == data[pos])
+	    break;
+	  if (dir == 1)
+	    pos = arr1[pos];
+	  else
+	    pos = arr2[pos];
+	  if (pos == -1)
+	    {
+	      pos = start;
+	      dir = !dir;
+	    }
+	}
+    }
+  return pos + dir;
+}
+
+/* { dg-final { scan-rtl-dump "loop unrolled 7 times" "loop2_unroll" } } */
+/* { dg-final { cleanup-rtl-dump "loop*" } } */



> jeff


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Fix for PR64081 in RTL loop unroller
  2015-01-16 15:34               ` Zamyatin, Igor
@ 2015-01-16 16:54                 ` Jeff Law
  0 siblings, 0 replies; 9+ messages in thread
From: Jeff Law @ 2015-01-16 16:54 UTC (permalink / raw)
  To: Zamyatin, Igor, GCC Patches (gcc-patches@gcc.gnu.org); +Cc: ysrumyan

On 01/16/15 08:16, Zamyatin, Igor wrote:
>>> Not sure it's possible to merge DF_REG_DEF_CHAIN walk and
>> DF_REF_CHAIN walk...
>> OK.  Just use the same overall structure if we can't pull the test out into a
>> single function that could be called from both places.
>>
>
> Thanks, is updated patch ok for trunk?
>
> Igor
>
>
> Changelog:
>
> gcc
>
> 2015-01-16  Igor Zamyatin  <igor.zamyatin@intel.com>
>
> 	PR rtl-optimization/64081
> 	* loop-iv.c (def_pred_latch_p): New function.
> 	(latch_dominating_def): Allow specific cases with non-single
> 	definitions.
> 	(iv_get_reaching_def): Likewise.
> 	(check_complex_exit_p): New function.
> 	(check_simple_exit): Use check_complex_exit_p to allow certain cases
> 	with exits not executing on any iteration.
>
> testsuite
>
> 2014-01-16  Igor Zamyatin  <igor.zamyatin@intel.com>
>
> 	PR rtl-optimization/64081
> 	* gcc.dg/pr64081.c: New test.
Just a few nits below.  Approved with the nits fixed.


> +/* Checks that def is in an immediate predecessor of the latch block.  */
> +
> +static bool
> +def_pred_latch_p (df_ref d_ref)
Use the actual parameter name in the comment and put it in caps, mention 
the return values.  Something like this:

/* Return true if D_REF is defined in an immediate predecessor of the
    current loop's latch block.  Otherwise return false.  */

>
> +  /* If we have single definition it has to be excuted on each iteration.  */
s/excuted/executed/

> +/* Check whether exit is not simple but still good for further analysis.
> +   Looks like such loops mostly are a result of jump threading.  */
> +
> +static bool
> +check_complex_exit_p (struct loop* loop, basic_block bb)
/* Return true if LOOP has a complex exit, but is still good for further
    analysis.  Return false otherwise.  BB is LOOP's exit block.  */


With those comment fixes, this is OK for the trunk.

Thanks,
Jeff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] Fix for PR64081 in RTL loop unroller
@ 2015-01-19 22:38 David Edelsohn
  0 siblings, 0 replies; 9+ messages in thread
From: David Edelsohn @ 2015-01-19 22:38 UTC (permalink / raw)
  To: Zamyatin, Igor; +Cc: GCC Patches, Jeffrey Law, ysrumyan

Unfortunately this patch broke bootstrap on AIX (PPC32)

PR bootstrap/64676

- David

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-01-19 21:58 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-19 10:24 [PATCH] Fix for PR64081 in RTL loop unroller Zamyatin, Igor
2015-01-09  6:09 ` Jeff Law
     [not found]   ` <CAEoMCqTzzMz-qO5x1q=5Htjxe1Von02iB_BpW7umXyQy_nVxtw@mail.gmail.com>
     [not found]     ` <CAKdSQZnrxfXcUMXwNW=c0PPaFgFe6rUKhkW+2T1U8BKop8K8=A@mail.gmail.com>
2015-01-13 18:38       ` Zamyatin, Igor
2015-01-13 19:06         ` Jeff Law
2015-01-15 16:55           ` Zamyatin, Igor
2015-01-15 17:11             ` Jeff Law
2015-01-16 15:34               ` Zamyatin, Igor
2015-01-16 16:54                 ` Jeff Law
2015-01-19 22:38 David Edelsohn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).