public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
@ 2011-07-27 15:58 William J. Schmidt
  2011-07-27 16:20 ` Michael Matz
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: William J. Schmidt @ 2011-07-27 15:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: bergner

This is a draft patch that biases the reassociation machinery so that
each iteration of an accumulator pattern in a loop is independent of the
other iterations.  This addresses a problem identified as an accidental
side effect of the bug observed in PR tree-optimization/49749.  This
patch reverses a substantial performance loss to 410.bwaves in cpu2006.

I've restricted the bias to take place only for phi results that are
identified as true accumulators within innermost loops.  Currently there
is no restriction on the size or complexity of the loop, otherwise.

I've bootstrapped and regression-tested this on powerpc64-linux with no
new failures.  I'm still doing performance runs to assess the results,
and may still need to tweak this.  It's close, though, and since I have
upcoming vacation, I wanted to post this for comments now in hopes of
wrapping this up by the end of the week.  Please let me know what you
think.

Thanks,
Bill


2011-07-27  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/49749
	* tree-ssa-reassoc.c (get_rank): Add forward declaration.
	(PHI_LOOP_BIAS): New macro.
	(phi_rank): New function.
	(phi_propagation_rank): Likewise.
	(propagate_rank): Likewise.
	(get_rank): Add calls to phi_rank and propagate_rank.
	
Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c	(revision 176585)
+++ gcc/tree-ssa-reassoc.c	(working copy)
@@ -190,7 +190,118 @@ static long *bb_rank;
 /* Operand->rank hashtable.  */
 static struct pointer_map_t *operand_rank;
 
+/* Forward decls.  */
+static long get_rank (tree);
 
+
+/* Bias amount for loop-carried phis.  We want this to be larger than
+   the depth of any reassociation tree we can see, but not larger than
+   the rank difference between two blocks.  */
+#define PHI_LOOP_BIAS (1 << 15)
+
+/* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
+   an innermost loop, and the phi has only a single use which is inside
+   the loop, then the rank is the block rank of the loop latch plus an
+   extra bias for the loop-carried dependence.  This causes expressions
+   calculated into an accumulator variable to be independent for each
+   iteration of the loop.  If STMT is some other phi, the rank is the
+   block rank of its containing block.  */
+static long
+phi_rank (gimple stmt)
+{
+  basic_block bb = gimple_bb (stmt);
+  struct loop *father = bb->loop_father;
+  tree res;
+  unsigned i;
+  use_operand_p use;
+  gimple use_stmt;
+
+  /* We only care about real loops (those with a latch).  */
+  if (!father->latch)
+    return bb_rank[bb->index];
+
+  /* Interesting phis must be in headers of innermost loops.  */
+  if (bb != father->header
+      || father->inner)
+    return bb_rank[bb->index];
+
+  /* Ignore virtual SSA_NAMEs.  */
+  res = gimple_phi_result (stmt);
+  if (!is_gimple_reg (SSA_NAME_VAR (res)))
+    return bb_rank[bb->index];
+
+  /* The phi definition must have a single use, and that use must be
+     within the loop.  Otherwise this isn't an accumulator pattern.  */
+  if (!single_imm_use (res, &use, &use_stmt)
+      || gimple_bb (use_stmt)->loop_father != father)
+    return bb_rank[bb->index];
+
+  /* Look for phi arguments from within the loop.  If found, bias this phi.  */
+  for (i = 0; i < gimple_phi_num_args (stmt); i++)
+    {
+      tree arg = gimple_phi_arg_def (stmt, i);
+      if (TREE_CODE (arg) == SSA_NAME
+	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
+	{
+	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
+	  if (gimple_bb (def_stmt)->loop_father == father)
+	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
+	}
+    }
+
+  /* Must be an uninteresting phi.  */
+  return bb_rank[bb->index];
+}
+
+/* If EXP is an SSA_NAME defined by a PHI statement that represents a
+   loop-carried dependence of an innermost loop, return the block rank
+   of the defining PHI statement.  Otherwise return zero.
+
+   The motivation for this is that we can't propagate the biased rank
+   of the loop-carried phi, as this defeats the purpose of the bias.
+   However, the rank of a value that depends on the result of a loop-
+   carried phi should still be higher than the rank of a value that
+   depends on values from more distant blocks.  */
+static long
+phi_propagation_rank (tree exp)
+{
+  gimple phi_stmt;
+  long block_rank;
+
+  if (TREE_CODE (exp) != SSA_NAME
+      || SSA_NAME_IS_DEFAULT_DEF (exp))
+    return 0;
+
+  phi_stmt = SSA_NAME_DEF_STMT (exp);
+
+  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
+    return 0;
+
+  /* Non-loop-carried phis have block rank.  Loop-carried phis have
+     an additional bias added in.  If this phi doesn't have block rank,
+     it's biased and should not be propagated.  */
+  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
+
+  if (phi_rank (phi_stmt) != block_rank)
+    return block_rank;
+
+  return 0;
+}
+
+/* Return the maximum of RANK and the rank that should be propagated
+   from expression OP.  For most operands, this is just the rank of OP.
+   For loop-carried phis, the value is obtained from phi_propagation_rank.  */
+static long
+propagate_rank (long rank, tree op)
+{
+  long phi_prop_rank = phi_propagation_rank (op);
+
+  if (phi_prop_rank)
+    return MAX (rank, phi_prop_rank);
+
+  return MAX (rank, get_rank (op));
+}
+
 /* Look up the operand rank structure for expression E.  */
 
 static inline long
@@ -232,11 +343,38 @@ get_rank (tree e)
      I make no claims that this is optimal, however, it gives good
      results.  */
 
+  /* We make an exception to the normal ranking system to break
+     dependences of accumulator variables in loops.  Suppose we
+     have a simple one-block loop containing:
+
+       x_1 = phi(x_0, x_2)
+       b = a + x_1
+       c = b + d
+       x_2 = c + e
+
+     As shown, each iteration of the calculation into x is fully
+     dependent upon the iteration before it.  We would prefer to
+     see this in the form:
+
+       x_1 = phi(x_0, x_2)
+       b = a + d
+       c = b + e
+       x_2 = c + x_1
+
+     If the loop is unrolled, the calculations of b and c from
+     different iterations can be interleaved.
+
+     To obtain this result during reassociation, we bias the rank
+     of the phi definition x_1 upward, when it is recognized as an
+     accumulator pattern.  The artificial rank causes it to be 
+     added last, providing the desired independence.  */
+
   if (TREE_CODE (e) == SSA_NAME)
     {
       gimple stmt;
       long rank;
       int i, n;
+      tree op;
 
       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
 	  && SSA_NAME_IS_DEFAULT_DEF (e))
@@ -246,6 +384,9 @@ get_rank (tree e)
       if (gimple_bb (stmt) == NULL)
 	return 0;
 
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	return phi_rank (stmt);
+
       if (!is_gimple_assign (stmt)
 	  || gimple_vdef (stmt))
 	return bb_rank[gimple_bb (stmt)->index];
@@ -255,20 +396,25 @@ get_rank (tree e)
       if (rank != -1)
 	return rank;
 
-      /* Otherwise, find the maximum rank for the operands, or the bb
-	 rank, whichever is less.   */
+      /* Otherwise, find the maximum rank for the operands.  As an
+	 exception, remove the bias from loop-carried phis when propagating
+	 the rank so that dependent operations are not also biased.  */
       rank = 0;
       if (gimple_assign_single_p (stmt))
 	{
 	  tree rhs = gimple_assign_rhs1 (stmt);
 	  n = TREE_OPERAND_LENGTH (rhs);
 	  if (n == 0)
-	    rank = MAX (rank, get_rank (rhs));
+	    rank = propagate_rank (rank, rhs);
 	  else
 	    {
 	      for (i = 0; i < n; i++)
-		if (TREE_OPERAND (rhs, i))
-		  rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
+		{
+		  op = TREE_OPERAND (rhs, i);
+
+		  if (op != NULL_TREE)
+		    rank = propagate_rank (rank, op);
+		}
 	    }
 	}
       else
@@ -276,8 +422,9 @@ get_rank (tree e)
 	  n = gimple_num_ops (stmt);
 	  for (i = 1; i < n; i++)
 	    {
-	      gcc_assert (gimple_op (stmt, i));
-	      rank = MAX(rank, get_rank (gimple_op (stmt, i)));
+	      op = gimple_op (stmt, i);
+	      gcc_assert (op);
+	      rank = propagate_rank (rank, op);
 	    }
 	}
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
  2011-07-27 15:58 [PATCH, RFC] PR49749 biased reassociation for accumulator patterns William J. Schmidt
@ 2011-07-27 16:20 ` Michael Matz
  2011-07-28 11:05 ` Richard Guenther
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Michael Matz @ 2011-07-27 16:20 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: gcc-patches, bergner

Hi,

On Wed, 27 Jul 2011, William J. Schmidt wrote:

> +static long
> +propagate_rank (long rank, tree op)
> +{
> +  long phi_prop_rank = phi_propagation_rank (op);
> +
> +  if (phi_prop_rank)
> +    return MAX (rank, phi_prop_rank);
> +
> +  return MAX (rank, get_rank (op));
> +}

I know it's pre-existing code, but as you're touching it anyway: function 
calls in min/max macros usually are a bad idea due to multiple 
evaluations.  If nothing else it's just wasted work.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
  2011-07-27 15:58 [PATCH, RFC] PR49749 biased reassociation for accumulator patterns William J. Schmidt
  2011-07-27 16:20 ` Michael Matz
@ 2011-07-28 11:05 ` Richard Guenther
  2011-07-29 15:27 ` William J. Schmidt
  2011-07-29 17:27 ` William J. Schmidt
  3 siblings, 0 replies; 6+ messages in thread
From: Richard Guenther @ 2011-07-28 11:05 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: gcc-patches, bergner

On Wed, Jul 27, 2011 at 5:11 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This is a draft patch that biases the reassociation machinery so that
> each iteration of an accumulator pattern in a loop is independent of the
> other iterations.  This addresses a problem identified as an accidental
> side effect of the bug observed in PR tree-optimization/49749.  This
> patch reverses a substantial performance loss to 410.bwaves in cpu2006.
>
> I've restricted the bias to take place only for phi results that are
> identified as true accumulators within innermost loops.  Currently there
> is no restriction on the size or complexity of the loop, otherwise.
>
> I've bootstrapped and regression-tested this on powerpc64-linux with no
> new failures.  I'm still doing performance runs to assess the results,
> and may still need to tweak this.  It's close, though, and since I have
> upcoming vacation, I wanted to post this for comments now in hopes of
> wrapping this up by the end of the week.  Please let me know what you
> think.

The patch looks sensible to me, so if it shows good results in performance
testing it's ok for trunk with Michas comments implemneted.

Thanks,
Richard.

> Thanks,
> Bill
>
>
> 2011-07-27  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/49749
>        * tree-ssa-reassoc.c (get_rank): Add forward declaration.
>        (PHI_LOOP_BIAS): New macro.
>        (phi_rank): New function.
>        (phi_propagation_rank): Likewise.
>        (propagate_rank): Likewise.
>        (get_rank): Add calls to phi_rank and propagate_rank.
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c      (revision 176585)
> +++ gcc/tree-ssa-reassoc.c      (working copy)
> @@ -190,7 +190,118 @@ static long *bb_rank;
>  /* Operand->rank hashtable.  */
>  static struct pointer_map_t *operand_rank;
>
> +/* Forward decls.  */
> +static long get_rank (tree);
>
> +
> +/* Bias amount for loop-carried phis.  We want this to be larger than
> +   the depth of any reassociation tree we can see, but not larger than
> +   the rank difference between two blocks.  */
> +#define PHI_LOOP_BIAS (1 << 15)
> +
> +/* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
> +   an innermost loop, and the phi has only a single use which is inside
> +   the loop, then the rank is the block rank of the loop latch plus an
> +   extra bias for the loop-carried dependence.  This causes expressions
> +   calculated into an accumulator variable to be independent for each
> +   iteration of the loop.  If STMT is some other phi, the rank is the
> +   block rank of its containing block.  */
> +static long
> +phi_rank (gimple stmt)
> +{
> +  basic_block bb = gimple_bb (stmt);
> +  struct loop *father = bb->loop_father;
> +  tree res;
> +  unsigned i;
> +  use_operand_p use;
> +  gimple use_stmt;
> +
> +  /* We only care about real loops (those with a latch).  */
> +  if (!father->latch)
> +    return bb_rank[bb->index];
> +
> +  /* Interesting phis must be in headers of innermost loops.  */
> +  if (bb != father->header
> +      || father->inner)
> +    return bb_rank[bb->index];
> +
> +  /* Ignore virtual SSA_NAMEs.  */
> +  res = gimple_phi_result (stmt);
> +  if (!is_gimple_reg (SSA_NAME_VAR (res)))
> +    return bb_rank[bb->index];
> +
> +  /* The phi definition must have a single use, and that use must be
> +     within the loop.  Otherwise this isn't an accumulator pattern.  */
> +  if (!single_imm_use (res, &use, &use_stmt)
> +      || gimple_bb (use_stmt)->loop_father != father)
> +    return bb_rank[bb->index];
> +
> +  /* Look for phi arguments from within the loop.  If found, bias this phi.  */
> +  for (i = 0; i < gimple_phi_num_args (stmt); i++)
> +    {
> +      tree arg = gimple_phi_arg_def (stmt, i);
> +      if (TREE_CODE (arg) == SSA_NAME
> +         && !SSA_NAME_IS_DEFAULT_DEF (arg))
> +       {
> +         gimple def_stmt = SSA_NAME_DEF_STMT (arg);
> +         if (gimple_bb (def_stmt)->loop_father == father)
> +           return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
> +       }
> +    }
> +
> +  /* Must be an uninteresting phi.  */
> +  return bb_rank[bb->index];
> +}
> +
> +/* If EXP is an SSA_NAME defined by a PHI statement that represents a
> +   loop-carried dependence of an innermost loop, return the block rank
> +   of the defining PHI statement.  Otherwise return zero.
> +
> +   The motivation for this is that we can't propagate the biased rank
> +   of the loop-carried phi, as this defeats the purpose of the bias.
> +   However, the rank of a value that depends on the result of a loop-
> +   carried phi should still be higher than the rank of a value that
> +   depends on values from more distant blocks.  */
> +static long
> +phi_propagation_rank (tree exp)
> +{
> +  gimple phi_stmt;
> +  long block_rank;
> +
> +  if (TREE_CODE (exp) != SSA_NAME
> +      || SSA_NAME_IS_DEFAULT_DEF (exp))
> +    return 0;
> +
> +  phi_stmt = SSA_NAME_DEF_STMT (exp);
> +
> +  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
> +    return 0;
> +
> +  /* Non-loop-carried phis have block rank.  Loop-carried phis have
> +     an additional bias added in.  If this phi doesn't have block rank,
> +     it's biased and should not be propagated.  */
> +  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
> +
> +  if (phi_rank (phi_stmt) != block_rank)
> +    return block_rank;
> +
> +  return 0;
> +}
> +
> +/* Return the maximum of RANK and the rank that should be propagated
> +   from expression OP.  For most operands, this is just the rank of OP.
> +   For loop-carried phis, the value is obtained from phi_propagation_rank.  */
> +static long
> +propagate_rank (long rank, tree op)
> +{
> +  long phi_prop_rank = phi_propagation_rank (op);
> +
> +  if (phi_prop_rank)
> +    return MAX (rank, phi_prop_rank);
> +
> +  return MAX (rank, get_rank (op));
> +}
> +
>  /* Look up the operand rank structure for expression E.  */
>
>  static inline long
> @@ -232,11 +343,38 @@ get_rank (tree e)
>      I make no claims that this is optimal, however, it gives good
>      results.  */
>
> +  /* We make an exception to the normal ranking system to break
> +     dependences of accumulator variables in loops.  Suppose we
> +     have a simple one-block loop containing:
> +
> +       x_1 = phi(x_0, x_2)
> +       b = a + x_1
> +       c = b + d
> +       x_2 = c + e
> +
> +     As shown, each iteration of the calculation into x is fully
> +     dependent upon the iteration before it.  We would prefer to
> +     see this in the form:
> +
> +       x_1 = phi(x_0, x_2)
> +       b = a + d
> +       c = b + e
> +       x_2 = c + x_1
> +
> +     If the loop is unrolled, the calculations of b and c from
> +     different iterations can be interleaved.
> +
> +     To obtain this result during reassociation, we bias the rank
> +     of the phi definition x_1 upward, when it is recognized as an
> +     accumulator pattern.  The artificial rank causes it to be
> +     added last, providing the desired independence.  */
> +
>   if (TREE_CODE (e) == SSA_NAME)
>     {
>       gimple stmt;
>       long rank;
>       int i, n;
> +      tree op;
>
>       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
>          && SSA_NAME_IS_DEFAULT_DEF (e))
> @@ -246,6 +384,9 @@ get_rank (tree e)
>       if (gimple_bb (stmt) == NULL)
>        return 0;
>
> +      if (gimple_code (stmt) == GIMPLE_PHI)
> +       return phi_rank (stmt);
> +
>       if (!is_gimple_assign (stmt)
>          || gimple_vdef (stmt))
>        return bb_rank[gimple_bb (stmt)->index];
> @@ -255,20 +396,25 @@ get_rank (tree e)
>       if (rank != -1)
>        return rank;
>
> -      /* Otherwise, find the maximum rank for the operands, or the bb
> -        rank, whichever is less.   */
> +      /* Otherwise, find the maximum rank for the operands.  As an
> +        exception, remove the bias from loop-carried phis when propagating
> +        the rank so that dependent operations are not also biased.  */
>       rank = 0;
>       if (gimple_assign_single_p (stmt))
>        {
>          tree rhs = gimple_assign_rhs1 (stmt);
>          n = TREE_OPERAND_LENGTH (rhs);
>          if (n == 0)
> -           rank = MAX (rank, get_rank (rhs));
> +           rank = propagate_rank (rank, rhs);
>          else
>            {
>              for (i = 0; i < n; i++)
> -               if (TREE_OPERAND (rhs, i))
> -                 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
> +               {
> +                 op = TREE_OPERAND (rhs, i);
> +
> +                 if (op != NULL_TREE)
> +                   rank = propagate_rank (rank, op);
> +               }
>            }
>        }
>       else
> @@ -276,8 +422,9 @@ get_rank (tree e)
>          n = gimple_num_ops (stmt);
>          for (i = 1; i < n; i++)
>            {
> -             gcc_assert (gimple_op (stmt, i));
> -             rank = MAX(rank, get_rank (gimple_op (stmt, i)));
> +             op = gimple_op (stmt, i);
> +             gcc_assert (op);
> +             rank = propagate_rank (rank, op);
>            }
>        }
>
>
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
  2011-07-27 15:58 [PATCH, RFC] PR49749 biased reassociation for accumulator patterns William J. Schmidt
  2011-07-27 16:20 ` Michael Matz
  2011-07-28 11:05 ` Richard Guenther
@ 2011-07-29 15:27 ` William J. Schmidt
  2011-07-29 17:27 ` William J. Schmidt
  3 siblings, 0 replies; 6+ messages in thread
From: William J. Schmidt @ 2011-07-29 15:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: bergner

I found a handful of degradations with this patch from an earlier test
version, which demonstrate the incorrectness of this comment:

On Wed, 2011-07-27 at 10:11 -0500, William J. Schmidt wrote:

> +   However, the rank of a value that depends on the result of a loop-
> +   carried phi should still be higher than the rank of a value that
> +   depends on values from more distant blocks.  */

On further review, it was smarter to treat the phi's rank as zero for
propagation purposes.  (Among other things, an expression of the form
"phi + constant" gets a ludicrously high rank otherwise.)

Otherwise, things looked good.  I'm testing a revised version of the
patch with the change in propagation rules.  I hope to post it shortly,
assuming the numbers come out as I expect.

Thanks,
Bill

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
  2011-07-27 15:58 [PATCH, RFC] PR49749 biased reassociation for accumulator patterns William J. Schmidt
                   ` (2 preceding siblings ...)
  2011-07-29 15:27 ` William J. Schmidt
@ 2011-07-29 17:27 ` William J. Schmidt
  2011-07-30 13:17   ` Richard Guenther
  3 siblings, 1 reply; 6+ messages in thread
From: William J. Schmidt @ 2011-07-29 17:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: bergner

Here is the final version of the reassociation patch.  There are two
differences from the version I published on 7/27.  I removed the
function call from within the MAX macro per Michael's comment, and I
changed the propagation of the rank of loop-carried phis to be zero.
This involved a small change to propagate_rank, and re-casting
phi_propagation_rank to a predicate function loop_carried_phi.

Performance numbers look good, with some nice gains and no significant
regressions for CPU2006 on powerpc64-linux.  Bootstrapped and regression
tested on powerpc64-linux with no regressions.

Ok for trunk?

Thanks,
Bill


2011-07-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/49749
	* tree-ssa-reassoc.c (get_rank): New forward declaration.
	(PHI_LOOP_BIAS): New macro.
	(phi_rank): New function.
	(loop_carried_phi): Likewise.
	(propagate_rank): Likewise.
	(get_rank): Add calls to phi_rank and propagate_rank.
	
Index: gcc/tree-ssa-reassoc.c
===================================================================
--- gcc/tree-ssa-reassoc.c	(revision 176585)
+++ gcc/tree-ssa-reassoc.c	(working copy)
@@ -190,7 +190,115 @@ static long *bb_rank;
 /* Operand->rank hashtable.  */
 static struct pointer_map_t *operand_rank;
 
+/* Forward decls.  */
+static long get_rank (tree);
 
+
+/* Bias amount for loop-carried phis.  We want this to be larger than
+   the depth of any reassociation tree we can see, but not larger than
+   the rank difference between two blocks.  */
+#define PHI_LOOP_BIAS (1 << 15)
+
+/* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
+   an innermost loop, and the phi has only a single use which is inside
+   the loop, then the rank is the block rank of the loop latch plus an
+   extra bias for the loop-carried dependence.  This causes expressions
+   calculated into an accumulator variable to be independent for each
+   iteration of the loop.  If STMT is some other phi, the rank is the
+   block rank of its containing block.  */
+static long
+phi_rank (gimple stmt)
+{
+  basic_block bb = gimple_bb (stmt);
+  struct loop *father = bb->loop_father;
+  tree res;
+  unsigned i;
+  use_operand_p use;
+  gimple use_stmt;
+
+  /* We only care about real loops (those with a latch).  */
+  if (!father->latch)
+    return bb_rank[bb->index];
+
+  /* Interesting phis must be in headers of innermost loops.  */
+  if (bb != father->header
+      || father->inner)
+    return bb_rank[bb->index];
+
+  /* Ignore virtual SSA_NAMEs.  */
+  res = gimple_phi_result (stmt);
+  if (!is_gimple_reg (SSA_NAME_VAR (res)))
+    return bb_rank[bb->index];
+
+  /* The phi definition must have a single use, and that use must be
+     within the loop.  Otherwise this isn't an accumulator pattern.  */
+  if (!single_imm_use (res, &use, &use_stmt)
+      || gimple_bb (use_stmt)->loop_father != father)
+    return bb_rank[bb->index];
+
+  /* Look for phi arguments from within the loop.  If found, bias this phi.  */
+  for (i = 0; i < gimple_phi_num_args (stmt); i++)
+    {
+      tree arg = gimple_phi_arg_def (stmt, i);
+      if (TREE_CODE (arg) == SSA_NAME
+	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
+	{
+	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
+	  if (gimple_bb (def_stmt)->loop_father == father)
+	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
+	}
+    }
+
+  /* Must be an uninteresting phi.  */
+  return bb_rank[bb->index];
+}
+
+/* If EXP is an SSA_NAME defined by a PHI statement that represents a
+   loop-carried dependence of an innermost loop, return TRUE; else
+   return FALSE.  */
+static bool
+loop_carried_phi (tree exp)
+{
+  gimple phi_stmt;
+  long block_rank;
+
+  if (TREE_CODE (exp) != SSA_NAME
+      || SSA_NAME_IS_DEFAULT_DEF (exp))
+    return false;
+
+  phi_stmt = SSA_NAME_DEF_STMT (exp);
+
+  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
+    return false;
+
+  /* Non-loop-carried phis have block rank.  Loop-carried phis have
+     an additional bias added in.  If this phi doesn't have block rank,
+     it's biased and should not be propagated.  */
+  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
+
+  if (phi_rank (phi_stmt) != block_rank)
+    return true;
+
+  return false;
+}
+
+/* Return the maximum of RANK and the rank that should be propagated
+   from expression OP.  For most operands, this is just the rank of OP.
+   For loop-carried phis, the value is zero to avoid undoing the bias
+   in favor of the phi.  */
+static long
+propagate_rank (long rank, tree op)
+{
+  long op_rank;
+
+  if (loop_carried_phi (op))
+    return rank;
+
+  op_rank = get_rank (op);
+
+  return MAX (rank, op_rank);
+}
+
 /* Look up the operand rank structure for expression E.  */
 
 static inline long
@@ -232,11 +340,38 @@ get_rank (tree e)
      I make no claims that this is optimal, however, it gives good
      results.  */
 
+  /* We make an exception to the normal ranking system to break
+     dependences of accumulator variables in loops.  Suppose we
+     have a simple one-block loop containing:
+
+       x_1 = phi(x_0, x_2)
+       b = a + x_1
+       c = b + d
+       x_2 = c + e
+
+     As shown, each iteration of the calculation into x is fully
+     dependent upon the iteration before it.  We would prefer to
+     see this in the form:
+
+       x_1 = phi(x_0, x_2)
+       b = a + d
+       c = b + e
+       x_2 = c + x_1
+
+     If the loop is unrolled, the calculations of b and c from
+     different iterations can be interleaved.
+
+     To obtain this result during reassociation, we bias the rank
+     of the phi definition x_1 upward, when it is recognized as an
+     accumulator pattern.  The artificial rank causes it to be 
+     added last, providing the desired independence.  */
+
   if (TREE_CODE (e) == SSA_NAME)
     {
       gimple stmt;
       long rank;
       int i, n;
+      tree op;
 
       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
 	  && SSA_NAME_IS_DEFAULT_DEF (e))
@@ -246,6 +381,9 @@ get_rank (tree e)
       if (gimple_bb (stmt) == NULL)
 	return 0;
 
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	return phi_rank (stmt);
+
       if (!is_gimple_assign (stmt)
 	  || gimple_vdef (stmt))
 	return bb_rank[gimple_bb (stmt)->index];
@@ -255,20 +393,25 @@ get_rank (tree e)
       if (rank != -1)
 	return rank;
 
-      /* Otherwise, find the maximum rank for the operands, or the bb
-	 rank, whichever is less.   */
+      /* Otherwise, find the maximum rank for the operands.  As an
+	 exception, remove the bias from loop-carried phis when propagating
+	 the rank so that dependent operations are not also biased.  */
       rank = 0;
       if (gimple_assign_single_p (stmt))
 	{
 	  tree rhs = gimple_assign_rhs1 (stmt);
 	  n = TREE_OPERAND_LENGTH (rhs);
 	  if (n == 0)
-	    rank = MAX (rank, get_rank (rhs));
+	    rank = propagate_rank (rank, rhs);
 	  else
 	    {
 	      for (i = 0; i < n; i++)
-		if (TREE_OPERAND (rhs, i))
-		  rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
+		{
+		  op = TREE_OPERAND (rhs, i);
+
+		  if (op != NULL_TREE)
+		    rank = propagate_rank (rank, op);
+		}
 	    }
 	}
       else
@@ -276,8 +419,9 @@ get_rank (tree e)
 	  n = gimple_num_ops (stmt);
 	  for (i = 1; i < n; i++)
 	    {
-	      gcc_assert (gimple_op (stmt, i));
-	      rank = MAX(rank, get_rank (gimple_op (stmt, i)));
+	      op = gimple_op (stmt, i);
+	      gcc_assert (op);
+	      rank = propagate_rank (rank, op);
 	    }
 	}
 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, RFC] PR49749 biased reassociation for accumulator patterns
  2011-07-29 17:27 ` William J. Schmidt
@ 2011-07-30 13:17   ` Richard Guenther
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Guenther @ 2011-07-30 13:17 UTC (permalink / raw)
  To: William J. Schmidt; +Cc: gcc-patches, bergner

On Fri, Jul 29, 2011 at 7:11 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Here is the final version of the reassociation patch.  There are two
> differences from the version I published on 7/27.  I removed the
> function call from within the MAX macro per Michael's comment, and I
> changed the propagation of the rank of loop-carried phis to be zero.
> This involved a small change to propagate_rank, and re-casting
> phi_propagation_rank to a predicate function loop_carried_phi.
>
> Performance numbers look good, with some nice gains and no significant
> regressions for CPU2006 on powerpc64-linux.  Bootstrapped and regression
> tested on powerpc64-linux with no regressions.
>
> Ok for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Bill
>
>
> 2011-07-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/49749
>        * tree-ssa-reassoc.c (get_rank): New forward declaration.
>        (PHI_LOOP_BIAS): New macro.
>        (phi_rank): New function.
>        (loop_carried_phi): Likewise.
>        (propagate_rank): Likewise.
>        (get_rank): Add calls to phi_rank and propagate_rank.
>
> Index: gcc/tree-ssa-reassoc.c
> ===================================================================
> --- gcc/tree-ssa-reassoc.c      (revision 176585)
> +++ gcc/tree-ssa-reassoc.c      (working copy)
> @@ -190,7 +190,115 @@ static long *bb_rank;
>  /* Operand->rank hashtable.  */
>  static struct pointer_map_t *operand_rank;
>
> +/* Forward decls.  */
> +static long get_rank (tree);
>
> +
> +/* Bias amount for loop-carried phis.  We want this to be larger than
> +   the depth of any reassociation tree we can see, but not larger than
> +   the rank difference between two blocks.  */
> +#define PHI_LOOP_BIAS (1 << 15)
> +
> +/* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
> +   an innermost loop, and the phi has only a single use which is inside
> +   the loop, then the rank is the block rank of the loop latch plus an
> +   extra bias for the loop-carried dependence.  This causes expressions
> +   calculated into an accumulator variable to be independent for each
> +   iteration of the loop.  If STMT is some other phi, the rank is the
> +   block rank of its containing block.  */
> +static long
> +phi_rank (gimple stmt)
> +{
> +  basic_block bb = gimple_bb (stmt);
> +  struct loop *father = bb->loop_father;
> +  tree res;
> +  unsigned i;
> +  use_operand_p use;
> +  gimple use_stmt;
> +
> +  /* We only care about real loops (those with a latch).  */
> +  if (!father->latch)
> +    return bb_rank[bb->index];
> +
> +  /* Interesting phis must be in headers of innermost loops.  */
> +  if (bb != father->header
> +      || father->inner)
> +    return bb_rank[bb->index];
> +
> +  /* Ignore virtual SSA_NAMEs.  */
> +  res = gimple_phi_result (stmt);
> +  if (!is_gimple_reg (SSA_NAME_VAR (res)))
> +    return bb_rank[bb->index];
> +
> +  /* The phi definition must have a single use, and that use must be
> +     within the loop.  Otherwise this isn't an accumulator pattern.  */
> +  if (!single_imm_use (res, &use, &use_stmt)
> +      || gimple_bb (use_stmt)->loop_father != father)
> +    return bb_rank[bb->index];
> +
> +  /* Look for phi arguments from within the loop.  If found, bias this phi.  */
> +  for (i = 0; i < gimple_phi_num_args (stmt); i++)
> +    {
> +      tree arg = gimple_phi_arg_def (stmt, i);
> +      if (TREE_CODE (arg) == SSA_NAME
> +         && !SSA_NAME_IS_DEFAULT_DEF (arg))
> +       {
> +         gimple def_stmt = SSA_NAME_DEF_STMT (arg);
> +         if (gimple_bb (def_stmt)->loop_father == father)
> +           return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
> +       }
> +    }
> +
> +  /* Must be an uninteresting phi.  */
> +  return bb_rank[bb->index];
> +}
> +
> +/* If EXP is an SSA_NAME defined by a PHI statement that represents a
> +   loop-carried dependence of an innermost loop, return TRUE; else
> +   return FALSE.  */
> +static bool
> +loop_carried_phi (tree exp)
> +{
> +  gimple phi_stmt;
> +  long block_rank;
> +
> +  if (TREE_CODE (exp) != SSA_NAME
> +      || SSA_NAME_IS_DEFAULT_DEF (exp))
> +    return false;
> +
> +  phi_stmt = SSA_NAME_DEF_STMT (exp);
> +
> +  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
> +    return false;
> +
> +  /* Non-loop-carried phis have block rank.  Loop-carried phis have
> +     an additional bias added in.  If this phi doesn't have block rank,
> +     it's biased and should not be propagated.  */
> +  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
> +
> +  if (phi_rank (phi_stmt) != block_rank)
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return the maximum of RANK and the rank that should be propagated
> +   from expression OP.  For most operands, this is just the rank of OP.
> +   For loop-carried phis, the value is zero to avoid undoing the bias
> +   in favor of the phi.  */
> +static long
> +propagate_rank (long rank, tree op)
> +{
> +  long op_rank;
> +
> +  if (loop_carried_phi (op))
> +    return rank;
> +
> +  op_rank = get_rank (op);
> +
> +  return MAX (rank, op_rank);
> +}
> +
>  /* Look up the operand rank structure for expression E.  */
>
>  static inline long
> @@ -232,11 +340,38 @@ get_rank (tree e)
>      I make no claims that this is optimal, however, it gives good
>      results.  */
>
> +  /* We make an exception to the normal ranking system to break
> +     dependences of accumulator variables in loops.  Suppose we
> +     have a simple one-block loop containing:
> +
> +       x_1 = phi(x_0, x_2)
> +       b = a + x_1
> +       c = b + d
> +       x_2 = c + e
> +
> +     As shown, each iteration of the calculation into x is fully
> +     dependent upon the iteration before it.  We would prefer to
> +     see this in the form:
> +
> +       x_1 = phi(x_0, x_2)
> +       b = a + d
> +       c = b + e
> +       x_2 = c + x_1
> +
> +     If the loop is unrolled, the calculations of b and c from
> +     different iterations can be interleaved.
> +
> +     To obtain this result during reassociation, we bias the rank
> +     of the phi definition x_1 upward, when it is recognized as an
> +     accumulator pattern.  The artificial rank causes it to be
> +     added last, providing the desired independence.  */
> +
>   if (TREE_CODE (e) == SSA_NAME)
>     {
>       gimple stmt;
>       long rank;
>       int i, n;
> +      tree op;
>
>       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
>          && SSA_NAME_IS_DEFAULT_DEF (e))
> @@ -246,6 +381,9 @@ get_rank (tree e)
>       if (gimple_bb (stmt) == NULL)
>        return 0;
>
> +      if (gimple_code (stmt) == GIMPLE_PHI)
> +       return phi_rank (stmt);
> +
>       if (!is_gimple_assign (stmt)
>          || gimple_vdef (stmt))
>        return bb_rank[gimple_bb (stmt)->index];
> @@ -255,20 +393,25 @@ get_rank (tree e)
>       if (rank != -1)
>        return rank;
>
> -      /* Otherwise, find the maximum rank for the operands, or the bb
> -        rank, whichever is less.   */
> +      /* Otherwise, find the maximum rank for the operands.  As an
> +        exception, remove the bias from loop-carried phis when propagating
> +        the rank so that dependent operations are not also biased.  */
>       rank = 0;
>       if (gimple_assign_single_p (stmt))
>        {
>          tree rhs = gimple_assign_rhs1 (stmt);
>          n = TREE_OPERAND_LENGTH (rhs);
>          if (n == 0)
> -           rank = MAX (rank, get_rank (rhs));
> +           rank = propagate_rank (rank, rhs);
>          else
>            {
>              for (i = 0; i < n; i++)
> -               if (TREE_OPERAND (rhs, i))
> -                 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
> +               {
> +                 op = TREE_OPERAND (rhs, i);
> +
> +                 if (op != NULL_TREE)
> +                   rank = propagate_rank (rank, op);
> +               }
>            }
>        }
>       else
> @@ -276,8 +419,9 @@ get_rank (tree e)
>          n = gimple_num_ops (stmt);
>          for (i = 1; i < n; i++)
>            {
> -             gcc_assert (gimple_op (stmt, i));
> -             rank = MAX(rank, get_rank (gimple_op (stmt, i)));
> +             op = gimple_op (stmt, i);
> +             gcc_assert (op);
> +             rank = propagate_rank (rank, op);
>            }
>        }
>
>
>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-07-30  9:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-27 15:58 [PATCH, RFC] PR49749 biased reassociation for accumulator patterns William J. Schmidt
2011-07-27 16:20 ` Michael Matz
2011-07-28 11:05 ` Richard Guenther
2011-07-29 15:27 ` William J. Schmidt
2011-07-29 17:27 ` William J. Schmidt
2011-07-30 13:17   ` Richard Guenther

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).