public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "Cui, Lili" <lili.cui@intel.com>
To: Richard Biener <richard.guenther@gmail.com>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: RE: [PATCH] PR gcc/98350:Handle FMA friendly in reassoc pass
Date: Wed, 24 May 2023 23:43:29 +0000	[thread overview]
Message-ID: <SJ0PR11MB56008551792AD8C7D8FEA9309E419@SJ0PR11MB5600.namprd11.prod.outlook.com> (raw)
In-Reply-To: <CAFiYyc2tkFdzY5e-YU+S0qWVa+1vmKCbNH1b2_qs5wi0CBuxhw@mail.gmail.com>

> > +rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
> > +                                        const vec<operand_entry *>
> > +&ops)
> >  {
> >    enum tree_code opcode = gimple_assign_rhs_code (stmt);
> >    int op_num = ops.length ();
> > @@ -5483,10 +5494,11 @@ rewrite_expr_tree_parallel (gassign *stmt, int
> width,
> >    int stmt_num = op_num - 1;
> >    gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
> >    int op_index = op_num - 1;
> > -  int stmt_index = 0;
> > -  int ready_stmts_end = 0;
> > -  int i = 0;
> > -  gimple *stmt1 = NULL, *stmt2 = NULL;
> > +  int width_count = width;
> > +  int i = 0, j = 0;
> > +  tree tmp_op[2], op1;
> > +  operand_entry *oe;
> > +  gimple *stmt1 = NULL;
> >    tree last_rhs1 = gimple_assign_rhs1 (stmt);
> >
> >    /* We start expression rewriting from the top statements.
> > @@ -5496,91 +5508,84 @@ rewrite_expr_tree_parallel (gassign *stmt, int
> width,
> >    for (i = stmt_num - 2; i >= 0; i--)
> >      stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
> >
> > -  for (i = 0; i < stmt_num; i++)
> > +  /* Build parallel dependency chain according to width.  */  for (i
> > + = 0; i < width; i++)
> >      {
> > -      tree op1, op2;
> > -
> > -      /* Determine whether we should use results of
> > -        already handled statements or not.  */
> > -      if (ready_stmts_end == 0
> > -         && (i - stmt_index >= width || op_index < 1))
> > -       ready_stmts_end = i;
> > -
> > -      /* Now we choose operands for the next statement.  Non zero
> > -        value in ready_stmts_end means here that we should use
> > -        the result of already generated statements as new operand.  */
> > -      if (ready_stmts_end > 0)
> > -       {
> > -         op1 = gimple_assign_lhs (stmts[stmt_index++]);
> > -         if (ready_stmts_end > stmt_index)
> > -           op2 = gimple_assign_lhs (stmts[stmt_index++]);
> > -         else if (op_index >= 0)
> > -           {
> > -             operand_entry *oe = ops[op_index--];
> > -             stmt2 = oe->stmt_to_insert;
> > -             op2 = oe->op;
> > -           }
> > -         else
> > -           {
> > -             gcc_assert (stmt_index < i);
> > -             op2 = gimple_assign_lhs (stmts[stmt_index++]);
> > -           }
> > +      /*   */
> 
> empty comment?

Added it, thanks.

> 
> > +      if (op_index > 1 && !has_fma)
> > +       swap_ops_for_binary_stmt (ops, op_index - 2);
> >
> > -         if (stmt_index >= ready_stmts_end)
> > -           ready_stmts_end = 0;
> > -       }
> > -      else
> > +      for (j = 0; j < 2; j++)
> >         {
> > -         if (op_index > 1)
> > -           swap_ops_for_binary_stmt (ops, op_index - 2);
> > -         operand_entry *oe2 = ops[op_index--];
> > -         operand_entry *oe1 = ops[op_index--];
> > -         op2 = oe2->op;
> > -         stmt2 = oe2->stmt_to_insert;
> > -         op1 = oe1->op;
> > -         stmt1 = oe1->stmt_to_insert;
> > +         gcc_assert (op_index >= 0);
> > +         oe = ops[op_index--];
> > +         tmp_op[j] = oe->op;
> > +         /* If the stmt that defines operand has to be inserted, insert it
> > +            before the use.  */
> > +         stmt1 = oe->stmt_to_insert;
> > +         if (stmt1)
> > +           insert_stmt_before_use (stmts[i], stmt1);
> > +         stmt1 = NULL;
> >         }
> > -
> > -      /* If we emit the last statement then we should put
> > -        operands into the last statement.  It will also
> > -        break the loop.  */
> > -      if (op_index < 0 && stmt_index == i)
> > -       i = stmt_num - 1;
> > +      stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), tmp_op[1],
> tmp_op[0], opcode);
> > +      gimple_set_visited (stmts[i], true);
> >
> >        if (dump_file && (dump_flags & TDF_DETAILS))
> >         {
> > -         fprintf (dump_file, "Transforming ");
> > +         fprintf (dump_file, " into ");
> >           print_gimple_stmt (dump_file, stmts[i], 0);
> >         }
> > +    }
> >
> > -      /* If the stmt that defines operand has to be inserted, insert it
> > -        before the use.  */
> > -      if (stmt1)
> > -       insert_stmt_before_use (stmts[i], stmt1);
> > -      if (stmt2)
> > -       insert_stmt_before_use (stmts[i], stmt2);
> > -      stmt1 = stmt2 = NULL;
> > -
> > -      /* We keep original statement only for the last one.  All
> > -        others are recreated.  */
> > -      if (i == stmt_num - 1)
> > +  for (i = width; i < stmt_num; i++)
> > +    {
> > +      /* We keep original statement only for the last one.  All others are
> > +        recreated.  */
> > +      if ( op_index < 0)
> >         {
> > -         gimple_assign_set_rhs1 (stmts[i], op1);
> > -         gimple_assign_set_rhs2 (stmts[i], op2);
> > -         update_stmt (stmts[i]);
> > +         if (width_count == 2)
> > +           {
> > +
> > +             /* We keep original statement only for the last one.  All
> > +                others are recreated.  */
> > +             gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs (stmts[i-1]));
> > +             gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs (stmts[i-2]));
> > +             update_stmt (stmts[i]);
> > +           }
> > +         else
> > +           {
> > +
> > +             stmts[i] =
> > +               build_and_add_sum (TREE_TYPE (last_rhs1),
> > +                                  gimple_assign_lhs (stmts[i-width_count]),
> > +                                  gimple_assign_lhs (stmts[i-width_count+1]),
> > +                                  opcode);
> > +             gimple_set_visited (stmts[i], true);
> > +             width_count--;
> > +           }
> >         }
> >        else
> >         {
> > -         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2,
> opcode);
> > +         /* Attach the rest of the ops to the parallel dependency chain.  */
> > +         oe = ops[op_index--];
> > +         op1 = oe->op;
> > +         stmt1 = oe->stmt_to_insert;
> > +         if (stmt1)
> > +           insert_stmt_before_use (stmts[i], stmt1);
> > +         stmt1 = NULL;
> > +         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
> > +                                       gimple_assign_lhs (stmts[i-width]),
> > +                                       op1,
> > +                                       opcode);
> >           gimple_set_visited (stmts[i], true);
> >         }
> > +
> >        if (dump_file && (dump_flags & TDF_DETAILS))
> >         {
> >           fprintf (dump_file, " into ");
> >           print_gimple_stmt (dump_file, stmts[i], 0);
> >         }
> >      }
> > -
> 
> I've looked three times but didn't find a use of 'has_fma'?

It's located after your first comment, If the chain has FAM, we do not swap two operands

  if (op_index > 1 && !has_fma)
    swap_ops_for_binary_stmt (ops, op_index - 2);

> >    remove_visited_stmt_chain (last_rhs1);  }
> >
> > @@ -6649,6 +6654,76 @@ transform_stmt_to_multiply
> (gimple_stmt_iterator *gsi, gimple *stmt,
> >      }
> >  }
> >
> > +/* Rearrange ops to generate more FMA when the chain may has more
> than 2 fmas.
> 
> may have
> 
Done.

> > +   Put no-mult ops and mult ops alternately at the end of the queue, which
> is
> > +   conducive to generating more fma and reducing the loss of FMA when
> breaking
> > +   the chain.
> > +   E.g.
> > +   a * b + c * d + e generates:
> > +
> > +   _4  = c_9(D) * d_10(D);
> > +   _12 = .FMA (a_7(D), b_8(D), _4);
> > +   _11 = e_6(D) + _12;
> > +
> > +   Rtearrange ops to -> e + a * b + c * d generates:
> 
> Rearrange
> 
Done.

> > +
> > +   _4  = .FMA (c_7(D), d_8(D), _3);
> > +   _11 = .FMA (a_5(D), b_6(D), _4);
> > + */
> > +static bool
> > +rank_ops_for_fma (vec<operand_entry *> *ops) {
> > +  operand_entry *oe;
> > +  unsigned int i;
> > +  unsigned int ops_length = ops->length ();
> > +  auto_vec<operand_entry *> ops_mult;
> > +  auto_vec<operand_entry *> ops_others;
> > +
> > +  FOR_EACH_VEC_ELT (*ops, i, oe)
> > +    {
> > +      if (TREE_CODE (oe->op) == SSA_NAME)
> > +       {
> > +         gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
> > +         if (is_gimple_assign (def_stmt)
> > +             && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
> > +           ops_mult.safe_push (oe);
> > +         else
> > +           ops_others.safe_push (oe);
> > +       }
> > +      else
> > +       ops_others.safe_push (oe);
> > +    }
> > +  /* When ops_mult.length == 2, like the following case,
> > +
> > +     a * b + c * d + e.
> > +
> > +     we need to rearrange the ops.
> > +
> > +     Putting ops that not def from mult in front can generate more
> > + fmas.  */  if (ops_mult.length () >= 2)
> > +    {
> > +      /* If all ops are defined with mult, we don't need to rearrange them.
> */
> > +      if (ops_mult.length () != ops_length)
> 
> use && with the previous condition.
> 
Done.

> > +       {
> > +         /* Put no-mult ops and mult ops alternately at the end of the
> > +            queue, which is conducive to generating more fma and reducing
> the
> > +            loss of FMA when breaking the chain.  */
> > +         ops->truncate (0);
> > +         ops->splice (ops_mult);
> > +         int j, opindex = ops->length ();
> > +         int others_length = ops_others.length();
> > +         for (j = 0; j < others_length; j++)
> > +           {
> > +             oe = ops_others.pop ();
> > +             ops->safe_insert (opindex, oe);
> 
> that's quadratic as it needs to move ops.  As said previously we know that
> 'ops' has enough room and you can use the quick_ (or non-safe_) variants of
> the APIs on it.
> 
Done, thanks.

Regards,
Lili.

> Otherwise looks good to me.
> 
> Thanks,
> Richard.
> 

      reply	other threads:[~2023-05-24 23:43 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-17 13:02 Cui, Lili
2023-05-18 16:56 ` Cui, Lili
2023-05-22 13:29 ` Richard Biener
2023-05-24 23:43   ` Cui, Lili [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=SJ0PR11MB56008551792AD8C7D8FEA9309E419@SJ0PR11MB5600.namprd11.prod.outlook.com \
    --to=lili.cui@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=richard.guenther@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).