public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <richard.guenther@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: Andrew Pinski <pinskia@gmail.com>, GCC Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] Optimize vector init constructor
Date: Mon, 04 Mar 2019 11:55:00 -0000	[thread overview]
Message-ID: <CAFiYyc17ECEUCXzV4-rVupBjwWhCuVQfTRGgmGtt1uThh12Prg@mail.gmail.com> (raw)
In-Reply-To: <20190303211325.GA24057@gmail.com>

On Sun, Mar 3, 2019 at 10:13 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 03, 2019 at 06:40:09AM -0800, Andrew Pinski wrote:
> > )
> > ,On Sun, Mar 3, 2019 at 6:32 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > For vector init constructor:
> > >
> > > ---
> > > typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> > >
> > > __v4sf
> > > foo (__v4sf x, float f)
> > > {
> > >   __v4sf y = { f, x[1], x[2], x[3] };
> > >   return y;
> > > }
> > > ---
> > >
> > > we can optimize vector init constructor with vector copy or permute
> > > followed by a single scalar insert:
> > >
> > >   __v4sf D.1912;
> > >   __v4sf D.1913;
> > >   __v4sf D.1914;
> > >   __v4sf y;
> > >
> > >   x.0_1 = x;
> > >   D.1912 = x.0_1;
> > >   _2 = D.1912;
> > >   D.1913 = _2;
> > >   BIT_FIELD_REF <D.1913, 32, 0> = f;
> > >   y = D.1913;
> > >   D.1914 = y;
> > >   return D.1914;
> > >
> > > instead of
> > >
> > >   __v4sf D.1962;
> > >   __v4sf y;
> > >
> > >   _1 = BIT_FIELD_REF <x, 32, 32>;
> > >   _2 = BIT_FIELD_REF <x, 32, 64>;
> > >   _3 = BIT_FIELD_REF <x, 32, 96>;
> > >   y = {f, _1, _2, _3};
> > >   D.1962 = y;
> > >   return D.1962;
> > >
> > > gcc/
> > >
> > >         PR tree-optimization/88828
> > >         * gimplify.c (gimplify_init_constructor): Optimize vector init
> > >         constructor with vector copy or permute followed by a single
> > >         scalar insert.
> >
> >
> > Doing this here does not catch things like:
> > typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> >
> >
> > __v4sf
> > vector_init (float f0,float f1, float f2,float f3)
> > {
> >   __v4sf y = { f, x[1], x[2], x[3] };
> >    return y;
> > }
> >
> > __v4sf
> > foo (__v4sf x, float f)
> > {
> >   return vector_init (f, x[1], x[2], x[3]) ;
> > }
> >
>
> Here is a patch for simplify_vector_constructor to optimize vector init
> constructor with vector copy or permute followed by a single scalar
> insert.

That's the correct place to fix this indeed.

  But this doesn't work correcly:
>
> [hjl@gnu-cfl-2 pr88828]$ cat bar.i
> typedef float __v4sf __attribute__ ((__vector_size__ (16)));
>
> static __v4sf
> vector_init (float f0,float f1, float f2,float f3)
> {
>   __v4sf y = { f0, f1, f2, f3 };
>    return y;
> }
>
> __v4sf
> foo (__v4sf x, float f)
> {
>   return vector_init (f, x[1], x[2], x[3]) ;
> }
> [hjl@gnu-cfl-2 pr88828]$ make bar.s
> /export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -O2 -S bar.i
> [hjl@gnu-cfl-2 pr88828]$ cat bar.s
>         .file   "bar.i"
>         .text
>         .p2align 4
>         .globl  foo
>         .type   foo, @function
> foo:
> .LFB1:
>         .cfi_startproc
>         ret
>         .cfi_endproc
> .LFE1:
>         .size   foo, .-foo
>         .ident  "GCC: (GNU) 9.0.1 20190303 (experimental)"
>         .section        .note.GNU-stack,"",@progbits
> [hjl@gnu-cfl-2 pr88828]$
>
> Scalar insert is missing.
> ---
>  gcc/tree-ssa-forwprop.c | 77 ++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 69 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> index eeb6281c652..b10cfccf7b8 100644
> --- a/gcc/tree-ssa-forwprop.c
> +++ b/gcc/tree-ssa-forwprop.c
> @@ -2008,7 +2008,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>    unsigned elem_size, i;
>    unsigned HOST_WIDE_INT nelts;
>    enum tree_code code, conv_code;
> -  constructor_elt *elt;
> +  constructor_elt *ce;
>    bool maybe_ident;
>
>    gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR);
> @@ -2027,18 +2027,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>    orig[1] = NULL;
>    conv_code = ERROR_MARK;
>    maybe_ident = true;
> -  FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
> +
> +  tree rhs_vector = NULL;
> +  /* The single scalar element.  */
> +  tree scalar_element = NULL;
> +  unsigned int scalar_idx = 0;
> +  bool insert = false;
> +  unsigned int nscalars = 0;
> +  unsigned int nvectors = 0;
> +  FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, ce)
>      {
>        tree ref, op1;
>
>        if (i >= nelts)
>         return false;
>
> -      if (TREE_CODE (elt->value) != SSA_NAME)
> +      if (TREE_CODE (ce->value) != SSA_NAME)
>         return false;
> -      def_stmt = get_prop_source_stmt (elt->value, false, NULL);
> +      def_stmt = get_prop_source_stmt (ce->value, false, NULL);
>        if (!def_stmt)
> -       return false;
> +       {
> +         if ( gimple_nop_p (SSA_NAME_DEF_STMT (ce->value)))
> +           {
> +             /* Only allow one single scalar insert.  */
> +             if (nscalars != 0)
> +               return false;
> +
> +             nscalars = 1;
> +             insert = true;
> +             scalar_idx = i;
> +             scalar_element = ce->value;
> +             continue;
> +           }
> +         else
> +           return false;
> +       }
>        code = gimple_assign_rhs_code (def_stmt);
>        if (code == FLOAT_EXPR
>           || code == FIX_TRUNC_EXPR)
> @@ -2046,7 +2069,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>           op1 = gimple_assign_rhs1 (def_stmt);
>           if (conv_code == ERROR_MARK)
>             {
> -             if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))),
> +             if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (ce->value))),
>                             GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1)))))
>                 return false;
>               conv_code = code;
> @@ -2095,6 +2118,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>         elt += nelts;
>        if (elt != i)
>         maybe_ident = false;
> +
> +       if (type == TREE_TYPE (ref))
> +        {
> +          /* The RHS vector has the same type as LHS.  */
> +          if (rhs_vector == NULL)
> +            rhs_vector = ref;
> +          /* Check if all RHS vector elements come fome the same
> +             vector.  */
> +          if (rhs_vector == ref)
> +            nvectors++;
> +        }
> +
>        sel.quick_push (elt);
>      }
>    if (i < nelts)
> @@ -2113,6 +2148,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>           || conv_code == CALL_EXPR))
>      return false;
>
> +  /* Replace the scalar element with the vector element.  */
> +  if (insert
> +      && (TYPE_VECTOR_SUBPARTS (type).to_constant ()
> +         == (nscalars + nvectors)))
> +    sel.quick_push (scalar_idx);
> +
>    if (maybe_ident)
>      {
>        if (conv_code == ERROR_MARK)
> @@ -2127,14 +2168,22 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>
>        vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
>        if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
> -       return false;
> +       {
> +         if (insert)
> +           gcc_unreachable ();
> +         return false;
> +       }
>        mask_type
>         = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
>                              nelts);
>        if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
>           || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
>                        GET_MODE_SIZE (TYPE_MODE (type))))
> -       return false;
> +       {
> +         if (insert)
> +           gcc_unreachable ();
> +         return false;
> +       }
>        op2 = vec_perm_indices_to_tree (mask_type, indices);
>        if (!orig[1])
>         orig[1] = orig[0];
> @@ -2153,6 +2202,18 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
>         }
>      }
>    update_stmt (gsi_stmt (*gsi));
> +  if (insert)
> +    {
> +      /* Generate a single scalar insert.  */
> +      /* FIXME: This doesn't work correctly.  */
> +      tree lhs = gimple_assign_lhs (stmt);
> +      tree bitfield = build3 (BIT_FIELD_REF, elem_type, lhs,
> +                             bitsize_int (elem_size),
> +                             bitsize_int (scalar_idx * elem_size));
> +      gimple *new_stmt = gimple_build_assign (bitfield, scalar_element);

I think you want to generate from the original

    _1 = { .... };

the new

    _2 = copy or permute to _new_ LHS SSA name
    _1 = BIT_INSERT_EXPR <_2, scalar_element, scalar_idx * elem_size>;

> +      gsi_insert_after (gsi, new_stmt, GSI_SAME_STMT);

and you want to advance to the _1 = BIT_INSERT_EXPR here.  The easiest way
is to emit a new stmt for _2 = copy ...; and do the set_rhs with the
BIT_INSERT_EXPR.

> +      update_stmt (gsi_stmt (*gsi));
> +    }
>    return true;
>  }
>
> --
> 2.20.1
>

  reply	other threads:[~2019-03-04 11:55 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-03 14:32 H.J. Lu
2019-03-03 14:40 ` Andrew Pinski
2019-03-03 21:13   ` H.J. Lu
2019-03-04 11:55     ` Richard Biener [this message]
2019-03-04 17:46       ` V2 [PATCH] Optimize vector constructor H.J. Lu
2019-03-06  7:54         ` V3 " H.J. Lu
2019-03-06 13:39           ` Richard Biener
2019-03-07  7:12             ` V4 " H.J. Lu
2019-03-08  9:56               ` V5 " H.J. Lu
2019-03-08 11:23                 ` Richard Biener
2019-03-11  7:58                   ` H.J. Lu
2019-05-02 14:54                     ` Richard Biener
2019-05-02 14:55                       ` Richard Biener
2019-05-02 17:53                         ` H.J. Lu
2019-05-03 16:54                           ` V6 " H.J. Lu
2019-05-08 12:04                             ` Richard Biener
2019-05-14  9:13                               ` Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFiYyc17ECEUCXzV4-rVupBjwWhCuVQfTRGgmGtt1uThh12Prg@mail.gmail.com \
    --to=richard.guenther@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hjl.tools@gmail.com \
    --cc=pinskia@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).