public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <richard.guenther@gmail.com>
To: "Kewen.Lin" <linkw@linux.ibm.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
	Richard Sandiford <richard.sandiford@arm.com>,
	 Segher Boessenkool <segher@kernel.crashing.org>,
	Peter Bergner <bergner@linux.ibm.com>
Subject: Re: [PATCH] vect: Fold LEN_{LOAD,STORE} if it's for the whole vector [PR107412]
Date: Sat, 5 Nov 2022 12:40:16 +0100	[thread overview]
Message-ID: <CAFiYyc2Uf_yd_ztF8_c_f0BT0T_Xu2ZrCD5+XbK66fztDL0PMg@mail.gmail.com> (raw)
In-Reply-To: <94ac390b-a770-c868-051b-75319eb7f81d@linux.ibm.com>

On Wed, Nov 2, 2022 at 8:59 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi,
>
> As the test case in PR107412 shows, we can fold IFN .LEN_{LOAD,
> STORE} into normal vector load/store if the given length is known
> to be equal to the length of the whole vector.  It would help to
> improve overall cycles as normally the latency of vector access
> with length in bytes is bigger than normal vector access, and it
> also saves the preparation for length if constant length can not
> be encoded into instruction (such as on power).
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?

OK.

>
> BR,
> Kewen
> -----
>         PR tree-optimization/107412
>
> gcc/ChangeLog:
>
>         * gimple-fold.cc (gimple_fold_mask_load_store_mem_ref): Rename to ...
>         (gimple_fold_partial_load_store_mem_ref): ... this, add one parameter
>         mask_p indicating it's for mask or length, and add some handlings for
>         IFN LEN_{LOAD,STORE}.
>         (gimple_fold_mask_load): Rename to ...
>         (gimple_fold_partial_load): ... this, add one parameter mask_p.
>         (gimple_fold_mask_store): Rename to ...
>         (gimple_fold_partial_store): ... this, add one parameter mask_p.
>         (gimple_fold_call): Add the handlings for IFN LEN_{LOAD,STORE},
>         and adjust calls on gimple_fold_mask_load_store_mem_ref to
>         gimple_fold_partial_load_store_mem_ref.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/powerpc/pr107412.c: New test.
>         * gcc.target/powerpc/p9-vec-length-epil-8.c: Adjust scan times for
>         folded LEN_LOAD.
> ---
>  gcc/gimple-fold.cc                            | 57 ++++++++++++++-----
>  .../gcc.target/powerpc/p9-vec-length-epil-8.c |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr107412.c   | 19 +++++++
>  3 files changed, 64 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr107412.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index a1704784bc9..e3a087defa6 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -5370,19 +5370,39 @@ arith_overflowed_p (enum tree_code code, const_tree type,
>    return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
>  }
>
> -/* If IFN_MASK_LOAD/STORE call CALL is unconditional, return a MEM_REF
> +/* If IFN_{MASK,LEN}_LOAD/STORE call CALL is unconditional, return a MEM_REF
>     for the memory it references, otherwise return null.  VECTYPE is the
> -   type of the memory vector.  */
> +   type of the memory vector.  MASK_P indicates it's for MASK if true,
> +   otherwise it's for LEN.  */
>
>  static tree
> -gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype)
> +gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
>  {
>    tree ptr = gimple_call_arg (call, 0);
>    tree alias_align = gimple_call_arg (call, 1);
> -  tree mask = gimple_call_arg (call, 2);
> -  if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask))
> +  if (!tree_fits_uhwi_p (alias_align))
>      return NULL_TREE;
>
> +  if (mask_p)
> +    {
> +      tree mask = gimple_call_arg (call, 2);
> +      if (!integer_all_onesp (mask))
> +       return NULL_TREE;
> +    } else {
> +      tree basic_len = gimple_call_arg (call, 2);
> +      if (!tree_fits_uhwi_p (basic_len))
> +       return NULL_TREE;
> +      unsigned int nargs = gimple_call_num_args (call);
> +      tree bias = gimple_call_arg (call, nargs - 1);
> +      gcc_assert (tree_fits_uhwi_p (bias));
> +      tree biased_len = int_const_binop (MINUS_EXPR, basic_len, bias);
> +      unsigned int len = tree_to_uhwi (biased_len);
> +      unsigned int vect_len
> +       = GET_MODE_SIZE (TYPE_MODE (vectype)).to_constant ();
> +      if (vect_len != len)
> +       return NULL_TREE;
> +    }
> +
>    unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align);
>    if (TYPE_ALIGN (vectype) != align)
>      vectype = build_aligned_type (vectype, align);
> @@ -5390,16 +5410,18 @@ gimple_fold_mask_load_store_mem_ref (gcall *call, tree vectype)
>    return fold_build2 (MEM_REF, vectype, ptr, offset);
>  }
>
> -/* Try to fold IFN_MASK_LOAD call CALL.  Return true on success.  */
> +/* Try to fold IFN_{MASK,LEN}_LOAD call CALL.  Return true on success.
> +   MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
>
>  static bool
> -gimple_fold_mask_load (gimple_stmt_iterator *gsi, gcall *call)
> +gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call, bool mask_p)
>  {
>    tree lhs = gimple_call_lhs (call);
>    if (!lhs)
>      return false;
>
> -  if (tree rhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (lhs)))
> +  if (tree rhs
> +      = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs), mask_p))
>      {
>        gassign *new_stmt = gimple_build_assign (lhs, rhs);
>        gimple_set_location (new_stmt, gimple_location (call));
> @@ -5410,13 +5432,16 @@ gimple_fold_mask_load (gimple_stmt_iterator *gsi, gcall *call)
>    return false;
>  }
>
> -/* Try to fold IFN_MASK_STORE call CALL.  Return true on success.  */
> +/* Try to fold IFN_{MASK,LEN}_STORE call CALL.  Return true on success.
> +   MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
>
>  static bool
> -gimple_fold_mask_store (gimple_stmt_iterator *gsi, gcall *call)
> +gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
> +                          bool mask_p)
>  {
>    tree rhs = gimple_call_arg (call, 3);
> -  if (tree lhs = gimple_fold_mask_load_store_mem_ref (call, TREE_TYPE (rhs)))
> +  if (tree lhs
> +      = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs), mask_p))
>      {
>        gassign *new_stmt = gimple_build_assign (lhs, rhs);
>        gimple_set_location (new_stmt, gimple_location (call));
> @@ -5634,10 +5659,16 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
>           cplx_result = true;
>           break;
>         case IFN_MASK_LOAD:
> -         changed |= gimple_fold_mask_load (gsi, stmt);
> +         changed |= gimple_fold_partial_load (gsi, stmt, true);
>           break;
>         case IFN_MASK_STORE:
> -         changed |= gimple_fold_mask_store (gsi, stmt);
> +         changed |= gimple_fold_partial_store (gsi, stmt, true);
> +         break;
> +       case IFN_LEN_LOAD:
> +         changed |= gimple_fold_partial_load (gsi, stmt, false);
> +         break;
> +       case IFN_LEN_STORE:
> +         changed |= gimple_fold_partial_store (gsi, stmt, false);
>           break;
>         default:
>           break;
> diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> index 961df0d5646..8b9c9107814 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
> @@ -8,5 +8,5 @@
>
>  #include "p9-vec-length-8.h"
>
> -/* { dg-final { scan-assembler-times {\mlxvl\M} 21 } } */
> +/* { dg-final { scan-assembler-times {\mlxvl\M} 16 } } */
>  /* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr107412.c b/gcc/testsuite/gcc.target/powerpc/pr107412.c
> new file mode 100644
> index 00000000000..4526ea8639d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr107412.c
> @@ -0,0 +1,19 @@
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -ftree-vectorize -fno-vect-cost-model -funroll-loops -fno-tree-loop-distribute-patterns --param vect-partial-vector-usage=2 -fdump-tree-optimized" } */
> +
> +/* Verify there is only one IFN call LEN_LOAD and IFN_STORE separately.  */
> +
> +#define N 16
> +int src[N];
> +int dest[N];
> +
> +void
> +foo ()
> +{
> +  for (int i = 0; i < (N - 1); i++)
> +    dest[i] = src[i];
> +}
> +
> +/* { dg-final { scan-tree-dump-times {\mLEN_LOAD\M} 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times {\mLEN_STORE\M} 1 "optimized" } } */
> --
> 2.27.0

  reply	other threads:[~2022-11-05 11:40 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-02  7:59 Kewen.Lin
2022-11-05 11:40 ` Richard Biener [this message]
2022-11-24  9:24 ` Richard Sandiford
2022-11-28  2:57   ` Kewen.Lin
2022-12-01 11:28     ` Richard Sandiford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFiYyc2Uf_yd_ztF8_c_f0BT0T_Xu2ZrCD5+XbK66fztDL0PMg@mail.gmail.com \
    --to=richard.guenther@gmail.com \
    --cc=bergner@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@linux.ibm.com \
    --cc=richard.sandiford@arm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).