public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Christophe Lyon <christophe.lyon@linaro.org>
To: Richard Biener <rguenther@suse.de>
Cc: gcc-patches@gcc.gnu.org,
	 "richard.sandiford@arm.com" <richard.sandiford@arm.com>
Subject: Re: [PATCH] tree-optimization/109849 - missed code hoisting
Date: Wed, 24 May 2023 12:07:48 +0200	[thread overview]
Message-ID: <CAPS5khbo9a19byiXLhKyNL2-EY7FjDnRvqbPtWCriUBdnNY9_w@mail.gmail.com> (raw)
In-Reply-To: <20230523095533.B1C0713588@imap2.suse-dmz.suse.de>

[-- Attachment #1: Type: text/plain, Size: 6660 bytes --]

Hi Richard,

On Tue, 23 May 2023 at 11:55, Richard Biener via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> The following fixes code hoisting to properly consider ANTIC_OUT instead
> of ANTIC_IN.  That's a bit expensive to re-compute but since we no
> longer iterate we're doing this only once per BB which should be
> acceptable.  This avoids missing hoistings to the end of blocks where
> something in the block clobbers the hoisted value.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
>
>         PR tree-optimization/109849
>         * tree-ssa-pre.cc (do_hoist_insertion): Compute ANTIC_OUT
>         and use that to determine what to hoist.
>
>         * gcc.dg/tree-ssa/ssa-hoist-8.c: New testcase.
>

This patch causes a regression on aarch64:
gcc.target/aarch64/sve/fmla_2.c: \\tst1d found 3 times
 FAIL: gcc.target/aarch64/sve/fmla_2.c scan-assembler-times \\tst1d 2


We used to generate:
        mov     x6, 0
        mov     w7, 55
        whilelo p7.d, wzr, w7
        .p2align 3,,7
.L2:
        ld1d    z30.d, p7/z, [x5, x6, lsl 3]
        ld1d    z31.d, p7/z, [x4, x6, lsl 3]
        cmpne   p6.d, p7/z, z30.d, #0
        ld1d    z30.d, p7/z, [x3, x6, lsl 3]
        ld1d    z29.d, p6/z, [x2, x6, lsl 3]
        movprfx z28, z30
        fmla    z28.d, p6/m, z31.d, z29.d
        fmla    z31.d, p6/m, z30.d, z29.d
        st1d    z28.d, p7, [x1, x6, lsl 3]
        st1d    z31.d, p7, [x0, x6, lsl 3]
        incd    x6
        whilelo p7.d, w6, w7
        b.any   .L2


But now:
        mov     x6, 0
        mov     w7, 55
        ptrue   p4.b, all
        whilelo p7.d, wzr, w7
        .p2align 3,,7
.L2:
        ld1d    z30.d, p7/z, [x5, x6, lsl 3]
        ld1d    z31.d, p7/z, [x4, x6, lsl 3]
        cmpne   p6.d, p7/z, z30.d, #0
        cmpeq   p5.d, p7/z, z30.d, #0
        ld1d    z29.d, p6/z, [x2, x6, lsl 3]
        ld1d    z28.d, p6/z, [x3, x6, lsl 3]
        ld1d    z30.d, p5/z, [x3, x6, lsl 3]
        movprfx z27, z31
        fmla    z27.d, p4/m, z29.d, z28.d
        movprfx z30.d, p6/m, z28.d
        fmla    z30.d, p6/m, z31.d, z29.d
        st1d    z27.d, p6, [x0, x6, lsl 3]
        st1d    z30.d, p7, [x1, x6, lsl 3]
        st1d    z31.d, p5, [x0, x6, lsl 3]
        incd    x6
        whilelo p7.d, w6, w7
        b.any   .L2


Christophe

---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c | 22 ++++++++++
>  gcc/tree-ssa-pre.cc                         | 48 ++++++++++++++++++---
>  2 files changed, 64 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> new file mode 100644
> index 00000000000..66bb48e0dc1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-8.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +
> +int mem;
> +void foo ();
> +int bar (int flag)
> +{
> +  int res;
> +  foo ();
> +  /* Hoist the load of mem here even though foo () clobbers it.  */
> +  if (flag)
> +    res = mem;
> +  else
> +    {
> +      res = mem;
> +      mem = 2;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-tree-dump "HOIST inserted: 1" "pre" } } */
> +/* { dg-final { scan-tree-dump-times " = mem;" 1 "pre" } } */
> diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
> index 1f7eea93c16..d56431b4145 100644
> --- a/gcc/tree-ssa-pre.cc
> +++ b/gcc/tree-ssa-pre.cc
> @@ -3622,19 +3622,51 @@ do_hoist_insertion (basic_block block)
>        && stmt_ends_bb_p (gsi_stmt (last)))
>      return false;
>
> -  /* Compute the set of hoistable expressions from ANTIC_IN.  First
> compute
> +  /* We have multiple successors, compute ANTIC_OUT by taking the
> intersection
> +     of all of ANTIC_IN translating through PHI nodes.  Note we do not
> have to
> +     worry about iteration stability here so just intersect the
> expression sets
> +     as well.  This is a simplification of what we do in
> compute_antic_aux.  */
> +  bitmap_set_t ANTIC_OUT = bitmap_set_new ();
> +  bool first = true;
> +  FOR_EACH_EDGE (e, ei, block->succs)
> +    {
> +      if (first)
> +       {
> +         phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
> +         first = false;
> +       }
> +      else if (!gimple_seq_empty_p (phi_nodes (e->dest)))
> +       {
> +         bitmap_set_t tmp = bitmap_set_new ();
> +         phi_translate_set (tmp, ANTIC_IN (e->dest), e);
> +         bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
> +         bitmap_and_into (&ANTIC_OUT->expressions, &tmp->expressions);
> +         bitmap_set_free (tmp);
> +       }
> +      else
> +       {
> +         bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN
> (e->dest)->values);
> +         bitmap_and_into (&ANTIC_OUT->expressions,
> +                          &ANTIC_IN (e->dest)->expressions);
> +       }
> +    }
> +
> +  /* Compute the set of hoistable expressions from ANTIC_OUT.  First
> compute
>       hoistable values.  */
>    bitmap_set hoistable_set;
>
> -  /* A hoistable value must be in ANTIC_IN(block)
> +  /* A hoistable value must be in ANTIC_OUT(block)
>       but not in AVAIL_OUT(BLOCK).  */
>    bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack);
>    bitmap_and_compl (&hoistable_set.values,
> -                   &ANTIC_IN (block)->values, &AVAIL_OUT (block)->values);
> +                   &ANTIC_OUT->values, &AVAIL_OUT (block)->values);
>
>    /* Short-cut for a common case: hoistable_set is empty.  */
>    if (bitmap_empty_p (&hoistable_set.values))
> -    return false;
> +    {
> +      bitmap_set_free (ANTIC_OUT);
> +      return false;
> +    }
>
>    /* Compute which of the hoistable values is in AVAIL_OUT of
>       at least one of the successors of BLOCK.  */
> @@ -3652,11 +3684,14 @@ do_hoist_insertion (basic_block block)
>
>    /* Short-cut for a common case: availout_in_some is empty.  */
>    if (bitmap_empty_p (&availout_in_some))
> -    return false;
> +    {
> +      bitmap_set_free (ANTIC_OUT);
> +      return false;
> +    }
>
>    /* Hack hoitable_set in-place so we can use
> sorted_array_from_bitmap_set.  */
>    bitmap_move (&hoistable_set.values, &availout_in_some);
> -  hoistable_set.expressions = ANTIC_IN (block)->expressions;
> +  hoistable_set.expressions = ANTIC_OUT->expressions;
>
>    /* Now finally construct the topological-ordered expression set.  */
>    vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set);
> @@ -3731,6 +3766,7 @@ do_hoist_insertion (basic_block block)
>      }
>
>    exprs.release ();
> +  bitmap_set_free (ANTIC_OUT);
>
>    return new_stuff;
>  }
> --
> 2.35.3
>

  reply	other threads:[~2023-05-24 10:07 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-23  9:55 Richard Biener
2023-05-24 10:07 ` Christophe Lyon [this message]
2023-05-24 10:41   ` Richard Biener
2023-05-24 12:45     ` Christophe Lyon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAPS5khbo9a19byiXLhKyNL2-EY7FjDnRvqbPtWCriUBdnNY9_w@mail.gmail.com \
    --to=christophe.lyon@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).