public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Christophe Lyon <christophe.lyon@linaro.org>
To: Richard Biener <rguenther@suse.de>
Cc: gcc-patches@gcc.gnu.org,
	 "richard.sandiford@arm.com" <richard.sandiford@arm.com>
Subject: Re: [PATCH] tree-optimization/109849 - missed code hoisting
Date: Wed, 24 May 2023 14:45:40 +0200	[thread overview]
Message-ID: <CAPS5khacUeEnwa7YabHv0VzO46DmZBPhQcE=gkfCzdtzgDuX0w@mail.gmail.com> (raw)
In-Reply-To: <nycvar.YFH.7.77.849.2305241040570.4723@jbgna.fhfr.qr>

[-- Attachment #1: Type: text/plain, Size: 6414 bytes --]

On Wed, 24 May 2023 at 12:41, Richard Biener <rguenther@suse.de> wrote:

> On Wed, 24 May 2023, Christophe Lyon wrote:
>
> > Hi Richard,
> >
> > On Tue, 23 May 2023 at 11:55, Richard Biener via Gcc-patches <
> > gcc-patches@gcc.gnu.org> wrote:
> >
> > > The following fixes code hoisting to properly consider ANTIC_OUT
> instead
> > > of ANTIC_IN.  That's a bit expensive to re-compute but since we no
> > > longer iterate we're doing this only once per BB which should be
> > > acceptable.  This avoids missing hoistings to the end of blocks where
> > > something in the block clobbers the hoisted value.
> > >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
> > >
> > >         PR tree-optimization/109849
> > >         * tree-ssa-pre.cc (do_hoist_insertion): Compute ANTIC_OUT
> > >         and use that to determine what to hoist.
> > >
> > >         * gcc.dg/tree-ssa/ssa-hoist-8.c: New testcase.
> > >
> >
> > This patch causes a regression on aarch64:
> > gcc.target/aarch64/sve/fmla_2.c: \\tst1d found 3 times
> >  FAIL: gcc.target/aarch64/sve/fmla_2.c scan-assembler-times \\tst1d 2
> >
> >
> > We used to generate:
> >         mov     x6, 0
> >         mov     w7, 55
> >         whilelo p7.d, wzr, w7
> >         .p2align 3,,7
> > .L2:
> >         ld1d    z30.d, p7/z, [x5, x6, lsl 3]
> >         ld1d    z31.d, p7/z, [x4, x6, lsl 3]
> >         cmpne   p6.d, p7/z, z30.d, #0
> >         ld1d    z30.d, p7/z, [x3, x6, lsl 3]
> >         ld1d    z29.d, p6/z, [x2, x6, lsl 3]
> >         movprfx z28, z30
> >         fmla    z28.d, p6/m, z31.d, z29.d
> >         fmla    z31.d, p6/m, z30.d, z29.d
> >         st1d    z28.d, p7, [x1, x6, lsl 3]
> >         st1d    z31.d, p7, [x0, x6, lsl 3]
> >         incd    x6
> >         whilelo p7.d, w6, w7
> >         b.any   .L2
> >
> >
> > But now:
> >         mov     x6, 0
> >         mov     w7, 55
> >         ptrue   p4.b, all
> >         whilelo p7.d, wzr, w7
> >         .p2align 3,,7
> > .L2:
> >         ld1d    z30.d, p7/z, [x5, x6, lsl 3]
> >         ld1d    z31.d, p7/z, [x4, x6, lsl 3]
> >         cmpne   p6.d, p7/z, z30.d, #0
> >         cmpeq   p5.d, p7/z, z30.d, #0
> >         ld1d    z29.d, p6/z, [x2, x6, lsl 3]
> >         ld1d    z28.d, p6/z, [x3, x6, lsl 3]
> >         ld1d    z30.d, p5/z, [x3, x6, lsl 3]
> >         movprfx z27, z31
> >         fmla    z27.d, p4/m, z29.d, z28.d
> >         movprfx z30.d, p6/m, z28.d
> >         fmla    z30.d, p6/m, z31.d, z29.d
> >         st1d    z27.d, p6, [x0, x6, lsl 3]
> >         st1d    z30.d, p7, [x1, x6, lsl 3]
> >         st1d    z31.d, p5, [x0, x6, lsl 3]
> >         incd    x6
> >         whilelo p7.d, w6, w7
> >         b.any   .L2
>
> Thanks for reporting.  I'm testing the following together with
> a testcase that's not architecture specific.
>
> Richard.
>
> From 2340df60dd9192b30b02de5b34f9cb7c16806430 Mon Sep 17 00:00:00 2001
> From: Richard Biener <rguenther@suse.de>
> Date: Wed, 24 May 2023 12:36:28 +0200
> Subject: [PATCH] tree-optimization/109849 - fix fallout of PRE hoisting
> change
> To: gcc-patches@gcc.gnu.org
>
> The PR109849 fix made us no longer hoist some memory loads because
> of the expression set intersection.  We can still avoid to compute
> the union by simply taking the first sets expressions and leave
> the pruning of expressions with values not suitable for hoisting
> to sorted_array_from_bitmap_set.
>
>         PR tree-optimization/109849
>         * tree-ssa-pre.cc (do_hoist_insertion): Do not intersect
>         expressions but take the first sets.
>
>         * gcc.dg/tree-ssa/ssa-hoist-9.c: New testcase.
>

Thanks, I confirm that this fixes the problem I reported on aarch64.

Christophe


> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-9.c | 20 ++++++++++++++++++++
>  gcc/tree-ssa-pre.cc                         | 12 ++++--------
>  2 files changed, 24 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-9.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-9.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-9.c
> new file mode 100644
> index 00000000000..388f79fd80f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-9.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +
> +int foo (int flag, int * __restrict a, int * __restrict b)
> +{
> +  int res;
> +  if (flag)
> +    res = *a + *b;
> +  else
> +    {
> +      res = *a;
> +      *a = 1;
> +      res += *b;
> +    }
> +  return res;
> +}
> +
> +/* { dg-final { scan-tree-dump "HOIST inserted: 3" "pre" } } */
> +/* { dg-final { scan-tree-dump-times " = \\\*" 2 "pre" } } */
> +/* { dg-final { scan-tree-dump-times " = \[^\r\n\]* \\\+ \[^\r\n\]*;" 1
> "pre" } } */
> diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
> index b1ceea90a8e..7bbfa5ac43d 100644
> --- a/gcc/tree-ssa-pre.cc
> +++ b/gcc/tree-ssa-pre.cc
> @@ -3625,8 +3625,9 @@ do_hoist_insertion (basic_block block)
>
>    /* We have multiple successors, compute ANTIC_OUT by taking the
> intersection
>       of all of ANTIC_IN translating through PHI nodes.  Note we do not
> have to
> -     worry about iteration stability here so just intersect the
> expression sets
> -     as well.  This is a simplification of what we do in
> compute_antic_aux.  */
> +     worry about iteration stability here so just use the expression set
> +     from the first set and prune that by sorted_array_from_bitmap_set.
> +     This is a simplification of what we do in compute_antic_aux.  */
>    bitmap_set_t ANTIC_OUT = bitmap_set_new ();
>    bool first = true;
>    FOR_EACH_EDGE (e, ei, block->succs)
> @@ -3641,15 +3642,10 @@ do_hoist_insertion (basic_block block)
>           bitmap_set_t tmp = bitmap_set_new ();
>           phi_translate_set (tmp, ANTIC_IN (e->dest), e);
>           bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
> -         bitmap_and_into (&ANTIC_OUT->expressions, &tmp->expressions);
>           bitmap_set_free (tmp);
>         }
>        else
> -       {
> -         bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN
> (e->dest)->values);
> -         bitmap_and_into (&ANTIC_OUT->expressions,
> -                          &ANTIC_IN (e->dest)->expressions);
> -       }
> +       bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values);
>      }
>
>    /* Compute the set of hoistable expressions from ANTIC_OUT.  First
> compute
> --
> 2.35.3
>
>

      reply	other threads:[~2023-05-24 12:45 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-23  9:55 Richard Biener
2023-05-24 10:07 ` Christophe Lyon
2023-05-24 10:41   ` Richard Biener
2023-05-24 12:45     ` Christophe Lyon [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAPS5khacUeEnwa7YabHv0VzO46DmZBPhQcE=gkfCzdtzgDuX0w@mail.gmail.com' \
    --to=christophe.lyon@linaro.org \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).