public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: liuhongt <hongtao.liu@intel.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>, Uros Bizjak <ubizjak@gmail.com>
Subject: Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
Date: Mon, 28 Feb 2022 18:26:55 -0800	[thread overview]
Message-ID: <CAMe9rOrnA=mYtqCcAP4OJsPctJvyJb1By1M5vMZanQO3UU9x7g@mail.gmail.com> (raw)
In-Reply-To: <20220301020312.8827-1-hongtao.liu@intel.com>

On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> .. in ix86_expand_vector_move and
> ix86_convert_const_wide_int_to_broadcast(called by the former).
>
> ix86_expand_vector_move is called by emit_move_insn which is used by
> many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> when there's explict usage of xmm7/xmm15/xmm31.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> for both w/and w/o --with-cpu=native --with-arch=native.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/104704
>         * config/i386/i386-expand.cc
>         (ix86_convert_const_wide_int_to_broadcast): Replace
>         ix86_gen_scratch_sse_rtx with gen_reg_rtx.
>         (ix86_expand_vector_move): Ditto.
>         * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
>         corresponding splitter after it.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
>         * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
>         * gcc.target/i386/pr100865-12b.c: Ditto.
>         * gcc.target/i386/pr100865-8b.c: Ditto.
>         * gcc.target/i386/pr100865-9b.c: Ditto.
>         * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
>         * gcc.target/i386/pr82942-1.c: Ditto.
>         * gcc.target/i386/pr82990-1.c: Ditto.
>         * gcc.target/i386/pr82990-3.c: Ditto.
>         * gcc.target/i386/pr82990-5.c: Ditto.
> ---
>  gcc/config/i386/i386-expand.cc               |  6 +--
>  gcc/config/i386/sse.md                       | 41 +++++++++++++++-----
>  gcc/testsuite/gcc.target/i386/incoming-11.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-11b.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-12b.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-8b.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr100865-9b.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/pr82941-1.c    |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82942-1.c    |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-1.c    |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-3.c    |  3 +-
>  gcc/testsuite/gcc.target/i386/pr82990-5.c    |  3 +-
>  12 files changed, 45 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index faa0191c6dd..75a28cdd89d 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
>    machine_mode vector_mode;
>    if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
>      gcc_unreachable ();
> -  rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> +  rtx target = gen_reg_rtx (vector_mode);

I think ix86_gen_scratch_sse_rtx should check
currently_expanding_gimple_stmt == NULL
to return gen_reg_rtx (vector_mode) instead.

>    bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
>                                                target,
>                                                GEN_INT (val_broadcast));
> @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
>        if (!register_operand (op0, mode)
>           && !register_operand (op1, mode))
>         {
> -         rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> +         rtx scratch = gen_reg_rtx (mode);
>           emit_move_insn (scratch, op1);
>           op1 = scratch;
>         }
> @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
>        && !register_operand (op0, mode)
>        && !register_operand (op1, mode))
>      {
> -      rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> +      rtx tmp = gen_reg_rtx (GET_MODE (op0));
>        emit_move_insn (tmp, op1);
>        emit_move_insn (op0, tmp);
>        return;
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3066ea3734a..d124545aa5d 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
>     (set_attr "mode" "V4SF")])
>
>  (define_insn "*vec_dupv4si"
> -  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
> +  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x,v")
>         (vec_duplicate:V4SI
> -         (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> +         (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
>    "TARGET_SSE"
>    "@
>     %vpshufd\t{$0, %1, %0|%0, %1, 0}
>     vbroadcastss\t{%1, %0|%0, %1}
> -   shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "sse2,avx,noavx")
> -   (set_attr "type" "sselog1,ssemov,sselog1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> -   (set_attr "mode" "TI,V4SF,V4SF")])
> +   shufps\t{$0, %0, %0|%0, %0, 0}
> +   #"
> +  [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> +   (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> +   (set_attr "length_immediate" "1,0,1,1")
> +   (set_attr "prefix_extra" "0,1,*,0")
> +   (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> +   (set_attr "mode" "TI,V4SF,V4SF,TI")
> +   (set (attr "preferred_for_speed")
> +     (cond [(eq_attr "alternative" "3")
> +             (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> +          ]
> +          (symbol_ref "true")))])
> +
> +(define_split
> +  [(set (match_operand:V4SI 0 "sse_reg_operand")
> +       (vec_duplicate:V4SI
> +         (match_operand:SI 1 "general_reg_operand")))]
> +  "TARGET_SSE && reload_completed
> +   /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> +      available, because then we can broadcast from GPRs directly.  */
> +   && !TARGET_AVX512VL"
> +  [(const_int 0)]
> +{
> +  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> +                               CONST0_RTX (V4SImode),
> +                               gen_lowpart (SImode, operands[1])));
> +  emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> +  DONE;
> +})
>
>  (define_insn "*vec_dupv2di"
>    [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
> diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> index 4b822684b88..a830c96f7d1 100644
> --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> @@ -15,4 +15,4 @@ void f()
>         for (i = 0; i < 100; i++) q[i] = 1;
>  }
>
> -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> index 7e458e85cdd..fe7736c318c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> @@ -5,4 +5,4 @@
>
>  /* { dg-final { scan-assembler-times "movabsq" 1 } } */
>  /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> index dee0cfb016a..c9acfc7088f 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> @@ -5,4 +5,4 @@
>
>  /* { dg-final { scan-assembler-times "movabsq" 1 } } */
>  /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> index 4b7dd7cee3e..fa474c98a37 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> @@ -4,4 +4,4 @@
>  #include "pr100865-8a.c"
>
>  /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> index a315dde7c52..0714c3c9d6a 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> @@ -4,4 +4,4 @@
>  #include "pr100865-9a.c"
>
>  /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> index c3be2f5b797..d7e530d5116 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> @@ -11,5 +11,4 @@ pr82941 ()
>    z = y;
>  }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> index 29ead049a67..9cdf81a9d60 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> @@ -3,5 +3,4 @@
>
>  #include "pr82941-1.c"
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> index bbf580fea77..ff1d6d40eb2 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> @@ -11,5 +11,4 @@ pr82941 ()
>    z = y;
>  }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> index 89ddb20adb3..201fa98d8d4 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> @@ -3,5 +3,4 @@
>
>  #include "pr82941-1.c"
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> index b9da0e706b1..008217af0b8 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> @@ -11,5 +11,4 @@ pr82941 ()
>    z = y;
>  }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1  } } */
> --
> 2.18.1
>


-- 
H.J.

  reply	other threads:[~2022-03-01  2:27 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-01  2:03 liuhongt
2022-03-01  2:26 ` H.J. Lu [this message]
2022-03-01  2:38   ` H.J. Lu
2022-03-01  5:45     ` Hongtao Liu
2022-03-01 15:06       ` H.J. Lu
2022-03-01 22:48         ` H.J. Lu
2022-03-02  1:32           ` Hongtao Liu
2022-03-02  2:01             ` H.J. Lu
2022-03-01  3:01   ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMe9rOrnA=mYtqCcAP4OJsPctJvyJb1By1M5vMZanQO3UU9x7g@mail.gmail.com' \
    --to=hjl.tools@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).