* [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
@ 2022-03-01 2:03 liuhongt
2022-03-01 2:26 ` H.J. Lu
0 siblings, 1 reply; 9+ messages in thread
From: liuhongt @ 2022-03-01 2:03 UTC (permalink / raw)
To: gcc-patches
.. in ix86_expand_vector_move and
ix86_convert_const_wide_int_to_broadcast(called by the former).
ix86_expand_vector_move is called by emit_move_insn which is used by
many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
when there's explict usage of xmm7/xmm15/xmm31.
Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
for both w/and w/o --with-cpu=native --with-arch=native.
Ok for trunk?
gcc/ChangeLog:
PR target/104704
* config/i386/i386-expand.cc
(ix86_convert_const_wide_int_to_broadcast): Replace
ix86_gen_scratch_sse_rtx with gen_reg_rtx.
(ix86_expand_vector_move): Ditto.
* config/i386/sse.md (*vec_dupv4si): Add alternative $r and
corresponding splitter after it.
gcc/testsuite/ChangeLog:
* gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
* gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
* gcc.target/i386/pr100865-12b.c: Ditto.
* gcc.target/i386/pr100865-8b.c: Ditto.
* gcc.target/i386/pr100865-9b.c: Ditto.
* gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
* gcc.target/i386/pr82942-1.c: Ditto.
* gcc.target/i386/pr82990-1.c: Ditto.
* gcc.target/i386/pr82990-3.c: Ditto.
* gcc.target/i386/pr82990-5.c: Ditto.
---
gcc/config/i386/i386-expand.cc | 6 +--
gcc/config/i386/sse.md | 41 +++++++++++++++-----
gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
12 files changed, 45 insertions(+), 27 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index faa0191c6dd..75a28cdd89d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
machine_mode vector_mode;
if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
gcc_unreachable ();
- rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
+ rtx target = gen_reg_rtx (vector_mode);
bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
target,
GEN_INT (val_broadcast));
@@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
if (!register_operand (op0, mode)
&& !register_operand (op1, mode))
{
- rtx scratch = ix86_gen_scratch_sse_rtx (mode);
+ rtx scratch = gen_reg_rtx (mode);
emit_move_insn (scratch, op1);
op1 = scratch;
}
@@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
&& !register_operand (op0, mode)
&& !register_operand (op1, mode))
{
- rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
+ rtx tmp = gen_reg_rtx (GET_MODE (op0));
emit_move_insn (tmp, op1);
emit_move_insn (op0, tmp);
return;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3066ea3734a..d124545aa5d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
(set_attr "mode" "V4SF")])
(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
"TARGET_SSE"
"@
%vpshufd\t{$0, %1, %0|%0, %1, 0}
vbroadcastss\t{%1, %0|%0, %1}
- shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "sse2,avx,noavx")
- (set_attr "type" "sselog1,ssemov,sselog1")
- (set_attr "length_immediate" "1,0,1")
- (set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "maybe_vex,maybe_evex,orig")
- (set_attr "mode" "TI,V4SF,V4SF")])
+ shufps\t{$0, %0, %0|%0, %0, 0}
+ #"
+ [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
+ (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
+ (set_attr "length_immediate" "1,0,1,1")
+ (set_attr "prefix_extra" "0,1,*,0")
+ (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
+ (set_attr "mode" "TI,V4SF,V4SF,TI")
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "3")
+ (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+ ]
+ (symbol_ref "true")))])
+
+(define_split
+ [(set (match_operand:V4SI 0 "sse_reg_operand")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "general_reg_operand")))]
+ "TARGET_SSE && reload_completed
+ /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
+ available, because then we can broadcast from GPRs directly. */
+ && !TARGET_AVX512VL"
+ [(const_int 0)]
+{
+ emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
+ CONST0_RTX (V4SImode),
+ gen_lowpart (SImode, operands[1])));
+ emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
+ DONE;
+})
(define_insn "*vec_dupv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
index 4b822684b88..a830c96f7d1 100644
--- a/gcc/testsuite/gcc.target/i386/incoming-11.c
+++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
@@ -15,4 +15,4 @@ void f()
for (i = 0; i < 100; i++) q[i] = 1;
}
-/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
+/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
index 7e458e85cdd..fe7736c318c 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
@@ -5,4 +5,4 @@
/* { dg-final { scan-assembler-times "movabsq" 1 } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
index dee0cfb016a..c9acfc7088f 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
@@ -5,4 +5,4 @@
/* { dg-final { scan-assembler-times "movabsq" 1 } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
index 4b7dd7cee3e..fa474c98a37 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
@@ -4,4 +4,4 @@
#include "pr100865-8a.c"
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
index a315dde7c52..0714c3c9d6a 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
@@ -4,4 +4,4 @@
#include "pr100865-9a.c"
/* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
index c3be2f5b797..d7e530d5116 100644
--- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
@@ -11,5 +11,4 @@ pr82941 ()
z = y;
}
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
index 29ead049a67..9cdf81a9d60 100644
--- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
@@ -3,5 +3,4 @@
#include "pr82941-1.c"
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
index bbf580fea77..ff1d6d40eb2 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
@@ -11,5 +11,4 @@ pr82941 ()
z = y;
}
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
index 89ddb20adb3..201fa98d8d4 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
@@ -3,5 +3,4 @@
#include "pr82941-1.c"
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
index b9da0e706b1..008217af0b8 100644
--- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
@@ -11,5 +11,4 @@ pr82941 ()
z = y;
}
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
--
2.18.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 2:03 [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx liuhongt
@ 2022-03-01 2:26 ` H.J. Lu
2022-03-01 2:38 ` H.J. Lu
2022-03-01 3:01 ` Hongtao Liu
0 siblings, 2 replies; 9+ messages in thread
From: H.J. Lu @ 2022-03-01 2:26 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, Uros Bizjak
On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> .. in ix86_expand_vector_move and
> ix86_convert_const_wide_int_to_broadcast(called by the former).
>
> ix86_expand_vector_move is called by emit_move_insn which is used by
> many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> when there's explict usage of xmm7/xmm15/xmm31.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> for both w/and w/o --with-cpu=native --with-arch=native.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR target/104704
> * config/i386/i386-expand.cc
> (ix86_convert_const_wide_int_to_broadcast): Replace
> ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> (ix86_expand_vector_move): Ditto.
> * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> corresponding splitter after it.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> * gcc.target/i386/pr100865-12b.c: Ditto.
> * gcc.target/i386/pr100865-8b.c: Ditto.
> * gcc.target/i386/pr100865-9b.c: Ditto.
> * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> * gcc.target/i386/pr82942-1.c: Ditto.
> * gcc.target/i386/pr82990-1.c: Ditto.
> * gcc.target/i386/pr82990-3.c: Ditto.
> * gcc.target/i386/pr82990-5.c: Ditto.
> ---
> gcc/config/i386/i386-expand.cc | 6 +--
> gcc/config/i386/sse.md | 41 +++++++++++++++-----
> gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> 12 files changed, 45 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index faa0191c6dd..75a28cdd89d 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> machine_mode vector_mode;
> if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> gcc_unreachable ();
> - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> + rtx target = gen_reg_rtx (vector_mode);
I think ix86_gen_scratch_sse_rtx should check
currently_expanding_gimple_stmt == NULL
to return gen_reg_rtx (vector_mode) instead.
> bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
> target,
> GEN_INT (val_broadcast));
> @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> if (!register_operand (op0, mode)
> && !register_operand (op1, mode))
> {
> - rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> + rtx scratch = gen_reg_rtx (mode);
> emit_move_insn (scratch, op1);
> op1 = scratch;
> }
> @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> && !register_operand (op0, mode)
> && !register_operand (op1, mode))
> {
> - rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> + rtx tmp = gen_reg_rtx (GET_MODE (op0));
> emit_move_insn (tmp, op1);
> emit_move_insn (op0, tmp);
> return;
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3066ea3734a..d124545aa5d 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
> (set_attr "mode" "V4SF")])
>
> (define_insn "*vec_dupv4si"
> - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
> + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
> (vec_duplicate:V4SI
> - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
> "TARGET_SSE"
> "@
> %vpshufd\t{$0, %1, %0|%0, %1, 0}
> vbroadcastss\t{%1, %0|%0, %1}
> - shufps\t{$0, %0, %0|%0, %0, 0}"
> - [(set_attr "isa" "sse2,avx,noavx")
> - (set_attr "type" "sselog1,ssemov,sselog1")
> - (set_attr "length_immediate" "1,0,1")
> - (set_attr "prefix_extra" "0,1,*")
> - (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> - (set_attr "mode" "TI,V4SF,V4SF")])
> + shufps\t{$0, %0, %0|%0, %0, 0}
> + #"
> + [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> + (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> + (set_attr "length_immediate" "1,0,1,1")
> + (set_attr "prefix_extra" "0,1,*,0")
> + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> + (set_attr "mode" "TI,V4SF,V4SF,TI")
> + (set (attr "preferred_for_speed")
> + (cond [(eq_attr "alternative" "3")
> + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> + ]
> + (symbol_ref "true")))])
> +
> +(define_split
> + [(set (match_operand:V4SI 0 "sse_reg_operand")
> + (vec_duplicate:V4SI
> + (match_operand:SI 1 "general_reg_operand")))]
> + "TARGET_SSE && reload_completed
> + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> + available, because then we can broadcast from GPRs directly. */
> + && !TARGET_AVX512VL"
> + [(const_int 0)]
> +{
> + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> + CONST0_RTX (V4SImode),
> + gen_lowpart (SImode, operands[1])));
> + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> + DONE;
> +})
>
> (define_insn "*vec_dupv2di"
> [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
> diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> index 4b822684b88..a830c96f7d1 100644
> --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> @@ -15,4 +15,4 @@ void f()
> for (i = 0; i < 100; i++) q[i] = 1;
> }
>
> -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> index 7e458e85cdd..fe7736c318c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> @@ -5,4 +5,4 @@
>
> /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> index dee0cfb016a..c9acfc7088f 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> @@ -5,4 +5,4 @@
>
> /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> index 4b7dd7cee3e..fa474c98a37 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> @@ -4,4 +4,4 @@
> #include "pr100865-8a.c"
>
> /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> index a315dde7c52..0714c3c9d6a 100644
> --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> @@ -4,4 +4,4 @@
> #include "pr100865-9a.c"
>
> /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> index c3be2f5b797..d7e530d5116 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> @@ -11,5 +11,4 @@ pr82941 ()
> z = y;
> }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> index 29ead049a67..9cdf81a9d60 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> @@ -3,5 +3,4 @@
>
> #include "pr82941-1.c"
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> index bbf580fea77..ff1d6d40eb2 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> @@ -11,5 +11,4 @@ pr82941 ()
> z = y;
> }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> index 89ddb20adb3..201fa98d8d4 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> @@ -3,5 +3,4 @@
>
> #include "pr82941-1.c"
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> index b9da0e706b1..008217af0b8 100644
> --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> @@ -11,5 +11,4 @@ pr82941 ()
> z = y;
> }
>
> -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> --
> 2.18.1
>
--
H.J.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 2:26 ` H.J. Lu
@ 2022-03-01 2:38 ` H.J. Lu
2022-03-01 5:45 ` Hongtao Liu
2022-03-01 3:01 ` Hongtao Liu
1 sibling, 1 reply; 9+ messages in thread
From: H.J. Lu @ 2022-03-01 2:38 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, Uros Bizjak
On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > .. in ix86_expand_vector_move and
> > ix86_convert_const_wide_int_to_broadcast(called by the former).
> >
> > ix86_expand_vector_move is called by emit_move_insn which is used by
> > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > when there's explict usage of xmm7/xmm15/xmm31.
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > for both w/and w/o --with-cpu=native --with-arch=native.
> >
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > PR target/104704
> > * config/i386/i386-expand.cc
> > (ix86_convert_const_wide_int_to_broadcast): Replace
> > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > (ix86_expand_vector_move): Ditto.
> > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > corresponding splitter after it.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > * gcc.target/i386/pr100865-12b.c: Ditto.
> > * gcc.target/i386/pr100865-8b.c: Ditto.
> > * gcc.target/i386/pr100865-9b.c: Ditto.
> > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > * gcc.target/i386/pr82942-1.c: Ditto.
> > * gcc.target/i386/pr82990-1.c: Ditto.
> > * gcc.target/i386/pr82990-3.c: Ditto.
> > * gcc.target/i386/pr82990-5.c: Ditto.
> > ---
> > gcc/config/i386/i386-expand.cc | 6 +--
> > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > 12 files changed, 45 insertions(+), 27 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index faa0191c6dd..75a28cdd89d 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > machine_mode vector_mode;
> > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > gcc_unreachable ();
> > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > + rtx target = gen_reg_rtx (vector_mode);
>
> I think ix86_gen_scratch_sse_rtx should check
> currently_expanding_gimple_stmt == NULL
> to return gen_reg_rtx (vector_mode) instead.
Like this:
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b2bf90576d5..6c0e4929914 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
mode1, machine_mode,
rtx
ix86_gen_scratch_sse_rtx (machine_mode mode)
{
- if (TARGET_SSE && !lra_in_progress)
+ if (TARGET_SSE && currently_expanding_gimple_stmt)
{
unsigned int regno;
if (TARGET_64BIT)
(END)
> > bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
> > target,
> > GEN_INT (val_broadcast));
> > @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > if (!register_operand (op0, mode)
> > && !register_operand (op1, mode))
> > {
> > - rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> > + rtx scratch = gen_reg_rtx (mode);
> > emit_move_insn (scratch, op1);
> > op1 = scratch;
> > }
> > @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > && !register_operand (op0, mode)
> > && !register_operand (op1, mode))
> > {
> > - rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> > + rtx tmp = gen_reg_rtx (GET_MODE (op0));
> > emit_move_insn (tmp, op1);
> > emit_move_insn (op0, tmp);
> > return;
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 3066ea3734a..d124545aa5d 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
> > (set_attr "mode" "V4SF")])
> >
> > (define_insn "*vec_dupv4si"
> > - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
> > + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
> > (vec_duplicate:V4SI
> > - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> > + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
> > "TARGET_SSE"
> > "@
> > %vpshufd\t{$0, %1, %0|%0, %1, 0}
> > vbroadcastss\t{%1, %0|%0, %1}
> > - shufps\t{$0, %0, %0|%0, %0, 0}"
> > - [(set_attr "isa" "sse2,avx,noavx")
> > - (set_attr "type" "sselog1,ssemov,sselog1")
> > - (set_attr "length_immediate" "1,0,1")
> > - (set_attr "prefix_extra" "0,1,*")
> > - (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> > - (set_attr "mode" "TI,V4SF,V4SF")])
> > + shufps\t{$0, %0, %0|%0, %0, 0}
> > + #"
> > + [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> > + (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> > + (set_attr "length_immediate" "1,0,1,1")
> > + (set_attr "prefix_extra" "0,1,*,0")
> > + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> > + (set_attr "mode" "TI,V4SF,V4SF,TI")
> > + (set (attr "preferred_for_speed")
> > + (cond [(eq_attr "alternative" "3")
> > + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> > + ]
> > + (symbol_ref "true")))])
> > +
> > +(define_split
> > + [(set (match_operand:V4SI 0 "sse_reg_operand")
> > + (vec_duplicate:V4SI
> > + (match_operand:SI 1 "general_reg_operand")))]
> > + "TARGET_SSE && reload_completed
> > + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> > + available, because then we can broadcast from GPRs directly. */
> > + && !TARGET_AVX512VL"
> > + [(const_int 0)]
> > +{
> > + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> > + CONST0_RTX (V4SImode),
> > + gen_lowpart (SImode, operands[1])));
> > + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> > + DONE;
> > +})
> >
> > (define_insn "*vec_dupv2di"
> > [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
> > diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > index 4b822684b88..a830c96f7d1 100644
> > --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> > +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > @@ -15,4 +15,4 @@ void f()
> > for (i = 0; i < 100; i++) q[i] = 1;
> > }
> >
> > -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > index 7e458e85cdd..fe7736c318c 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > @@ -5,4 +5,4 @@
> >
> > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > index dee0cfb016a..c9acfc7088f 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > @@ -5,4 +5,4 @@
> >
> > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > index 4b7dd7cee3e..fa474c98a37 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > @@ -4,4 +4,4 @@
> > #include "pr100865-8a.c"
> >
> > /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > index a315dde7c52..0714c3c9d6a 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > @@ -4,4 +4,4 @@
> > #include "pr100865-9a.c"
> >
> > /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > index c3be2f5b797..d7e530d5116 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > index 29ead049a67..9cdf81a9d60 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > @@ -3,5 +3,4 @@
> >
> > #include "pr82941-1.c"
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > index bbf580fea77..ff1d6d40eb2 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > index 89ddb20adb3..201fa98d8d4 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > @@ -3,5 +3,4 @@
> >
> > #include "pr82941-1.c"
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > index b9da0e706b1..008217af0b8 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > --
> > 2.18.1
> >
>
>
> --
> H.J.
--
H.J.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 2:26 ` H.J. Lu
2022-03-01 2:38 ` H.J. Lu
@ 2022-03-01 3:01 ` Hongtao Liu
1 sibling, 0 replies; 9+ messages in thread
From: Hongtao Liu @ 2022-03-01 3:01 UTC (permalink / raw)
To: H.J. Lu; +Cc: liuhongt, GCC Patches
On Tue, Mar 1, 2022 at 10:27 AM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > .. in ix86_expand_vector_move and
> > ix86_convert_const_wide_int_to_broadcast(called by the former).
> >
> > ix86_expand_vector_move is called by emit_move_insn which is used by
> > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > when there's explict usage of xmm7/xmm15/xmm31.
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > for both w/and w/o --with-cpu=native --with-arch=native.
> >
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > PR target/104704
> > * config/i386/i386-expand.cc
> > (ix86_convert_const_wide_int_to_broadcast): Replace
> > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > (ix86_expand_vector_move): Ditto.
> > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > corresponding splitter after it.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > * gcc.target/i386/pr100865-12b.c: Ditto.
> > * gcc.target/i386/pr100865-8b.c: Ditto.
> > * gcc.target/i386/pr100865-9b.c: Ditto.
> > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > * gcc.target/i386/pr82942-1.c: Ditto.
> > * gcc.target/i386/pr82990-1.c: Ditto.
> > * gcc.target/i386/pr82990-3.c: Ditto.
> > * gcc.target/i386/pr82990-5.c: Ditto.
> > ---
> > gcc/config/i386/i386-expand.cc | 6 +--
> > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > 12 files changed, 45 insertions(+), 27 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index faa0191c6dd..75a28cdd89d 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > machine_mode vector_mode;
> > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > gcc_unreachable ();
> > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > + rtx target = gen_reg_rtx (vector_mode);
>
> I think ix86_gen_scratch_sse_rtx should check
> currently_expanding_gimple_stmt == NULL
> to return gen_reg_rtx (vector_mode) instead.
>
I'm a bit worried about continuing to use the hard register even if
only at the expand stage, if there is a recursive call to
expand_vector_move, we will still mess up the data flow.
.i.e there's emit_move_insn in ix86_expand_vector_init_duplicate.
> > bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
> > target,
> > GEN_INT (val_broadcast));
> > @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > if (!register_operand (op0, mode)
> > && !register_operand (op1, mode))
> > {
> > - rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> > + rtx scratch = gen_reg_rtx (mode);
> > emit_move_insn (scratch, op1);
> > op1 = scratch;
> > }
> > @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > && !register_operand (op0, mode)
> > && !register_operand (op1, mode))
> > {
> > - rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> > + rtx tmp = gen_reg_rtx (GET_MODE (op0));
> > emit_move_insn (tmp, op1);
> > emit_move_insn (op0, tmp);
> > return;
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 3066ea3734a..d124545aa5d 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
> > (set_attr "mode" "V4SF")])
> >
> > (define_insn "*vec_dupv4si"
> > - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
> > + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
> > (vec_duplicate:V4SI
> > - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> > + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
> > "TARGET_SSE"
> > "@
> > %vpshufd\t{$0, %1, %0|%0, %1, 0}
> > vbroadcastss\t{%1, %0|%0, %1}
> > - shufps\t{$0, %0, %0|%0, %0, 0}"
> > - [(set_attr "isa" "sse2,avx,noavx")
> > - (set_attr "type" "sselog1,ssemov,sselog1")
> > - (set_attr "length_immediate" "1,0,1")
> > - (set_attr "prefix_extra" "0,1,*")
> > - (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> > - (set_attr "mode" "TI,V4SF,V4SF")])
> > + shufps\t{$0, %0, %0|%0, %0, 0}
> > + #"
> > + [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> > + (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> > + (set_attr "length_immediate" "1,0,1,1")
> > + (set_attr "prefix_extra" "0,1,*,0")
> > + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> > + (set_attr "mode" "TI,V4SF,V4SF,TI")
> > + (set (attr "preferred_for_speed")
> > + (cond [(eq_attr "alternative" "3")
> > + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> > + ]
> > + (symbol_ref "true")))])
> > +
> > +(define_split
> > + [(set (match_operand:V4SI 0 "sse_reg_operand")
> > + (vec_duplicate:V4SI
> > + (match_operand:SI 1 "general_reg_operand")))]
> > + "TARGET_SSE && reload_completed
> > + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> > + available, because then we can broadcast from GPRs directly. */
> > + && !TARGET_AVX512VL"
> > + [(const_int 0)]
> > +{
> > + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> > + CONST0_RTX (V4SImode),
> > + gen_lowpart (SImode, operands[1])));
> > + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> > + DONE;
> > +})
> >
> > (define_insn "*vec_dupv2di"
> > [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
> > diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > index 4b822684b88..a830c96f7d1 100644
> > --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> > +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > @@ -15,4 +15,4 @@ void f()
> > for (i = 0; i < 100; i++) q[i] = 1;
> > }
> >
> > -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > index 7e458e85cdd..fe7736c318c 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > @@ -5,4 +5,4 @@
> >
> > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > index dee0cfb016a..c9acfc7088f 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > @@ -5,4 +5,4 @@
> >
> > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > index 4b7dd7cee3e..fa474c98a37 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > @@ -4,4 +4,4 @@
> > #include "pr100865-8a.c"
> >
> > /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > index a315dde7c52..0714c3c9d6a 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > @@ -4,4 +4,4 @@
> > #include "pr100865-9a.c"
> >
> > /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > index c3be2f5b797..d7e530d5116 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > index 29ead049a67..9cdf81a9d60 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > @@ -3,5 +3,4 @@
> >
> > #include "pr82941-1.c"
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > index bbf580fea77..ff1d6d40eb2 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > index 89ddb20adb3..201fa98d8d4 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > @@ -3,5 +3,4 @@
> >
> > #include "pr82941-1.c"
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > index b9da0e706b1..008217af0b8 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > @@ -11,5 +11,4 @@ pr82941 ()
> > z = y;
> > }
> >
> > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > --
> > 2.18.1
> >
>
>
> --
> H.J.
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 2:38 ` H.J. Lu
@ 2022-03-01 5:45 ` Hongtao Liu
2022-03-01 15:06 ` H.J. Lu
0 siblings, 1 reply; 9+ messages in thread
From: Hongtao Liu @ 2022-03-01 5:45 UTC (permalink / raw)
To: H.J. Lu; +Cc: liuhongt, GCC Patches
On Tue, Mar 1, 2022 at 10:39 AM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > .. in ix86_expand_vector_move and
> > > ix86_convert_const_wide_int_to_broadcast(called by the former).
> > >
> > > ix86_expand_vector_move is called by emit_move_insn which is used by
> > > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > > when there's explict usage of xmm7/xmm15/xmm31.
> > >
> > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > > for both w/and w/o --with-cpu=native --with-arch=native.
> > >
> > > Ok for trunk?
> > >
> > > gcc/ChangeLog:
> > >
> > > PR target/104704
> > > * config/i386/i386-expand.cc
> > > (ix86_convert_const_wide_int_to_broadcast): Replace
> > > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > > (ix86_expand_vector_move): Ditto.
> > > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > > corresponding splitter after it.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > > * gcc.target/i386/pr100865-12b.c: Ditto.
> > > * gcc.target/i386/pr100865-8b.c: Ditto.
> > > * gcc.target/i386/pr100865-9b.c: Ditto.
> > > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > > * gcc.target/i386/pr82942-1.c: Ditto.
> > > * gcc.target/i386/pr82990-1.c: Ditto.
> > > * gcc.target/i386/pr82990-3.c: Ditto.
> > > * gcc.target/i386/pr82990-5.c: Ditto.
> > > ---
> > > gcc/config/i386/i386-expand.cc | 6 +--
> > > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > > 12 files changed, 45 insertions(+), 27 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > index faa0191c6dd..75a28cdd89d 100644
> > > --- a/gcc/config/i386/i386-expand.cc
> > > +++ b/gcc/config/i386/i386-expand.cc
> > > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > > machine_mode vector_mode;
> > > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > > gcc_unreachable ();
> > > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > > + rtx target = gen_reg_rtx (vector_mode);
> >
> > I think ix86_gen_scratch_sse_rtx should check
> > currently_expanding_gimple_stmt == NULL
> > to return gen_reg_rtx (vector_mode) instead.
>
> Like this:
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b2bf90576d5..6c0e4929914 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
> mode1, machine_mode,
> rtx
> ix86_gen_scratch_sse_rtx (machine_mode mode)
> {
> - if (TARGET_SSE && !lra_in_progress)
> + if (TARGET_SSE && currently_expanding_gimple_stmt)
> {
> unsigned int regno;
> if (TARGET_64BIT)
> (END)
Looks like it relies on PR104721.
>
> > > bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
> > > target,
> > > GEN_INT (val_broadcast));
> > > @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > > if (!register_operand (op0, mode)
> > > && !register_operand (op1, mode))
> > > {
> > > - rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> > > + rtx scratch = gen_reg_rtx (mode);
> > > emit_move_insn (scratch, op1);
> > > op1 = scratch;
> > > }
> > > @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > > && !register_operand (op0, mode)
> > > && !register_operand (op1, mode))
> > > {
> > > - rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> > > + rtx tmp = gen_reg_rtx (GET_MODE (op0));
> > > emit_move_insn (tmp, op1);
> > > emit_move_insn (op0, tmp);
> > > return;
> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > index 3066ea3734a..d124545aa5d 100644
> > > --- a/gcc/config/i386/sse.md
> > > +++ b/gcc/config/i386/sse.md
> > > @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
> > > (set_attr "mode" "V4SF")])
> > >
> > > (define_insn "*vec_dupv4si"
> > > - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
> > > + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
> > > (vec_duplicate:V4SI
> > > - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> > > + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
> > > "TARGET_SSE"
> > > "@
> > > %vpshufd\t{$0, %1, %0|%0, %1, 0}
> > > vbroadcastss\t{%1, %0|%0, %1}
> > > - shufps\t{$0, %0, %0|%0, %0, 0}"
> > > - [(set_attr "isa" "sse2,avx,noavx")
> > > - (set_attr "type" "sselog1,ssemov,sselog1")
> > > - (set_attr "length_immediate" "1,0,1")
> > > - (set_attr "prefix_extra" "0,1,*")
> > > - (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> > > - (set_attr "mode" "TI,V4SF,V4SF")])
> > > + shufps\t{$0, %0, %0|%0, %0, 0}
> > > + #"
> > > + [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> > > + (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> > > + (set_attr "length_immediate" "1,0,1,1")
> > > + (set_attr "prefix_extra" "0,1,*,0")
> > > + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> > > + (set_attr "mode" "TI,V4SF,V4SF,TI")
> > > + (set (attr "preferred_for_speed")
> > > + (cond [(eq_attr "alternative" "3")
> > > + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> > > + ]
> > > + (symbol_ref "true")))])
> > > +
> > > +(define_split
> > > + [(set (match_operand:V4SI 0 "sse_reg_operand")
> > > + (vec_duplicate:V4SI
> > > + (match_operand:SI 1 "general_reg_operand")))]
> > > + "TARGET_SSE && reload_completed
> > > + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> > > + available, because then we can broadcast from GPRs directly. */
> > > + && !TARGET_AVX512VL"
> > > + [(const_int 0)]
> > > +{
> > > + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> > > + CONST0_RTX (V4SImode),
> > > + gen_lowpart (SImode, operands[1])));
> > > + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> > > + DONE;
> > > +})
> > >
> > > (define_insn "*vec_dupv2di"
> > > [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
> > > diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > index 4b822684b88..a830c96f7d1 100644
> > > --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > @@ -15,4 +15,4 @@ void f()
> > > for (i = 0; i < 100; i++) q[i] = 1;
> > > }
> > >
> > > -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > > +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > index 7e458e85cdd..fe7736c318c 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > @@ -5,4 +5,4 @@
> > >
> > > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > index dee0cfb016a..c9acfc7088f 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > @@ -5,4 +5,4 @@
> > >
> > > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > index 4b7dd7cee3e..fa474c98a37 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > @@ -4,4 +4,4 @@
> > > #include "pr100865-8a.c"
> > >
> > > /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > index a315dde7c52..0714c3c9d6a 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > @@ -4,4 +4,4 @@
> > > #include "pr100865-9a.c"
> > >
> > > /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > index c3be2f5b797..d7e530d5116 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > @@ -11,5 +11,4 @@ pr82941 ()
> > > z = y;
> > > }
> > >
> > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > index 29ead049a67..9cdf81a9d60 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > @@ -3,5 +3,4 @@
> > >
> > > #include "pr82941-1.c"
> > >
> > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > index bbf580fea77..ff1d6d40eb2 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > @@ -11,5 +11,4 @@ pr82941 ()
> > > z = y;
> > > }
> > >
> > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > index 89ddb20adb3..201fa98d8d4 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > @@ -3,5 +3,4 @@
> > >
> > > #include "pr82941-1.c"
> > >
> > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > index b9da0e706b1..008217af0b8 100644
> > > --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > @@ -11,5 +11,4 @@ pr82941 ()
> > > z = y;
> > > }
> > >
> > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > --
> > > 2.18.1
> > >
> >
> >
> > --
> > H.J.
>
>
>
> --
> H.J.
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 5:45 ` Hongtao Liu
@ 2022-03-01 15:06 ` H.J. Lu
2022-03-01 22:48 ` H.J. Lu
0 siblings, 1 reply; 9+ messages in thread
From: H.J. Lu @ 2022-03-01 15:06 UTC (permalink / raw)
To: Hongtao Liu; +Cc: liuhongt, GCC Patches
On Mon, Feb 28, 2022 at 9:36 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Mar 1, 2022 at 10:39 AM H.J. Lu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > .. in ix86_expand_vector_move and
> > > > ix86_convert_const_wide_int_to_broadcast(called by the former).
> > > >
> > > > ix86_expand_vector_move is called by emit_move_insn which is used by
> > > > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > > > when there's explict usage of xmm7/xmm15/xmm31.
> > > >
> > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > > > for both w/and w/o --with-cpu=native --with-arch=native.
> > > >
> > > > Ok for trunk?
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR target/104704
> > > > * config/i386/i386-expand.cc
> > > > (ix86_convert_const_wide_int_to_broadcast): Replace
> > > > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > > > (ix86_expand_vector_move): Ditto.
> > > > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > > > corresponding splitter after it.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > > > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > > > * gcc.target/i386/pr100865-12b.c: Ditto.
> > > > * gcc.target/i386/pr100865-8b.c: Ditto.
> > > > * gcc.target/i386/pr100865-9b.c: Ditto.
> > > > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > > > * gcc.target/i386/pr82942-1.c: Ditto.
> > > > * gcc.target/i386/pr82990-1.c: Ditto.
> > > > * gcc.target/i386/pr82990-3.c: Ditto.
> > > > * gcc.target/i386/pr82990-5.c: Ditto.
> > > > ---
> > > > gcc/config/i386/i386-expand.cc | 6 +--
> > > > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > > > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > > > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > > > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > > > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > > > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > > > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > > > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > > > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > > > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > > > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > > > 12 files changed, 45 insertions(+), 27 deletions(-)
> > > >
> > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > index faa0191c6dd..75a28cdd89d 100644
> > > > --- a/gcc/config/i386/i386-expand.cc
> > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > > > machine_mode vector_mode;
> > > > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > > > gcc_unreachable ();
> > > > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > > > + rtx target = gen_reg_rtx (vector_mode);
> > >
> > > I think ix86_gen_scratch_sse_rtx should check
> > > currently_expanding_gimple_stmt == NULL
> > > to return gen_reg_rtx (vector_mode) instead.
> >
> > Like this:
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b2bf90576d5..6c0e4929914 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
> > mode1, machine_mode,
> > rtx
> > ix86_gen_scratch_sse_rtx (machine_mode mode)
> > {
> > - if (TARGET_SSE && !lra_in_progress)
> > + if (TARGET_SSE && currently_expanding_gimple_stmt)
> > {
> > unsigned int regno;
> > if (TARGET_64BIT)
> > (END)
> Looks like it relies on PR104721.
I have checked the fix for PR104721.
> >
> > > > bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
> > > > target,
> > > > GEN_INT (val_broadcast));
> > > > @@ -605,7 +605,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > > > if (!register_operand (op0, mode)
> > > > && !register_operand (op1, mode))
> > > > {
> > > > - rtx scratch = ix86_gen_scratch_sse_rtx (mode);
> > > > + rtx scratch = gen_reg_rtx (mode);
> > > > emit_move_insn (scratch, op1);
> > > > op1 = scratch;
> > > > }
> > > > @@ -647,7 +647,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
> > > > && !register_operand (op0, mode)
> > > > && !register_operand (op1, mode))
> > > > {
> > > > - rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
> > > > + rtx tmp = gen_reg_rtx (GET_MODE (op0));
> > > > emit_move_insn (tmp, op1);
> > > > emit_move_insn (op0, tmp);
> > > > return;
> > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > > index 3066ea3734a..d124545aa5d 100644
> > > > --- a/gcc/config/i386/sse.md
> > > > +++ b/gcc/config/i386/sse.md
> > > > @@ -25121,20 +25121,43 @@ (define_insn "vec_dupv4sf"
> > > > (set_attr "mode" "V4SF")])
> > > >
> > > > (define_insn "*vec_dupv4si"
> > > > - [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
> > > > + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,v")
> > > > (vec_duplicate:V4SI
> > > > - (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> > > > + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0,$r")))]
> > > > "TARGET_SSE"
> > > > "@
> > > > %vpshufd\t{$0, %1, %0|%0, %1, 0}
> > > > vbroadcastss\t{%1, %0|%0, %1}
> > > > - shufps\t{$0, %0, %0|%0, %0, 0}"
> > > > - [(set_attr "isa" "sse2,avx,noavx")
> > > > - (set_attr "type" "sselog1,ssemov,sselog1")
> > > > - (set_attr "length_immediate" "1,0,1")
> > > > - (set_attr "prefix_extra" "0,1,*")
> > > > - (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> > > > - (set_attr "mode" "TI,V4SF,V4SF")])
> > > > + shufps\t{$0, %0, %0|%0, %0, 0}
> > > > + #"
> > > > + [(set_attr "isa" "sse2,avx,noavx,noavx512vl")
> > > > + (set_attr "type" "sselog1,ssemov,sselog1,sselog1")
> > > > + (set_attr "length_immediate" "1,0,1,1")
> > > > + (set_attr "prefix_extra" "0,1,*,0")
> > > > + (set_attr "prefix" "maybe_vex,maybe_evex,orig,maybe_vex")
> > > > + (set_attr "mode" "TI,V4SF,V4SF,TI")
> > > > + (set (attr "preferred_for_speed")
> > > > + (cond [(eq_attr "alternative" "3")
> > > > + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
> > > > + ]
> > > > + (symbol_ref "true")))])
> > > > +
> > > > +(define_split
> > > > + [(set (match_operand:V4SI 0 "sse_reg_operand")
> > > > + (vec_duplicate:V4SI
> > > > + (match_operand:SI 1 "general_reg_operand")))]
> > > > + "TARGET_SSE && reload_completed
> > > > + /* Disable this splitter if avx512vl_vec_dup_gprv4si insn is
> > > > + available, because then we can broadcast from GPRs directly. */
> > > > + && !TARGET_AVX512VL"
> > > > + [(const_int 0)]
> > > > +{
> > > > + emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
> > > > + CONST0_RTX (V4SImode),
> > > > + gen_lowpart (SImode, operands[1])));
> > > > + emit_insn (gen_vec_duplicatev4si (operands[0], operands[0]));
> > > > + DONE;
> > > > +})
> > > >
> > > > (define_insn "*vec_dupv2di"
> > > > [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
> > > > diff --git a/gcc/testsuite/gcc.target/i386/incoming-11.c b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > > index 4b822684b88..a830c96f7d1 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/incoming-11.c
> > > > @@ -15,4 +15,4 @@ void f()
> > > > for (i = 0; i < 100; i++) q[i] = 1;
> > > > }
> > > >
> > > > -/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > > > +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-11b.c b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > > index 7e458e85cdd..fe7736c318c 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-11b.c
> > > > @@ -5,4 +5,4 @@
> > > >
> > > > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > > > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-12b.c b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > > index dee0cfb016a..c9acfc7088f 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-12b.c
> > > > @@ -5,4 +5,4 @@
> > > >
> > > > /* { dg-final { scan-assembler-times "movabsq" 1 } } */
> > > > /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8b.c b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > > index 4b7dd7cee3e..fa474c98a37 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-8b.c
> > > > @@ -4,4 +4,4 @@
> > > > #include "pr100865-8a.c"
> > > >
> > > > /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9b.c b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > > index a315dde7c52..0714c3c9d6a 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr100865-9b.c
> > > > @@ -4,4 +4,4 @@
> > > > #include "pr100865-9a.c"
> > > >
> > > > /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
> > > > -/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > +/* { dg-final { scan-assembler-times "vmovdqa(?:64|)\[\\t \]%xmm\[0-9\]+, " 16 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr82941-1.c b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > > index c3be2f5b797..d7e530d5116 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr82941-1.c
> > > > @@ -11,5 +11,4 @@ pr82941 ()
> > > > z = y;
> > > > }
> > > >
> > > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > > index 29ead049a67..9cdf81a9d60 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c
> > > > @@ -3,5 +3,4 @@
> > > >
> > > > #include "pr82941-1.c"
> > > >
> > > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > > index bbf580fea77..ff1d6d40eb2 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
> > > > @@ -11,5 +11,4 @@ pr82941 ()
> > > > z = y;
> > > > }
> > > >
> > > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > > index 89ddb20adb3..201fa98d8d4 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
> > > > @@ -3,5 +3,4 @@
> > > >
> > > > #include "pr82941-1.c"
> > > >
> > > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > > index b9da0e706b1..008217af0b8 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
> > > > @@ -11,5 +11,4 @@ pr82941 ()
> > > > z = y;
> > > > }
> > > >
> > > > -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
> > > > -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> > > > +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
> > > > --
> > > > 2.18.1
> > > >
> > >
> > >
> > > --
> > > H.J.
> >
> >
> >
> > --
> > H.J.
>
>
>
> --
> BR,
> Hongtao
--
H.J.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 15:06 ` H.J. Lu
@ 2022-03-01 22:48 ` H.J. Lu
2022-03-02 1:32 ` Hongtao Liu
0 siblings, 1 reply; 9+ messages in thread
From: H.J. Lu @ 2022-03-01 22:48 UTC (permalink / raw)
To: Hongtao Liu; +Cc: liuhongt, GCC Patches
On Tue, Mar 1, 2022 at 7:06 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Feb 28, 2022 at 9:36 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Tue, Mar 1, 2022 at 10:39 AM H.J. Lu via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> > > > >
> > > > > .. in ix86_expand_vector_move and
> > > > > ix86_convert_const_wide_int_to_broadcast(called by the former).
> > > > >
> > > > > ix86_expand_vector_move is called by emit_move_insn which is used by
> > > > > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > > > > when there's explict usage of xmm7/xmm15/xmm31.
> > > > >
> > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > > > > for both w/and w/o --with-cpu=native --with-arch=native.
> > > > >
> > > > > Ok for trunk?
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > > PR target/104704
> > > > > * config/i386/i386-expand.cc
> > > > > (ix86_convert_const_wide_int_to_broadcast): Replace
> > > > > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > > > > (ix86_expand_vector_move): Ditto.
> > > > > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > > > > corresponding splitter after it.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > > > > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > > > > * gcc.target/i386/pr100865-12b.c: Ditto.
> > > > > * gcc.target/i386/pr100865-8b.c: Ditto.
> > > > > * gcc.target/i386/pr100865-9b.c: Ditto.
> > > > > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > > > > * gcc.target/i386/pr82942-1.c: Ditto.
> > > > > * gcc.target/i386/pr82990-1.c: Ditto.
> > > > > * gcc.target/i386/pr82990-3.c: Ditto.
> > > > > * gcc.target/i386/pr82990-5.c: Ditto.
> > > > > ---
> > > > > gcc/config/i386/i386-expand.cc | 6 +--
> > > > > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > > > > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > > > > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > > > > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > > > > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > > > > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > > > > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > > > > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > > > > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > > > > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > > > > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > > > > 12 files changed, 45 insertions(+), 27 deletions(-)
> > > > >
> > > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > > index faa0191c6dd..75a28cdd89d 100644
> > > > > --- a/gcc/config/i386/i386-expand.cc
> > > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > > > > machine_mode vector_mode;
> > > > > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > > > > gcc_unreachable ();
> > > > > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > > > > + rtx target = gen_reg_rtx (vector_mode);
> > > >
> > > > I think ix86_gen_scratch_sse_rtx should check
> > > > currently_expanding_gimple_stmt == NULL
> > > > to return gen_reg_rtx (vector_mode) instead.
> > >
> > > Like this:
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index b2bf90576d5..6c0e4929914 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
> > > mode1, machine_mode,
> > > rtx
> > > ix86_gen_scratch_sse_rtx (machine_mode mode)
> > > {
> > > - if (TARGET_SSE && !lra_in_progress)
> > > + if (TARGET_SSE && currently_expanding_gimple_stmt)
> > > {
> > > unsigned int regno;
> > > if (TARGET_64BIT)
> > > (END)
> > Looks like it relies on PR104721.
>
> I have checked the fix for PR104721.
>
The proposed patch doesn't fix the testcase in:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704
I am testing:
https://gitlab.com/x86-gcc/gcc/-/merge_requests/28
--
H.J.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-01 22:48 ` H.J. Lu
@ 2022-03-02 1:32 ` Hongtao Liu
2022-03-02 2:01 ` H.J. Lu
0 siblings, 1 reply; 9+ messages in thread
From: Hongtao Liu @ 2022-03-02 1:32 UTC (permalink / raw)
To: H.J. Lu; +Cc: liuhongt, GCC Patches
On Wed, Mar 2, 2022 at 6:49 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Mar 1, 2022 at 7:06 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Feb 28, 2022 at 9:36 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Tue, Mar 1, 2022 at 10:39 AM H.J. Lu via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> > > > > >
> > > > > > .. in ix86_expand_vector_move and
> > > > > > ix86_convert_const_wide_int_to_broadcast(called by the former).
> > > > > >
> > > > > > ix86_expand_vector_move is called by emit_move_insn which is used by
> > > > > > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > > > > > when there's explict usage of xmm7/xmm15/xmm31.
> > > > > >
> > > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > > > > > for both w/and w/o --with-cpu=native --with-arch=native.
> > > > > >
> > > > > > Ok for trunk?
> > > > > >
> > > > > > gcc/ChangeLog:
> > > > > >
> > > > > > PR target/104704
> > > > > > * config/i386/i386-expand.cc
> > > > > > (ix86_convert_const_wide_int_to_broadcast): Replace
> > > > > > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > > > > > (ix86_expand_vector_move): Ditto.
> > > > > > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > > > > > corresponding splitter after it.
> > > > > >
> > > > > > gcc/testsuite/ChangeLog:
> > > > > >
> > > > > > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > > > > > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > > > > > * gcc.target/i386/pr100865-12b.c: Ditto.
> > > > > > * gcc.target/i386/pr100865-8b.c: Ditto.
> > > > > > * gcc.target/i386/pr100865-9b.c: Ditto.
> > > > > > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > > > > > * gcc.target/i386/pr82942-1.c: Ditto.
> > > > > > * gcc.target/i386/pr82990-1.c: Ditto.
> > > > > > * gcc.target/i386/pr82990-3.c: Ditto.
> > > > > > * gcc.target/i386/pr82990-5.c: Ditto.
> > > > > > ---
> > > > > > gcc/config/i386/i386-expand.cc | 6 +--
> > > > > > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > > > > > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > > > > > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > > > > > 12 files changed, 45 insertions(+), 27 deletions(-)
> > > > > >
> > > > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > > > index faa0191c6dd..75a28cdd89d 100644
> > > > > > --- a/gcc/config/i386/i386-expand.cc
> > > > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > > > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > > > > > machine_mode vector_mode;
> > > > > > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > > > > > gcc_unreachable ();
> > > > > > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > > > > > + rtx target = gen_reg_rtx (vector_mode);
> > > > >
> > > > > I think ix86_gen_scratch_sse_rtx should check
> > > > > currently_expanding_gimple_stmt == NULL
> > > > > to return gen_reg_rtx (vector_mode) instead.
> > > >
> > > > Like this:
> > > >
> > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > > index b2bf90576d5..6c0e4929914 100644
> > > > --- a/gcc/config/i386/i386.cc
> > > > +++ b/gcc/config/i386/i386.cc
> > > > @@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
> > > > mode1, machine_mode,
> > > > rtx
> > > > ix86_gen_scratch_sse_rtx (machine_mode mode)
> > > > {
> > > > - if (TARGET_SSE && !lra_in_progress)
> > > > + if (TARGET_SSE && currently_expanding_gimple_stmt)
> > > > {
> > > > unsigned int regno;
> > > > if (TARGET_64BIT)
> > > > (END)
> > > Looks like it relies on PR104721.
> >
> > I have checked the fix for PR104721.
> >
>
> The proposed patch doesn't fix the testcase in:
>
The original patch can, then i prefer my patch to
currently_expanding_gimple_stmt.
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704
>
> I am testing:
>
> https://gitlab.com/x86-gcc/gcc/-/merge_requests/28
>
> --
> H.J.
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx.
2022-03-02 1:32 ` Hongtao Liu
@ 2022-03-02 2:01 ` H.J. Lu
0 siblings, 0 replies; 9+ messages in thread
From: H.J. Lu @ 2022-03-02 2:01 UTC (permalink / raw)
To: Hongtao Liu; +Cc: liuhongt, GCC Patches
On Tue, Mar 1, 2022 at 5:23 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Wed, Mar 2, 2022 at 6:49 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Mar 1, 2022 at 7:06 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Mon, Feb 28, 2022 at 9:36 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > > >
> > > > On Tue, Mar 1, 2022 at 10:39 AM H.J. Lu via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > On Mon, Feb 28, 2022 at 6:26 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Feb 28, 2022 at 6:03 PM liuhongt <hongtao.liu@intel.com> wrote:
> > > > > > >
> > > > > > > .. in ix86_expand_vector_move and
> > > > > > > ix86_convert_const_wide_int_to_broadcast(called by the former).
> > > > > > >
> > > > > > > ix86_expand_vector_move is called by emit_move_insn which is used by
> > > > > > > many pre_reload passes, ix86_gen_scratch_sse_rtx will break data flow
> > > > > > > when there's explict usage of xmm7/xmm15/xmm31.
> > > > > > >
> > > > > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
> > > > > > > for both w/and w/o --with-cpu=native --with-arch=native.
> > > > > > >
> > > > > > > Ok for trunk?
> > > > > > >
> > > > > > > gcc/ChangeLog:
> > > > > > >
> > > > > > > PR target/104704
> > > > > > > * config/i386/i386-expand.cc
> > > > > > > (ix86_convert_const_wide_int_to_broadcast): Replace
> > > > > > > ix86_gen_scratch_sse_rtx with gen_reg_rtx.
> > > > > > > (ix86_expand_vector_move): Ditto.
> > > > > > > * config/i386/sse.md (*vec_dupv4si): Add alternative $r and
> > > > > > > corresponding splitter after it.
> > > > > > >
> > > > > > > gcc/testsuite/ChangeLog:
> > > > > > >
> > > > > > > * gcc.target/i386/incoming-11.c: Revert r12-2665-g7f4c3943f795fd.
> > > > > > > * gcc.target/i386/pr100865-11b.c: Expect vmovdqa or vmovda64.
> > > > > > > * gcc.target/i386/pr100865-12b.c: Ditto.
> > > > > > > * gcc.target/i386/pr100865-8b.c: Ditto.
> > > > > > > * gcc.target/i386/pr100865-9b.c: Ditto.
> > > > > > > * gcc.target/i386/pr82941-1.c: Expect vzeroupper for ! ia32.
> > > > > > > * gcc.target/i386/pr82942-1.c: Ditto.
> > > > > > > * gcc.target/i386/pr82990-1.c: Ditto.
> > > > > > > * gcc.target/i386/pr82990-3.c: Ditto.
> > > > > > > * gcc.target/i386/pr82990-5.c: Ditto.
> > > > > > > ---
> > > > > > > gcc/config/i386/i386-expand.cc | 6 +--
> > > > > > > gcc/config/i386/sse.md | 41 +++++++++++++++-----
> > > > > > > gcc/testsuite/gcc.target/i386/incoming-11.c | 2 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr100865-11b.c | 2 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr100865-12b.c | 2 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr100865-8b.c | 2 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr100865-9b.c | 2 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr82941-1.c | 3 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr82942-1.c | 3 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr82990-1.c | 3 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr82990-3.c | 3 +-
> > > > > > > gcc/testsuite/gcc.target/i386/pr82990-5.c | 3 +-
> > > > > > > 12 files changed, 45 insertions(+), 27 deletions(-)
> > > > > > >
> > > > > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > > > > index faa0191c6dd..75a28cdd89d 100644
> > > > > > > --- a/gcc/config/i386/i386-expand.cc
> > > > > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > > > > @@ -257,7 +257,7 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
> > > > > > > machine_mode vector_mode;
> > > > > > > if (!mode_for_vector (broadcast_mode, nunits).exists (&vector_mode))
> > > > > > > gcc_unreachable ();
> > > > > > > - rtx target = ix86_gen_scratch_sse_rtx (vector_mode);
> > > > > > > + rtx target = gen_reg_rtx (vector_mode);
> > > > > >
> > > > > > I think ix86_gen_scratch_sse_rtx should check
> > > > > > currently_expanding_gimple_stmt == NULL
> > > > > > to return gen_reg_rtx (vector_mode) instead.
> > > > >
> > > > > Like this:
> > > > >
> > > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > > > index b2bf90576d5..6c0e4929914 100644
> > > > > --- a/gcc/config/i386/i386.cc
> > > > > +++ b/gcc/config/i386/i386.cc
> > > > > @@ -23786,7 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode
> > > > > mode1, machine_mode,
> > > > > rtx
> > > > > ix86_gen_scratch_sse_rtx (machine_mode mode)
> > > > > {
> > > > > - if (TARGET_SSE && !lra_in_progress)
> > > > > + if (TARGET_SSE && currently_expanding_gimple_stmt)
> > > > > {
> > > > > unsigned int regno;
> > > > > if (TARGET_64BIT)
> > > > > (END)
> > > > Looks like it relies on PR104721.
> > >
> > > I have checked the fix for PR104721.
> > >
> >
> > The proposed patch doesn't fix the testcase in:
> >
> The original patch can, then i prefer my patch to
> currently_expanding_gimple_stmt.
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704
> >
> > I am testing:
> >
> > https://gitlab.com/x86-gcc/gcc/-/merge_requests/28
> >
> > --
> > H.J.
There are 2 kinds of issues in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104704
1.
__m512d y, z;
int i;
int
do_test (void)
{
register int xmm31 __asm ("xmm31") = i;
asm volatile ("" : "+v" (xmm31));
z = y;
register int xmm2 __asm ("xmm2") = xmm31;
asm volatile ("" : "+v" (xmm2));
return xmm2;
}
2.
char z[128];
int i;
__attribute__((noipa))
int
do_test (void)
{
register int xmm31 __asm ("xmm31") = i;
asm volatile ("" : "+v" (xmm31));
__builtin_memset (&z, 0, sizeof (z));
register int xmm2 __asm ("xmm2") = xmm31;
asm volatile ("" : "+v" (xmm2));
return xmm2;
}
Your patch fixes #1. I don't think it fixes #2.
--
H.J.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-03-02 2:01 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-01 2:03 [PATCH] [i386] Replace ix86_gen_scratch_sse_rtx with gen_reg_rtx liuhongt
2022-03-01 2:26 ` H.J. Lu
2022-03-01 2:38 ` H.J. Lu
2022-03-01 5:45 ` Hongtao Liu
2022-03-01 15:06 ` H.J. Lu
2022-03-01 22:48 ` H.J. Lu
2022-03-02 1:32 ` Hongtao Liu
2022-03-02 2:01 ` H.J. Lu
2022-03-01 3:01 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).