public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si, di}_internal.
@ 2022-06-07  7:41 liuhongt
  2022-06-07  7:42 ` Hongtao Liu
  2022-06-07  8:13 ` [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si,di}_internal Uros Bizjak
  0 siblings, 2 replies; 3+ messages in thread
From: liuhongt @ 2022-06-07  7:41 UTC (permalink / raw)
  To: gcc-patches

So alternative v won't be igored in record_reg_classess.

Similar for *r alternatives in some vector patterns.

It helps testcase in the PR, also RA now makes better decisions for
gcc.target/i386/extract-insert-combining.c

        movd    %esi, %xmm0
        movd    %edi, %xmm1
-       movl    %esi, -12(%rsp)
        paddd   %xmm0, %xmm1
        pinsrd  $0, %esi, %xmm0
        paddd   %xmm1, %xmm0

The patch has no big impact on SPEC2017 for both O2 and Ofast
march=native run.

And I noticed there's some changes in SPEC2017

Before:
mov mem, %eax
vmovd %eax, %xmm0
..
mov %eax, 64(%rsp)

After:
vmovd mem, %xmm0
..
vmovd %xmm0, 64(%rsp)

Which should be exactly what we want?

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

	* config/i386/i386.md (*movsi_internal): Change alternative
	from *v to ?v.
	(*movdi_internal): Ditto.
	* config/i386/sse.md (vec_set<mode>_0): Change alternative *r
	to ?r.
	(*vec_extractv4sf_mem): Ditto.
	(*vec_extracthf): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr105513-1.c: New test.
	* gcc.target/i386/extract-insert-combining.c: Add new
	scan-assembler-not for spill.
---
 gcc/config/i386/i386.md                          |  8 ++++----
 gcc/config/i386/sse.md                           |  8 ++++----
 .../gcc.target/i386/extract-insert-combining.c   |  1 +
 gcc/testsuite/gcc.target/i386/pr105513-1.c       | 16 ++++++++++++++++
 4 files changed, 25 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 48a98e1b68b..5b538413942 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2251,9 +2251,9 @@ (define_split
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -2472,9 +2472,9 @@ (define_peephole2
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
+    "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
 	(match_operand:SI 1 "general_operand"
-    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
+    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 62688f8e29d..d41ce2e1a9b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
 ;; see comment above inline_secondary_memory_needed function in i386.cc
 (define_insn "vec_set<mode>_0"
   [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
-	  "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
+	  "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
 	(vec_merge:VI4F_128
 	  (vec_duplicate:VI4F_128
 	    (match_operand:<ssescalarmode> 2 "general_operand"
-	  " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
+	  " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
 	  (match_operand:VI4F_128 1 "nonimm_or_0_operand"
 	  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
 	  (const_int 1)))]
@@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
    (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
 
 (define_insn_and_split "*vec_extractv4sf_mem"
-  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
+  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
 	(vec_select:SF
 	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
 	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
@@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract<mode>_0"
   "operands[1] = gen_lowpart (HFmode, operands[1]);")
 
 (define_insn "*vec_extracthf"
-  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
+  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
 	(vec_select:HF
 	  (match_operand:V8HF 1 "register_operand" "v,v,0,v")
 	  (parallel
diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
index 32d951e6832..5a53d4cbf06 100644
--- a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
+++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
@@ -4,6 +4,7 @@
 /* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */
 /* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
 /* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not {(?n)mov.*(%rsp)} { target { ! ia32 } } } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr105513-1.c b/gcc/testsuite/gcc.target/i386/pr105513-1.c
new file mode 100644
index 00000000000..530f5292252
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105513-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mtune=skylake -mfpmath=sse" } */
+/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */
+
+static int as_int(float x)
+{
+    return (union{float x; int i;}){x}.i;
+}
+
+float f(double y, float x)
+{
+    int i = as_int(x);
+    if (__builtin_expect(i > 99, 0)) return 0;
+    if (i*2u < 77) if (i==2) return 0;
+    return y*x;
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si, di}_internal.
  2022-06-07  7:41 [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si, di}_internal liuhongt
@ 2022-06-07  7:42 ` Hongtao Liu
  2022-06-07  8:13 ` [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si,di}_internal Uros Bizjak
  1 sibling, 0 replies; 3+ messages in thread
From: Hongtao Liu @ 2022-06-07  7:42 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

On Tue, Jun 7, 2022 at 3:41 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> So alternative v won't be igored in record_reg_classess.
>
> Similar for *r alternatives in some vector patterns.
>
> It helps testcase in the PR, also RA now makes better decisions for
> gcc.target/i386/extract-insert-combining.c
>
>         movd    %esi, %xmm0
>         movd    %edi, %xmm1
> -       movl    %esi, -12(%rsp)
>         paddd   %xmm0, %xmm1
>         pinsrd  $0, %esi, %xmm0
>         paddd   %xmm1, %xmm0
>
> The patch has no big impact on SPEC2017 for both O2 and Ofast
> march=native run.
>
> And I noticed there's some changes in SPEC2017
>
> Before:
> mov mem, %eax
> vmovd %eax, %xmm0
> ..
> mov %eax, 64(%rsp)
>
> After:
> vmovd mem, %xmm0
> ..
> vmovd %xmm0, 64(%rsp)
>
> Which should be exactly what we want?
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.md (*movsi_internal): Change alternative
>         from *v to ?v.
>         (*movdi_internal): Ditto.
>         * config/i386/sse.md (vec_set<mode>_0): Change alternative *r
>         to ?r.
>         (*vec_extractv4sf_mem): Ditto.
>         (*vec_extracthf): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr105513-1.c: New test.
>         * gcc.target/i386/extract-insert-combining.c: Add new
>         scan-assembler-not for spill.
> ---
>  gcc/config/i386/i386.md                          |  8 ++++----
>  gcc/config/i386/sse.md                           |  8 ++++----
>  .../gcc.target/i386/extract-insert-combining.c   |  1 +
>  gcc/testsuite/gcc.target/i386/pr105513-1.c       | 16 ++++++++++++++++
>  4 files changed, 25 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 48a98e1b68b..5b538413942 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2251,9 +2251,9 @@ (define_split
>
>  (define_insn "*movdi_internal"
>    [(set (match_operand:DI 0 "nonimmediate_operand"
> -    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
> +    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
>         (match_operand:DI 1 "general_operand"
> -    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
> +    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))
>     && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> @@ -2472,9 +2472,9 @@ (define_peephole2
>
>  (define_insn "*movsi_internal"
>    [(set (match_operand:SI 0 "nonimmediate_operand"
> -    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
> +    "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
>         (match_operand:SI 1 "general_operand"
> -    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
> +    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))
>     && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 62688f8e29d..d41ce2e1a9b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
>  ;; see comment above inline_secondary_memory_needed function in i386.cc
>  (define_insn "vec_set<mode>_0"
>    [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
> -         "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
> +         "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
>         (vec_merge:VI4F_128
>           (vec_duplicate:VI4F_128
>             (match_operand:<ssescalarmode> 2 "general_operand"
> -         " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
> +         " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
>           (match_operand:VI4F_128 1 "nonimm_or_0_operand"
>           " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
>           (const_int 1)))]
> @@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
>     (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
>
>  (define_insn_and_split "*vec_extractv4sf_mem"
> -  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
> +  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
>         (vec_select:SF
>           (match_operand:V4SF 1 "memory_operand" "o,o,o")
>           (parallel [(match_operand 2 "const_0_to_3_operand")])))]
> @@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract<mode>_0"
>    "operands[1] = gen_lowpart (HFmode, operands[1]);")
>
>  (define_insn "*vec_extracthf"
> -  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
> +  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
>         (vec_select:HF
>           (match_operand:V8HF 1 "register_operand" "v,v,0,v")
>           (parallel
> diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> index 32d951e6832..5a53d4cbf06 100644
> --- a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> +++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> @@ -4,6 +4,7 @@
>  /* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */
>  /* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
>  /* { dg-final { scan-assembler-not "vmovss" } } */
> +/* { dg-final { scan-assembler-not {(?n)mov.*(%rsp)} { target { ! ia32 } } } } */
>
>  #include <immintrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr105513-1.c b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> new file mode 100644
> index 00000000000..530f5292252
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mtune=skylake -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */
> +
> +static int as_int(float x)
> +{
> +    return (union{float x; int i;}){x}.i;
> +}
> +
> +float f(double y, float x)
> +{
> +    int i = as_int(x);
> +    if (__builtin_expect(i > 99, 0)) return 0;
> +    if (i*2u < 77) if (i==2) return 0;
> +    return y*x;
> +}
> --
> 2.18.1
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si,di}_internal.
  2022-06-07  7:41 [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si, di}_internal liuhongt
  2022-06-07  7:42 ` Hongtao Liu
@ 2022-06-07  8:13 ` Uros Bizjak
  1 sibling, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2022-06-07  8:13 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Tue, Jun 7, 2022 at 9:41 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> So alternative v won't be igored in record_reg_classess.
>
> Similar for *r alternatives in some vector patterns.
>
> It helps testcase in the PR, also RA now makes better decisions for
> gcc.target/i386/extract-insert-combining.c
>
>         movd    %esi, %xmm0
>         movd    %edi, %xmm1
> -       movl    %esi, -12(%rsp)
>         paddd   %xmm0, %xmm1
>         pinsrd  $0, %esi, %xmm0
>         paddd   %xmm1, %xmm0
>
> The patch has no big impact on SPEC2017 for both O2 and Ofast
> march=native run.
>
> And I noticed there's some changes in SPEC2017
>
> Before:
> mov mem, %eax
> vmovd %eax, %xmm0
> ..
> mov %eax, 64(%rsp)
>
> After:
> vmovd mem, %xmm0
> ..
> vmovd %xmm0, 64(%rsp)
>
> Which should be exactly what we want?
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.md (*movsi_internal): Change alternative
>         from *v to ?v.
>         (*movdi_internal): Ditto.
>         * config/i386/sse.md (vec_set<mode>_0): Change alternative *r
>         to ?r.
>         (*vec_extractv4sf_mem): Ditto.
>         (*vec_extracthf): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr105513-1.c: New test.
>         * gcc.target/i386/extract-insert-combining.c: Add new
>         scan-assembler-not for spill.

Let's have some experiment with this approach. The above is also
better for TUNE_INTER_UNIT_MOVES_{TO,FROM}_VEC, since moves between
%eax and %xmm will again go through memory (I'm not sure how much we
care for these targets anyway).

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md                          |  8 ++++----
>  gcc/config/i386/sse.md                           |  8 ++++----
>  .../gcc.target/i386/extract-insert-combining.c   |  1 +
>  gcc/testsuite/gcc.target/i386/pr105513-1.c       | 16 ++++++++++++++++
>  4 files changed, 25 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105513-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 48a98e1b68b..5b538413942 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2251,9 +2251,9 @@ (define_split
>
>  (define_insn "*movdi_internal"
>    [(set (match_operand:DI 0 "nonimmediate_operand"
> -    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,m,?r ,?*Yd,?r,?*v,?*y,?*x,*k,*k  ,*r,*m,*k")
> +    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
>         (match_operand:DI 1 "general_operand"
> -    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,v,*Yd,r   ,*v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
> +    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,v,*Yd,r   ,?v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))
>     && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> @@ -2472,9 +2472,9 @@ (define_peephole2
>
>  (define_insn "*movsi_internal"
>    [(set (match_operand:SI 0 "nonimmediate_operand"
> -    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k  ,*rm,*k")
> +    "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
>         (match_operand:SI 1 "general_operand"
> -    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,*v,Bk,*v,*v,r  ,*r,*kBk,*k ,CBC"))]
> +    "g ,re,C ,*y,Bk ,*y,*y,r  ,C ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))
>     && ix86_hardreg_mov_ok (operands[0], operands[1])"
>  {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 62688f8e29d..d41ce2e1a9b 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -10590,11 +10590,11 @@ (define_insn "*vec_concatv4sf_0"
>  ;; see comment above inline_secondary_memory_needed function in i386.cc
>  (define_insn "vec_set<mode>_0"
>    [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
> -         "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
> +         "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x  ,m ,m   ,m")
>         (vec_merge:VI4F_128
>           (vec_duplicate:VI4F_128
>             (match_operand:<ssescalarmode> 2 "general_operand"
> -         " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
> +         " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
>           (match_operand:VI4F_128 1 "nonimm_or_0_operand"
>           " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
>           (const_int 1)))]
> @@ -11056,7 +11056,7 @@ (define_insn_and_split "*sse4_1_extractps"
>     (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
>
>  (define_insn_and_split "*vec_extractv4sf_mem"
> -  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
> +  [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
>         (vec_select:SF
>           (match_operand:V4SF 1 "memory_operand" "o,o,o")
>           (parallel [(match_operand 2 "const_0_to_3_operand")])))]
> @@ -11933,7 +11933,7 @@ (define_insn_and_split "*vec_extract<mode>_0"
>    "operands[1] = gen_lowpart (HFmode, operands[1]);")
>
>  (define_insn "*vec_extracthf"
> -  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
> +  [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
>         (vec_select:HF
>           (match_operand:V8HF 1 "register_operand" "v,v,0,v")
>           (parallel
> diff --git a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> index 32d951e6832..5a53d4cbf06 100644
> --- a/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> +++ b/gcc/testsuite/gcc.target/i386/extract-insert-combining.c
> @@ -4,6 +4,7 @@
>  /* { dg-final { scan-assembler-times "(?:vpaddd|paddd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 2 } } */
>  /* { dg-final { scan-assembler-times "(?:vpinsrd|pinsrd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */
>  /* { dg-final { scan-assembler-not "vmovss" } } */
> +/* { dg-final { scan-assembler-not {(?n)mov.*(%rsp)} { target { ! ia32 } } } } */
>
>  #include <immintrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr105513-1.c b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> new file mode 100644
> index 00000000000..530f5292252
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105513-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -msse2 -mtune=skylake -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */
> +
> +static int as_int(float x)
> +{
> +    return (union{float x; int i;}){x}.i;
> +}
> +
> +float f(double y, float x)
> +{
> +    int i = as_int(x);
> +    if (__builtin_expect(i > 99, 0)) return 0;
> +    if (i*2u < 77) if (i==2) return 0;
> +    return y*x;
> +}
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-06-07  8:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-07  7:41 [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si, di}_internal liuhongt
2022-06-07  7:42 ` Hongtao Liu
2022-06-07  8:13 ` [PATCH] Disparages SSE_REGS alternatives sligntly with ?v instead of *v in *mov{si,di}_internal Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).