public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v3] x86: make better use of VBROADCASTSS / VPBROADCASTD
@ 2023-07-11  6:03 Jan Beulich
  2023-07-11  6:33 ` Liu, Hongtao
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2023-07-11  6:03 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kirill Yukhin, Hongtao Liu

... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are
never longer (yet sometimes shorter) than the corresponding VSHUFPS /
VPSHUFD, due to the immediate operand of the shuffle insns balancing the
(uniform) need for VEX3 in the broadcast ones. When EVEX encoding is
respective the broadcast insns are always shorter.

Add new alternatives to cover the AVX2 and AVX512 cases as appropriate.

While touching this anyway, switch to consistently using "sseshuf1" in
the "type" attributes for all shuffle forms.

gcc/

	* config/i386/sse.md (vec_dupv4sf): Make first alternative use
	vbroadcastss for AVX2. New AVX512F alternative.
	(*vec_dupv4si): New AVX2 and AVX512F alternatives using
	vpbroadcastd. Replace sselog1 by sseshuf1 in "type" attribute.

gcc/testsuite/

	* gcc.target/i386/avx2-dupv4sf.c: New test.
	* gcc.target/i386/avx2-dupv4si.c: Likewise.
	* gcc.target/i386/avx512f-dupv4sf.c: Likewise.
	* gcc.target/i386/avx512f-dupv4si.c: Likewise.
---
Note that unlike originally intended, "prefix_extra" isn't dropped:
"length_vex" uses it to determine whether 2-byte VEX encoding is
possible (which it isn't for VBROADCASTSS / VPBROADCASTD). "length"
itself specifically does not use it for VEX/EVEX encoded insns.

Especially with the added "enabled" attribute I didn't really see how to
(further) fold alternatives 0 and 1. Instead *vec_dupv4si might benefit
from using sse2_noavx2 instead of sse2 for alternative 2, except that
there is no sse2_noavx2, only sse2_noavx.

I'm working from the assumption that the isa attributes to the original
1st and 2nd alternatives don't need further restricting (to sse2_noavx2
or avx_noavx2 as applicable), as the new earlier alternatives cover all
operand forms already when at least AVX2 is enabled.
---
v3: Testcases for new alternatives. "type" and "prefix_extra"
    adjustments.
v2: Correct operand constraints. Respect -mprefer-vector-width=. Fold
    two alternatives of vec_dupv4sf.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25969,41 +25969,64 @@
 	(const_int 1)))])
 
 (define_insn "vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x")
 	(vec_duplicate:V4SF
-	  (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
+	  (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))]
   "TARGET_SSE"
   "@
-   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" : \"vshufps\t{$0, %d1, %0|%0, %d1, 0}\";
+   vbroadcastss\t{%1, %g0|%g0, %1}
    vbroadcastss\t{%1, %0|%0, %1}
    shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "avx,avx,noavx")
-   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
-   (set_attr "mode" "V4SF")])
+  [(set_attr "isa" "avx,*,avx,noavx")
+   (set (attr "type")
+	(cond [(and (eq_attr "alternative" "0")
+		    (match_test "!TARGET_AVX2"))
+		 (const_string "sseshuf1")
+	       (eq_attr "alternative" "3")
+		 (const_string "sseshuf1")
+	      ]
+	      (const_string "ssemov")))
+   (set (attr "length_immediate")
+	(if_then_else (eq_attr "type" "sseshuf1")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "prefix_extra" "0,1,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig")
+   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF")
+   (set (attr "enabled")
+	(if_then_else (eq_attr "alternative" "1")
+		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+				   && !TARGET_PREFER_AVX256")
+		      (const_string "*")))])
 
 (define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
+  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,v,v,x")
 	(vec_duplicate:V4SI
-	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+	  (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))]
   "TARGET_SSE"
   "@
+   vpbroadcastd\t{%1, %0|%0, %1}
+   vpbroadcastd\t{%1, %g0|%g0, %1}
    %vpshufd\t{$0, %1, %0|%0, %1, 0}
    vbroadcastss\t{%1, %0|%0, %1}
    shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")
+  [(set_attr "isa" "avx2,*,sse2,avx,noavx")
+   (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1")
+   (set_attr "length_immediate" "0,0,1,0,1")
+   (set_attr "prefix_extra" "1,1,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
+   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "1")
+     (cond [(eq_attr "alternative" "3")
 	      (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
 	   ]
-	   (symbol_ref "true")))])
+	   (symbol_ref "true")))
+   (set (attr "enabled")
+	(if_then_else (eq_attr "alternative" "1")
+		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+				   && !TARGET_PREFER_AVX256")
+		      (const_string "*")))])
 
 (define_insn "*vec_dupv2di"
   [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,v,x")
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
+
+typedef float __attribute__ ((vector_size (16))) v4sf;
+
+v4sf bcst_reg (float f)
+{
+  register float x asm ("xmm7") = f;
+
+  asm ("" : "+v" (x));
+  return (v4sf) {x, x, x, x};
+}
+
+v4sf bcst_mem (const float *f)
+{
+  return (v4sf) {*f, *f, *f, *f};
+}
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si bcst_reg (int i)
+{
+  register int x asm ("xmm7") = i;
+
+  asm ("" : "+v" (x));
+  return (v4si) {x, x, x, x};
+}
+
+v4si bcst_mem (const int *i)
+{
+  return (v4si) {*i, *i, *i, *i};
+}
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler "vbroadcastss\[^\n\]*%xmm17, *%zmm" } } */
+
+typedef float __attribute__ ((vector_size (16))) v4sf;
+
+v4sf bcst (float f)
+{
+  register float x asm ("xmm17") = f;
+
+  asm ("" : "+v" (x));
+  return (v4sf) {x, x, x, x};
+}
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\]*%xmm17, *%zmm" } } */
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si bcst (int i)
+{
+  register int x asm ("xmm17") = i;
+
+  asm ("" : "+v" (x));
+  return (v4si) {x, x, x, x};
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* RE: [PATCH v3] x86: make better use of VBROADCASTSS / VPBROADCASTD
  2023-07-11  6:03 [PATCH v3] x86: make better use of VBROADCASTSS / VPBROADCASTD Jan Beulich
@ 2023-07-11  6:33 ` Liu, Hongtao
  0 siblings, 0 replies; 2+ messages in thread
From: Liu, Hongtao @ 2023-07-11  6:33 UTC (permalink / raw)
  To: Beulich, Jan, gcc-patches; +Cc: Kirill Yukhin



> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, July 11, 2023 2:04 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Kirill Yukhin <kirill.yukhin@gmail.com>; Liu, Hongtao
> <hongtao.liu@intel.com>
> Subject: [PATCH v3] x86: make better use of VBROADCASTSS /
> VPBROADCASTD
> 
> ... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are never
> longer (yet sometimes shorter) than the corresponding VSHUFPS / VPSHUFD,
> due to the immediate operand of the shuffle insns balancing the
> (uniform) need for VEX3 in the broadcast ones. When EVEX encoding is
> respective the broadcast insns are always shorter.
> 
> Add new alternatives to cover the AVX2 and AVX512 cases as appropriate.
> 
> While touching this anyway, switch to consistently using "sseshuf1" in the
> "type" attributes for all shuffle forms.
> 
> gcc/
> 
> 	* config/i386/sse.md (vec_dupv4sf): Make first alternative use
> 	vbroadcastss for AVX2. New AVX512F alternative.
> 	(*vec_dupv4si): New AVX2 and AVX512F alternatives using
> 	vpbroadcastd. Replace sselog1 by sseshuf1 in "type" attribute.
> 
> gcc/testsuite/
> 
> 	* gcc.target/i386/avx2-dupv4sf.c: New test.
> 	* gcc.target/i386/avx2-dupv4si.c: Likewise.
> 	* gcc.target/i386/avx512f-dupv4sf.c: Likewise.
> 	* gcc.target/i386/avx512f-dupv4si.c: Likewise.
> ---
> Note that unlike originally intended, "prefix_extra" isn't dropped:
> "length_vex" uses it to determine whether 2-byte VEX encoding is possible
> (which it isn't for VBROADCASTSS / VPBROADCASTD). "length"
> itself specifically does not use it for VEX/EVEX encoded insns.
> 
> Especially with the added "enabled" attribute I didn't really see how to
> (further) fold alternatives 0 and 1. Instead *vec_dupv4si might benefit from
> using sse2_noavx2 instead of sse2 for alternative 2, except that there is no
> sse2_noavx2, only sse2_noavx.
> 
> I'm working from the assumption that the isa attributes to the original 1st and
> 2nd alternatives don't need further restricting (to sse2_noavx2 or
> avx_noavx2 as applicable), as the new earlier alternatives cover all operand
> forms already when at least AVX2 is enabled.
Yes, the patch LGTM.
> ---
> v3: Testcases for new alternatives. "type" and "prefix_extra"
>     adjustments.
> v2: Correct operand constraints. Respect -mprefer-vector-width=. Fold
>     two alternatives of vec_dupv4sf.
> 
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -25969,41 +25969,64 @@
>  	(const_int 1)))])
> 
>  (define_insn "vec_dupv4sf"
> -  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
> +  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x")
>  	(vec_duplicate:V4SF
> -	  (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
> +	  (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))]
>    "TARGET_SSE"
>    "@
> -   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
> +   * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" :
> \"vshufps\t{$0, %d1, %0|%0, %d1, 0}\";
> +   vbroadcastss\t{%1, %g0|%g0, %1}
>     vbroadcastss\t{%1, %0|%0, %1}
>     shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "avx,avx,noavx")
> -   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
> -   (set_attr "mode" "V4SF")])
> +  [(set_attr "isa" "avx,*,avx,noavx")
> +   (set (attr "type")
> +	(cond [(and (eq_attr "alternative" "0")
> +		    (match_test "!TARGET_AVX2"))
> +		 (const_string "sseshuf1")
> +	       (eq_attr "alternative" "3")
> +		 (const_string "sseshuf1")
> +	      ]
> +	      (const_string "ssemov")))
> +   (set (attr "length_immediate")
> +	(if_then_else (eq_attr "type" "sseshuf1")
> +		      (const_string "1")
> +		      (const_string "0")))
> +   (set_attr "prefix_extra" "0,1,1,*")
> +   (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig")
> +   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF")
> +   (set (attr "enabled")
> +	(if_then_else (eq_attr "alternative" "1")
> +		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
> +				   && !TARGET_PREFER_AVX256")
> +		      (const_string "*")))])
> 
>  (define_insn "*vec_dupv4si"
> -  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
> +  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,v,v,x")
>  	(vec_duplicate:V4SI
> -	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
> +	  (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))]
>    "TARGET_SSE"
>    "@
> +   vpbroadcastd\t{%1, %0|%0, %1}
> +   vpbroadcastd\t{%1, %g0|%g0, %1}
>     %vpshufd\t{$0, %1, %0|%0, %1, 0}
>     vbroadcastss\t{%1, %0|%0, %1}
>     shufps\t{$0, %0, %0|%0, %0, 0}"
> -  [(set_attr "isa" "sse2,avx,noavx")
> -   (set_attr "type" "sselog1,ssemov,sselog1")
> -   (set_attr "length_immediate" "1,0,1")
> -   (set_attr "prefix_extra" "0,1,*")
> -   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
> -   (set_attr "mode" "TI,V4SF,V4SF")
> +  [(set_attr "isa" "avx2,*,sse2,avx,noavx")
> +   (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1")
> +   (set_attr "length_immediate" "0,0,1,0,1")
> +   (set_attr "prefix_extra" "1,1,0,1,*")
> +   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
> +   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
>     (set (attr "preferred_for_speed")
> -     (cond [(eq_attr "alternative" "1")
> +     (cond [(eq_attr "alternative" "3")
>  	      (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
>  	   ]
> -	   (symbol_ref "true")))])
> +	   (symbol_ref "true")))
> +   (set (attr "enabled")
> +	(if_then_else (eq_attr "alternative" "1")
> +		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
> +				   && !TARGET_PREFER_AVX256")
> +		      (const_string "*")))])
> 
>  (define_insn "*vec_dupv2di"
>    [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,v,x")
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
> +
> +typedef float __attribute__ ((vector_size (16))) v4sf;
> +
> +v4sf bcst_reg (float f)
> +{
> +  register float x asm ("xmm7") = f;
> +
> +  asm ("" : "+v" (x));
> +  return (v4sf) {x, x, x, x};
> +}
> +
> +v4sf bcst_mem (const float *f)
> +{
> +  return (v4sf) {*f, *f, *f, *f};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */
> +
> +typedef int __attribute__ ((vector_size (16))) v4si;
> +
> +v4si bcst_reg (int i)
> +{
> +  register int x asm ("xmm7") = i;
> +
> +  asm ("" : "+v" (x));
> +  return (v4si) {x, x, x, x};
> +}
> +
> +v4si bcst_mem (const int *i)
> +{
> +  return (v4si) {*i, *i, *i, *i};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2"
> +} */
> +/* { dg-final { scan-assembler "vbroadcastss\[^\n\]*%xmm17, *%zmm" } }
> +*/
> +
> +typedef float __attribute__ ((vector_size (16))) v4sf;
> +
> +v4sf bcst (float f)
> +{
> +  register float x asm ("xmm17") = f;
> +
> +  asm ("" : "+v" (x));
> +  return (v4sf) {x, x, x, x};
> +}
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2"
> +} */
> +/* { dg-final { scan-assembler "vpbroadcastd\[^\n\]*%xmm17, *%zmm" } }
> +*/
> +
> +typedef int __attribute__ ((vector_size (16))) v4si;
> +
> +v4si bcst (int i)
> +{
> +  register int x asm ("xmm17") = i;
> +
> +  asm ("" : "+v" (x));
> +  return (v4si) {x, x, x, x};
> +}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-11  6:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-11  6:03 [PATCH v3] x86: make better use of VBROADCASTSS / VPBROADCASTD Jan Beulich
2023-07-11  6:33 ` Liu, Hongtao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).