public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [x86] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns
@ 2024-04-28  5:45 liuhongt
  2024-04-28  8:18 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: liuhongt @ 2024-04-28  5:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

So when both source operand and dest operand require avx512 MASK_REGS, RA
can allocate MASK_REGS register instead of GPR to avoid reload it from
GPR to MASK_REGS.
It's similar as what did for logic patterns.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	* config/i386/i386.md: (zero_extendsidi2): Adjust
	alternative *k to ?k.
	(zero_extend<mode>di2): Ditto.
	(*zero_extend<mode>si2): Ditto.
	(*zero_extendqihi2): Ditto.
---
 gcc/config/i386/i386.md                       | 16 +++----
 .../gcc.target/i386/zero_extendkmask.c        | 43 +++++++++++++++++++
 2 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/zero_extendkmask.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d4ce3809e6d..f2ab7fdcd58 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4567,10 +4567,10 @@ (define_expand "zero_extendsidi2"
 
 (define_insn "*zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-		"=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
+		"=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k")
 	(zero_extend:DI
 	 (match_operand:SI 1 "x86_64_zext_operand"
-	        "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k,*km")))]
+	        "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,?k,?km")))]
   ""
 {
   switch (get_attr_type (insn))
@@ -4703,9 +4703,9 @@ (define_mode_attr kmov_isa
   [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
 
 (define_insn "zero_extend<mode>di2"
-  [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
+  [(set (match_operand:DI 0 "register_operand" "=r,?r,?k")
 	(zero_extend:DI
-	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
   "TARGET_64BIT"
   "@
    movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
@@ -4758,9 +4758,9 @@ (define_insn_and_split "zero_extend<mode>si2_and"
    (set_attr "mode" "SI")])
 
 (define_insn "*zero_extend<mode>si2"
-  [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
+  [(set (match_operand:SI 0 "register_operand" "=r,?r,?k")
 	(zero_extend:SI
-	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
+	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
   "@
    movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
@@ -4813,8 +4813,8 @@ (define_insn_and_split "zero_extendqihi2_and"
 
 ; zero extend to SImode to avoid partial register stalls
 (define_insn "*zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
-	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
+  [(set (match_operand:HI 0 "register_operand" "=r,?r,?k")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))]
   "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
   "@
    movz{bl|x}\t{%1, %k0|%k0, %1}
diff --git a/gcc/testsuite/gcc.target/i386/zero_extendkmask.c b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
new file mode 100644
index 00000000000..6b18980bbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not {(?n)shr[bwl]} } } */
+/* { dg-final { scan-assembler-not {(?n)movz[bw]} } } */
+
+#include<immintrin.h>
+
+__m512
+foo (__m512d a, __m512d b, __m512 c, __m512 d)
+{
+  return _mm512_mask_mov_ps (c, (__mmask16) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+
+__m512i
+foo1 (__m512d a, __m512d b, __m512i c, __m512i d)
+{
+  return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo2 (__m512d a, __m512d b, __m512i c, __m512i d)
+{
+  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo3 (__m512 a, __m512 b, __m512i c, __m512i d)
+{
+  return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo4 (__m512 a, __m512 b, __m512i c, __m512i d)
+{
+  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo5 (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmp_epi16_mask (a, b, 5) >> 1), d);
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] [x86] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns
  2024-04-28  5:45 [PATCH] [x86] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns liuhongt
@ 2024-04-28  8:18 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2024-04-28  8:18 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Sun, Apr 28, 2024 at 7:47 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> So when both source operand and dest operand require avx512 MASK_REGS, RA
> can allocate MASK_REGS register instead of GPR to avoid reload it from
> GPR to MASK_REGS.
> It's similar as what did for logic patterns.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.md: (zero_extendsidi2): Adjust
>         alternative *k to ?k.
>         (zero_extend<mode>di2): Ditto.
>         (*zero_extend<mode>si2): Ditto.
>         (*zero_extendqihi2): Ditto.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md                       | 16 +++----
>  .../gcc.target/i386/zero_extendkmask.c        | 43 +++++++++++++++++++
>  2 files changed, 51 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/zero_extendkmask.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index d4ce3809e6d..f2ab7fdcd58 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4567,10 +4567,10 @@ (define_expand "zero_extendsidi2"
>
>  (define_insn "*zero_extendsidi2"
>    [(set (match_operand:DI 0 "nonimmediate_operand"
> -               "=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
> +               "=r,?r,?o,r   ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k")
>         (zero_extend:DI
>          (match_operand:SI 1 "x86_64_zext_operand"
> -               "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,*k,*km")))]
> +               "0 ,rm,r ,rmWz,0,r  ,m   ,v ,r ,m ,*x,*v,?k,?km")))]
>    ""
>  {
>    switch (get_attr_type (insn))
> @@ -4703,9 +4703,9 @@ (define_mode_attr kmov_isa
>    [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
>
>  (define_insn "zero_extend<mode>di2"
> -  [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
> +  [(set (match_operand:DI 0 "register_operand" "=r,?r,?k")
>         (zero_extend:DI
> -        (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
> +        (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
>    "TARGET_64BIT"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
> @@ -4758,9 +4758,9 @@ (define_insn_and_split "zero_extend<mode>si2_and"
>     (set_attr "mode" "SI")])
>
>  (define_insn "*zero_extend<mode>si2"
> -  [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
> +  [(set (match_operand:SI 0 "register_operand" "=r,?r,?k")
>         (zero_extend:SI
> -         (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
> +         (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
>    "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
> @@ -4813,8 +4813,8 @@ (define_insn_and_split "zero_extendqihi2_and"
>
>  ; zero extend to SImode to avoid partial register stalls
>  (define_insn "*zero_extendqihi2"
> -  [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
> -       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
> +  [(set (match_operand:HI 0 "register_operand" "=r,?r,?k")
> +       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))]
>    "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
>    "@
>     movz{bl|x}\t{%1, %k0|%k0, %1}
> diff --git a/gcc/testsuite/gcc.target/i386/zero_extendkmask.c b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
> new file mode 100644
> index 00000000000..6b18980bbd1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-march=x86-64-v4 -O2" } */
> +/* { dg-final { scan-assembler-not {(?n)shr[bwl]} } } */
> +/* { dg-final { scan-assembler-not {(?n)movz[bw]} } } */
> +
> +#include<immintrin.h>
> +
> +__m512
> +foo (__m512d a, __m512d b, __m512 c, __m512 d)
> +{
> +  return _mm512_mask_mov_ps (c, (__mmask16) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
> +}
> +
> +
> +__m512i
> +foo1 (__m512d a, __m512d b, __m512i c, __m512i d)
> +{
> +  return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
> +}
> +
> +__m512i
> +foo2 (__m512d a, __m512d b, __m512i c, __m512i d)
> +{
> +  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
> +}
> +
> +__m512i
> +foo3 (__m512 a, __m512 b, __m512i c, __m512i d)
> +{
> +  return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
> +}
> +
> +__m512i
> +foo4 (__m512 a, __m512 b, __m512i c, __m512i d)
> +{
> +  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
> +}
> +
> +__m512i
> +foo5 (__m512i a, __m512i b, __m512i c, __m512i d)
> +{
> +  return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmp_epi16_mask (a, b, 5) >> 1), d);
> +}
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-04-28  8:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-28  5:45 [PATCH] [x86] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns liuhongt
2024-04-28  8:18 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).