public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV.
@ 2023-06-27  5:38 liuhongt
  2023-06-27  7:20 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: liuhongt @ 2023-06-27  5:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.guenther

At the rtl level, we cannot guarantee that the maskstore is not optimized
to other full-memory accesses, as the current implementations are equivalent
in terms of pattern, to solve this potential problem, this patch refines
the pattern of the maskstore and the intrinsics with unspec.

One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
fault suppression for masked-out elements?

Currently we're still using vec_merge for both AVX2 and AVX512 target.

------------------------
Similar like r14-2070-gc79476da46728e

If mem_addr points to a memory region with less than whole vector size
bytes of accessible memory and k is a mask that would prevent reading
the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
it to be transformed to any other whole memory access instructions.

Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

	PR rtl-optimization/110237
	* config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
	UNSPEC_MASKMOV.
	(maskstore<mode><avx512fmaskmodelower): Ditto.
	(*<avx512>_store<mode>_mask): New define_insn, it's renamed
	from original <avx512>_store<mode>_mask.
---
 gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 57 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3b50c7117f8..812cfca4b92 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_store<mode>_mask"
+(define_insn "*<avx512>_store<mode>_mask"
   [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
 	(vec_merge:V48_AVX512VL
 	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
@@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
    (set_attr "memory" "store")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_store<mode>_mask"
+(define_insn "*<avx512>_store<mode>_mask"
   [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
 	(vec_merge:VI12HFBF_AVX512VL
 	  (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
@@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
   "TARGET_AVX")
 
 (define_expand "maskstore<mode><avx512fmaskmodelower>"
-  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
-	(vec_merge:V48H_AVX512VL
-	  (match_operand:V48H_AVX512VL 1 "register_operand")
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
+	(unspec:V48_AVX512VL
+	  [(match_operand:V48_AVX512VL 1 "register_operand")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand")]
+	  UNSPEC_MASKMOV))]
   "TARGET_AVX512F")
 
 (define_expand "maskstore<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
-	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 1 "register_operand")
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
+	(unspec:VI12HFBF_AVX512VL
+	  [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand")]
+	  UNSPEC_MASKMOV))]
   "TARGET_AVX512BW")
 
+(define_insn "<avx512>_store<mode>_mask"
+  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
+	(unspec:V48_AVX512VL
+	  [(match_operand:V48_AVX512VL 1 "register_operand" "v")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
+	  UNSPEC_MASKMOV))]
+  "TARGET_AVX512F"
+{
+  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
+    {
+      if (misaligned_operand (operands[0], <MODE>mode))
+	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+      else
+	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+  else
+    {
+      if (misaligned_operand (operands[0], <MODE>mode))
+	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+      else
+	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_store<mode>_mask"
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
+	(unspec:VI12HFBF_AVX512VL
+	  [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
+	   (match_dup 0)
+	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
+	   UNSPEC_MASKMOV))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "cbranch<mode>4"
   [(set (reg:CC FLAGS_REG)
 	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV.
  2023-06-27  5:38 [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV liuhongt
@ 2023-06-27  7:20 ` Richard Biener
  2023-06-27  7:28   ` Hongtao Liu
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2023-06-27  7:20 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches

On Tue, Jun 27, 2023 at 7:38 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> At the rtl level, we cannot guarantee that the maskstore is not optimized
> to other full-memory accesses, as the current implementations are equivalent
> in terms of pattern, to solve this potential problem, this patch refines
> the pattern of the maskstore and the intrinsics with unspec.
>
> One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
> fault suppression for masked-out elements?

You mean the vcond and vcond_eq optabs?  No, those do not expect
fault suppression.

>
> Currently we're still using vec_merge for both AVX2 and AVX512 target.
>
> ------------------------
> Similar like r14-2070-gc79476da46728e
>
> If mem_addr points to a memory region with less than whole vector size
> bytes of accessible memory and k is a mask that would prevent reading
> the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
> it to be transformed to any other whole memory access instructions.
>
> Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> Ready to push to trunk.
>
> gcc/ChangeLog:
>
>         PR rtl-optimization/110237
>         * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
>         UNSPEC_MASKMOV.
>         (maskstore<mode><avx512fmaskmodelower): Ditto.
>         (*<avx512>_store<mode>_mask): New define_insn, it's renamed
>         from original <avx512>_store<mode>_mask.
> ---
>  gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
>  1 file changed, 57 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 3b50c7117f8..812cfca4b92 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_store<mode>_mask"
> +(define_insn "*<avx512>_store<mode>_mask"
>    [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
>         (vec_merge:V48_AVX512VL
>           (match_operand:V48_AVX512VL 1 "register_operand" "v")
> @@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
>     (set_attr "memory" "store")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_store<mode>_mask"
> +(define_insn "*<avx512>_store<mode>_mask"
>    [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
>         (vec_merge:VI12HFBF_AVX512VL
>           (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> @@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
>    "TARGET_AVX")
>
>  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> -  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
> -       (vec_merge:V48H_AVX512VL
> -         (match_operand:V48H_AVX512VL 1 "register_operand")
> -         (match_dup 0)
> -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> +  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
> +       (unspec:V48_AVX512VL
> +         [(match_operand:V48_AVX512VL 1 "register_operand")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> +         UNSPEC_MASKMOV))]
>    "TARGET_AVX512F")
>
>  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> -  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
> -       (vec_merge:VI12_AVX512VL
> -         (match_operand:VI12_AVX512VL 1 "register_operand")
> -         (match_dup 0)
> -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
> +       (unspec:VI12HFBF_AVX512VL
> +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> +         UNSPEC_MASKMOV))]
>    "TARGET_AVX512BW")
>
> +(define_insn "<avx512>_store<mode>_mask"
> +  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> +       (unspec:V48_AVX512VL
> +         [(match_operand:V48_AVX512VL 1 "register_operand" "v")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> +         UNSPEC_MASKMOV))]
> +  "TARGET_AVX512F"
> +{
> +  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
> +    {
> +      if (misaligned_operand (operands[0], <MODE>mode))
> +       return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +      else
> +       return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +    }
> +  else
> +    {
> +      if (misaligned_operand (operands[0], <MODE>mode))
> +       return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +      else
> +       return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> +    }
> +}
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "memory" "store")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "<avx512>_store<mode>_mask"
> +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> +       (unspec:VI12HFBF_AVX512VL
> +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> +          (match_dup 0)
> +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> +          UNSPEC_MASKMOV))]
> +  "TARGET_AVX512BW"
> +  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "memory" "store")
> +   (set_attr "mode" "<sseinsnmode>")])
> +
>  (define_expand "cbranch<mode>4"
>    [(set (reg:CC FLAGS_REG)
>         (compare:CC (match_operand:VI48_AVX 1 "register_operand")
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV.
  2023-06-27  7:20 ` Richard Biener
@ 2023-06-27  7:28   ` Hongtao Liu
  2023-06-27  7:46     ` Hongtao Liu
  0 siblings, 1 reply; 4+ messages in thread
From: Hongtao Liu @ 2023-06-27  7:28 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, gcc-patches

On Tue, Jun 27, 2023 at 3:20 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Tue, Jun 27, 2023 at 7:38 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > At the rtl level, we cannot guarantee that the maskstore is not optimized
> > to other full-memory accesses, as the current implementations are equivalent
> > in terms of pattern, to solve this potential problem, this patch refines
> > the pattern of the maskstore and the intrinsics with unspec.
> >
> > One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
> > fault suppression for masked-out elements?
>
> You mean the vcond and vcond_eq optabs?  No, those do not expect
> fault suppression.
Yes, vcond/vcond_eq, thanks for clarifying.
>
> >
> > Currently we're still using vec_merge for both AVX2 and AVX512 target.
> >
> > ------------------------
> > Similar like r14-2070-gc79476da46728e
> >
> > If mem_addr points to a memory region with less than whole vector size
> > bytes of accessible memory and k is a mask that would prevent reading
> > the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
> > it to be transformed to any other whole memory access instructions.
> >
> > Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> > Ready to push to trunk.
> >
> > gcc/ChangeLog:
> >
> >         PR rtl-optimization/110237
> >         * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
> >         UNSPEC_MASKMOV.
> >         (maskstore<mode><avx512fmaskmodelower): Ditto.
> >         (*<avx512>_store<mode>_mask): New define_insn, it's renamed
> >         from original <avx512>_store<mode>_mask.
> > ---
> >  gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 57 insertions(+), 12 deletions(-)
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 3b50c7117f8..812cfca4b92 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<sseinsnmode>")])
> >
> > -(define_insn "<avx512>_store<mode>_mask"
> > +(define_insn "*<avx512>_store<mode>_mask"
> >    [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> >         (vec_merge:V48_AVX512VL
> >           (match_operand:V48_AVX512VL 1 "register_operand" "v")
> > @@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
> >     (set_attr "memory" "store")
> >     (set_attr "mode" "<sseinsnmode>")])
> >
> > -(define_insn "<avx512>_store<mode>_mask"
> > +(define_insn "*<avx512>_store<mode>_mask"
> >    [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> >         (vec_merge:VI12HFBF_AVX512VL
> >           (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> > @@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
> >    "TARGET_AVX")
> >
> >  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> > -  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
> > -       (vec_merge:V48H_AVX512VL
> > -         (match_operand:V48H_AVX512VL 1 "register_operand")
> > -         (match_dup 0)
> > -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> > +  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
> > +       (unspec:V48_AVX512VL
> > +         [(match_operand:V48_AVX512VL 1 "register_operand")
> > +          (match_dup 0)
> > +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> > +         UNSPEC_MASKMOV))]
> >    "TARGET_AVX512F")
> >
> >  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> > -  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
> > -       (vec_merge:VI12_AVX512VL
> > -         (match_operand:VI12_AVX512VL 1 "register_operand")
> > -         (match_dup 0)
> > -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> > +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
> > +       (unspec:VI12HFBF_AVX512VL
> > +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
> > +          (match_dup 0)
> > +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> > +         UNSPEC_MASKMOV))]
> >    "TARGET_AVX512BW")
> >
> > +(define_insn "<avx512>_store<mode>_mask"
> > +  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> > +       (unspec:V48_AVX512VL
> > +         [(match_operand:V48_AVX512VL 1 "register_operand" "v")
> > +          (match_dup 0)
> > +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> > +         UNSPEC_MASKMOV))]
> > +  "TARGET_AVX512F"
> > +{
> > +  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
> > +    {
> > +      if (misaligned_operand (operands[0], <MODE>mode))
> > +       return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > +      else
> > +       return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > +    }
> > +  else
> > +    {
> > +      if (misaligned_operand (operands[0], <MODE>mode))
> > +       return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > +      else
> > +       return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > +    }
> > +}
> > +  [(set_attr "type" "ssemov")
> > +   (set_attr "prefix" "evex")
> > +   (set_attr "memory" "store")
> > +   (set_attr "mode" "<sseinsnmode>")])
> > +
> > +(define_insn "<avx512>_store<mode>_mask"
> > +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> > +       (unspec:VI12HFBF_AVX512VL
> > +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> > +          (match_dup 0)
> > +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> > +          UNSPEC_MASKMOV))]
> > +  "TARGET_AVX512BW"
> > +  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
> > +  [(set_attr "type" "ssemov")
> > +   (set_attr "prefix" "evex")
> > +   (set_attr "memory" "store")
> > +   (set_attr "mode" "<sseinsnmode>")])
> > +
> >  (define_expand "cbranch<mode>4"
> >    [(set (reg:CC FLAGS_REG)
> >         (compare:CC (match_operand:VI48_AVX 1 "register_operand")
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV.
  2023-06-27  7:28   ` Hongtao Liu
@ 2023-06-27  7:46     ` Hongtao Liu
  0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2023-06-27  7:46 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, gcc-patches

On Tue, Jun 27, 2023 at 3:28 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jun 27, 2023 at 3:20 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Tue, Jun 27, 2023 at 7:38 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > At the rtl level, we cannot guarantee that the maskstore is not optimized
> > > to other full-memory accesses, as the current implementations are equivalent
> > > in terms of pattern, to solve this potential problem, this patch refines
> > > the pattern of the maskstore and the intrinsics with unspec.
> > >
> > > One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
> > > fault suppression for masked-out elements?
> >
> > You mean the vcond and vcond_eq optabs?  No, those do not expect
> > fault suppression.
> Yes, vcond/vcond_eq, thanks for clarifying.
> >
> > >
> > > Currently we're still using vec_merge for both AVX2 and AVX512 target.
> > >
> > > ------------------------
> > > Similar like r14-2070-gc79476da46728e
> > >
> > > If mem_addr points to a memory region with less than whole vector size
> > > bytes of accessible memory and k is a mask that would prevent reading
> > > the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
> > > it to be transformed to any other whole memory access instructions.
> > >
> > > Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
> > > Ready to push to trunk.
I'm going to backpart this patch and masload one[1] to GCC11/GCC12/GCC13

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622410.html
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR rtl-optimization/110237
> > >         * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
> > >         UNSPEC_MASKMOV.
> > >         (maskstore<mode><avx512fmaskmodelower): Ditto.
> > >         (*<avx512>_store<mode>_mask): New define_insn, it's renamed
> > >         from original <avx512>_store<mode>_mask.
> > > ---
> > >  gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
> > >  1 file changed, 57 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > index 3b50c7117f8..812cfca4b92 100644
> > > --- a/gcc/config/i386/sse.md
> > > +++ b/gcc/config/i386/sse.md
> > > @@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
> > >     (set_attr "prefix" "evex")
> > >     (set_attr "mode" "<sseinsnmode>")])
> > >
> > > -(define_insn "<avx512>_store<mode>_mask"
> > > +(define_insn "*<avx512>_store<mode>_mask"
> > >    [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> > >         (vec_merge:V48_AVX512VL
> > >           (match_operand:V48_AVX512VL 1 "register_operand" "v")
> > > @@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
> > >     (set_attr "memory" "store")
> > >     (set_attr "mode" "<sseinsnmode>")])
> > >
> > > -(define_insn "<avx512>_store<mode>_mask"
> > > +(define_insn "*<avx512>_store<mode>_mask"
> > >    [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> > >         (vec_merge:VI12HFBF_AVX512VL
> > >           (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> > > @@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
> > >    "TARGET_AVX")
> > >
> > >  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> > > -  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
> > > -       (vec_merge:V48H_AVX512VL
> > > -         (match_operand:V48H_AVX512VL 1 "register_operand")
> > > -         (match_dup 0)
> > > -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> > > +  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
> > > +       (unspec:V48_AVX512VL
> > > +         [(match_operand:V48_AVX512VL 1 "register_operand")
> > > +          (match_dup 0)
> > > +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> > > +         UNSPEC_MASKMOV))]
> > >    "TARGET_AVX512F")
> > >
> > >  (define_expand "maskstore<mode><avx512fmaskmodelower>"
> > > -  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
> > > -       (vec_merge:VI12_AVX512VL
> > > -         (match_operand:VI12_AVX512VL 1 "register_operand")
> > > -         (match_dup 0)
> > > -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> > > +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
> > > +       (unspec:VI12HFBF_AVX512VL
> > > +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
> > > +          (match_dup 0)
> > > +          (match_operand:<avx512fmaskmode> 2 "register_operand")]
> > > +         UNSPEC_MASKMOV))]
> > >    "TARGET_AVX512BW")
> > >
> > > +(define_insn "<avx512>_store<mode>_mask"
> > > +  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
> > > +       (unspec:V48_AVX512VL
> > > +         [(match_operand:V48_AVX512VL 1 "register_operand" "v")
> > > +          (match_dup 0)
> > > +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> > > +         UNSPEC_MASKMOV))]
> > > +  "TARGET_AVX512F"
> > > +{
> > > +  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
> > > +    {
> > > +      if (misaligned_operand (operands[0], <MODE>mode))
> > > +       return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > > +      else
> > > +       return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > > +    }
> > > +  else
> > > +    {
> > > +      if (misaligned_operand (operands[0], <MODE>mode))
> > > +       return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > > +      else
> > > +       return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
> > > +    }
> > > +}
> > > +  [(set_attr "type" "ssemov")
> > > +   (set_attr "prefix" "evex")
> > > +   (set_attr "memory" "store")
> > > +   (set_attr "mode" "<sseinsnmode>")])
> > > +
> > > +(define_insn "<avx512>_store<mode>_mask"
> > > +  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
> > > +       (unspec:VI12HFBF_AVX512VL
> > > +         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
> > > +          (match_dup 0)
> > > +          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
> > > +          UNSPEC_MASKMOV))]
> > > +  "TARGET_AVX512BW"
> > > +  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
> > > +  [(set_attr "type" "ssemov")
> > > +   (set_attr "prefix" "evex")
> > > +   (set_attr "memory" "store")
> > > +   (set_attr "mode" "<sseinsnmode>")])
> > > +
> > >  (define_expand "cbranch<mode>4"
> > >    [(set (reg:CC FLAGS_REG)
> > >         (compare:CC (match_operand:VI48_AVX 1 "register_operand")
> > > --
> > > 2.39.1.388.g2fc9e9ca3c
> > >
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-06-27  7:46 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-27  5:38 [PATCH] [x86] Refine maskstore patterns with UNSPEC_MASKMOV liuhongt
2023-06-27  7:20 ` Richard Biener
2023-06-27  7:28   ` Hongtao Liu
2023-06-27  7:46     ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).