public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores.
@ 2014-08-22 11:51 Kirill Yukhin
  2014-08-23  7:44 ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: Kirill Yukhin @ 2014-08-22 11:51 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches, kirill.yukhin

Hello,
This patch extends unaligned loads and stores patterns.

I've refactored original patch (stored on SVN's branch)
toward reducing complexity of conditions in
   define_insn "<avx512>_storedqu<mode>_mask"

It seems like such a trick won't work for:
   <sse2_avx_avx512f>_loaddqu<mode><mask_name>
Problem is V[32|16]QI modes, which enabled for SSE/AVX
w/o masking and for AVX-512BW & AVX-512VL when masking is
on.

Of course, I can split the define_insn & define_expand
into 3 patterns w/ mode iterators of:
  1. V16QI, V32QI - baseline is SSE2, masks enabled for AVX-512BW&VL
  2. V64QI, V8HI, V16HI, V32HI - baseline is AVX-512BW, masks enabled
     for AVX-512VL
  3. V8DI, V4DI, V2DI, V16SI, V8SI, V4SI - baseline is AVX-512F, masks
     enabled for AVX-512VL.

But such approach will lead to 6 patterns instead of 2 (with non-trivial
asm emit). I have doubts if it is useful...


Current patch passess bootstrap and shows now regiressions under
simulator.

What do you think?

gcc/
	* config/i386/sse.md
	(define_mode_iterator VI48_AVX512VL): New.
	(define_mode_iterator VI_UNALIGNED_LOADSTORE): Add V64QI, V32HI, V16HI,
	V8HI, V4SI, V4DI, V2DI modes.
	(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Update
	condition.
	(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Update
	condition, handle new modes.
	(define_insn "<sse2_avx_avx512f>_storedqu<mode>"): Handle new modes.
	(define_insn "avx512f_storedqu<mode>_mask"): Delete.
	(define_insn "<avx512>_storedqu<mode>_mask" with
	VI48_AVX512VL): New.
	(define_insn "<avx512>_storedqu<mode>_mask" with
	VI12_AVX512VL): Ditto.

--
Thanks, K


diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cd0c08e..51cfada 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -235,6 +235,10 @@
 (define_mode_iterator VF_512
   [V16SF V8DF])
 
+(define_mode_iterator VI48_AVX512VL
+  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
+   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
+
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
@@ -260,8 +264,12 @@
   [(V32QI "TARGET_AVX") V16QI])
 
 (define_mode_iterator VI_UNALIGNED_LOADSTORE
-  [(V32QI "TARGET_AVX") V16QI
-   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW")
+   (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 ;; All DImode vector integer modes
 (define_mode_iterator VI8
@@ -1172,7 +1180,10 @@
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2 
+   && (!<mask_applied>
+       || (TARGET_AVX512BW && TARGET_AVX512VL)
+       || (<MODE>mode != V32QImode && (<MODE>mode != V16QImode)))"
 {
   /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
      just fine if misaligned_operand is true, and without the UNSPEC it can
@@ -1197,20 +1208,27 @@
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2
+   && (!<mask_applied>
+       || (TARGET_AVX512BW && TARGET_AVX512VL)
+       || (<MODE>mode != V32QImode && (<MODE>mode != V16QImode)))"
 {
   switch (get_attr_mode (insn))
     {
+    case MODE_V16SF:
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-      else
-	return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      switch (<MODE>mode)
+      {
+      case V32QImode:
+      case V16QImode:
+	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	  return "%vmovdqu\t{%1, %0|%0, %1}";
+      default:
+	return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+      }
     }
 }
   [(set_attr "type" "ssemov")
@@ -1246,13 +1264,16 @@
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqu32\t{%1, %0|%0, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      switch (<MODE>mode)
+      {
+      case V32QImode:
+      case V16QImode:
+	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	  return "%vmovdqu\t{%1, %0|%0, %1}";
+      default:
+	  return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
+      }
     }
 }
   [(set_attr "type" "ssemov")
@@ -1276,21 +1297,32 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "avx512f_storedqu<mode>_mask"
-  [(set (match_operand:VI48_512 0 "memory_operand" "=m")
-	(vec_merge:VI48_512
-	  (unspec:VI48_512
-	    [(match_operand:VI48_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI48_AVX512VL
+	  (unspec:VI48_AVX512VL
+	    [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
 	    UNSPEC_STOREU)
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
   "TARGET_AVX512F"
-{
-  if (<MODE>mode == V8DImode)
-    return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-  else
-    return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-}
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI12_AVX512VL
+	  (unspec:VI12_AVX512VL
+	    [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
    (set_attr "memory" "store")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores.
  2014-08-22 11:51 [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores Kirill Yukhin
@ 2014-08-23  7:44 ` Uros Bizjak
  2014-08-26 19:56   ` Kirill Yukhin
  0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2014-08-23  7:44 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches

On Fri, Aug 22, 2014 at 1:51 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

> This patch extends unaligned loads and stores patterns.
>
> I've refactored original patch (stored on SVN's branch)
> toward reducing complexity of conditions in
>    define_insn "<avx512>_storedqu<mode>_mask"
>
> It seems like such a trick won't work for:
>    <sse2_avx_avx512f>_loaddqu<mode><mask_name>
> Problem is V[32|16]QI modes, which enabled for SSE/AVX
> w/o masking and for AVX-512BW & AVX-512VL when masking is
> on.
>
> Of course, I can split the define_insn & define_expand
> into 3 patterns w/ mode iterators of:
>   1. V16QI, V32QI - baseline is SSE2, masks enabled for AVX-512BW&VL
>   2. V64QI, V8HI, V16HI, V32HI - baseline is AVX-512BW, masks enabled
>      for AVX-512VL
>   3. V8DI, V4DI, V2DI, V16SI, V8SI, V4SI - baseline is AVX-512F, masks
>      enabled for AVX-512VL.
>
> But such approach will lead to 6 patterns instead of 2 (with non-trivial
> asm emit). I have doubts if it is useful...

At this stage, I'd still prefer simple constraints (the solution,
proposed above), even for the price of additional patterns. Looking at
the patterns, it is quite hard to calculate final condition for the
particular mode/target combo, even without enable attribute and
conditional operand constraints/predicates. With the solution above,
the complexity is conveniently pushed to mask define_subst attribute.

Uros.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores.
  2014-08-23  7:44 ` Uros Bizjak
@ 2014-08-26 19:56   ` Kirill Yukhin
       [not found]     ` <CAFULd4Z+FSQCHSKrVp-XMr1UJDhHHQwnKjPa6izqhDVp_igGVA@mail.gmail.com>
  0 siblings, 1 reply; 4+ messages in thread
From: Kirill Yukhin @ 2014-08-26 19:56 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches

Hello Uroš,
On 23 Aug 09:44, Uros Bizjak wrote:
> On Fri, Aug 22, 2014 at 1:51 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> 
> > This patch extends unaligned loads and stores patterns.
> At this stage, I'd still prefer simple constraints (the solution,
> proposed above), even for the price of additional patterns. Looking at
> the patterns, it is quite hard to calculate final condition for the
> particular mode/target combo, even without enable attribute and
> conditional operand constraints/predicates. With the solution above,
> the complexity is conveniently pushed to mask define_subst attribute.
In the bottom patch which splits unaligned ld/st patterns.
Bootstrapped and avx512-regtested on simulator.

gcc/
        * config/i386/sse.md
        (define_mode_iterator VI48_AVX512VL): New.
        (define_mode_iterator VI_UNALIGNED_LOADSTORE): Delete.
	(define_mode_iterator VI_ULOADSTORE_AVX512BW): New.
	(define_mode_iterator VI_ULOADSTORE_AVX512F): Ditto.
	(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI1): Change mode iterator.
	(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI_ULOADSTORE_AVX512BW): New.
	(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI_ULOADSTORE_AVX512F): Ditto.
        (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI1): Change mode iterator.
        (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI_ULOADSTORE_AVX512BW): New.
        (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
	with VI_ULOADSTORE_AVX512F): Ditto.
        (define_insn "<sse2_avx_avx512f>_storedqu<mode>
	with VI1): Change mode iterator.
        (define_insn "<sse2_avx_avx512f>_storedqu<mode>
	with VI_ULOADSTORE_AVX512BW): New.
        (define_insn "<sse2_avx_avx512f>_storedqu<mode>
	with VI_ULOADSTORE_AVX512F): Ditto.
        (define_insn "avx512f_storedqu<mode>_mask"): Delete.
        (define_insn "<avx512>_storedqu<mode>_mask" with
        VI48_AVX512VL): New.
        (define_insn "<avx512>_storedqu<mode>_mask" with
        VI12_AVX512VL): Ditto.

Is it ok for trunk?

--
Thanks, K


diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0624582..0245ec4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -235,6 +235,10 @@
 (define_mode_iterator VF_512
   [V16SF V8DF])
 
+(define_mode_iterator VI48_AVX512VL
+  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
+   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
+
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
@@ -259,9 +263,13 @@
 (define_mode_iterator VI1
   [(V32QI "TARGET_AVX") V16QI])
 
-(define_mode_iterator VI_UNALIGNED_LOADSTORE
-  [(V32QI "TARGET_AVX") V16QI
-   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+(define_mode_iterator VI_ULOADSTORE_AVX512BW
+  [V64QI
+   V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI_ULOADSTORE_AVX512F
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 ;; All DImode vector integer modes
 (define_mode_iterator VI8
@@ -1172,18 +1180,18 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
+   just fine if misaligned_operand is true, and without the UNSPEC it can
+   be combined with arithmetic instructions.  If misaligned_operand is
+   false, still emit UNSPEC_LOADU insn to honor user's request for
+   misaligned load.  */
 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
-	(unspec:VI_UNALIGNED_LOADSTORE
-	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
+  [(set (match_operand:VI1 0 "register_operand")
+	(unspec:VI1
+	  [(match_operand:VI1 1 "nonimmediate_operand")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
 {
-  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
-     just fine if misaligned_operand is true, and without the UNSPEC it can
-     be combined with arithmetic instructions.  If misaligned_operand is
-     false, still emit UNSPEC_LOADU insn to honor user's request for
-     misaligned load.  */
   if (TARGET_AVX
       && misaligned_operand (operands[1], <MODE>mode))
     {
@@ -1197,25 +1205,61 @@
     }
 })
 
+(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "register_operand")
+	(unspec:VI_ULOADSTORE_AVX512BW
+	  [(match_operand:VI_ULOADSTORE_AVX512BW 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_AVX512BW"
+{
+  if (misaligned_operand (operands[1], <MODE>mode))
+    {
+      rtx src = operands[1];
+      if (<mask_applied>)
+	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
+				 operands[2 * <mask_applied>],
+				 operands[3 * <mask_applied>]);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
+      DONE;
+    }
+})
+
+(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "register_operand")
+	(unspec:VI_ULOADSTORE_AVX512F
+	  [(match_operand:VI_ULOADSTORE_AVX512F 1 "nonimmediate_operand")]
+	  UNSPEC_LOADU))]
+  "TARGET_AVX512F"
+{
+  if (misaligned_operand (operands[1], <MODE>mode))
+    {
+      rtx src = operands[1];
+      if (<mask_applied>)
+	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
+				 operands[2 * <mask_applied>],
+				 operands[3 * <mask_applied>]);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
+      DONE;
+    }
+})
+
 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
-  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
-	(unspec:VI_UNALIGNED_LOADSTORE
-	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VI1 0 "register_operand" "=v")
+	(unspec:VI1
+	  [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
 {
   switch (get_attr_mode (insn))
     {
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-      else
-	return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	return "%vmovdqu\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     }
 }
   [(set_attr "type" "ssemov")
@@ -1238,10 +1282,34 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
+(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "register_operand" "=v")
+	(unspec:VI_ULOADSTORE_AVX512BW
+	  [(match_operand:VI_ULOADSTORE_AVX512BW 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_LOADU))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_evex")])
+
+(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "register_operand" "=v")
+	(unspec:VI_ULOADSTORE_AVX512F
+	  [(match_operand:VI_ULOADSTORE_AVX512F 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_LOADU))]
+  "TARGET_AVX512F"
+  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_evex")])
+
 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
-  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
-	(unspec:VI_UNALIGNED_LOADSTORE
-	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
+  [(set (match_operand:VI1 0 "memory_operand" "=m")
+	(unspec:VI1
+	  [(match_operand:VI1 1 "register_operand" "v")]
 	  UNSPEC_STOREU))]
   "TARGET_SSE2"
 {
@@ -1251,13 +1319,16 @@
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqu32\t{%1, %0|%0, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      switch (<MODE>mode)
+      {
+      case V32QImode:
+      case V16QImode:
+	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	  return "%vmovdqu\t{%1, %0|%0, %1}";
+      default:
+	  return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
+      }
     }
 }
   [(set_attr "type" "ssemov")
@@ -1281,21 +1352,56 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "avx512f_storedqu<mode>_mask"
-  [(set (match_operand:VI48_512 0 "memory_operand" "=m")
-	(vec_merge:VI48_512
-	  (unspec:VI48_512
-	    [(match_operand:VI48_512 1 "register_operand" "v")]
+(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512BW 0 "memory_operand" "=m")
+	(unspec:VI_ULOADSTORE_AVX512BW
+	  [(match_operand:VI_ULOADSTORE_AVX512BW 1 "register_operand" "v")]
+	  UNSPEC_STOREU))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_evex")])
+
+(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
+  [(set (match_operand:VI_ULOADSTORE_AVX512F 0 "memory_operand" "=m")
+	(unspec:VI_ULOADSTORE_AVX512F
+	  [(match_operand:VI_ULOADSTORE_AVX512F 1 "register_operand" "v")]
+	  UNSPEC_STOREU))]
+  "TARGET_AVX512F"
+  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
+   (set_attr "prefix" "maybe_vex")])
+
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI48_AVX512VL
+	  (unspec:VI48_AVX512VL
+	    [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
 	    UNSPEC_STOREU)
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
   "TARGET_AVX512F"
-{
-  if (<MODE>mode == V8DImode)
-    return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-  else
-    return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-}
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI12_AVX512VL
+	  (unspec:VI12_AVX512VL
+	    [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
    (set_attr "memory" "store")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Fwd: [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores.
       [not found]     ` <CAFULd4Z+FSQCHSKrVp-XMr1UJDhHHQwnKjPa6izqhDVp_igGVA@mail.gmail.com>
@ 2014-08-26 20:34       ` Uros Bizjak
  0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2014-08-26 20:34 UTC (permalink / raw)
  To: gcc-patches

On Tue, Aug 26, 2014 at 9:55 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello Uroš,
> On 23 Aug 09:44, Uros Bizjak wrote:
>> On Fri, Aug 22, 2014 at 1:51 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
>>
>> > This patch extends unaligned loads and stores patterns.
>> At this stage, I'd still prefer simple constraints (the solution,
>> proposed above), even for the price of additional patterns. Looking at
>> the patterns, it is quite hard to calculate final condition for the
>> particular mode/target combo, even without enable attribute and
>> conditional operand constraints/predicates. With the solution above,
>> the complexity is conveniently pushed to mask define_subst attribute.
> In the bottom patch which splits unaligned ld/st patterns.
> Bootstrapped and avx512-regtested on simulator.
>
> gcc/
>         * config/i386/sse.md
>         (define_mode_iterator VI48_AVX512VL): New.
>         (define_mode_iterator VI_UNALIGNED_LOADSTORE): Delete.
>         (define_mode_iterator VI_ULOADSTORE_AVX512BW): New.
>         (define_mode_iterator VI_ULOADSTORE_AVX512F): Ditto.
>         (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI1): Change mode iterator.
>         (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI_ULOADSTORE_AVX512BW): New.
>         (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI_ULOADSTORE_AVX512F): Ditto.
>         (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI1): Change mode iterator.
>         (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI_ULOADSTORE_AVX512BW): New.
>         (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
>         with VI_ULOADSTORE_AVX512F): Ditto.
>         (define_insn "<sse2_avx_avx512f>_storedqu<mode>
>         with VI1): Change mode iterator.
>         (define_insn "<sse2_avx_avx512f>_storedqu<mode>
>         with VI_ULOADSTORE_AVX512BW): New.
>         (define_insn "<sse2_avx_avx512f>_storedqu<mode>
>         with VI_ULOADSTORE_AVX512F): Ditto.
>         (define_insn "avx512f_storedqu<mode>_mask"): Delete.
>         (define_insn "<avx512>_storedqu<mode>_mask" with
>         VI48_AVX512VL): New.
>         (define_insn "<avx512>_storedqu<mode>_mask" with
>         VI12_AVX512VL): Ditto.
>
> Is it ok for trunk?

OK with a renamed mode iterator as suggested below.

Thanks,
Uros.


> -(define_mode_iterator VI_UNALIGNED_LOADSTORE
> -  [(V32QI "TARGET_AVX") V16QI
> -   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
> +(define_mode_iterator VI_ULOADSTORE_AVX512BW
> +  [V64QI
> +   V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
> +
> +(define_mode_iterator VI_ULOADSTORE_AVX512F
> +  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
> +   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])

Please name these two VI_ULOADSTORE_BW_AVX512VL and
VI_ULOADSTORE_F_AVX512VL to be consistent with other names and put
these nearby loadstore patterns.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-08-26 20:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-22 11:51 [PATCH i386 AVX512] [22/n] Extend unaligned loads & stores Kirill Yukhin
2014-08-23  7:44 ` Uros Bizjak
2014-08-26 19:56   ` Kirill Yukhin
     [not found]     ` <CAFULd4Z+FSQCHSKrVp-XMr1UJDhHHQwnKjPa6izqhDVp_igGVA@mail.gmail.com>
2014-08-26 20:34       ` Fwd: " Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).