public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3563] AVX512FP16: Add vmovw/vmovsh.
@ 2021-09-16  5:37 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-09-16  5:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:c4d423c74abd470d0161ec2a428fd01de5e1ed76

commit r12-3563-gc4d423c74abd470d0161ec2a428fd01de5e1ed76
Author: liuhongt <hongtao.liu@intel.com>
Date:   Thu Feb 28 11:43:30 2019 -0800

    AVX512FP16: Add vmovw/vmovsh.
    
    gcc/ChangeLog:
    
            * config/i386/avx512fp16intrin.h: (_mm_cvtsi16_si128):
            New intrinsic.
            (_mm_cvtsi128_si16): Likewise.
            (_mm_mask_load_sh): Likewise.
            (_mm_maskz_load_sh): Likewise.
            (_mm_mask_store_sh): Likewise.
            (_mm_move_sh): Likewise.
            (_mm_mask_move_sh): Likewise.
            (_mm_maskz_move_sh): Likewise.
            * config/i386/i386-builtin-types.def: Add corresponding builtin types.
            * config/i386/i386-builtin.def: Add corresponding new builtins.
            * config/i386/i386-expand.c
            (ix86_expand_special_args_builtin): Handle new builtin types.
            (ix86_expand_vector_init_one_nonzero): Adjust for FP16 target.
            * config/i386/sse.md (VI2F): New mode iterator.
            (vec_set<mode>_0): Use new mode iterator.
            (avx512f_mov<ssescalarmodelower>_mask): Adjust for HF vector mode.
            (avx512f_store<mode>_mask): Ditto.

Diff:
---
 gcc/config/i386/avx512fp16intrin.h     | 59 ++++++++++++++++++++++++++++++++++
 gcc/config/i386/i386-builtin-types.def |  3 ++
 gcc/config/i386/i386-builtin.def       |  5 +++
 gcc/config/i386/i386-expand.c          | 11 +++++++
 gcc/config/i386/sse.md                 | 33 ++++++++++---------
 5 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 5d66ca5c820..baa5be485e9 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -2453,6 +2453,65 @@ _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vmovw.  */
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi16_si128 (short __A)
+{
+  return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, __A);
+}
+
+extern __inline short
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si16 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, 0);
+}
+
+/* Intrinsics vmovsh.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sh (__m128h __A, __mmask8 __B, _Float16 const* __C)
+{
+  return __builtin_ia32_loadsh_mask (__C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sh (__mmask8 __A, _Float16 const* __B)
+{
+  return __builtin_ia32_loadsh_mask (__B, _mm_setzero_ph (), __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sh (_Float16 const* __A, __mmask8 __B, __m128h __C)
+{
+  __builtin_ia32_storesh_mask (__A,  __C, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sh (__m128h __A, __m128h  __B)
+{
+  __A[0] = __B[0];
+  return __A;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sh (__m128h __A, __mmask8 __B, __m128h  __C, __m128h __D)
+{
+  return __builtin_ia32_vmovsh_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sh (__mmask8 __A, __m128h  __B, __m128h __C)
+{
+  return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
+}
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 78127fab3c0..126cc0c45ce 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -134,6 +134,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST)
 DEF_POINTER_TYPE (PVOID, VOID)
 DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
 DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PCFLOAT16, FLOAT16, CONST)
 DEF_POINTER_TYPE (PSHORT, SHORT)
 DEF_POINTER_TYPE (PUSHORT, USHORT)
 DEF_POINTER_TYPE (PINT, INT)
@@ -1308,6 +1309,8 @@ DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI)
 DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
 DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
+DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 0292059c068..b8f782b2750 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -393,6 +393,10 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mas
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
 
+/* AVX512FP16 */
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_storehf_mask, "__builtin_ia32_storesh_mask", IX86_BUILTIN_STORESH_MASK, UNKNOWN, (int) VOID_FTYPE_PCFLOAT16_V8HF_UQI)
+
 /* RDPKRU and WRPKRU.  */
 BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru,  "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
 BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru,  "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
@@ -2826,6 +2830,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getmantv16hf_mask, "__builtin_ia32_getmantph256_mask", IX86_BUILTIN_GETMANTPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getmantv8hf_mask, "__builtin_ia32_getmantph128_mask", IX86_BUILTIN_GETMANTPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_movhf_mask, "__builtin_ia32_vmovsh_mask", IX86_BUILTIN_VMOVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 
 /* Builtins with rounding support.  */
 BDESC_END (ARGS, ROUND_ARGS)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 84acaa22151..7e830873691 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -11085,6 +11085,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
     case VOID_FTYPE_PFLOAT_V16SF_UHI:
     case VOID_FTYPE_PFLOAT_V8SF_UQI:
     case VOID_FTYPE_PFLOAT_V4SF_UQI:
+    case VOID_FTYPE_PCFLOAT16_V8HF_UQI:
     case VOID_FTYPE_PV32QI_V32HI_USI:
     case VOID_FTYPE_PV16QI_V16HI_UHI:
     case VOID_FTYPE_PUDI_V8HI_UQI:
@@ -11157,6 +11158,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
     case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
     case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
     case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
+    case V8HF_FTYPE_PCFLOAT16_V8HF_UQI:
       nargs = 3;
       klass = load;
       memory = 0;
@@ -14194,6 +14196,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
       break;
     case E_V8HImode:
       use_vector_set = TARGET_SSE2;
+      gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+	? gen_vec_setv8hi_0 : NULL;
       break;
     case E_V8QImode:
       use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
@@ -14205,8 +14209,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
       use_vector_set = TARGET_SSE4_1;
       break;
     case E_V32QImode:
+      use_vector_set = TARGET_AVX;
+      break;
     case E_V16HImode:
       use_vector_set = TARGET_AVX;
+      gen_vec_set_0 = TARGET_AVX512FP16 && one_var == 0
+	? gen_vec_setv16hi_0 : NULL;
       break;
     case E_V8SImode:
       use_vector_set = TARGET_AVX;
@@ -14254,6 +14262,9 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
       use_vector_set = TARGET_AVX512FP16 && one_var == 0;
       gen_vec_set_0 = gen_vec_setv32hf_0;
       break;
+    case E_V32HImode:
+      use_vector_set = TARGET_AVX512FP16 && one_var == 0;
+      gen_vec_set_0 = gen_vec_setv32hi_0;
     default:
       break;
     }
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a1d419292d1..5dbbed0c09d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -779,6 +779,7 @@
   (V32HF "TARGET_AVX512BW")])
 
 ;; Int-float size matches
+(define_mode_iterator VI2F [V8HI V16HI V32HI V8HF V16HF V32HF])
 (define_mode_iterator VI4F_128 [V4SI V4SF])
 (define_mode_iterator VI8F_128 [V2DI V2DF])
 (define_mode_iterator VI4F_256 [V8SI V8SF])
@@ -1361,13 +1362,13 @@
   [(set (match_dup 0) (match_dup 1))])
 
 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-	(vec_merge:VF_128
-	  (vec_merge:VF_128
-	    (match_operand:VF_128 2 "register_operand" "v")
-	    (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+	(vec_merge:VFH_128
+	  (vec_merge:VFH_128
+	    (match_operand:VFH_128 2 "register_operand" "v")
+	    (match_operand:VFH_128 3 "nonimm_or_0_operand" "0C")
 	    (match_operand:QI 4 "register_operand" "Yk"))
-	  (match_operand:VF_128 1 "register_operand" "v")
+	  (match_operand:VFH_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512F"
   "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
@@ -1380,7 +1381,7 @@
 	(vec_merge:<ssevecmode>
 	  (vec_merge:<ssevecmode>
 	    (vec_duplicate:<ssevecmode>
-	      (match_operand:MODEF 1 "memory_operand"))
+	      (match_operand:MODEFH 1 "memory_operand"))
 	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
 	    (match_operand:QI 3 "register_operand"))
 	  (match_dup 4)
@@ -1393,7 +1394,7 @@
 	(vec_merge:<ssevecmode>
 	  (vec_merge:<ssevecmode>
 	    (vec_duplicate:<ssevecmode>
-	      (match_operand:MODEF 1 "memory_operand" "m"))
+	      (match_operand:MODEFH 1 "memory_operand" "m"))
 	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
 	    (match_operand:QI 3 "register_operand" "Yk"))
 	  (match_operand:<ssevecmode> 4 "const0_operand" "C")
@@ -1406,11 +1407,11 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512f_store<mode>_mask"
-  [(set (match_operand:MODEF 0 "memory_operand" "=m")
-	(if_then_else:MODEF
+  [(set (match_operand:MODEFH 0 "memory_operand" "=m")
+	(if_then_else:MODEFH
 	  (and:QI (match_operand:QI 2 "register_operand" "Yk")
 		 (const_int 1))
-	  (vec_select:MODEF
+	  (vec_select:MODEFH
 	    (match_operand:<ssevecmode> 1 "register_operand" "v")
 	    (parallel [(const_int 0)]))
 	  (match_dup 0)))]
@@ -8818,11 +8819,11 @@
 
 ;; vmovw clears also the higer bits
 (define_insn "vec_set<mode>_0"
-  [(set (match_operand:VF_AVX512FP16 0 "register_operand" "=v,v")
-	(vec_merge:VF_AVX512FP16
-	  (vec_duplicate:VF_AVX512FP16
-	    (match_operand:HF 2 "nonimmediate_operand" "r,m"))
-	  (match_operand:VF_AVX512FP16 1 "const0_operand" "C,C")
+  [(set (match_operand:VI2F 0 "register_operand" "=v,v")
+	(vec_merge:VI2F
+	  (vec_duplicate:VI2F
+	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
+	  (match_operand:VI2F 1 "const0_operand" "C,C")
 	  (const_int 1)))]
   "TARGET_AVX512FP16"
   "@


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-16  5:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-16  5:37 [gcc r12-3563] AVX512FP16: Add vmovw/vmovsh hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).