public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: liuhongt <hongtao.liu@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: crazylht@gmail.com, hjl.tools@gmail.com, ubizjak@gmail.com,
	jakub@redhat.com
Subject: [PATCH 37/62] AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh.
Date: Thu,  1 Jul 2021 14:16:23 +0800	[thread overview]
Message-ID: <20210701061648.9447-38-hongtao.liu@intel.com> (raw)
In-Reply-To: <20210701061648.9447-1-hongtao.liu@intel.com>

gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm_cvtsh_ss):
	New intrinsic.
	(_mm_mask_cvtsh_ss): Likewise.
	(_mm_maskz_cvtsh_ss): Likewise.
	(_mm_cvtsh_sd): Likewise.
	(_mm_mask_cvtsh_sd): Likewise.
	(_mm_maskz_cvtsh_sd): Likewise.
	(_mm_cvt_roundsh_ss): Likewise.
	(_mm_mask_cvt_roundsh_ss): Likewise.
	(_mm_maskz_cvt_roundsh_ss): Likewise.
	(_mm_cvt_roundsh_sd): Likewise.
	(_mm_mask_cvt_roundsh_sd): Likewise.
	(_mm_maskz_cvt_roundsh_sd): Likewise.
	(_mm_cvtss_sh): Likewise.
	(_mm_mask_cvtss_sh): Likewise.
	(_mm_maskz_cvtss_sh): Likewise.
	(_mm_cvtsd_sh): Likewise.
	(_mm_mask_cvtsd_sh): Likewise.
	(_mm_maskz_cvtsd_sh): Likewise.
	(_mm_cvt_roundss_sh): Likewise.
	(_mm_mask_cvt_roundss_sh): Likewise.
	(_mm_maskz_cvt_roundss_sh): Likewise.
	(_mm_cvt_roundsd_sh): Likewise.
	(_mm_mask_cvt_roundsd_sh): Likewise.
	(_mm_maskz_cvt_roundsd_sh): Likewise.
	* config/i386/i386-builtin-types.def
	(V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT,
	V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT,
	V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT,
	V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT): Add new builtin types.
	* config/i386/i386-builtin.def: Add corrresponding new builtins.
	* config/i386/i386-expand.c: Handle new builtin types.
	* config/i386/sse.md (VF48_128): New mode iterator.
	(avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>):
	New.
	(avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>):
	Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Add test for new builtins.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
---
 gcc/config/i386/avx512fp16intrin.h     | 280 +++++++++++++++++++++++++
 gcc/config/i386/i386-builtin-types.def |   4 +
 gcc/config/i386/i386-builtin.def       |   4 +
 gcc/config/i386/i386-expand.c          |   4 +
 gcc/config/i386/sse.md                 |  36 ++++
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 gcc/testsuite/gcc.target/i386/sse-13.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  12 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 10 files changed, 364 insertions(+)

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 5a6a0ba83a9..05efbc5777b 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -4604,6 +4604,286 @@ _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vcvtsh2ss, vcvtsh2sd.  */
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_ss (__m128 __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+					      _mm_setzero_ps (),
+					      (__mmask8) -1,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+			 __m128h __D)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
+			  __m128h __C)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+					      _mm_setzero_ps (),
+					      __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_sd (__m128d __A, __m128h __B)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+					      _mm_setzero_pd (),
+					      (__mmask8) -1,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+			 __m128h __D)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+					      _mm_setzero_pd (),
+					      __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+					      _mm_setzero_ps (),
+					      (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+			 __m128h __D, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
+			  __m128h __C, const int __R)
+{
+  return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+					      _mm_setzero_ps (),
+					      __A, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+					      _mm_setzero_pd (),
+					      (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+			 __m128h __D, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
+{
+  return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+					      _mm_setzero_pd (),
+					      __A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_ss(A, B, R)				\
+  (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A),		\
+					_mm_setzero_ps (),	\
+					(__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R)				\
+  (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_ss(A, B, C, R)			\
+  (__builtin_ia32_vcvtsh2ss_mask_round((C), (B),		\
+				       _mm_setzero_ps (),	\
+				       (A), (R)))
+
+#define _mm_cvt_roundsh_sd(A, B, R)				\
+  (__builtin_ia32_vcvtsh2sd_mask_round((B), (A),		\
+				       _mm_setzero_pd (),	\
+				       (__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R)				\
+  (__builtin_ia32_vcvtsh2sd_mask_round((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_sd(A, B, C, R)			\
+  (__builtin_ia32_vcvtsh2sd_mask_round((C), (B),		\
+				       _mm_setzero_pd (),	\
+				       (A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtss2sh, vcvtsd2sh.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sh (__m128h __A, __m128 __B)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+					      _mm_setzero_ph (),
+					      __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_sh (__m128h __A, __m128d __B)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+					      _mm_setzero_ph (),
+					      __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
+			 const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
+			  const int __R)
+{
+  return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+					      _mm_setzero_ph (),
+					      __A, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
+			 const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
+			  const int __R)
+{
+  return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+					      _mm_setzero_ph (),
+					      __A, __R);
+}
+
+#else
+#define _mm_cvt_roundss_sh(A, B, R)				\
+  (__builtin_ia32_vcvtss2sh_mask_round ((B), (A),		\
+					_mm_setzero_ph (),	\
+					(__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundss_sh(A, B, C, D, R)				\
+  (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundss_sh(A, B, C, R)			\
+  (__builtin_ia32_vcvtss2sh_mask_round ((C), (B),		\
+					_mm_setzero_ph (),	\
+					A, R))
+
+#define _mm_cvt_roundsd_sh(A, B, R)				\
+  (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A),		\
+					_mm_setzero_ph (),	\
+					(__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R)				\
+  (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsd_sh(A, B, C, R)			\
+  (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B),		\
+					_mm_setzero_ph (),	\
+					(A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 4123e66f7cd..0cdbf1bc0c0 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1348,6 +1348,10 @@ DEF_FUNCTION_TYPE (V8DF, V8HF, V8DF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8DI, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8DF, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V8HF, V2DF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V8HF, V4SF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V2DF, V8HF, V2DF, V2DF, UQI, INT)
+DEF_FUNCTION_TYPE (V4SF, V8HF, V4SF, V4SF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF)
 DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 2992bd0383d..4bb48bc21dc 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3136,6 +3136,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2ps_v16sf_mask_round", IX86_BUILTIN_VCVTPH2PS_V16SF_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph_v8df_mask_round", IX86_BUILTIN_VCVTPD2PH_V8DF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2ph_v16sf_mask_round", IX86_BUILTIN_VCVTPS2PH_V16SF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index a216f6f2bf3..9233c6cd1e8 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -10565,8 +10565,10 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
+    case V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT:
     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
     case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
+    case V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
     case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
     case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
@@ -10574,6 +10576,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
     case V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT:
     case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
+    case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT:
+    case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT:
       nargs = 5;
       break;
     case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7447d6b75b5..95f4a82c9cd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -788,6 +788,10 @@ (define_mode_iterator VF48_I1248
   [V16SI V16SF V8DI V8DF V32HI V64QI])
 (define_mode_iterator VF48H_AVX512VL
   [V8DF V16SF (V8SF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF48_128
+  [V2DF V4SF])
+
 (define_mode_iterator VI48F
   [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
@@ -5869,6 +5873,38 @@ (define_insn "*avx512fp16_vcvtpd2ph_v2df_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>"
+  [(set (match_operand:VF48_128 0 "register_operand" "=v")
+     (vec_merge:VF48_128
+       (vec_duplicate:VF48_128
+         (float_extend:<ssescalarmode>
+           (vec_select:HF
+             (match_operand:V8HF 1 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
+	     (parallel [(const_int 0)]))))
+       (match_operand:VF48_128 2 "register_operand" "v")
+       (const_int 1)))]
+  "TARGET_AVX512FP16"
+  "vcvtsh2<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_saeonly_scalar_mask_op3>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+     (vec_merge:V8HF
+       (vec_duplicate:V8HF
+         (float_truncate:HF
+           (vec_select:<ssescalarmode>
+             (match_operand:VF48_128 1 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")
+	     (parallel [(const_int 0)]))))
+       (match_operand:V8HF 2 "register_operand" "v")
+       (const_int 1)))]
+"TARGET_AVX512FP16"
+"vcvt<ssescalarmodesuffix>2sh\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index f186f8c40f3..deb25098f25 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -753,6 +753,10 @@
 #define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 0e88174e636..dbe206bd1bb 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -770,6 +770,10 @@
 #define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 5c3e370d4a7..e64321d8afa 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -766,6 +766,10 @@ test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
 test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8)
 test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8)
+test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8)
+test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8)
+test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8)
+test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8)
 test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
 test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
@@ -828,6 +832,10 @@ test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8)
 test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8)
+test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8)
+test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -856,6 +864,10 @@ test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h,
 test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
 test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
 test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8)
+test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8)
+test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8)
 test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 5bf94d56ce3..d92898fdd11 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -872,6 +872,10 @@ test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8)
 test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8)
 test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
 test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
+test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8)
+test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8)
+test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8)
+test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -931,6 +935,10 @@ test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8)
 test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8)
+test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8)
+test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -958,6 +966,10 @@ test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m51
 test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
 test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
 test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8)
+test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8)
+test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8)
 test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
 test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 2f27d9a1e87..2f5027ba36f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -771,6 +771,10 @@
 #define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
-- 
2.18.1


  parent reply	other threads:[~2021-07-01  6:17 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-01  6:15 [PATCH 00/62] Support all AVX512FP16 intrinsics liuhongt
2021-07-01  6:15 ` [PATCH 01/62] AVX512FP16: Support vector init/broadcast for FP16 liuhongt
2021-07-01  6:15 ` [PATCH 02/62] AVX512FP16: Add testcase for vector init and broadcast intrinsics liuhongt
2021-07-01  6:15 ` [PATCH 03/62] AVX512FP16: Fix HF vector passing in variable arguments liuhongt
2021-07-01  6:15 ` [PATCH 04/62] AVX512FP16: Add ABI tests for xmm liuhongt
2021-07-01  6:15 ` [PATCH 05/62] AVX512FP16: Add ABI test for ymm liuhongt
2021-07-01  6:15 ` [PATCH 06/62] AVX512FP16: Add abi test for zmm liuhongt
2021-07-01  6:15 ` [PATCH 07/62] AVX512FP16: Add vaddph/vsubph/vdivph/vmulph liuhongt
2021-09-09  7:48   ` Hongtao Liu
2021-07-01  6:15 ` [PATCH 08/62] AVX512FP16: Add testcase for vaddph/vsubph/vmulph/vdivph liuhongt
2021-07-01  6:15 ` [PATCH 09/62] AVX512FP16: Enable _Float16 autovectorization liuhongt
2021-09-10  7:03   ` Hongtao Liu
2021-07-01  6:15 ` [PATCH 10/62] AVX512FP16: Add vaddsh/vsubsh/vmulsh/vdivsh liuhongt
2021-07-01  6:15 ` [PATCH 11/62] AVX512FP16: Add testcase for vaddsh/vsubsh/vmulsh/vdivsh liuhongt
2021-07-01  6:15 ` [PATCH 12/62] AVX512FP16: Add vmaxph/vminph/vmaxsh/vminsh liuhongt
2021-07-01  6:15 ` [PATCH 13/62] AVX512FP16: Add testcase for vmaxph/vmaxsh/vminph/vminsh liuhongt
2021-07-01  6:16 ` [PATCH 14/62] AVX512FP16: Add vcmpph/vcmpsh/vcomish/vucomish liuhongt
2021-07-01  6:16 ` [PATCH 15/62] AVX512FP16: Add testcase for vcmpph/vcmpsh/vcomish/vucomish liuhongt
2021-07-01  6:16 ` [PATCH 16/62] AVX512FP16: Add vsqrtph/vrsqrtph/vsqrtsh/vrsqrtsh liuhongt
2021-09-14  3:50   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 17/62] AVX512FP16: Add testcase for vsqrtph/vsqrtsh/vrsqrtph/vrsqrtsh liuhongt
2021-07-01  6:16 ` [PATCH 18/62] AVX512FP16: Add vrcpph/vrcpsh/vscalefph/vscalefsh liuhongt
2021-07-01  6:16 ` [PATCH 19/62] AVX512FP16: Add testcase for vrcpph/vrcpsh/vscalefph/vscalefsh liuhongt
2021-07-01  6:16 ` [PATCH 20/62] AVX512FP16: Add vreduceph/vreducesh/vrndscaleph/vrndscalesh liuhongt
2021-07-01  6:16 ` [PATCH 21/62] AVX512FP16: Add testcase for vreduceph/vreducesh/vrndscaleph/vrndscalesh liuhongt
2021-07-01  6:16 ` [PATCH 22/62] AVX512FP16: Add fpclass/getexp/getmant instructions liuhongt
2021-07-01  6:16 ` [PATCH 23/62] AVX512FP16: Add testcase for fpclass/getmant/getexp instructions liuhongt
2021-07-01  6:16 ` [PATCH 24/62] AVX512FP16: Add vmovw/vmovsh liuhongt
2021-09-16  5:08   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 25/62] AVX512FP16: Add testcase for vmovsh/vmovw liuhongt
2021-07-01  6:16 ` [PATCH 26/62] AVX512FP16: Add vcvtph2dq/vcvtph2qq/vcvtph2w/vcvtph2uw/vcvtph2uqq/vcvtph2udq liuhongt
2021-07-01  6:16 ` [PATCH 27/62] AVX512FP16: Add testcase for vcvtph2w/vcvtph2uw/vcvtph2dq/vcvtph2udq/vcvtph2qq/vcvtph2uqq liuhongt
2021-07-01  6:16 ` [PATCH 28/62] AVX512FP16: Add vcvtuw2ph/vcvtw2ph/vcvtdq2ph/vcvtudq2ph/vcvtqq2ph/vcvtuqq2ph liuhongt
2021-07-01  6:16 ` [PATCH 29/62] AVX512FP16: Add testcase for vcvtw2ph/vcvtuw2ph/vcvtdq2ph/vcvtudq2ph/vcvtqq2ph/vcvtuqq2ph liuhongt
2021-07-01  6:16 ` [PATCH 30/62] AVX512FP16: Add vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh liuhongt
2021-09-17  8:07   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 31/62] AVX512FP16: Add testcase for vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh liuhongt
2021-07-01  6:16 ` [PATCH 32/62] AVX512FP16: Add vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2qq/vcvttph2udq/vcvttph2uqq liuhongt
2021-07-01  6:16 ` [PATCH 33/62] AVX512FP16: Add testcase for vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2udq/vcvttph2qq/vcvttph2uqq liuhongt
2021-07-01  6:16 ` [PATCH 34/62] AVX512FP16: Add vcvttsh2si/vcvttsh2usi liuhongt
2021-07-01  6:16 ` [PATCH 35/62] AVX512FP16: Add vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx liuhongt
2021-07-01  6:16 ` [PATCH 36/62] AVX512FP16: Add testcase for vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx liuhongt
2021-07-01  6:16 ` liuhongt [this message]
2021-07-01  6:16 ` [PATCH 38/62] AVX512FP16: Add testcase for vcvtsh2sd/vcvtsh2ss/vcvtsd2sh/vcvtss2sh liuhongt
2021-07-01  6:16 ` [PATCH 39/62] AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer liuhongt
2021-07-01  6:16 ` [PATCH 40/62] AVX512FP16: Add vfmaddsub[132, 213, 231]ph/vfmsubadd[132, 213, 231]ph liuhongt
2021-09-18  7:04   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 41/62] AVX512FP16: Add testcase for " liuhongt
2021-07-01  6:16 ` [PATCH 42/62] AVX512FP16: Add FP16 fma instructions liuhongt
2021-07-01  6:16 ` [PATCH 43/62] AVX512FP16: Add testcase for " liuhongt
2021-07-01  6:16 ` [PATCH 44/62] AVX512FP16: Add scalar/vector bitwise operations, including liuhongt
2021-07-23  5:13   ` Hongtao Liu
2021-07-26  2:25     ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 45/62] AVX512FP16: Add testcase for fp16 bitwise operations liuhongt
2021-07-01  6:16 ` [PATCH 46/62] AVX512FP16: Enable FP16 mask load/store liuhongt
2021-07-01  6:16 ` [PATCH 47/62] AVX512FP16: Add scalar fma instructions liuhongt
2021-07-01  6:16 ` [PATCH 48/62] AVX512FP16: Add testcase for scalar FMA instructions liuhongt
2021-07-01  6:16 ` [PATCH 49/62] AVX512FP16: Add vfcmaddcph/vfmaddcph/vfcmulcph/vfmulcph liuhongt
2021-09-22  4:38   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 50/62] AVX512FP16: Add testcases for vfcmaddcph/vfmaddcph/vfcmulcph/vfmulcph liuhongt
2021-07-01  6:16 ` [PATCH 51/62] AVX512FP16: Add vfcmaddcsh/vfmaddcsh/vfcmulcsh/vfmulcsh liuhongt
2021-07-01  6:16 ` [PATCH 52/62] AVX512FP16: Add testcases for vfcmaddcsh/vfmaddcsh/vfcmulcsh/vfmulcsh liuhongt
2021-07-01  6:16 ` [PATCH 53/62] AVX512FP16: Add expander for sqrthf2 liuhongt
2021-07-23  5:12   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 54/62] AVX512FP16: Add expander for ceil/floor/trunc/roundeven liuhongt
2021-07-01  6:16 ` [PATCH 55/62] AVX512FP16: Add expander for cstorehf4 liuhongt
2021-07-01  6:16 ` [PATCH 56/62] AVX512FP16: Optimize (_Float16) sqrtf ((float) f16) to sqrtf16 (f16) liuhongt
2021-07-01  9:50   ` Richard Biener
2021-07-01 10:23     ` Hongtao Liu
2021-07-01 12:43       ` Richard Biener
2021-07-01 21:48         ` Joseph Myers
2021-07-02  7:38           ` Richard Biener
2021-07-01 21:17   ` Joseph Myers
2021-07-01  6:16 ` [PATCH 57/62] AVX512FP16: Add expander for fmahf4 liuhongt
2021-07-01  6:16 ` [PATCH 58/62] AVX512FP16: Optimize for code like (_Float16) __builtin_ceif ((float) f16) liuhongt
2021-07-01  9:52   ` Richard Biener
2021-07-01 21:26   ` Joseph Myers
2021-07-02  7:36     ` Richard Biener
2021-07-02 11:46       ` Bernhard Reutner-Fischer
2021-07-04  5:17         ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 59/62] AVX512FP16: Support load/store/abs intrinsics liuhongt
2021-09-22 10:30   ` Hongtao Liu
2021-07-01  6:16 ` [PATCH 60/62] AVX512FP16: Add reduce operators(add/mul/min/max) liuhongt
2021-07-01  6:16 ` [PATCH 61/62] AVX512FP16: Add complex conjugation intrinsic instructions liuhongt
2021-07-01  6:16 ` [PATCH 62/62] AVX512FP16: Add permutation and mask blend intrinsics liuhongt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210701061648.9447-38-hongtao.liu@intel.com \
    --to=hongtao.liu@intel.com \
    --cc=crazylht@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hjl.tools@gmail.com \
    --cc=jakub@redhat.com \
    --cc=ubizjak@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).