public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3608] AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh.
@ 2021-09-17 8:05 hongtao Liu
0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-09-17 8:05 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:90429b962e25574217f1a706ee16c238b30e22be
commit r12-3608-g90429b962e25574217f1a706ee16c238b30e22be
Author: liuhongt <hongtao.liu@intel.com>
Date: Tue Mar 19 16:43:29 2019 +0800
AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h (_mm_cvtsh_ss):
New intrinsic.
(_mm_mask_cvtsh_ss): Likewise.
(_mm_maskz_cvtsh_ss): Likewise.
(_mm_cvtsh_sd): Likewise.
(_mm_mask_cvtsh_sd): Likewise.
(_mm_maskz_cvtsh_sd): Likewise.
(_mm_cvt_roundsh_ss): Likewise.
(_mm_mask_cvt_roundsh_ss): Likewise.
(_mm_maskz_cvt_roundsh_ss): Likewise.
(_mm_cvt_roundsh_sd): Likewise.
(_mm_mask_cvt_roundsh_sd): Likewise.
(_mm_maskz_cvt_roundsh_sd): Likewise.
(_mm_cvtss_sh): Likewise.
(_mm_mask_cvtss_sh): Likewise.
(_mm_maskz_cvtss_sh): Likewise.
(_mm_cvtsd_sh): Likewise.
(_mm_mask_cvtsd_sh): Likewise.
(_mm_maskz_cvtsd_sh): Likewise.
(_mm_cvt_roundss_sh): Likewise.
(_mm_mask_cvt_roundss_sh): Likewise.
(_mm_maskz_cvt_roundss_sh): Likewise.
(_mm_cvt_roundsd_sh): Likewise.
(_mm_mask_cvt_roundsd_sh): Likewise.
(_mm_maskz_cvt_roundsd_sh): Likewise.
* config/i386/i386-builtin-types.def
(V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT,
V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT,
V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT,
V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT): Add new builtin types.
* config/i386/i386-builtin.def: Add corrresponding new builtins.
* config/i386/i386-expand.c: Handle new builtin types.
* config/i386/sse.md (VF48_128): New mode iterator.
(avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>):
New.
(avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>):
Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx-1.c: Add test for new builtins.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add test for new intrinsics.
* gcc.target/i386/sse-22.c: Ditto.
Diff:
---
gcc/config/i386/avx512fp16intrin.h | 280 +++++++++++++++++++++++++++++++++
gcc/config/i386/i386-builtin-types.def | 4 +
gcc/config/i386/i386-builtin.def | 5 +-
gcc/config/i386/i386-expand.c | 4 +
gcc/config/i386/sse.md | 64 ++++++++
gcc/testsuite/gcc.target/i386/avx-1.c | 4 +
gcc/testsuite/gcc.target/i386/sse-13.c | 4 +
gcc/testsuite/gcc.target/i386/sse-14.c | 12 ++
gcc/testsuite/gcc.target/i386/sse-22.c | 12 ++
gcc/testsuite/gcc.target/i386/sse-23.c | 4 +
10 files changed, 392 insertions(+), 1 deletion(-)
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 57c264ed9f3..3b236ecbfc4 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -4610,6 +4610,286 @@ _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
#endif /* __OPTIMIZE__ */
+/* Intrinsics vcvtsh2ss, vcvtsh2sd. */
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_ss (__m128 __A, __m128h __B)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+ __m128h __D)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
+ __m128h __C)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+ _mm_setzero_ps (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_sd (__m128d __A, __m128h __B)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+ __m128h __D)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+ _mm_setzero_pd (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+ __m128h __D, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
+ __m128h __C, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+ _mm_setzero_ps (),
+ __A, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+ __m128h __D, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+ _mm_setzero_pd (),
+ __A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_ss(A, B, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
+ _mm_setzero_ps (), \
+ (__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
+ _mm_setzero_ps (), \
+ (A), (R)))
+
+#define _mm_cvt_roundsh_sd(A, B, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
+ _mm_setzero_pd (), \
+ (__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
+ _mm_setzero_pd (), \
+ (A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtss2sh, vcvtsd2sh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sh (__m128h __A, __m128 __B)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_sh (__m128h __A, __m128d __B)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
+ const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
+ const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
+ const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
+ const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, __R);
+}
+
+#else
+#define _mm_cvt_roundss_sh(A, B, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
+ _mm_setzero_ph (), \
+ (__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
+ _mm_setzero_ph (), \
+ A, R))
+
+#define _mm_cvt_roundsd_sh(A, B, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
+ _mm_setzero_ph (), \
+ (__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
+ _mm_setzero_ph (), \
+ (A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 3a4974f6dcd..7fd4286ef26 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1348,6 +1348,10 @@ DEF_FUNCTION_TYPE (V8DF, V8HF, V8DF, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8DI, V8HF, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8DF, V8HF, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V8HF, V2DF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V8HF, V4SF, V8HF, V8HF, UQI, INT)
+DEF_FUNCTION_TYPE (V2DF, V8HF, V2DF, V2DF, UQI, INT)
+DEF_FUNCTION_TYPE (V4SF, V8HF, V4SF, V4SF, UQI, INT)
DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF)
DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index f2af774eb8c..dc56dc2c0f5 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3136,7 +3136,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
-
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 5581975547d..bfafd1517c8 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -10743,8 +10743,10 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
+ case V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT:
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
+ case V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
@@ -10752,6 +10754,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
case V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT:
case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
+ case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT:
+ case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT:
nargs = 5;
break;
case V32HF_FTYPE_V32HF_INT_V32HF_USI_INT:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ef9e12b07e..d7a132877e9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -809,6 +809,10 @@
[V16SI V16SF V8DI V8DF V32HI V64QI])
(define_mode_iterator VF48H_AVX512VL
[V8DF V16SF (V8SF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF48_128
+ [V2DF V4SF])
+
(define_mode_iterator VI48F
[V16SI V16SF V8DI V8DF
(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
@@ -6222,6 +6226,66 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>"
+ [(set (match_operand:VF48_128 0 "register_operand" "=v")
+ (vec_merge:VF48_128
+ (vec_duplicate:VF48_128
+ (float_extend:<ssescalarmode>
+ (vec_select:HF
+ (match_operand:V8HF 1 "register_operand" "v")
+ (parallel [(const_int 0)]))))
+ (match_operand:VF48_128 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vcvtsh2<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_saeonly_scalar_mask_op3>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name>_mem"
+ [(set (match_operand:VF48_128 0 "register_operand" "=v")
+ (vec_merge:VF48_128
+ (vec_duplicate:VF48_128
+ (float_extend:<ssescalarmode>
+ (match_operand:HF 1 "memory_operand" "m")))
+ (match_operand:VF48_128 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vcvtsh2<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>"
+ [(set (match_operand:V8HF 0 "register_operand" "=v")
+ (vec_merge:V8HF
+ (vec_duplicate:V8HF
+ (float_truncate:HF
+ (vec_select:<ssescalarmode>
+ (match_operand:VF48_128 1 "register_operand" "v")
+ (parallel [(const_int 0)]))))
+ (match_operand:V8HF 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vcvt<ssescalarmodesuffix>2sh\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name>_mem"
+ [(set (match_operand:V8HF 0 "register_operand" "=v")
+ (vec_merge:V8HF
+ (vec_duplicate:V8HF
+ (float_truncate:HF
+ (match_operand:MODEF 1 "memory_operand" "m")))
+ (match_operand:V8HF 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vcvt<ssescalarmodesuffix>2sh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index cce3409db4a..add3e0ced24 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -753,6 +753,10 @@
#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 7e5bf406006..dd3399397f2 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -770,6 +770,10 @@
#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 5c3e370d4a7..e64321d8afa 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -766,6 +766,10 @@ test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8)
test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8)
+test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8)
+test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8)
+test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8)
+test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8)
test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
@@ -828,6 +832,10 @@ test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8)
test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8)
+test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8)
+test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -856,6 +864,10 @@ test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h,
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8)
+test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8)
+test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8)
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 5bf94d56ce3..d92898fdd11 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -872,6 +872,10 @@ test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8)
test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8)
test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
+test_2 (_mm_cvt_roundsh_ss, __m128, __m128, __m128h, 8)
+test_2 (_mm_cvt_roundsh_sd, __m128d, __m128d, __m128h, 8)
+test_2 (_mm_cvt_roundss_sh, __m128h, __m128h, __m128, 8)
+test_2 (_mm_cvt_roundsd_sh, __m128h, __m128h, __m128d, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -931,6 +935,10 @@ test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8)
test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8)
+test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8)
+test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8)
+test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
@@ -958,6 +966,10 @@ test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m51
test_4 (_mm_mask_reduce_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_roundscale_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123)
test_4 (_mm_mask_getexp_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8)
+test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8)
+test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8)
+test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8)
test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8)
test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 947cf74c136..9c32b7b9816 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -771,6 +771,10 @@
#define __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, D) __builtin_ia32_vcvtph2psx512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph512_mask_round(A, B, C, 8)
#define __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, D) __builtin_ia32_vcvtps2phx512_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2ss_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-09-17 8:05 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17 8:05 [gcc r12-3608] AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).