public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3505] AVX512FP16: Add vrcpph/vrcpsh/vscalefph/vscalefsh.
@ 2021-09-14  4:35 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-09-14  4:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:bf4c12404fbbc6b21ab4ca2e5e8c6b6b1afe4190

commit r12-3505-gbf4c12404fbbc6b21ab4ca2e5e8c6b6b1afe4190
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Feb 25 11:25:03 2019 -0800

    AVX512FP16: Add vrcpph/vrcpsh/vscalefph/vscalefsh.
    
    gcc/ChangeLog:
    
            * config/i386/avx512fp16intrin.h: (_mm512_rcp_ph):
            New intrinsic.
            (_mm512_mask_rcp_ph): Likewise.
            (_mm512_maskz_rcp_ph): Likewise.
            (_mm_rcp_sh): Likewise.
            (_mm_mask_rcp_sh): Likewise.
            (_mm_maskz_rcp_sh): Likewise.
            (_mm512_scalef_ph): Likewise.
            (_mm512_mask_scalef_ph): Likewise.
            (_mm512_maskz_scalef_ph): Likewise.
            (_mm512_scalef_round_ph): Likewise.
            (_mm512_mask_scalef_round_ph): Likewise.
            (_mm512_maskz_scalef_round_ph): Likewise.
            (_mm_scalef_sh): Likewise.
            (_mm_mask_scalef_sh): Likewise.
            (_mm_maskz_scalef_sh): Likewise.
            (_mm_scalef_round_sh): Likewise.
            (_mm_mask_scalef_round_sh): Likewise.
            (_mm_maskz_scalef_round_sh): Likewise.
            * config/i386/avx512fp16vlintrin.h (_mm_rcp_ph):
            New intrinsic.
            (_mm256_rcp_ph): Likewise.
            (_mm_mask_rcp_ph): Likewise.
            (_mm256_mask_rcp_ph): Likewise.
            (_mm_maskz_rcp_ph): Likewise.
            (_mm256_maskz_rcp_ph): Likewise.
            (_mm_scalef_ph): Likewise.
            (_mm256_scalef_ph): Likewise.
            (_mm_mask_scalef_ph): Likewise.
            (_mm256_mask_scalef_ph): Likewise.
            (_mm_maskz_scalef_ph): Likewise.
            (_mm256_maskz_scalef_ph): Likewise.
            * config/i386/i386-builtin.def: Add new builtins.
            * config/i386/sse.md (VFH_AVX512VL): New.
            (avx512fp16_rcp<mode>2<mask_name>): Ditto.
            (avx512fp16_vmrcpv8hf2<mask_scalar_name>): Ditto.
            (avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>):
            Adjust to support HF vector modes.
            (<avx512>_scalef<mode><mask_name><round_name>): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx-1.c: Add test for new builtins.
            * gcc.target/i386/sse-13.c: Ditto.
            * gcc.target/i386/sse-23.c: Ditto.
            * gcc.target/i386/sse-14.c: Add test for new intrinsics.
            * gcc.target/i386/sse-22.c: Ditto.

Diff:
---
 gcc/config/i386/avx512fp16intrin.h     | 195 +++++++++++++++++++++++++++++++++
 gcc/config/i386/avx512fp16vlintrin.h   |  97 ++++++++++++++++
 gcc/config/i386/i386-builtin.def       |   8 ++
 gcc/config/i386/sse.md                 |  49 +++++++--
 gcc/testsuite/gcc.target/i386/avx-1.c  |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c |   2 +
 gcc/testsuite/gcc.target/i386/sse-14.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-22.c |   3 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   2 +
 9 files changed, 355 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 50db5d12140..9a52d2ac36e 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -1428,6 +1428,201 @@ _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vrcpph.  */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_ph (__m512h __A)
+{
+  return __builtin_ia32_vrcpph_v32hf_mask (__A, _mm512_setzero_ph (),
+					   (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+  return __builtin_ia32_vrcpph_v32hf_mask (__C, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
+{
+  return __builtin_ia32_vrcpph_v32hf_mask (__B, _mm512_setzero_ph (),
+					   __A);
+}
+
+/* Intrinsics vrcpsh.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_vrcpsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
+					  (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_vrcpsh_v8hf_mask (__D, __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_vrcpsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
+					  __A);
+}
+
+/* Intrinsics vscalefph.  */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ph (__m512h __A, __m512h __B)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
+						    _mm512_setzero_ph (),
+						    (__mmask32) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
+						    _mm512_setzero_ph (),
+						    __A,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
+						    _mm512_setzero_ph (),
+						    (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+			     __m512h __D, const int __E)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
+						    __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+			      const int __D)
+{
+  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
+						    _mm512_setzero_ph (),
+						    __A, __D);
+}
+
+#else
+#define _mm512_scalef_round_ph(A, B, C)					\
+  (__builtin_ia32_vscalefph_v32hf_mask_round ((A), (B),			\
+					      _mm512_setzero_ph (),	\
+					      (__mmask32)-1, (C)))
+
+#define _mm512_mask_scalef_round_ph(A, B, C, D, E)			\
+  (__builtin_ia32_vscalefph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_scalef_round_ph(A, B, C, D)			\
+  (__builtin_ia32_vscalefph_v32hf_mask_round ((B), (C),			\
+					      _mm512_setzero_ph (),	\
+					      (A), (D)))
+
+#endif  /* __OPTIMIZE__ */
+
+/* Intrinsics vscalefsh.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sh (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
+						   _mm_setzero_ph (),
+						   (__mmask8) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
+						   _mm_setzero_ph (),
+						   __A,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
+						   _mm_setzero_ph (),
+						   (__mmask8) -1, __C);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+			  __m128h __D, const int __E)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
+						   __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+			   const int __D)
+{
+  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
+						   _mm_setzero_ph (),
+						   __A, __D);
+}
+
+#else
+#define _mm_scalef_round_sh(A, B, C)					  \
+  (__builtin_ia32_vscalefsh_v8hf_mask_round ((A), (B),			  \
+					     _mm_setzero_ph (),		  \
+					     (__mmask8)-1, (C)))
+
+#define _mm_mask_scalef_round_sh(A, B, C, D, E)				  \
+  (__builtin_ia32_vscalefsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm_maskz_scalef_round_sh(A, B, C, D)				  \
+  (__builtin_ia32_vscalefsh_v8hf_mask_round ((B), (C), _mm_setzero_ph (), \
+					     (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index aaed85203c9..ebda59b9f9a 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -451,6 +451,103 @@ _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
 					     __A);
 }
 
+/* Intrinsics vrcpph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ph (__m128h __A)
+{
+  return __builtin_ia32_vrcpph_v8hf_mask (__A, _mm_setzero_ph (),
+					  (__mmask8) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ph (__m256h __A)
+{
+  return __builtin_ia32_vrcpph_v16hf_mask (__A, _mm256_setzero_ph (),
+					   (__mmask16) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
+{
+  return __builtin_ia32_vrcpph_v8hf_mask (__C, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
+{
+  return __builtin_ia32_vrcpph_v16hf_mask (__C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
+{
+  return __builtin_ia32_vrcpph_v8hf_mask (__B, _mm_setzero_ph (), __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
+{
+  return __builtin_ia32_vrcpph_v16hf_mask (__B, _mm256_setzero_ph (),
+					   __A);
+}
+
+/* Intrinsics vscalefph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ph (__m128h __A, __m128h __B)
+{
+  return __builtin_ia32_vscalefph_v8hf_mask (__A, __B,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_ph (__m256h __A, __m256h __B)
+{
+  return __builtin_ia32_vscalefph_v16hf_mask (__A, __B,
+					      _mm256_setzero_ph (),
+					      (__mmask16) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+  return __builtin_ia32_vscalefph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
+		       __m256h __D)
+{
+  return __builtin_ia32_vscalefph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+  return __builtin_ia32_vscalefph_v8hf_mask (__B, __C,
+					     _mm_setzero_ph (), __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+  return __builtin_ia32_vscalefph_v16hf_mask (__B, __C,
+					      _mm256_setzero_ph (),
+					      __A);
+}
+
 #ifdef __DISABLE_AVX512FP16VL__
 #undef __DISABLE_AVX512FP16VL__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 13ae093df0b..e509c4922bf 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2808,6 +2808,12 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_vrsqrtph_v16hf_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_vrsqrtph_v32hf_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_vrsqrtsh_v8hf_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_vrcpph_v8hf_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_vrcpph_v16hf_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_vrcpph_v32hf_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
 
 /* Builtins with rounding support.  */
 BDESC_END (ARGS, ROUND_ARGS)
@@ -3025,6 +3031,8 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_vsqrtph_v32hf_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index aa266c38227..8cf34d74143 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -396,6 +396,13 @@
 (define_mode_iterator VF1_AVX512ER_128_256
   [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
 
+(define_mode_iterator VFH_AVX512VL
+  [(V32HF "TARGET_AVX512FP16")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
@@ -2341,6 +2348,30 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "SF")])
 
+(define_insn "avx512fp16_rcp<mode>2<mask_name>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v")
+	(unspec:VF_AVX512FP16VL
+	  [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RCP))]
+  "TARGET_AVX512FP16"
+  "vrcpph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_vmrcpv8hf2<mask_scalar_name>"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_merge:V8HF
+	  (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
+		       UNSPEC_RCP)
+	  (match_operand:V8HF 2 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512FP16"
+  "vrcpsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
 	(unspec:VF_AVX512VL
@@ -10281,11 +10312,11 @@
 })
 
 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
-  [(set (match_operand:VF_128 0 "register_operand" "=v")
-	(vec_merge:VF_128
-	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "register_operand" "v")
-	     (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
+  [(set (match_operand:VFH_128 0 "register_operand" "=v")
+	(vec_merge:VFH_128
+	  (unspec:VFH_128
+	    [(match_operand:VFH_128 1 "register_operand" "v")
+	     (match_operand:VFH_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
 	    UNSPEC_SCALEF)
 	  (match_dup 1)
 	  (const_int 1)))]
@@ -10295,10 +10326,10 @@
    (set_attr "mode"  "<ssescalarmode>")])
 
 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
-  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
-	(unspec:VF_AVX512VL
-	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
-	   (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
+  [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+	(unspec:VFH_AVX512VL
+	  [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VFH_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_SCALEF))]
   "TARGET_AVX512F"
   "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 651cb1c80fb..17c396567f2 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -703,6 +703,8 @@
 #define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
 #define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
 #define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 94553dec9e7..c1d95fc2ead 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -720,6 +720,8 @@
 #define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
 #define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
 #define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 7281bffdf2b..5b6d0b082d1 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -687,6 +687,8 @@ test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1)
 test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
 test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
 test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
+test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm_scalef_round_sh, __m128h, __m128h, __m128h, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -705,6 +707,8 @@ test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
 test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
 test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
 test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
+test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm_maskz_scalef_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@@ -720,6 +724,8 @@ test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h,
 test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
 test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
 test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
 
 /* avx512fp16vlintrin.h */
 test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 04326e0e37d..b2de5679bb6 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -792,6 +792,7 @@ test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1)
 test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
 test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
 test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
+test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -810,6 +811,7 @@ test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
 test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
 test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
 test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
+test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@@ -825,6 +827,7 @@ test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h,
 test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
 test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
 test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
 
 /* avx512fp16vlintrin.h */
 test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 7559d335dbc..5948622cc4f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -721,6 +721,8 @@
 #define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
 #define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
 #define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-14  4:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-14  4:35 [gcc r12-3505] AVX512FP16: Add vrcpph/vrcpsh/vscalefph/vscalefsh hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).