public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-8238] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
@ 2022-04-25  1:18 Hongyu Wang
  0 siblings, 0 replies; only message in thread
From: Hongyu Wang @ 2022-04-25  1:18 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3c940d42701707559fabe49be99296f60fbc43e7

commit r12-8238-g3c940d42701707559fabe49be99296f60fbc43e7
Author: Hongyu Wang <hongyu.wang@intel.com>
Date:   Fri Apr 22 14:42:30 2022 +0800

    AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
    
    Add missing macro under O0 and adjust macro format for scalf
    intrinsics.
    
    gcc/ChangeLog:
    
            PR target/105339
            * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
            Add parentheses for parameters and djust format.
            (_mm512_mask_scalef_round_pd): Ditto.
            (_mm512_maskz_scalef_round_pd): Ditto.
            (_mm512_scalef_round_ps): Ditto.
            (_mm512_mask_scalef_round_ps): Ditto.
            (_mm512_maskz_scalef_round_ps): Ditto.
            (_mm_scalef_round_sd): Use _mm_undefined_pd.
            (_mm_scalef_round_ss): Use _mm_undefined_ps.
            (_mm_mask_scalef_round_sd): New macro.
            (_mm_mask_scalef_round_ss): Ditto.
            (_mm_maskz_scalef_round_sd): Ditto.
            (_mm_maskz_scalef_round_ss): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            PR target/105339
            * gcc.target/i386/sse-14.c: Add tests for new macro.

Diff:
---
 gcc/config/i386/avx512fintrin.h        | 76 +++++++++++++++++++++++++---------
 gcc/testsuite/gcc.target/i386/sse-14.c |  4 ++
 2 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 29511fd2831..77d6249c2bc 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
 						      (__mmask8) __U, __R);
 }
 #else
-#define _mm512_scalef_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C)					\
+  ((__m512d)								\
+   __builtin_ia32_scalefpd512_mask((A), (B),				\
+				   (__v8df) _mm512_undefined_pd(),	\
+				   -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C)			\
+  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C)			\
+  ((__m512d)								\
+   __builtin_ia32_scalefpd512_mask((A), (B),				\
+				   (__v8df) _mm512_setzero_pd(),	\
+				   (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C)					\
+  ((__m512)								\
+   __builtin_ia32_scalefps512_mask((A), (B),				\
+				   (__v16sf) _mm512_undefined_ps(),	\
+				   -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C)			\
+  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C)			\
+  ((__m512)								\
+   __builtin_ia32_scalefps512_mask((A), (B),				\
+				   (__v16sf) _mm512_setzero_ps(),	\
+				   (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C)					\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B),			\
+				       (__v2df) _mm_undefined_pd (),	\
+				       -1, (C)))
 
-#define _mm512_scalef_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C)					\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B),			\
+				       (__v4sf) _mm_undefined_ps (),	\
+				       -1, (C)))
 
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C)				\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C)				\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm_scalef_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
-	(__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C)				\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B),			\
+				       (__v2df) _mm_setzero_pd (),	\
+				       (U), (C)))
 
-#define _mm_scalef_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
-	(__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C)				\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B),			\
+				       (__v4sf) _mm_setzero_ps (),	\
+				       (U), (C)))
 #endif
 
 #define _mm_mask_scalef_sd(W, U, A, B) \
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 956a9d16f84..f41493b93f3 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -430,7 +430,9 @@ test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
 test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9)
 test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9)
 test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9)
+test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
 test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9)
+test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 9)
 test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1)
 test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1)
 test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1)
@@ -545,7 +547,9 @@ test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
 test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
 test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
 test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9)
+test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
 test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
+test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
 test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1)
 test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
 test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-04-25  1:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-25  1:18 [gcc r12-8238] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Hongyu Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).