public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Hongyu Wang <hongyuw@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r9-10031] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Date: Wed, 27 Apr 2022 07:20:21 +0000 (GMT) [thread overview] Message-ID: <20220427072021.3C6993858D1E@sourceware.org> (raw) https://gcc.gnu.org/g:18bee83150e235ebd596443a0909a72d0c4d077f commit r9-10031-g18bee83150e235ebd596443a0909a72d0c4d077f Author: Hongyu Wang <hongyu.wang@intel.com> Date: Fri Apr 22 14:42:30 2022 +0800 AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Add missing macro under O0 and adjust macro format for scalf intrinsics. gcc/ChangeLog: PR target/105339 * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): Add parentheses for parameters and djust format. (_mm512_mask_scalef_round_pd): Ditto. (_mm512_maskz_scalef_round_pd): Ditto. (_mm512_scalef_round_ps): Ditto. (_mm512_mask_scalef_round_ps): Ditto. (_mm512_maskz_scalef_round_ps): Ditto. (_mm_scalef_round_sd): Use _mm_undefined_pd. (_mm_scalef_round_ss): Use _mm_undefined_ps. (_mm_mask_scalef_round_sd): New macro. (_mm_mask_scalef_round_ss): Ditto. (_mm_maskz_scalef_round_sd): Ditto. (_mm_maskz_scalef_round_ss): Ditto. gcc/testsuite/ChangeLog: PR target/105339 * gcc.target/i386/sse-14.c: Add tests for new macro. (cherry picked from commit 3c940d42701707559fabe49be99296f60fbc43e7) Diff: --- gcc/config/i386/avx512fintrin.h | 86 ++++++++++++++++++++++++---------- gcc/testsuite/gcc.target/i386/sse-14.c | 4 ++ 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 6ca90d344e9..5e3b888282c 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3237,31 +3237,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) (__mmask8) __U, __R); } #else -#define _mm512_scalef_round_pd(A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) - -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) - -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) - -#define _mm512_scalef_round_ps(A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) - -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) - -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) - -#define _mm_scalef_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ - (__v2df)_mm_setzero_pd (), -1, C) - -#define _mm_scalef_round_ss(A, B, C) \ - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ - (__v4sf)_mm_setzero_ps (), -1, C) +#define _mm512_scalef_round_pd(A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_undefined_pd(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_setzero_pd(), \ + (U), (C))) + +#define _mm512_scalef_round_ps(A, B, C) \ + ((__m512) \ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_undefined_ps(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ + ((__m512) \ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_setzero_ps(), \ + (U), (C))) + +#define _mm_scalef_round_sd(A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), \ + (__v2df) _mm_undefined_pd (), \ + -1, (C))) + +#define _mm_scalef_round_ss(A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), \ + (__v4sf) _mm_undefined_ps (), \ + -1, (C))) + +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) + +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) + +#define _mm_maskz_scalef_round_sd(U, A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), \ + (__v2df) _mm_setzero_pd (), \ + (U), (C))) + +#define _mm_maskz_scalef_round_ss(U, A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), \ + (__v4sf) _mm_setzero_ps (), \ + (U), (C))) #endif #ifdef __OPTIMIZE__ diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 99719325773..44347e9b802 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -429,7 +429,9 @@ test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9) test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1) test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1) test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1) @@ -543,7 +545,9 @@ test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1) test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)
reply other threads:[~2022-04-27 7:20 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220427072021.3C6993858D1E@sourceware.org \ --to=hongyuw@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).