public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-3788] AVX512FP16: Add permutation and mask blend intrinsics. Date: Wed, 22 Sep 2021 10:28:23 +0000 (GMT) [thread overview] Message-ID: <20210922102823.7B0C53858413@sourceware.org> (raw) https://gcc.gnu.org/g:6b0e0b29c639451dd8e2234907b68806e51019c7 commit r12-3788-g6b0e0b29c639451dd8e2234907b68806e51019c7 Author: dianhong xu <dianhong.xu@intel.com> Date: Tue Jun 22 20:33:24 2021 +0800 AVX512FP16: Add permutation and mask blend intrinsics. gcc/ChangeLog: * config/i386/avx512fp16intrin.h: (_mm512_mask_blend_ph): New intrinsic. (_mm512_permutex2var_ph): Ditto. (_mm512_permutexvar_ph): Ditto. * config/i386/avx512fp16vlintrin.h: (_mm256_mask_blend_ph): New intrinsic. (_mm256_permutex2var_ph): Ditto. (_mm256_permutexvar_ph): Ditto. (_mm_mask_blend_ph): Ditto. (_mm_permutex2var_ph): Ditto. (_mm_permutexvar_ph): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-14.c: New test. Diff: --- gcc/config/i386/avx512fp16intrin.h | 31 ++++++++++ gcc/config/i386/avx512fp16vlintrin.h | 62 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512fp16-14.c | 85 +++++++++++++++++++++++++++ 3 files changed, 178 insertions(+) diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index c0fb9ff2538..29cf6792335 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -7211,6 +7211,37 @@ _mm512_reduce_max_ph (__m512h __A) #undef _MM512_REDUCE_OP +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_blend_ph (__mmask32 __U, __m512h __A, __m512h __W) +{ + return (__m512h) __builtin_ia32_movdquhi512_mask ((__v32hi) __W, + (__v32hi) __A, + (__mmask32) __U); + +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutex2var_ph (__m512h __A, __m512i __I, __m512h __B) +{ + return (__m512h) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A, + (__v32hi) __I, + (__v32hi) __B, + (__mmask32)-1); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_permutexvar_ph (__m512i __A, __m512h __B) +{ + return (__m512h) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, + (__v32hi) __A, + (__v32hi) + (_mm512_setzero_ph ()), + (__mmask32)-1); +} + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 36b3fe21d5b..3d3de964224 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -3253,6 +3253,68 @@ _mm_reduce_max_ph (__m128h __A) #undef _MM256_REDUCE_OP #undef _MM_REDUCE_OP +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W) +{ + return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W, + (__v16hi) __A, + (__mmask16) __U); + +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B) +{ + return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, + (__v16hi) __I, + (__v16hi) __B, + (__mmask16)-1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_permutexvar_ph (__m256i __A, __m256h __B) +{ + return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) + (_mm256_setzero_ph ()), + (__mmask16)-1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W) +{ + return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W, + (__v8hi) __A, + (__mmask8) __U); + +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B) +{ + return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, + (__v8hi) __I, + (__v8hi) __B, + (__mmask8)-1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_permutexvar_ph (__m128i __A, __m128h __B) +{ + return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) + (_mm_setzero_ph ()), + (__mmask8)-1); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-14.c b/gcc/testsuite/gcc.target/i386/avx512fp16-14.c new file mode 100644 index 00000000000..5c670aea640 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-14.c @@ -0,0 +1,85 @@ +/* { dg-do compile} */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512bw" } */ + +#include <immintrin.h> + +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_mask_blend_ph (__mmask32 U, __m512h A, __m512h B ) +{ + return _mm512_mask_blend_ph (U, A, B); +} + +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_permutex2var_ph (__m512h A, __m512i I, __m512h B) +{ + return _mm512_permutex2var_ph (A, I, B); +} + +/* { dg-final { scan-assembler-times "vperm\[ti\]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+" 1 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +test_mm512_permutexvar_ph (__m512i A, __m512h B) +{ + return _mm512_permutexvar_ph (A, B); +} + +/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+" 1 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_mask_blend_ph (__mmask16 U, __m256h A, __m256h B ) +{ + return _mm256_mask_blend_ph (U, A, B); +} + +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_permutex2var_ph (__m256h A, __m256i I, __m256h B) +{ + return _mm256_permutex2var_ph (A, I, B); +} + +/* { dg-final { scan-assembler-times "vperm\[ti\]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ + +__m256h +__attribute__ ((noinline, noclone)) +test_mm256_permutexvar_ph (__m256i A, __m256h B) +{ + return _mm256_permutexvar_ph (A, B); +} + +/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_mask_blend_ph (__mmask8 U, __m128h A, __m128h B ) +{ + return _mm_mask_blend_ph (U, A, B); +} + +/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_permutex2var_ph (__m128h A, __m128i I, __m128h B) +{ + return _mm_permutex2var_ph (A, I, B); +} + +/* { dg-final { scan-assembler-times "vperm\[it\]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +test_mm_permutexvar_ph (__m128i A, __m128h B) +{ + return _mm_permutexvar_ph (A, B); +} + +/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+" 1 } } */
reply other threads:[~2021-09-22 10:28 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210922102823.7B0C53858413@sourceware.org \ --to=liuhongt@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).