public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ix86/heads/evex512)] Push evex512 target for 512 bit intrins Date: Fri, 22 Sep 2023 03:19:22 +0000 (GMT) [thread overview] Message-ID: <20230922031922.780063857C5A@sourceware.org> (raw) https://gcc.gnu.org/g:567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f commit 567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f Author: Haochen Jiang <haochen.jiang@intel.com> Date: Mon Aug 28 15:31:52 2023 +0800 Push evex512 target for 512 bit intrins gcc/ChangeLog: * config/i386/avx512bwintrin.h: Add evex512 target for 512 bit intrins. Diff: --- gcc/config/i386/avx512bwintrin.h | 291 ++++++++++++++++++++------------------- 1 file changed, 153 insertions(+), 138 deletions(-) diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index d1cd549ce18..925bae1457c 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -34,16 +34,6 @@ #define __DISABLE_AVX512BW__ #endif /* __AVX512BW__ */ -/* Internal data types for implementing the intrinsics. */ -typedef short __v32hi __attribute__ ((__vector_size__ (64))); -typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); -typedef char __v64qi __attribute__ ((__vector_size__ (64))); -typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); - -typedef unsigned long long __mmask64; - extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) @@ -54,229 +44,292 @@ _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); + return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) { - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) +_kadd_mask32 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned char +extern __inline unsigned int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_cvtmask32_u32 (__mmask32 __A) { - *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_cvtu32_mask32 (unsigned int __A) { - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_load_mask32 (__mmask32 *__A) { - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd (*__A); } -extern __inline unsigned char +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_store_mask32 (__mmask32 *__A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_knot_mask32 (__mmask32 __A) { - return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned int +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask32_u32 (__mmask32 __A) +_kxor_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned long long +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask64_u64 (__mmask64 __A) +_kand_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu32_mask32 (unsigned int __A) +_kandn_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu64_mask64 (unsigned long long __A) +_mm512_kunpackw (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask32 (__mmask32 *__A) +_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) { - return (__mmask32) __builtin_ia32_kmovd (*__A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } -extern __inline __mmask64 +#if __OPTIMIZE__ +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask64 (__mmask64 *__A) +_kshiftli_mask32 (__mmask32 __A, unsigned int __B) { - return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); + return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask32 (__mmask32 *__A, __mmask32 __B) +_kshiftri_mask32 (__mmask32 __A, unsigned int __B) { - *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); + return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +#else +#define _kshiftli_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) + +#define _kshiftri_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) + +#endif + +#ifdef __DISABLE_AVX512BW__ +#undef __DISABLE_AVX512BW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BW__ */ + +#if !defined (__AVX512BW__) || !defined (__EVEX512__) +#pragma GCC push_options +#pragma GCC target("avx512bw,evex512") +#define __DISABLE_AVX512BW_512__ +#endif /* __AVX512BW_512__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); +typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); + +typedef unsigned long long __mmask64; + +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask64 (__mmask64 *__A, __mmask64 __B) +_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); + *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask32 (__mmask32 __A) +_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask64 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask64 (__mmask64 __A) +_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); + return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask32 (__mmask32 __A, __mmask32 __B) +_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); + *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask64 (__mmask64 __A, __mmask64 __B) +_kadd_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline unsigned long long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask32 (__mmask32 __A, __mmask32 __B) +_cvtmask64_u64 (__mmask64 __A) { - return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); + return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask64 (__mmask64 __A, __mmask64 __B) +_cvtu64_mask64 (unsigned long long __A) { - return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask32 (__mmask32 __A, __mmask32 __B) +_load_mask64 (__mmask64 *__A) { - return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask64 (__mmask64 *__A, __mmask64 __B) +{ + *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask64 (__mmask64 __A, __mmask64 __B) +_knot_mask64 (__mmask64 __A) { - return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kandn_mask32 (__mmask32 __A, __mmask32 __B) +_kxor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); +} + +extern __inline __mmask64 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kand_mask64 (__mmask64 __A, __mmask64 __B) +{ + return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 @@ -366,22 +419,6 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) (__mmask64) __U); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_kunpackd (__mmask64 __A, __mmask64 __B) @@ -2776,14 +2813,6 @@ _mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A, } #ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftli_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftli_mask64 (__mmask64 __A, unsigned int __B) @@ -2792,14 +2821,6 @@ _kshiftli_mask64 (__mmask64 __A, unsigned int __B) (__mmask8) __B); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftri_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftri_mask64 (__mmask64 __A, unsigned int __B) @@ -3145,15 +3166,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) } #else -#define _kshiftli_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftli_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y))) -#define _kshiftri_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftri_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y))) @@ -3328,9 +3343,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) #endif -#ifdef __DISABLE_AVX512BW__ -#undef __DISABLE_AVX512BW__ +#ifdef __DISABLE_AVX512BW_512__ +#undef __DISABLE_AVX512BW_512__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BW__ */ +#endif /* __DISABLE_AVX512BW_512__ */ #endif /* _AVX512BWINTRIN_H_INCLUDED */
next reply other threads:[~2023-09-22 3:19 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-09-22 3:19 hongtao Liu [this message] -- strict thread matches above, loose matches on Subject: below -- 2023-09-22 3:19 hongtao Liu 2023-09-22 3:19 hongtao Liu 2023-09-22 3:19 hongtao Liu 2023-09-22 3:19 hongtao Liu
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230922031922.780063857C5A@sourceware.org \ --to=liuhongt@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).