From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 780063857C5A; Fri, 22 Sep 2023 03:19:22 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 780063857C5A DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1695352762; bh=rMQlpM/DRdjKIEw8/oRueSAGoiEB/OO/b3eCQRMRtUo=; h=From:To:Subject:Date:From; b=HeVq/Wdcuvtzvx8j1tSr0F9C18rRFwy7dRkhrzOZOSKaBGFw/AcI9snCmO8tVBkUd bOG67MHmsVcGs/ZHB1jNhOgfh4YagCMA0qarrKi26XHapA2vzUGvbNZ2s1F4zFEBNy T4ikZfI8pM4DMDKZMQSQvU5/qTgR1gAT5H5eg+U0= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ix86/heads/evex512)] Push evex512 target for 512 bit intrins X-Act-Checkin: gcc X-Git-Author: Haochen Jiang X-Git-Refname: refs/vendors/ix86/heads/evex512 X-Git-Oldrev: 23bccd14d1b18a00a35e3f408377d42982a5c467 X-Git-Newrev: 567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f Message-Id: <20230922031922.780063857C5A@sourceware.org> Date: Fri, 22 Sep 2023 03:19:22 +0000 (GMT) List-Id: https://gcc.gnu.org/g:567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f commit 567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f Author: Haochen Jiang Date: Mon Aug 28 15:31:52 2023 +0800 Push evex512 target for 512 bit intrins gcc/ChangeLog: * config/i386/avx512bwintrin.h: Add evex512 target for 512 bit intrins. Diff: --- gcc/config/i386/avx512bwintrin.h | 291 ++++++++++++++++++++------------------- 1 file changed, 153 insertions(+), 138 deletions(-) diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index d1cd549ce18..925bae1457c 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -34,16 +34,6 @@ #define __DISABLE_AVX512BW__ #endif /* __AVX512BW__ */ -/* Internal data types for implementing the intrinsics. */ -typedef short __v32hi __attribute__ ((__vector_size__ (64))); -typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); -typedef char __v64qi __attribute__ ((__vector_size__ (64))); -typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ - __may_alias__, __aligned__ (1))); - -typedef unsigned long long __mmask64; - extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) @@ -54,229 +44,292 @@ _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestzsi (__A, __B); + return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) { - return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); + *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcsi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); } extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF) +_kadd_mask32 (__mmask32 __A, __mmask32 __B) { - *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned char +extern __inline unsigned int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) +_cvtmask32_u32 (__mmask32 __A) { - *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B) +_cvtu32_mask32 (unsigned int __A) { - return (unsigned char) __builtin_ia32_kortestzsi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +_load_mask32 (__mmask32 *__A) { - return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); + return (__mmask32) __builtin_ia32_kmovd (*__A); } -extern __inline unsigned char +extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B) +_store_mask32 (__mmask32 *__A, __mmask32 __B) { - return (unsigned char) __builtin_ia32_kortestcsi (__A, __B); + *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); } -extern __inline unsigned char +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +_knot_mask32 (__mmask32 __A) { - return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kadd_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned int +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask32_u32 (__mmask32 __A) +_kxor_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline unsigned long long +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtmask64_u64 (__mmask64 __A) +_kand_mask32 (__mmask32 __A, __mmask32 __B) { - return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu32_mask32 (unsigned int __A) +_kandn_mask32 (__mmask32 __A, __mmask32 __B) { - return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A); + return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); } -extern __inline __mmask64 +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_cvtu64_mask64 (unsigned long long __A) +_mm512_kunpackw (__mmask32 __A, __mmask32 __B) { - return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask32 (__mmask32 *__A) +_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) { - return (__mmask32) __builtin_ia32_kmovd (*__A); + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, + (__mmask32) __B); } -extern __inline __mmask64 +#if __OPTIMIZE__ +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_load_mask64 (__mmask64 *__A) +_kshiftli_mask32 (__mmask32 __A, unsigned int __B) { - return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); + return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask32 (__mmask32 *__A, __mmask32 __B) +_kshiftri_mask32 (__mmask32 __A, unsigned int __B) { - *(__mmask32 *) __A = __builtin_ia32_kmovd (__B); + return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, + (__mmask8) __B); } -extern __inline void +#else +#define _kshiftli_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) + +#define _kshiftri_mask32(X, Y) \ + ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) + +#endif + +#ifdef __DISABLE_AVX512BW__ +#undef __DISABLE_AVX512BW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BW__ */ + +#if !defined (__AVX512BW__) || !defined (__EVEX512__) +#pragma GCC push_options +#pragma GCC target("avx512bw,evex512") +#define __DISABLE_AVX512BW_512__ +#endif /* __AVX512BW_512__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); +typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); + +typedef unsigned long long __mmask64; + +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_store_mask64 (__mmask64 *__A, __mmask64 __B) +_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); + *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask32 (__mmask32 __A) +_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A); + return (unsigned char) __builtin_ia32_ktestzdi (__A, __B); } -extern __inline __mmask64 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_knot_mask64 (__mmask64 __A) +_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); + return (unsigned char) __builtin_ia32_ktestcdi (__A, __B); } -extern __inline __mmask32 +extern __inline unsigned char __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask32 (__mmask32 __A, __mmask32 __B) +_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF) { - return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B); + *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B); + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestzdi (__A, __B); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B) +{ + return (unsigned char) __builtin_ia32_kortestcdi (__A, __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kor_mask64 (__mmask64 __A, __mmask64 __B) +_kadd_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline unsigned long long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask32 (__mmask32 __A, __mmask32 __B) +_cvtmask64_u64 (__mmask64 __A) { - return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B); + return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxnor_mask64 (__mmask64 __A, __mmask64 __B) +_cvtu64_mask64 (unsigned long long __A) { - return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask32 (__mmask32 __A, __mmask32 __B) +_load_mask64 (__mmask64 *__A) { - return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask64 (__mmask64 *__A, __mmask64 __B) +{ + *(__mmask64 *) __A = __builtin_ia32_kmovq (__B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kxor_mask64 (__mmask64 __A, __mmask64 __B) +_knot_mask64 (__mmask64 __A) { - return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask32 (__mmask32 __A, __mmask32 __B) +_kor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kand_mask64 (__mmask64 __A, __mmask64 __B) +_kxnor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); + return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B); } -extern __inline __mmask32 +extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kandn_mask32 (__mmask32 __A, __mmask32 __B) +_kxor_mask64 (__mmask64 __A, __mmask64 __B) { - return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B); + return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B); +} + +extern __inline __mmask64 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_kand_mask64 (__mmask64 __A, __mmask64 __B) +{ + return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B); } extern __inline __mmask64 @@ -366,22 +419,6 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) (__mmask64) __U); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kunpackw_mask32 (__mmask16 __A, __mmask16 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, - (__mmask32) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_kunpackd (__mmask64 __A, __mmask64 __B) @@ -2776,14 +2813,6 @@ _mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A, } #ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftli_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftli_mask64 (__mmask64 __A, unsigned int __B) @@ -2792,14 +2821,6 @@ _kshiftli_mask64 (__mmask64 __A, unsigned int __B) (__mmask8) __B); } -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_kshiftri_mask32 (__mmask32 __A, unsigned int __B) -{ - return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A, - (__mmask8) __B); -} - extern __inline __mmask64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _kshiftri_mask64 (__mmask64 __A, unsigned int __B) @@ -3145,15 +3166,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) } #else -#define _kshiftli_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftli_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y))) -#define _kshiftri_mask32(X, Y) \ - ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y))) - #define _kshiftri_mask64(X, Y) \ ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y))) @@ -3328,9 +3343,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) #endif -#ifdef __DISABLE_AVX512BW__ -#undef __DISABLE_AVX512BW__ +#ifdef __DISABLE_AVX512BW_512__ +#undef __DISABLE_AVX512BW_512__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BW__ */ +#endif /* __DISABLE_AVX512BW_512__ */ #endif /* _AVX512BWINTRIN_H_INCLUDED */