From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 8CFAB3858416; Fri, 22 Sep 2023 03:19:27 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8CFAB3858416 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1695352767; bh=90P5z/zHSye1MKi0mm5DJcCA8QAlkBXkncQpoCZftNY=; h=From:To:Subject:Date:From; b=Se47NOEuqwheP0SFVm+kuuQSBe2Jx6MAcKbRinyNChi36qwAJ2/eJYN43st9wl6qn dqi3wnGq+KaDlU7RcsA4t3yfRQ28uT+neZK+KeYVDLQ9LbCwH5tLwsf/33VfgbUakv raP4F8q7i5PRGG8xUUHm/uRkOEZK02yUXaiZdhgg= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ix86/heads/evex512)] Push evex512 target for 512 bit intrins X-Act-Checkin: gcc X-Git-Author: Haochen Jiang X-Git-Refname: refs/vendors/ix86/heads/evex512 X-Git-Oldrev: 567f243d6db4623dc5e7eb1a61b6faa0f5fa9f5f X-Git-Newrev: 2b77bcbb2a1a05fb57f6cb295f71659202c65374 Message-Id: <20230922031927.8CFAB3858416@sourceware.org> Date: Fri, 22 Sep 2023 03:19:27 +0000 (GMT) List-Id: https://gcc.gnu.org/g:2b77bcbb2a1a05fb57f6cb295f71659202c65374 commit 2b77bcbb2a1a05fb57f6cb295f71659202c65374 Author: Haochen Jiang Date: Tue Aug 29 15:37:13 2023 +0800 Push evex512 target for 512 bit intrins gcc/ChangeLog: * config.gcc: Add avx512bitalgvlintrin.h. * config/i386/avx5124fmapsintrin.h: Add evex512 target for 512 bit intrins. * config/i386/avx5124vnniwintrin.h: Ditto. * config/i386/avx512bf16intrin.h: Ditto. * config/i386/avx512bitalgintrin.h: Add evex512 target for 512 bit intrins. Split 128/256 bit intrins to avx512bitalgvlintrin.h. * config/i386/avx512erintrin.h: Add evex512 target for 512 bit intrins * config/i386/avx512ifmaintrin.h: Ditto * config/i386/avx512pfintrin.h: Ditto * config/i386/avx512vbmi2intrin.h: Ditto. * config/i386/avx512vbmiintrin.h: Ditto. * config/i386/avx512vnniintrin.h: Ditto. * config/i386/avx512vp2intersectintrin.h: Ditto. * config/i386/avx512vpopcntdqintrin.h: Ditto. * config/i386/gfniintrin.h: Ditto. * config/i386/immintrin.h: Add avx512bitalgvlintrin.h. * config/i386/vaesintrin.h: Add evex512 target for 512 bit intrins. * config/i386/vpclmulqdqintrin.h: Ditto. * config/i386/avx512bitalgvlintrin.h: New. Diff: --- gcc/config.gcc | 19 +-- gcc/config/i386/avx5124fmapsintrin.h | 2 +- gcc/config/i386/avx5124vnniwintrin.h | 2 +- gcc/config/i386/avx512bf16intrin.h | 31 +++-- gcc/config/i386/avx512bitalgintrin.h | 155 +------------------------ gcc/config/i386/avx512bitalgvlintrin.h | 180 +++++++++++++++++++++++++++++ gcc/config/i386/avx512erintrin.h | 2 +- gcc/config/i386/avx512ifmaintrin.h | 4 +- gcc/config/i386/avx512pfintrin.h | 2 +- gcc/config/i386/avx512vbmi2intrin.h | 4 +- gcc/config/i386/avx512vbmiintrin.h | 4 +- gcc/config/i386/avx512vnniintrin.h | 4 +- gcc/config/i386/avx512vp2intersectintrin.h | 4 +- gcc/config/i386/avx512vpopcntdqintrin.h | 4 +- gcc/config/i386/gfniintrin.h | 76 +++++++----- gcc/config/i386/immintrin.h | 2 + gcc/config/i386/vaesintrin.h | 4 +- gcc/config/i386/vpclmulqdqintrin.h | 4 +- 18 files changed, 282 insertions(+), 221 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index ee46d96bf62..cc37a9c768d 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -436,15 +436,16 @@ i[34567]86-*-* | x86_64-*-*) avx512vbmi2vlintrin.h avx512vnniintrin.h avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h avx512vpopcntdqvlintrin.h avx512bitalgintrin.h - pconfigintrin.h wbnoinvdintrin.h movdirintrin.h - waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h - avx512bf16intrin.h enqcmdintrin.h serializeintrin.h - avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h - tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h - amxbf16intrin.h x86gprintrin.h uintrintrin.h - hresetintrin.h keylockerintrin.h avxvnniintrin.h - mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h - avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h + avx512bitalgvlintrin.h pconfigintrin.h wbnoinvdintrin.h + movdirintrin.h waitpkgintrin.h cldemoteintrin.h + avx512bf16vlintrin.h avx512bf16intrin.h enqcmdintrin.h + serializeintrin.h avx512vp2intersectintrin.h + avx512vp2intersectvlintrin.h tsxldtrkintrin.h + amxtileintrin.h amxint8intrin.h amxbf16intrin.h + x86gprintrin.h uintrintrin.h hresetintrin.h + keylockerintrin.h avxvnniintrin.h mwaitintrin.h + avx512fp16intrin.h avx512fp16vlintrin.h avxifmaintrin.h + avxvnniint8intrin.h avxneconvertintrin.h cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h sm3intrin.h sha512intrin.h sm4intrin.h" diff --git a/gcc/config/i386/avx5124fmapsintrin.h b/gcc/config/i386/avx5124fmapsintrin.h index 97dd77c9235..4c884a5c203 100644 --- a/gcc/config/i386/avx5124fmapsintrin.h +++ b/gcc/config/i386/avx5124fmapsintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX5124FMAPS__ #pragma GCC push_options -#pragma GCC target("avx5124fmaps") +#pragma GCC target("avx5124fmaps,evex512") #define __DISABLE_AVX5124FMAPS__ #endif /* __AVX5124FMAPS__ */ diff --git a/gcc/config/i386/avx5124vnniwintrin.h b/gcc/config/i386/avx5124vnniwintrin.h index fd129589798..795e4814f28 100644 --- a/gcc/config/i386/avx5124vnniwintrin.h +++ b/gcc/config/i386/avx5124vnniwintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX5124VNNIW__ #pragma GCC push_options -#pragma GCC target("avx5124vnniw") +#pragma GCC target("avx5124vnniw,evex512") #define __DISABLE_AVX5124VNNIW__ #endif /* __AVX5124VNNIW__ */ diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h index 107f4a448f6..94ccbf6389f 100644 --- a/gcc/config/i386/avx512bf16intrin.h +++ b/gcc/config/i386/avx512bf16intrin.h @@ -34,13 +34,6 @@ #define __DISABLE_AVX512BF16__ #endif /* __AVX512BF16__ */ -/* Internal data types for implementing the intrinsics. */ -typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); - -/* The Intel API is flexible enough that we must allow aliasing with other - vector types, and their scalar components. */ -typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); - /* Convert One BF16 Data to One Single Float Data. */ extern __inline float __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -49,6 +42,24 @@ _mm_cvtsbh_ss (__bf16 __A) return __builtin_ia32_cvtbf2sf (__A); } +#ifdef __DISABLE_AVX512BF16__ +#undef __DISABLE_AVX512BF16__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BF16__ */ + +#if !defined (__AVX512BF16__) || !defined (__EVEX512__) +#pragma GCC push_options +#pragma GCC target("avx512bf16,evex512") +#define __DISABLE_AVX512BF16_512__ +#endif /* __AVX512BF16_512__ */ + +/* Internal data types for implementing the intrinsics. */ +typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); + /* vcvtne2ps2bf16 */ extern __inline __m512bh @@ -144,9 +155,9 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16))); } -#ifdef __DISABLE_AVX512BF16__ -#undef __DISABLE_AVX512BF16__ +#ifdef __DISABLE_AVX512BF16_512__ +#undef __DISABLE_AVX512BF16_512__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BF16__ */ +#endif /* __DISABLE_AVX512BF16_512__ */ #endif /* _AVX512BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h index a1c7be109a9..af8514f5838 100644 --- a/gcc/config/i386/avx512bitalgintrin.h +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -22,15 +22,15 @@ . */ #if !defined _IMMINTRIN_H_INCLUDED -# error "Never use directly; include instead." +# error "Never use directly; include instead." #endif #ifndef _AVX512BITALGINTRIN_H_INCLUDED #define _AVX512BITALGINTRIN_H_INCLUDED -#ifndef __AVX512BITALG__ +#if !defined (__AVX512BITALG__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512bitalg") +#pragma GCC target("avx512bitalg,evex512") #define __DISABLE_AVX512BITALG__ #endif /* __AVX512BITALG__ */ @@ -108,153 +108,4 @@ _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B) #pragma GCC pop_options #endif /* __DISABLE_AVX512BITALG__ */ -#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) -#pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512vl") -#define __DISABLE_AVX512BITALGVL__ -#endif /* __AVX512BITALGVL__ */ - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, - (__v32qi) __W, - (__mmask32) __U); -} - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline __mmask32 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B) -{ - return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A, - (__v32qi) __B, - (__mmask32) -1); -} - -extern __inline __mmask32 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B) -{ - return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A, - (__v32qi) __B, - (__mmask32) __M); -} - -extern __inline __mmask16 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B) -{ - return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A, - (__v16qi) __B, - (__mmask16) -1); -} - -extern __inline __mmask16 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B) -{ - return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A, - (__v16qi) __B, - (__mmask16) __M); -} - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_popcnt_epi8 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A); -} - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_popcnt_epi16 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A); -} - -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_popcnt_epi8 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A); -} - -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_popcnt_epi16 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A); -} - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A) -{ - return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, - (__v16hi) - _mm256_setzero_si256 (), - (__mmask16) __U); -} - -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); -} - -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __U); -} -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A) -{ - return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} -#ifdef __DISABLE_AVX512BITALGVL__ -#undef __DISABLE_AVX512BITALGVL__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BITALGVL__ */ - #endif /* _AVX512BITALGINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h new file mode 100644 index 00000000000..36d697dea8a --- /dev/null +++ b/gcc/config/i386/avx512bitalgvlintrin.h @@ -0,0 +1,180 @@ +/* Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use directly; include instead." +#endif + +#ifndef _AVX512BITALGVLINTRIN_H_INCLUDED +#define _AVX512BITALGVLINTRIN_H_INCLUDED + +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512vl") +#define __DISABLE_AVX512BITALGVL__ +#endif /* __AVX512BITALGVL__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) -1); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) __M); +} + +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) __M); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi8 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi16 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} +#ifdef __DISABLE_AVX512BITALGVL__ +#undef __DISABLE_AVX512BITALGVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGVL__ */ + +#endif /* _AVX512BITALGVLINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h index bd83b7fbbc6..5c7be9c47ac 100644 --- a/gcc/config/i386/avx512erintrin.h +++ b/gcc/config/i386/avx512erintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX512ER__ #pragma GCC push_options -#pragma GCC target("avx512er") +#pragma GCC target("avx512er,evex512") #define __DISABLE_AVX512ER__ #endif /* __AVX512ER__ */ diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h index fc97f1defe8..e08078b2725 100644 --- a/gcc/config/i386/avx512ifmaintrin.h +++ b/gcc/config/i386/avx512ifmaintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512IFMAINTRIN_H_INCLUDED #define _AVX512IFMAINTRIN_H_INCLUDED -#ifndef __AVX512IFMA__ +#if !defined (__AVX512IFMA__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512ifma") +#pragma GCC target("avx512ifma,evex512") #define __DISABLE_AVX512IFMA__ #endif /* __AVX512IFMA__ */ diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h index a547610660a..58af26ff02e 100644 --- a/gcc/config/i386/avx512pfintrin.h +++ b/gcc/config/i386/avx512pfintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX512PF__ #pragma GCC push_options -#pragma GCC target("avx512pf") +#pragma GCC target("avx512pf,evex512") #define __DISABLE_AVX512PF__ #endif /* __AVX512PF__ */ diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h index ca00f8a5f14..b7ff07b2d11 100644 --- a/gcc/config/i386/avx512vbmi2intrin.h +++ b/gcc/config/i386/avx512vbmi2intrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VBMI2INTRIN_H_INCLUDED #define __AVX512VBMI2INTRIN_H_INCLUDED -#if !defined(__AVX512VBMI2__) +#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512vbmi2") +#pragma GCC target("avx512vbmi2,evex512") #define __DISABLE_AVX512VBMI2__ #endif /* __AVX512VBMI2__ */ diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h index 502586090ae..1a7ab4edca3 100644 --- a/gcc/config/i386/avx512vbmiintrin.h +++ b/gcc/config/i386/avx512vbmiintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMIINTRIN_H_INCLUDED #define _AVX512VBMIINTRIN_H_INCLUDED -#ifndef __AVX512VBMI__ +#if !defined (__AVX512VBMI__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512vbmi") +#pragma GCC target("avx512vbmi,evex512") #define __DISABLE_AVX512VBMI__ #endif /* __AVX512VBMI__ */ diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h index e36e2e57f21..1090703ec48 100644 --- a/gcc/config/i386/avx512vnniintrin.h +++ b/gcc/config/i386/avx512vnniintrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VNNIINTRIN_H_INCLUDED #define __AVX512VNNIINTRIN_H_INCLUDED -#if !defined(__AVX512VNNI__) +#if !defined(__AVX512VNNI__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512vnni") +#pragma GCC target("avx512vnni,evex512") #define __DISABLE_AVX512VNNI__ #endif /* __AVX512VNNI__ */ diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h index 65e2fb1abf5..bf68245155d 100644 --- a/gcc/config/i386/avx512vp2intersectintrin.h +++ b/gcc/config/i386/avx512vp2intersectintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED -#if !defined(__AVX512VP2INTERSECT__) +#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512vp2intersect") +#pragma GCC target("avx512vp2intersect,evex512") #define __DISABLE_AVX512VP2INTERSECT__ #endif /* __AVX512VP2INTERSECT__ */ diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h index 47897fbd8d7..9470a403f8e 100644 --- a/gcc/config/i386/avx512vpopcntdqintrin.h +++ b/gcc/config/i386/avx512vpopcntdqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED -#ifndef __AVX512VPOPCNTDQ__ +#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__) #pragma GCC push_options -#pragma GCC target("avx512vpopcntdq") +#pragma GCC target("avx512vpopcntdq,evex512") #define __DISABLE_AVX512VPOPCNTDQ__ #endif /* __AVX512VPOPCNTDQ__ */ diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h index ef3dc225b40..907e7a0cf7a 100644 --- a/gcc/config/i386/gfniintrin.h +++ b/gcc/config/i386/gfniintrin.h @@ -297,9 +297,53 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B, #pragma GCC pop_options #endif /* __GFNIAVX512VLBW__ */ -#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__) +#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("gfni,avx512f,avx512bw") +#pragma GCC target("gfni,avx512f,evex512") +#define __DISABLE_GFNIAVX512F__ +#endif /* __GFNIAVX512F__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A, + (__v64qi) __B); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C) +{ + return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A, + (__v64qi) __B, __C); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) +{ + return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A, + (__v64qi) __B, __C); +} +#else +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \ + ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \ + (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C))) +#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \ + ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_GFNIAVX512F__ +#undef __DISABLE_GFNIAVX512F__ +#pragma GCC pop_options +#endif /* __GFNIAVX512F__ */ + +#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__) +#pragma GCC push_options +#pragma GCC target("gfni,avx512bw,evex512") #define __DISABLE_GFNIAVX512FBW__ #endif /* __GFNIAVX512FBW__ */ @@ -319,13 +363,6 @@ _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C) return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B, (__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A); } -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B) -{ - return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A, - (__v64qi) __B); -} #ifdef __OPTIMIZE__ extern __inline __m512i @@ -350,14 +387,6 @@ _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B, (__v64qi) _mm512_setzero_si512 (), __A); } -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C) -{ - return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A, - (__v64qi) __B, __C); -} - extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C, @@ -375,13 +404,6 @@ _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C, return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B, (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A); } -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) -{ - return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A, - (__v64qi) __B, __C); -} #else #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ @@ -391,9 +413,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \ (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) -#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \ - ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \ - (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C))) #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\ (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B))) @@ -401,9 +420,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\ (__v64qi)(__m512i)(C), (int)(D), \ (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) -#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \ - ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \ - (__v64qi)(__m512i)(B), (int)(C))) #endif #ifdef __DISABLE_GFNIAVX512FBW__ diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 29b4dbbda24..4e17901db15 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -96,6 +96,8 @@ #include +#include + #include #include diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h index 58fc19c9eb3..b2bcdbe5bd1 100644 --- a/gcc/config/i386/vaesintrin.h +++ b/gcc/config/i386/vaesintrin.h @@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B) #endif /* __DISABLE_VAES__ */ -#if !defined(__VAES__) || !defined(__AVX512F__) +#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__) #pragma GCC push_options -#pragma GCC target("vaes,avx512f") +#pragma GCC target("vaes,avx512f,evex512") #define __DISABLE_VAESF__ #endif /* __VAES__ */ diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h index 2c83b6037a0..c8c2c19d33f 100644 --- a/gcc/config/i386/vpclmulqdqintrin.h +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -28,9 +28,9 @@ #ifndef _VPCLMULQDQINTRIN_H_INCLUDED #define _VPCLMULQDQINTRIN_H_INCLUDED -#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__) #pragma GCC push_options -#pragma GCC target("vpclmulqdq,avx512f") +#pragma GCC target("vpclmulqdq,avx512f,evex512") #define __DISABLE_VPCLMULQDQF__ #endif /* __VPCLMULQDQF__ */