public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4491] [PATCH 4/5] Push evex512 target for 512 bit intrins
@ 2023-10-09 9:04 Haochen Jiang
0 siblings, 0 replies; only message in thread
From: Haochen Jiang @ 2023-10-09 9:04 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8108b22f8fcfeabad0a5e3573633bfc8d43eaae6
commit r14-4491-g8108b22f8fcfeabad0a5e3573633bfc8d43eaae6
Author: Haochen Jiang <haochen.jiang@intel.com>
Date: Mon Oct 9 16:09:35 2023 +0800
[PATCH 4/5] Push evex512 target for 512 bit intrins
gcc/ChangeLog:
* config.gcc: Add avx512bitalgvlintrin.h.
* config/i386/avx5124fmapsintrin.h: Add evex512 target for 512 bit
intrins.
* config/i386/avx5124vnniwintrin.h: Ditto.
* config/i386/avx512bf16intrin.h: Ditto.
* config/i386/avx512bitalgintrin.h: Add evex512 target for 512 bit
intrins. Split 128/256 bit intrins to avx512bitalgvlintrin.h.
* config/i386/avx512erintrin.h: Add evex512 target for 512 bit
intrins
* config/i386/avx512ifmaintrin.h: Ditto
* config/i386/avx512pfintrin.h: Ditto
* config/i386/avx512vbmi2intrin.h: Ditto.
* config/i386/avx512vbmiintrin.h: Ditto.
* config/i386/avx512vnniintrin.h: Ditto.
* config/i386/avx512vp2intersectintrin.h: Ditto.
* config/i386/avx512vpopcntdqintrin.h: Ditto.
* config/i386/gfniintrin.h: Ditto.
* config/i386/immintrin.h: Add avx512bitalgvlintrin.h.
* config/i386/vaesintrin.h: Add evex512 target for 512 bit intrins.
* config/i386/vpclmulqdqintrin.h: Ditto.
* config/i386/avx512bitalgvlintrin.h: New.
Diff:
---
gcc/config.gcc | 19 +--
gcc/config/i386/avx5124fmapsintrin.h | 2 +-
gcc/config/i386/avx5124vnniwintrin.h | 2 +-
gcc/config/i386/avx512bf16intrin.h | 31 +++--
gcc/config/i386/avx512bitalgintrin.h | 155 +------------------------
gcc/config/i386/avx512bitalgvlintrin.h | 180 +++++++++++++++++++++++++++++
gcc/config/i386/avx512erintrin.h | 2 +-
gcc/config/i386/avx512ifmaintrin.h | 4 +-
gcc/config/i386/avx512pfintrin.h | 2 +-
gcc/config/i386/avx512vbmi2intrin.h | 4 +-
gcc/config/i386/avx512vbmiintrin.h | 4 +-
gcc/config/i386/avx512vnniintrin.h | 4 +-
gcc/config/i386/avx512vp2intersectintrin.h | 4 +-
gcc/config/i386/avx512vpopcntdqintrin.h | 4 +-
gcc/config/i386/gfniintrin.h | 76 +++++++-----
gcc/config/i386/immintrin.h | 2 +
gcc/config/i386/vaesintrin.h | 4 +-
gcc/config/i386/vpclmulqdqintrin.h | 4 +-
18 files changed, 282 insertions(+), 221 deletions(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ee46d96bf62..cc37a9c768d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -436,15 +436,16 @@ i[34567]86-*-* | x86_64-*-*)
avx512vbmi2vlintrin.h avx512vnniintrin.h
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
- pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
- waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
- avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
- avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
- tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
- amxbf16intrin.h x86gprintrin.h uintrintrin.h
- hresetintrin.h keylockerintrin.h avxvnniintrin.h
- mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
- avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
+ avx512bitalgvlintrin.h pconfigintrin.h wbnoinvdintrin.h
+ movdirintrin.h waitpkgintrin.h cldemoteintrin.h
+ avx512bf16vlintrin.h avx512bf16intrin.h enqcmdintrin.h
+ serializeintrin.h avx512vp2intersectintrin.h
+ avx512vp2intersectvlintrin.h tsxldtrkintrin.h
+ amxtileintrin.h amxint8intrin.h amxbf16intrin.h
+ x86gprintrin.h uintrintrin.h hresetintrin.h
+ keylockerintrin.h avxvnniintrin.h mwaitintrin.h
+ avx512fp16intrin.h avx512fp16vlintrin.h avxifmaintrin.h
+ avxvnniint8intrin.h avxneconvertintrin.h
cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h
raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
sm3intrin.h sha512intrin.h sm4intrin.h"
diff --git a/gcc/config/i386/avx5124fmapsintrin.h b/gcc/config/i386/avx5124fmapsintrin.h
index 97dd77c9235..4c884a5c203 100644
--- a/gcc/config/i386/avx5124fmapsintrin.h
+++ b/gcc/config/i386/avx5124fmapsintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX5124FMAPS__
#pragma GCC push_options
-#pragma GCC target("avx5124fmaps")
+#pragma GCC target("avx5124fmaps,evex512")
#define __DISABLE_AVX5124FMAPS__
#endif /* __AVX5124FMAPS__ */
diff --git a/gcc/config/i386/avx5124vnniwintrin.h b/gcc/config/i386/avx5124vnniwintrin.h
index fd129589798..795e4814f28 100644
--- a/gcc/config/i386/avx5124vnniwintrin.h
+++ b/gcc/config/i386/avx5124vnniwintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX5124VNNIW__
#pragma GCC push_options
-#pragma GCC target("avx5124vnniw")
+#pragma GCC target("avx5124vnniw,evex512")
#define __DISABLE_AVX5124VNNIW__
#endif /* __AVX5124VNNIW__ */
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 107f4a448f6..94ccbf6389f 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -34,13 +34,6 @@
#define __DISABLE_AVX512BF16__
#endif /* __AVX512BF16__ */
-/* Internal data types for implementing the intrinsics. */
-typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
- vector types, and their scalar components. */
-typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
-
/* Convert One BF16 Data to One Single Float Data. */
extern __inline float
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -49,6 +42,24 @@ _mm_cvtsbh_ss (__bf16 __A)
return __builtin_ia32_cvtbf2sf (__A);
}
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16__ */
+
+#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512bf16,evex512")
+#define __DISABLE_AVX512BF16_512__
+#endif /* __AVX512BF16_512__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+
/* vcvtne2ps2bf16 */
extern __inline __m512bh
@@ -144,9 +155,9 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
(__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
}
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
+#ifdef __DISABLE_AVX512BF16_512__
+#undef __DISABLE_AVX512BF16_512__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
+#endif /* __DISABLE_AVX512BF16_512__ */
#endif /* _AVX512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index a1c7be109a9..af8514f5838 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -22,15 +22,15 @@
<http://www.gnu.org/licenses/>. */
#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
+# error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
#define _AVX512BITALGINTRIN_H_INCLUDED
-#ifndef __AVX512BITALG__
+#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512bitalg")
+#pragma GCC target("avx512bitalg,evex512")
#define __DISABLE_AVX512BITALG__
#endif /* __AVX512BITALG__ */
@@ -108,153 +108,4 @@ _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
#pragma GCC pop_options
#endif /* __DISABLE_AVX512BITALG__ */
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
-#pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl")
-#define __DISABLE_AVX512BITALGVL__
-#endif /* __AVX512BITALGVL__ */
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
- (__v32qi) __W,
- (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
- (__v32qi)
- _mm256_setzero_si256 (),
- (__mmask32) __U);
-}
-
-extern __inline __mmask32
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
-{
- return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
- (__v32qi) __B,
- (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
-{
- return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
- (__v32qi) __B,
- (__mmask32) __M);
-}
-
-extern __inline __mmask16
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
-{
- return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
- (__v16qi) __B,
- (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
-{
- return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
- (__v16qi) __B,
- (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_popcnt_epi8 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_popcnt_epi16 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_epi8 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_epi16 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
- (__v16hi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
-{
- return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
- (__v16hi)
- _mm256_setzero_si256 (),
- (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
- (__v16qi) __W,
- (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
- (__v16qi)
- _mm_setzero_si128 (),
- (__mmask16) __U);
-}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
- (__v8hi) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
-{
- return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
- (__v8hi)
- _mm_setzero_si128 (),
- (__mmask8) __U);
-}
-#ifdef __DISABLE_AVX512BITALGVL__
-#undef __DISABLE_AVX512BITALGVL__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BITALGVL__ */
-
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
new file mode 100644
index 00000000000..36d697dea8a
--- /dev/null
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -0,0 +1,180 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512bitalgvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
+#define _AVX512BITALGVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl")
+#define __DISABLE_AVX512BITALGVL__
+#endif /* __AVX512BITALGVL__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) __M);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi8 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi16 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi8 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi16 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#ifdef __DISABLE_AVX512BITALGVL__
+#undef __DISABLE_AVX512BITALGVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGVL__ */
+
+#endif /* _AVX512BITALGVLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index bd83b7fbbc6..5c7be9c47ac 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX512ER__
#pragma GCC push_options
-#pragma GCC target("avx512er")
+#pragma GCC target("avx512er,evex512")
#define __DISABLE_AVX512ER__
#endif /* __AVX512ER__ */
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index fc97f1defe8..e08078b2725 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
#define _AVX512IFMAINTRIN_H_INCLUDED
-#ifndef __AVX512IFMA__
+#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512ifma")
+#pragma GCC target("avx512ifma,evex512")
#define __DISABLE_AVX512IFMA__
#endif /* __AVX512IFMA__ */
diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index a547610660a..58af26ff02e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -30,7 +30,7 @@
#ifndef __AVX512PF__
#pragma GCC push_options
-#pragma GCC target("avx512pf")
+#pragma GCC target("avx512pf,evex512")
#define __DISABLE_AVX512PF__
#endif /* __AVX512PF__ */
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index ca00f8a5f14..b7ff07b2d11 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
#define __AVX512VBMI2INTRIN_H_INCLUDED
-#if !defined(__AVX512VBMI2__)
+#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi2")
+#pragma GCC target("avx512vbmi2,evex512")
#define __DISABLE_AVX512VBMI2__
#endif /* __AVX512VBMI2__ */
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 502586090ae..1a7ab4edca3 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
#define _AVX512VBMIINTRIN_H_INCLUDED
-#ifndef __AVX512VBMI__
+#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512vbmi")
+#pragma GCC target("avx512vbmi,evex512")
#define __DISABLE_AVX512VBMI__
#endif /* __AVX512VBMI__ */
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index e36e2e57f21..1090703ec48 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -28,9 +28,9 @@
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
#define __AVX512VNNIINTRIN_H_INCLUDED
-#if !defined(__AVX512VNNI__)
+#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512vnni")
+#pragma GCC target("avx512vnni,evex512")
#define __DISABLE_AVX512VNNI__
#endif /* __AVX512VNNI__ */
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index 65e2fb1abf5..bf68245155d 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
-#if !defined(__AVX512VP2INTERSECT__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512vp2intersect")
+#pragma GCC target("avx512vp2intersect,evex512")
#define __DISABLE_AVX512VP2INTERSECT__
#endif /* __AVX512VP2INTERSECT__ */
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index 47897fbd8d7..9470a403f8e 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
-#ifndef __AVX512VPOPCNTDQ__
+#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq")
+#pragma GCC target("avx512vpopcntdq,evex512")
#define __DISABLE_AVX512VPOPCNTDQ__
#endif /* __AVX512VPOPCNTDQ__ */
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index ef3dc225b40..907e7a0cf7a 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -297,9 +297,53 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
#pragma GCC pop_options
#endif /* __GFNIAVX512VLBW__ */
-#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
#pragma GCC push_options
-#pragma GCC target("gfni,avx512f,avx512bw")
+#pragma GCC target("gfni,avx512f,evex512")
+#define __DISABLE_GFNIAVX512F__
+#endif /* __GFNIAVX512F__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
+ (__v64qi) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
+{
+ return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
+ (__v64qi) __B, __C);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
+{
+ return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
+ (__v64qi) __B, __C);
+}
+#else
+#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
+ ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \
+ (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
+#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
+ ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), (int)(C)))
+#endif
+
+#ifdef __DISABLE_GFNIAVX512F__
+#undef __DISABLE_GFNIAVX512F__
+#pragma GCC pop_options
+#endif /* __GFNIAVX512F__ */
+
+#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("gfni,avx512bw,evex512")
#define __DISABLE_GFNIAVX512FBW__
#endif /* __GFNIAVX512FBW__ */
@@ -319,13 +363,6 @@ _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
}
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
- (__v64qi) __B);
-}
#ifdef __OPTIMIZE__
extern __inline __m512i
@@ -350,14 +387,6 @@ _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
(__v64qi) _mm512_setzero_si512 (), __A);
}
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
-{
- return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
- (__v64qi) __B, __C);
-}
-
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
@@ -375,13 +404,6 @@ _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
}
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
-{
- return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
- (__v64qi) __B, __C);
-}
#else
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
@@ -391,9 +413,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
-#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
- ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \
- (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
(__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
@@ -401,9 +420,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
(__v64qi)(__m512i)(C), (int)(D), \
(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
-#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
- ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \
- (__v64qi)(__m512i)(B), (int)(C)))
#endif
#ifdef __DISABLE_GFNIAVX512FBW__
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 29b4dbbda24..4e17901db15 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -96,6 +96,8 @@
#include <avx512bitalgintrin.h>
+#include <avx512bitalgvlintrin.h>
+
#include <avx512vp2intersectintrin.h>
#include <avx512vp2intersectvlintrin.h>
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 58fc19c9eb3..b2bcdbe5bd1 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
#endif /* __DISABLE_VAES__ */
-#if !defined(__VAES__) || !defined(__AVX512F__)
+#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("vaes,avx512f")
+#pragma GCC target("vaes,avx512f,evex512")
#define __DISABLE_VAESF__
#endif /* __VAES__ */
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 2c83b6037a0..c8c2c19d33f 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -28,9 +28,9 @@
#ifndef _VPCLMULQDQINTRIN_H_INCLUDED
#define _VPCLMULQDQINTRIN_H_INCLUDED
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
#pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f")
+#pragma GCC target("vpclmulqdq,avx512f,evex512")
#define __DISABLE_VPCLMULQDQF__
#endif /* __VPCLMULQDQF__ */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-10-09 9:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-09 9:04 [gcc r14-4491] [PATCH 4/5] Push evex512 target for 512 bit intrins Haochen Jiang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).