public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4491] [PATCH 4/5] Push evex512 target for 512 bit intrins
@ 2023-10-09  9:04 Haochen Jiang
  0 siblings, 0 replies; only message in thread
From: Haochen Jiang @ 2023-10-09  9:04 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8108b22f8fcfeabad0a5e3573633bfc8d43eaae6

commit r14-4491-g8108b22f8fcfeabad0a5e3573633bfc8d43eaae6
Author: Haochen Jiang <haochen.jiang@intel.com>
Date:   Mon Oct 9 16:09:35 2023 +0800

    [PATCH 4/5] Push evex512 target for 512 bit intrins
    
    gcc/ChangeLog:
    
            * config.gcc: Add avx512bitalgvlintrin.h.
            * config/i386/avx5124fmapsintrin.h: Add evex512 target for 512 bit
            intrins.
            * config/i386/avx5124vnniwintrin.h: Ditto.
            * config/i386/avx512bf16intrin.h: Ditto.
            * config/i386/avx512bitalgintrin.h: Add evex512 target for 512 bit
            intrins. Split 128/256 bit intrins to avx512bitalgvlintrin.h.
            * config/i386/avx512erintrin.h: Add evex512 target for 512 bit
            intrins
            * config/i386/avx512ifmaintrin.h: Ditto
            * config/i386/avx512pfintrin.h: Ditto
            * config/i386/avx512vbmi2intrin.h: Ditto.
            * config/i386/avx512vbmiintrin.h: Ditto.
            * config/i386/avx512vnniintrin.h: Ditto.
            * config/i386/avx512vp2intersectintrin.h: Ditto.
            * config/i386/avx512vpopcntdqintrin.h: Ditto.
            * config/i386/gfniintrin.h: Ditto.
            * config/i386/immintrin.h: Add avx512bitalgvlintrin.h.
            * config/i386/vaesintrin.h: Add evex512 target for 512 bit intrins.
            * config/i386/vpclmulqdqintrin.h: Ditto.
            * config/i386/avx512bitalgvlintrin.h: New.

Diff:
---
 gcc/config.gcc                             |  19 +--
 gcc/config/i386/avx5124fmapsintrin.h       |   2 +-
 gcc/config/i386/avx5124vnniwintrin.h       |   2 +-
 gcc/config/i386/avx512bf16intrin.h         |  31 +++--
 gcc/config/i386/avx512bitalgintrin.h       | 155 +------------------------
 gcc/config/i386/avx512bitalgvlintrin.h     | 180 +++++++++++++++++++++++++++++
 gcc/config/i386/avx512erintrin.h           |   2 +-
 gcc/config/i386/avx512ifmaintrin.h         |   4 +-
 gcc/config/i386/avx512pfintrin.h           |   2 +-
 gcc/config/i386/avx512vbmi2intrin.h        |   4 +-
 gcc/config/i386/avx512vbmiintrin.h         |   4 +-
 gcc/config/i386/avx512vnniintrin.h         |   4 +-
 gcc/config/i386/avx512vp2intersectintrin.h |   4 +-
 gcc/config/i386/avx512vpopcntdqintrin.h    |   4 +-
 gcc/config/i386/gfniintrin.h               |  76 +++++++-----
 gcc/config/i386/immintrin.h                |   2 +
 gcc/config/i386/vaesintrin.h               |   4 +-
 gcc/config/i386/vpclmulqdqintrin.h         |   4 +-
 18 files changed, 282 insertions(+), 221 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index ee46d96bf62..cc37a9c768d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -436,15 +436,16 @@ i[34567]86-*-* | x86_64-*-*)
 		       avx512vbmi2vlintrin.h avx512vnniintrin.h
 		       avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
 		       avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
-		       pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
-		       waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h
-		       avx512bf16intrin.h enqcmdintrin.h serializeintrin.h
-		       avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
-		       tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
-		       amxbf16intrin.h x86gprintrin.h uintrintrin.h
-		       hresetintrin.h keylockerintrin.h avxvnniintrin.h
-		       mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
-		       avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h
+		       avx512bitalgvlintrin.h pconfigintrin.h wbnoinvdintrin.h
+		       movdirintrin.h waitpkgintrin.h cldemoteintrin.h
+		       avx512bf16vlintrin.h avx512bf16intrin.h enqcmdintrin.h
+		       serializeintrin.h avx512vp2intersectintrin.h
+		       avx512vp2intersectvlintrin.h tsxldtrkintrin.h
+		       amxtileintrin.h amxint8intrin.h amxbf16intrin.h
+		       x86gprintrin.h uintrintrin.h hresetintrin.h
+		       keylockerintrin.h avxvnniintrin.h mwaitintrin.h
+		       avx512fp16intrin.h avx512fp16vlintrin.h avxifmaintrin.h
+		       avxvnniint8intrin.h avxneconvertintrin.h
 		       cmpccxaddintrin.h amxfp16intrin.h prfchiintrin.h
 		       raointintrin.h amxcomplexintrin.h avxvnniint16intrin.h
 		       sm3intrin.h sha512intrin.h sm4intrin.h"
diff --git a/gcc/config/i386/avx5124fmapsintrin.h b/gcc/config/i386/avx5124fmapsintrin.h
index 97dd77c9235..4c884a5c203 100644
--- a/gcc/config/i386/avx5124fmapsintrin.h
+++ b/gcc/config/i386/avx5124fmapsintrin.h
@@ -30,7 +30,7 @@
 
 #ifndef __AVX5124FMAPS__
 #pragma GCC push_options
-#pragma GCC target("avx5124fmaps")
+#pragma GCC target("avx5124fmaps,evex512")
 #define __DISABLE_AVX5124FMAPS__
 #endif /* __AVX5124FMAPS__ */
 
diff --git a/gcc/config/i386/avx5124vnniwintrin.h b/gcc/config/i386/avx5124vnniwintrin.h
index fd129589798..795e4814f28 100644
--- a/gcc/config/i386/avx5124vnniwintrin.h
+++ b/gcc/config/i386/avx5124vnniwintrin.h
@@ -30,7 +30,7 @@
 
 #ifndef __AVX5124VNNIW__
 #pragma GCC push_options
-#pragma GCC target("avx5124vnniw")
+#pragma GCC target("avx5124vnniw,evex512")
 #define __DISABLE_AVX5124VNNIW__
 #endif /* __AVX5124VNNIW__ */
 
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 107f4a448f6..94ccbf6389f 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -34,13 +34,6 @@
 #define __DISABLE_AVX512BF16__
 #endif /* __AVX512BF16__ */
 
-/* Internal data types for implementing the intrinsics.  */
-typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
-
 /* Convert One BF16 Data to One Single Float Data.  */
 extern __inline float
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -49,6 +42,24 @@ _mm_cvtsbh_ss (__bf16 __A)
   return __builtin_ia32_cvtbf2sf (__A);
 }
 
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16__ */
+
+#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512bf16,evex512")
+#define __DISABLE_AVX512BF16_512__
+#endif /* __AVX512BF16_512__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+
 /* vcvtne2ps2bf16 */
 
 extern __inline __m512bh
@@ -144,9 +155,9 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
 	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
 }
 
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
+#ifdef __DISABLE_AVX512BF16_512__
+#undef __DISABLE_AVX512BF16_512__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
+#endif /* __DISABLE_AVX512BF16_512__ */
 
 #endif /* _AVX512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index a1c7be109a9..af8514f5838 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -22,15 +22,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 #if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
+# error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
 #endif
 
 #ifndef _AVX512BITALGINTRIN_H_INCLUDED
 #define _AVX512BITALGINTRIN_H_INCLUDED
 
-#ifndef __AVX512BITALG__
+#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512bitalg")
+#pragma GCC target("avx512bitalg,evex512")
 #define __DISABLE_AVX512BITALG__
 #endif /* __AVX512BITALG__ */
 
@@ -108,153 +108,4 @@ _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
 #pragma GCC pop_options
 #endif /* __DISABLE_AVX512BITALG__ */
 
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
-#pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl")
-#define __DISABLE_AVX512BITALGVL__
-#endif /* __AVX512BITALGVL__ */
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
-							 (__v32qi) __W,
-							 (__mmask32) __U);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
-						(__v32qi)
-						 _mm256_setzero_si256 (),
-						(__mmask32) __U);
-}
-
-extern __inline __mmask32
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
-{
-  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
-						 (__v32qi) __B,
-						 (__mmask32) __M);
-}
-
-extern __inline __mmask16
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__mmask16) -1);
-}
-
-extern __inline __mmask16
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
-{
-  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
-						 (__v16qi) __B,
-						 (__mmask16) __M);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_popcnt_epi8 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_popcnt_epi16 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_epi8 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_epi16 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
-							(__v16hi) __W,
-							(__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
-						(__v16hi)
-						_mm256_setzero_si256 (),
-						(__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
-							 (__v16qi) __W,
-							 (__mmask16) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
-							 (__v16qi)
-							 _mm_setzero_si128 (),
-							 (__mmask16) __U);
-}
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
-							(__v8hi) __W,
-							(__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
-							(__v8hi)
-							_mm_setzero_si128 (),
-							(__mmask8) __U);
-}
-#ifdef __DISABLE_AVX512BITALGVL__
-#undef __DISABLE_AVX512BITALGVL__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BITALGVL__ */
-
 #endif /* _AVX512BITALGINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
new file mode 100644
index 00000000000..36d697dea8a
--- /dev/null
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -0,0 +1,180 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512bitalgvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
+#define _AVX512BITALGVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl")
+#define __DISABLE_AVX512BITALGVL__
+#endif /* __AVX512BITALGVL__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+							 (__v32qi) __W,
+							 (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+						(__v32qi)
+						 _mm256_setzero_si256 (),
+						(__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
+						 (__v32qi) __B,
+						 (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
+						 (__v32qi) __B,
+						 (__mmask32) __M);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
+						 (__v16qi) __B,
+						 (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
+						 (__v16qi) __B,
+						 (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi8 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi16 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+							(__v16hi) __W,
+							(__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+						(__v16hi)
+						_mm256_setzero_si256 (),
+						(__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+							 (__v16qi) __W,
+							 (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+							 (__v16qi)
+							 _mm_setzero_si128 (),
+							 (__mmask16) __U);
+}
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+							(__v8hi) __W,
+							(__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+							(__v8hi)
+							_mm_setzero_si128 (),
+							(__mmask8) __U);
+}
+#ifdef __DISABLE_AVX512BITALGVL__
+#undef __DISABLE_AVX512BITALGVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGVL__ */
+
+#endif /* _AVX512BITALGVLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index bd83b7fbbc6..5c7be9c47ac 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -30,7 +30,7 @@
 
 #ifndef __AVX512ER__
 #pragma GCC push_options
-#pragma GCC target("avx512er")
+#pragma GCC target("avx512er,evex512")
 #define __DISABLE_AVX512ER__
 #endif /* __AVX512ER__ */
 
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index fc97f1defe8..e08078b2725 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512IFMAINTRIN_H_INCLUDED
 #define _AVX512IFMAINTRIN_H_INCLUDED
 
-#ifndef __AVX512IFMA__
+#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512ifma")
+#pragma GCC target("avx512ifma,evex512")
 #define __DISABLE_AVX512IFMA__
 #endif /* __AVX512IFMA__ */
 
diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index a547610660a..58af26ff02e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -30,7 +30,7 @@
 
 #ifndef __AVX512PF__
 #pragma GCC push_options
-#pragma GCC target("avx512pf")
+#pragma GCC target("avx512pf,evex512")
 #define __DISABLE_AVX512PF__
 #endif /* __AVX512PF__ */
 
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index ca00f8a5f14..b7ff07b2d11 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -28,9 +28,9 @@
 #ifndef __AVX512VBMI2INTRIN_H_INCLUDED
 #define __AVX512VBMI2INTRIN_H_INCLUDED
 
-#if !defined(__AVX512VBMI2__)
+#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi2")
+#pragma GCC target("avx512vbmi2,evex512")
 #define __DISABLE_AVX512VBMI2__
 #endif /* __AVX512VBMI2__ */
 
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 502586090ae..1a7ab4edca3 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMIINTRIN_H_INCLUDED
 #define _AVX512VBMIINTRIN_H_INCLUDED
 
-#ifndef __AVX512VBMI__
+#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi")
+#pragma GCC target("avx512vbmi,evex512")
 #define __DISABLE_AVX512VBMI__
 #endif /* __AVX512VBMI__ */
 
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index e36e2e57f21..1090703ec48 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -28,9 +28,9 @@
 #ifndef __AVX512VNNIINTRIN_H_INCLUDED
 #define __AVX512VNNIINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VNNI__)
+#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512vnni")
+#pragma GCC target("avx512vnni,evex512")
 #define __DISABLE_AVX512VNNI__
 #endif /* __AVX512VNNI__ */
 
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index 65e2fb1abf5..bf68245155d 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VP2INTERSECT__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512vp2intersect")
+#pragma GCC target("avx512vp2intersect,evex512")
 #define __DISABLE_AVX512VP2INTERSECT__
 #endif /* __AVX512VP2INTERSECT__ */
 
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index 47897fbd8d7..9470a403f8e 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 
-#ifndef __AVX512VPOPCNTDQ__
+#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq")
+#pragma GCC target("avx512vpopcntdq,evex512")
 #define __DISABLE_AVX512VPOPCNTDQ__
 #endif /* __AVX512VPOPCNTDQ__ */
 
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index ef3dc225b40..907e7a0cf7a 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -297,9 +297,53 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
 #pragma GCC pop_options
 #endif /* __GFNIAVX512VLBW__ */
 
-#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("gfni,avx512f,avx512bw")
+#pragma GCC target("gfni,avx512f,evex512")
+#define __DISABLE_GFNIAVX512F__
+#endif /* __GFNIAVX512F__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
+						    (__v64qi) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
+{
+  return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
+							   (__v64qi) __B, __C);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
+{
+  return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
+							(__v64qi) __B, __C);
+}
+#else
+#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C)			\
+  ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi (			\
+	(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
+#define _mm512_gf2p8affine_epi64_epi8(A, B, C)				    \
+  ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A),    \
+	 (__v64qi)(__m512i)(B), (int)(C)))
+#endif
+
+#ifdef __DISABLE_GFNIAVX512F__
+#undef __DISABLE_GFNIAVX512F__
+#pragma GCC pop_options
+#endif /* __GFNIAVX512F__ */
+
+#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("gfni,avx512bw,evex512")
 #define __DISABLE_GFNIAVX512FBW__
 #endif /* __GFNIAVX512FBW__ */
 
@@ -319,13 +363,6 @@ _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
   return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
 			(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
 }
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
-						    (__v64qi) __B);
-}
 
 #ifdef __OPTIMIZE__
 extern __inline __m512i
@@ -350,14 +387,6 @@ _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
 				(__v64qi) _mm512_setzero_si512 (), __A);
 }
 
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
-{
-  return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
-							   (__v64qi) __B, __C);
-}
-
 extern __inline __m512i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
@@ -375,13 +404,6 @@ _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
   return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
 		  (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
 }
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
-{
-  return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
-							(__v64qi) __B, __C);
-}
 #else
 #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		\
   ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
@@ -391,9 +413,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
   ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
 	(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D),		\
 	(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
-#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C)			\
-  ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi (			\
-	(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
 #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E)		    \
   ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
      (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
@@ -401,9 +420,6 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
   ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
 	 (__v64qi)(__m512i)(C), (int)(D),				    \
 	 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
-#define _mm512_gf2p8affine_epi64_epi8(A, B, C)				    \
-  ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A),    \
-	 (__v64qi)(__m512i)(B), (int)(C)))
 #endif
 
 #ifdef __DISABLE_GFNIAVX512FBW__
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 29b4dbbda24..4e17901db15 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -96,6 +96,8 @@
 
 #include <avx512bitalgintrin.h>
 
+#include <avx512bitalgvlintrin.h>
+
 #include <avx512vp2intersectintrin.h>
 
 #include <avx512vp2intersectvlintrin.h>
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 58fc19c9eb3..b2bcdbe5bd1 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
 #endif /* __DISABLE_VAES__ */
 
 
-#if !defined(__VAES__) || !defined(__AVX512F__)
+#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("vaes,avx512f")
+#pragma GCC target("vaes,avx512f,evex512")
 #define __DISABLE_VAESF__
 #endif /* __VAES__ */
 
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 2c83b6037a0..c8c2c19d33f 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _VPCLMULQDQINTRIN_H_INCLUDED
 #define _VPCLMULQDQINTRIN_H_INCLUDED
 
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
 #pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f")
+#pragma GCC target("vpclmulqdq,avx512f,evex512")
 #define __DISABLE_VPCLMULQDQF__
 #endif /* __VPCLMULQDQF__ */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-09  9:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-09  9:04 [gcc r14-4491] [PATCH 4/5] Push evex512 target for 512 bit intrins Haochen Jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).