public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
@ 2017-10-24 10:16 Koval, Julia
  2017-11-28 18:09 ` Kirill Yukhin
  0 siblings, 1 reply; 10+ messages in thread
From: Koval, Julia @ 2017-10-24 10:16 UTC (permalink / raw)
  To: GCC Patches; +Cc: Kirill Yukhin

[-- Attachment #1: Type: text/plain, Size: 2382 bytes --]

Attached the patch

> -----Original Message-----
> From: Koval, Julia
> Sent: Tuesday, October 24, 2017 12:01 PM
> To: GCC Patches <gcc-patches@gcc.gnu.org>
> Cc: Kirill Yukhin <kirill.yukhin@gmail.com>
> Subject: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
> 
> Hi,
> This patch enables VPSHRD instruction. The doc for isaset and instruction:
> https://software.intel.com/sites/default/files/managed/c5/15/architecture-
> instruction-set-extensions-programming-reference.pdf
> 
> Ok for trunk?
> 
> gcc/
> 	config/i386/avx512vbmi2intrin.h (_mm512_shrdi_epi16,
> 	_mm512_mask_shrdi_epi16, _mm512_maskz_shrdi_epi16,
> _mm512_shrdi_epi32,
> 	_mm512_mask_shrdi_epi32, _mm512_maskz_shrdi_epi32,
> _mm512_shrdi_epi64,
> 	_mm512_mask_shrdi_epi64, _mm512_maskz_shrdi_epi64): New
> intrinsics.
> 	config/i386/avx512vbmi2vlintrin.h (_mm256_shrdi_epi16,
> 	_mm256_mask_shrdi_epi16, _mm256_maskz_shrdi_epi16,
> 	_mm256_mask_shrdi_epi32, _mm256_maskz_shrdi_epi32,
> _mm256_shrdi_epi32,
> 	_mm256_mask_shrdi_epi64, _mm256_maskz_shrdi_epi64,
> _mm256_shrdi_epi64,
> 	_mm_mask_shrdi_epi16, _mm_maskz_shrdi_epi16, _mm_shrdi_epi16,
> 	_mm_mask_shrdi_epi32, _mm_maskz_shrdi_epi32, _mm_shrdi_epi32,
> 	_mm_mask_shrdi_epi64, _mm_maskz_shrdi_epi64, _mm_shrdi_epi64):
> Ditto.
> 	config/i386/i386-builtin.def (__builtin_ia32_vpshrd_v32hi,
> 	__builtin_ia32_vpshrd_v32hi_mask, __builtin_ia32_vpshrd_v16hi,
> 	__builtin_ia32_vpshrd_v16hi_mask, __builtin_ia32_vpshrd_v8hi,
> 	__builtin_ia32_vpshrd_v8hi_mask, __builtin_ia32_vpshrd_v16si,
> 	__builtin_ia32_vpshrd_v16si_mask, __builtin_ia32_vpshrd_v8si,
> 	__builtin_ia32_vpshrd_v8si_mask, __builtin_ia32_vpshrd_v4si,
> 	__builtin_ia32_vpshrd_v4si_mask, __builtin_ia32_vpshrd_v8di,
> 	__builtin_ia32_vpshrd_v8di_mask, __builtin_ia32_vpshrd_v4di,
> 	__builtin_ia32_vpshrd_v4di_mask, __builtin_ia32_vpshrd_v2di,
> 	__builtin_ia32_vpshrd_v2di_mask): New builtins.
> 	config/i386/sse.md (vpshrd_<mode><mask_name>): New pattern.
> 
> gcc/testsuite/
> 	gcc.target/i386/avx-1.c: Handle new intrinsics.
> 	gcc.target/i386/sse-13.c: Ditto.
> 	gcc.target/i386/sse-23.c: Ditto.
> 	gcc.target/i386/avx512f-vpshrdd-2.c: New.
> 	gcc.target/i386/avx512f-vpshrdq-2.c: Ditto.
> 	gcc.target/i386/avx512vl-vpshrd-1.c: Ditto.
> 	gcc.target/i386/avx512vl-vpshrdd-2.c: Ditto.
> 	gcc.target/i386/avx512vl-vpshrdq-2.c: Ditto.

[-- Attachment #2: 0007-VPSHRD-instruction.patch --]
[-- Type: application/octet-stream, Size: 37492 bytes --]

From 5cf3ab90b48287adf608296f118a9934c7bf91f4 Mon Sep 17 00:00:00 2001
From: "julia.koval" <jkoval@gkliclel201.igk.intel.com>
Date: Thu, 19 Oct 2017 12:02:26 +0200
Subject: [PATCH 07/14] VPSHRD instruction

---
 gcc/config/i386/avx512vbmi2intrin.h                | 104 +++++++++++
 gcc/config/i386/avx512vbmi2vlintrin.h              | 205 +++++++++++++++++++++
 gcc/config/i386/i386-builtin.def                   |  18 ++
 gcc/config/i386/sse.md                             |  12 ++
 gcc/testsuite/gcc.target/i386/avx-1.c              |  18 ++
 gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c  |  62 +++++++
 gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c  |  62 +++++++
 gcc/testsuite/gcc.target/i386/avx512vl-vpshrd-1.c  |  54 ++++++
 gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c |  16 ++
 gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c |  16 ++
 gcc/testsuite/gcc.target/i386/sse-13.c             |  18 ++
 gcc/testsuite/gcc.target/i386/sse-23.c             |  18 ++
 12 files changed, 603 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vpshrd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c

diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index 7f6e878..0a7c2b9 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -153,6 +153,80 @@ _mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
 #ifdef __OPTIMIZE__
 extern __inline __m512i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
+{
+  return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
+									__C);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
+								int __E)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
+			(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
+	(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
+{
+  return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
+									__C);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
+								int __E)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
+			(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
+	(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
+{
+  return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
+								int __E)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
+					__E, (__v8di) __A, (__mmask8)__B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
+{
+  return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
+			__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
 {
   return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
@@ -225,6 +299,36 @@ _mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
 			__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
 }
 #else
+#define _mm512_shrdi_epi16(A, B, C) \
+  ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \
+						(__v32hi)(__m512i)(B),(int)(C))
+#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \
+  ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \
+	(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
+#define _mm512_maskz_shrdi_epi16(A, B, C, D) \
+  ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
+	(__v32hi)(__m512i)(C),(int)(D), \
+	(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
+#define _mm512_shrdi_epi32(A, B, C) \
+  ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \
+	(__v16si)(__m512i)(B),(int)(C))
+#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \
+  ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \
+	(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
+#define _mm512_maskz_shrdi_epi32(A, B, C, D) \
+  ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \
+	(__v16si)(__m512i)(C),(int)(D), \
+	(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
+#define _mm512_shrdi_epi64(A, B, C) \
+  ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
+	(__v8di)(__m512i)(B),(int)(C))
+#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
+  ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
+	(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
+#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
+  ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
+	(__v8di)(__m512i)(C),(int)(D), \
+	(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
 #define _mm512_shldi_epi16(A, B, C) \
   ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
 						(__v32hi)(__m512i)(B),(int)(C))
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index f1d2aee..5d8d88c 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -208,6 +208,151 @@ _mm256_maskz_expandloadu_epi16 (__mmask16 __A, const void * __B)
 #ifdef __OPTIMIZE__
 extern __inline __m256i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shrdi_epi16 (__m256i __A, __m256i __B, int __C)
+{
+  return (__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)__A, (__v16hi) __B,
+									__C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shrdi_epi16 (__m256i __A, __mmask16 __B, __m256i __C, __m256i __D,
+								int __E)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__C,
+			(__v16hi) __D, __E, (__v16hi) __A, (__mmask16)__B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shrdi_epi16 (__mmask16 __A, __m256i __B, __m256i __C, int __D)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi)__B,
+	(__v16hi) __C, __D, (__v16hi) _mm256_setzero_si256 (), (__mmask16)__A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shrdi_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
+								int __E)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__C, (__v8si) __D,
+					__E, (__v8si) __A, (__mmask8)__B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shrdi_epi32 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v8si_mask ((__v8si)__B, (__v8si) __C,
+			__D, (__v8si) _mm256_setzero_si256 (), (__mmask8)__A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shrdi_epi32 (__m256i __A, __m256i __B, int __C)
+{
+  return (__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)__A, (__v8si) __B, __C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shrdi_epi64 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D,
+								int __E)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__C, (__v4di) __D,
+					__E, (__v4di) __A, (__mmask8)__B);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shrdi_epi64 (__mmask8 __A, __m256i __B, __m256i __C, int __D)
+{
+  return (__m256i)__builtin_ia32_vpshrd_v4di_mask ((__v4di)__B, (__v4di) __C,
+			__D, (__v4di) _mm256_setzero_si256 (), (__mmask8)__A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shrdi_epi64 (__m256i __A, __m256i __B, int __C)
+{
+  return (__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)__A, (__v4di) __B, __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shrdi_epi16 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
+								int __E)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__C, (__v8hi) __D,
+					__E, (__v8hi) __A, (__mmask8)__B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shrdi_epi16 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi)__B, (__v8hi) __C,
+			__D, (__v8hi) _mm_setzero_si128 (), (__mmask8)__A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shrdi_epi16 (__m128i __A, __m128i __B, int __C)
+{
+  return (__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)__A, (__v8hi) __B, __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shrdi_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
+								int __E)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__C, (__v4si) __D,
+					__E, (__v4si) __A, (__mmask8)__B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shrdi_epi32 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v4si_mask ((__v4si)__B, (__v4si) __C,
+			__D, (__v4si) _mm_setzero_si128 (), (__mmask8)__A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shrdi_epi32 (__m128i __A, __m128i __B, int __C)
+{
+  return (__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)__A, (__v4si) __B, __C);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shrdi_epi64 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D,
+								int __E)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__C, (__v2di) __D,
+					__E, (__v2di) __A, (__mmask8)__B);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shrdi_epi64 (__mmask8 __A, __m128i __B, __m128i __C, int __D)
+{
+  return (__m128i)__builtin_ia32_vpshrd_v2di_mask ((__v2di)__B, (__v2di) __C,
+			__D, (__v2di) _mm_setzero_si128 (), (__mmask8)__A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shrdi_epi64 (__m128i __A, __m128i __B, int __C)
+{
+  return (__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)__A, (__v2di) __B, __C);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_shldi_epi16 (__m256i __A, __m256i __B, int __C)
 {
   return (__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)__A, (__v16hi) __B,
@@ -351,6 +496,66 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
   return (__m128i) __builtin_ia32_vpshld_v2di ((__v2di)__A, (__v2di) __B, __C);
 }
 #else
+#define _mm256_shrdi_epi16(A, B, C) \
+  ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
+	(__v16hi)(__m256i)(B),(int)(C))
+#define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
+  ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
+	(__v16hi)(__m256i)(D), (int)(E), (__v16hi)(__m256i)(A),(__mmask16)(B))
+#define _mm256_maskz_shrdi_epi16(A, B, C, D) \
+  ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \
+	(__v16hi)(__m256i)(C),(int)(D), \
+	(__v16hi)(__m256i)_mm256_setzero_si256 (), (__mmask16)(A))
+#define _mm256_shrdi_epi32(A, B, C) \
+  ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
+	(__v8si)(__m256i)(B),(int)(C))
+#define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
+  ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
+	(__v8si)(__m256i)(D), (int)(E), (__v8si)(__m256i)(A),(__mmask8)(B))
+#define _mm256_maskz_shrdi_epi32(A, B, C, D) \
+  ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \
+	(__v8si)(__m256i)(C),(int)(D), \
+	(__v8si)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
+#define _mm256_shrdi_epi64(A, B, C) \
+  ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
+	(__v4di)(__m256i)(B),(int)(C))
+#define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
+  ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
+	(__v4di)(__m256i)(D), (int)(E), (__v4di)(__m256i)(A),(__mmask8)(B))
+#define _mm256_maskz_shrdi_epi64(A, B, C, D) \
+  ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \
+	(__v4di)(__m256i)(C),(int)(D), \
+	(__v4di)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
+#define _mm_shrdi_epi16(A, B, C) \
+  ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
+	(__v8hi)(__m128i)(B),(int)(C))
+#define _mm_mask_shrdi_epi16(A, B, C, D, E) \
+  ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
+	(__v8hi)(__m128i)(D), (int)(E), (__v8hi)(__m128i)(A),(__mmask8)(B))
+#define _mm_maskz_shrdi_epi16(A, B, C, D) \
+  ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \
+	(__v8hi)(__m128i)(C),(int)(D), \
+	(__v8hi)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
+#define _mm_shrdi_epi32(A, B, C) \
+  ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
+	(__v4si)(__m128i)(B),(int)(C))
+#define _mm_mask_shrdi_epi32(A, B, C, D, E) \
+  ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
+	(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
+#define _mm_maskz_shrdi_epi32(A, B, C, D) \
+  ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
+	(__v4si)(__m128i)(C),(int)(D), \
+	(__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
+#define _mm_shrdi_epi64(A, B, C) \
+  ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
+	(__v2di)(__m128i)(B),(int)(C))
+#define _mm_mask_shrdi_epi64(A, B, C, D, E) \
+  ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
+	(__v2di)(__m128i)(D), (int)(E), (__v2di)(__m128i)(A),(__mmask8)(B))
+#define _mm_maskz_shrdi_epi64(A, B, C, D) \
+  ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \
+	(__v2di)(__m128i)(C),(int)(D), \
+	(__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
 #define _mm256_shldi_epi16(A, B, C) \
   ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
 						(__v16hi)(__m256i)(B),(int)(C))
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 48dda54..75ccba8 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2608,6 +2608,24 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv1
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5eedfaf..a30fd4e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -160,6 +160,7 @@
 
   ;; For AVX512VBMI2 support
   UNSPEC_VPSHLD
+  UNSPEC_VPSHRD
 ])
 
 (define_c_enum "unspecv" [
@@ -20228,6 +20229,17 @@
   "TARGET_SSE && TARGET_64BIT"
   "jmp\t%P1")
 
+(define_insn "vpshrd_<mode><mask_name>"
+  [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
+	(unspec:VI248_VLBW
+	  [(match_operand:VI248_VLBW 1 "register_operand" "v")
+	(match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
+	(match_operand:SI 3 "const_0_to_255_operand" "n")
+] UNSPEC_VPSHRD))]
+  "TARGET_AVX512VBMI2"
+  "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
+   [(set_attr ("prefix") ("evex"))])
+
 (define_insn "vpshld_<mode><mask_name>"
   [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
 	(unspec:VI248_VLBW
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 62674d1..dd0d7c6 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -604,6 +604,24 @@
 #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
 
 /* avx512vbmi2intrin.h */
+#define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16si(A, B, C) __builtin_ia32_vpshrd_v16si(A, B, 1)
+#define __builtin_ia32_vpshrd_v16si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8di(A, B, C) __builtin_ia32_vpshrd_v8di(A, B, 1)
+#define __builtin_ia32_vpshrd_v8di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16hi(A, B, C) __builtin_ia32_vpshrd_v16hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v16hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8si(A, B, C) __builtin_ia32_vpshrd_v8si(A, B, 1)
+#define __builtin_ia32_vpshrd_v8si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4di(A, B, C) __builtin_ia32_vpshrd_v4di(A, B, 1)
+#define __builtin_ia32_vpshrd_v4di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8hi(A, B, C) __builtin_ia32_vpshrd_v8hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v8hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4si(A, B, C) __builtin_ia32_vpshrd_v4si(A, B, 1)
+#define __builtin_ia32_vpshrd_v4si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v2di(A, B, C) __builtin_ia32_vpshrd_v2di(A, B, 1)
+#define __builtin_ia32_vpshrd_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v2di_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v32hi(A, B, C) __builtin_ia32_vpshld_v32hi(A, B, 1)
 #define __builtin_ia32_vpshld_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v32hi_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v16si(A, B, C) __builtin_ia32_vpshld_v16si(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c
new file mode 100644
index 0000000..54dd369
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *r, int *dst, int *s1, int *s2, int imm)
+{
+  int i;
+  for (i = 0; i < SIZE; i++)
+    {
+      r[i] = (s1[i] >> (imm & 31)) | (s2[i] << (32 - (imm & 31)));
+    }
+}
+
+void
+TEST (void)
+{
+  int i;
+  UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  int res_ref[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1 + i;
+      src2.a[i] = 2 + 2*i;
+    }
+
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+    }
+
+  CALC (res_ref, res1.a, src1.a, src2.a, DEFAULT_VALUE);
+
+  res1.x = INTRINSIC (_shrdi_epi32) (src1.x, src2.x, DEFAULT_VALUE);
+  res2.x = INTRINSIC (_mask_shrdi_epi32) (res2.x, mask, src1.x, src2.x, DEFAULT_VALUE);
+  res3.x = INTRINSIC (_maskz_shrdi_epi32) (mask, src1.x, src2.x, DEFAULT_VALUE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE (i_d) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (i_d) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c
new file mode 100644
index 0000000..4997c70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *r, long long *dst, long long *s1, long long *s2, int imm)
+{
+  int i;
+  for (i = 0; i < SIZE; i++)
+    {
+      r[i] = (s1[i] >> (imm & 63)) | (s2[i] << (64 - (imm & 63)));
+    }
+}
+
+void
+TEST (void)
+{
+  int i;
+  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src1, src2;
+  MASK_TYPE mask = MASK_VALUE;
+  long long res_ref[SIZE];
+
+  for (i = 0; i < SIZE; i++)
+    {
+      src1.a[i] = 1 + i;
+      src2.a[i] = 2 + 2*i;
+    }
+
+  for (i = 0; i < SIZE; i++)
+    {
+      res1.a[i] = DEFAULT_VALUE;
+      res2.a[i] = DEFAULT_VALUE;
+      res3.a[i] = DEFAULT_VALUE;
+    }
+
+  CALC (res_ref, res1.a, src1.a, src2.a, DEFAULT_VALUE);
+
+  res1.x = INTRINSIC (_shrdi_epi64) (src1.x, src2.x, DEFAULT_VALUE);
+  res2.x = INTRINSIC (_mask_shrdi_epi64) (res2.x, mask, src1.x, src2.x, DEFAULT_VALUE);
+  res3.x = INTRINSIC (_maskz_shrdi_epi64) (mask, src1.x, src2.x, DEFAULT_VALUE);
+
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+    abort ();
+
+  MASK_MERGE (i_q) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+    abort ();
+
+  MASK_ZERO (i_q) (res_ref, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrd-1.c
new file mode 100644
index 0000000..923b954
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrd-1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512vbmi2 -O2" } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdd\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshrdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x,y;
+volatile __m128i z1,z2;
+volatile __mmask32 m;
+
+void extern
+avx512f_test (void)
+{
+  x = _mm256_shrdi_epi16 (x, y, 3);
+  x = _mm256_maskz_shrdi_epi16 (m, x, y, 3);
+  x = _mm256_mask_shrdi_epi16 (x, m, y, x, 3);
+
+  x = _mm256_shrdi_epi32 (x, y, 3);
+  x = _mm256_maskz_shrdi_epi32 (m, x, y, 3);
+  x = _mm256_mask_shrdi_epi32 (x, m, y, x, 3);
+
+  x = _mm256_shrdi_epi64 (x, y, 3);
+  x = _mm256_maskz_shrdi_epi64 (m, x, y, 3);
+  x = _mm256_mask_shrdi_epi64 (x, m, y, x, 3);
+
+  z1 = _mm_shrdi_epi16 (z1, z2, 3);
+  z1 = _mm_maskz_shrdi_epi16 (m, z1, z2, 3);
+  z1 = _mm_mask_shrdi_epi16 (z1, m, z2, z1, 3);
+
+  z1 = _mm_shrdi_epi32 (z1, z2, 3);
+  z1 = _mm_maskz_shrdi_epi32 (m, z1, z2, 3);
+  z1 = _mm_mask_shrdi_epi32 (z1, m, z2, z1, 3);
+
+  z1 = _mm_shrdi_epi64 (z1, z2, 3);
+  z1 = _mm_maskz_shrdi_epi64 (m, z1, z2, 3);
+  z1 = _mm_mask_shrdi_epi64 (z1, m, z2, z1, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c
new file mode 100644
index 0000000..bf22915
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshrdd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshrdd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c
new file mode 100644
index 0000000..61e0708
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshrdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshrdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 650579f..2757dcf 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -621,6 +621,24 @@
 #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
 
 /* avx512vbmi2intrin.h */
+#define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16si(A, B, C) __builtin_ia32_vpshrd_v16si(A, B, 1)
+#define __builtin_ia32_vpshrd_v16si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8di(A, B, C) __builtin_ia32_vpshrd_v8di(A, B, 1)
+#define __builtin_ia32_vpshrd_v8di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16hi(A, B, C) __builtin_ia32_vpshrd_v16hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v16hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8si(A, B, C) __builtin_ia32_vpshrd_v8si(A, B, 1)
+#define __builtin_ia32_vpshrd_v8si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4di(A, B, C) __builtin_ia32_vpshrd_v4di(A, B, 1)
+#define __builtin_ia32_vpshrd_v4di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8hi(A, B, C) __builtin_ia32_vpshrd_v8hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v8hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4si(A, B, C) __builtin_ia32_vpshrd_v4si(A, B, 1)
+#define __builtin_ia32_vpshrd_v4si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v2di(A, B, C) __builtin_ia32_vpshrd_v2di(A, B, 1)
+#define __builtin_ia32_vpshrd_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v2di_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v32hi(A, B, C) __builtin_ia32_vpshld_v32hi(A, B, 1)
 #define __builtin_ia32_vpshld_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v32hi_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v16si(A, B, C) __builtin_ia32_vpshld_v16si(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index ac55847..ea3f9c5 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -620,6 +620,24 @@
 #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
 
 /* avx512vbmi2intrin.h */
+#define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16si(A, B, C) __builtin_ia32_vpshrd_v16si(A, B, 1)
+#define __builtin_ia32_vpshrd_v16si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8di(A, B, C) __builtin_ia32_vpshrd_v8di(A, B, 1)
+#define __builtin_ia32_vpshrd_v8di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v16hi(A, B, C) __builtin_ia32_vpshrd_v16hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v16hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v16hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8si(A, B, C) __builtin_ia32_vpshrd_v8si(A, B, 1)
+#define __builtin_ia32_vpshrd_v8si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4di(A, B, C) __builtin_ia32_vpshrd_v4di(A, B, 1)
+#define __builtin_ia32_vpshrd_v4di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4di_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v8hi(A, B, C) __builtin_ia32_vpshrd_v8hi(A, B, 1)
+#define __builtin_ia32_vpshrd_v8hi_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v8hi_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v4si(A, B, C) __builtin_ia32_vpshrd_v4si(A, B, 1)
+#define __builtin_ia32_vpshrd_v4si_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v4si_mask(A, B, 1, D, E)
+#define __builtin_ia32_vpshrd_v2di(A, B, C) __builtin_ia32_vpshrd_v2di(A, B, 1)
+#define __builtin_ia32_vpshrd_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshrd_v2di_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v32hi(A, B, C) __builtin_ia32_vpshld_v32hi(A, B, 1)
 #define __builtin_ia32_vpshld_v32hi_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v32hi_mask(A, B, 1, D, E)
 #define __builtin_ia32_vpshld_v16si(A, B, C) __builtin_ia32_vpshld_v16si(A, B, 1)
-- 
2.5.5


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-10-24 10:16 [PATCH][i386,AVX] Enable VBMI2 support [5/7] Koval, Julia
@ 2017-11-28 18:09 ` Kirill Yukhin
  0 siblings, 0 replies; 10+ messages in thread
From: Kirill Yukhin @ 2017-11-28 18:09 UTC (permalink / raw)
  To: Koval, Julia; +Cc: GCC Patches

Hello Julia,
On 24 Oct 10:05, Koval, Julia wrote:
> Attached the patch
> 
> > -----Original Message-----
> > From: Koval, Julia
> > Sent: Tuesday, October 24, 2017 12:01 PM
> > To: GCC Patches <gcc-patches@gcc.gnu.org>
> > Cc: Kirill Yukhin <kirill.yukhin@gmail.com>
> > Subject: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
> > 
> > Hi,
> > This patch enables VPSHRD instruction. The doc for isaset and instruction:
> > https://software.intel.com/sites/default/files/managed/c5/15/architecture-
> > instruction-set-extensions-programming-reference.pdf
> > 
> > Ok for trunk?
Your patch is OK for trunk. I've checked it in.

--
Thanks, K

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-12 17:06           ` Koval, Julia
@ 2017-12-12 22:18             ` Gerald Pfeifer
  0 siblings, 0 replies; 10+ messages in thread
From: Gerald Pfeifer @ 2017-12-12 22:18 UTC (permalink / raw)
  To: Koval, Julia; +Cc: Kirill Yukhin, gcc-patches

On Tue, 12 Dec 2017, Koval, Julia wrote:
> Here is the patch to update these files with my contributions. Ok for 
> trunk?

Nice, thank you.

The only change I'd suggest is <code>-march=icelake</code> to 
mark up the two options.

(No need to ask for any further approvals.)

Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-12 10:34         ` Gerald Pfeifer
@ 2017-12-12 17:06           ` Koval, Julia
  2017-12-12 22:18             ` Gerald Pfeifer
  0 siblings, 1 reply; 10+ messages in thread
From: Koval, Julia @ 2017-12-12 17:06 UTC (permalink / raw)
  To: Gerald Pfeifer; +Cc: Kirill Yukhin, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 836 bytes --]

Here is the patch to update these files with my contributions. Ok for trunk?

Thanks,
Julia

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Gerald Pfeifer
> Sent: Tuesday, December 12, 2017 11:34 AM
> To: Koval, Julia <julia.koval@intel.com>
> Cc: Kirill Yukhin <kirill.yukhin@gmail.com>; gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
> 
> On Tue, 12 Dec 2017, Koval, Julia wrote:
> > Looks good. How to put it there(sorry, noob question)?
> 
> Does https://gcc.gnu.org/about.html help?  If not, let me know
> and I'll work with you (and update those docs on the way).
> 
> Of course, even if things work for you, any suggestions on how
> to improve this little page are very welcome. :)
> 
> Gerald

[-- Attachment #2: patch --]
[-- Type: application/octet-stream, Size: 715 bytes --]

Index: htdocs/gcc-8/changes.html
===================================================================
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/changes.html,v
retrieving revision 1.22
diff -r1.22 changes.html
32a33,34
>   <li>The Cilk+ extensions to the C and C++ languages were removed from
>       GCC.</li>
186a189,194
>   <li>GCC now supports the Intel CPU named Cannonlake through
>     -march=cannonlake. The switch enables the AVX512VBMI, AVX512IFMA and SHA
>     ISA extensions.</li>
>   <li>GCC now supports the Intel CPU named and Icelake through -march=icelake.
>     The switch enables the AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ,
>     AVX512BITALG, RDPID and AVX512VPOPCNTDQ ISA extensions.</li>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-12  9:05       ` Koval, Julia
@ 2017-12-12 10:34         ` Gerald Pfeifer
  2017-12-12 17:06           ` Koval, Julia
  0 siblings, 1 reply; 10+ messages in thread
From: Gerald Pfeifer @ 2017-12-12 10:34 UTC (permalink / raw)
  To: Koval, Julia; +Cc: Kirill Yukhin, gcc-patches

On Tue, 12 Dec 2017, Koval, Julia wrote:
> Looks good. How to put it there(sorry, noob question)?

Does https://gcc.gnu.org/about.html help?  If not, let me know 
and I'll work with you (and update those docs on the way).

Of course, even if things work for you, any suggestions on how
to improve this little page are very welcome. :)

Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-09 13:49     ` Gerald Pfeifer
@ 2017-12-12  9:05       ` Koval, Julia
  2017-12-12 10:34         ` Gerald Pfeifer
  0 siblings, 1 reply; 10+ messages in thread
From: Koval, Julia @ 2017-12-12  9:05 UTC (permalink / raw)
  To: Gerald Pfeifer; +Cc: Kirill Yukhin, GCC Patches

Looks good. How to put it there(sorry, noob question)?

Thanks,
Julia

> -----Original Message-----
> From: Gerald Pfeifer [mailto:gerald@pfeifer.com]
> Sent: Saturday, December 09, 2017 2:49 PM
> To: Koval, Julia <julia.koval@intel.com>
> Cc: Kirill Yukhin <kirill.yukhin@gmail.com>; GCC Patches <gcc-
> patches@gcc.gnu.org>
> Subject: RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
> 
> Hi Julia,
> 
> On Mon, 4 Dec 2017, Koval, Julia wrote:
> > Do you think it is ok to copypaste it from GCC-6?
> 
> you mean copy, past, and adjust?  Yes, that should work.
> 
> > GCC now supports the Intel CPU, named Cannonlake through
> > -march=cannonlake. The switch enables the following ISA extensions:
> > AVX512VBMI, AVX512IFMA, SHA.
> > GCC now supports the Intel CPU, named and Icelake through
> > -march=icelake. The switch enables the following ISA extensions:
> > AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ, AVX512BITALG,
> RDPID,
> > AVX512VPOPCNTDQ.
> 
> No comma before "named".
> 
> <code>-march=...</code>.
> 
> And perhaps "enables the AVX..., AVX...,and... ISA extensions"?
> 
> Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-04  7:19   ` Koval, Julia
@ 2017-12-09 13:49     ` Gerald Pfeifer
  2017-12-12  9:05       ` Koval, Julia
  0 siblings, 1 reply; 10+ messages in thread
From: Gerald Pfeifer @ 2017-12-09 13:49 UTC (permalink / raw)
  To: Koval, Julia; +Cc: Kirill Yukhin, GCC Patches

Hi Julia,

On Mon, 4 Dec 2017, Koval, Julia wrote:
> Do you think it is ok to copypaste it from GCC-6?

you mean copy, past, and adjust?  Yes, that should work.

> GCC now supports the Intel CPU, named Cannonlake through 
> -march=cannonlake. The switch enables the following ISA extensions: 
> AVX512VBMI, AVX512IFMA, SHA.
> GCC now supports the Intel CPU, named and Icelake through 
> -march=icelake. The switch enables the following ISA extensions: 
> AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, 
> AVX512VPOPCNTDQ.

No comma before "named".

<code>-march=...</code>.

And perhaps "enables the AVX..., AVX...,and... ISA extensions"?

Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-12-03 17:51 ` Gerald Pfeifer
@ 2017-12-04  7:19   ` Koval, Julia
  2017-12-09 13:49     ` Gerald Pfeifer
  0 siblings, 1 reply; 10+ messages in thread
From: Koval, Julia @ 2017-12-04  7:19 UTC (permalink / raw)
  To: Gerald Pfeifer, Kirill Yukhin; +Cc: GCC Patches

Hi Gerald,
Do you think it is ok to copypaste it from GCC-6?

GCC now supports the Intel CPU, named Cannonlake through -march=cannonlake. The switch enables the following ISA extensions: AVX512VBMI, AVX512IFMA, SHA.
GCC now supports the Intel CPU, named and Icelake through -march=icelake. The switch enables the following ISA extensions: AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ.

Thanks,
Julia

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Gerald Pfeifer
> Sent: Sunday, December 03, 2017 6:51 PM
> To: Koval, Julia <julia.koval@intel.com>; Kirill Yukhin <kirill.yukhin@gmail.com>
> Cc: GCC Patches <gcc-patches@gcc.gnu.org>
> Subject: Re: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
> 
> Hi Julia, hi Kirill,
> 
> On Tue, 24 Oct 2017, Koval, Julia wrote:
> > This patch enables VPSHRD instruction.
> 
> packing a "random" of your contributions.  Can you please also think
> how to best document this in http://gcc.gnu.org/gcc-8/changes.html ?
> 
> Let me know if you need any help with the web side of things (beyond
> the brief notes in https://gcc.gnu.org/about.html )!
> 
> Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][i386,AVX] Enable VBMI2 support [5/7]
  2017-10-24 10:05 Koval, Julia
@ 2017-12-03 17:51 ` Gerald Pfeifer
  2017-12-04  7:19   ` Koval, Julia
  0 siblings, 1 reply; 10+ messages in thread
From: Gerald Pfeifer @ 2017-12-03 17:51 UTC (permalink / raw)
  To: Koval, Julia, Kirill Yukhin; +Cc: GCC Patches

Hi Julia, hi Kirill,

On Tue, 24 Oct 2017, Koval, Julia wrote:
> This patch enables VPSHRD instruction. 

packing a "random" of your contributions.  Can you please also think
how to best document this in http://gcc.gnu.org/gcc-8/changes.html ?

Let me know if you need any help with the web side of things (beyond
the brief notes in https://gcc.gnu.org/about.html )!

Gerald

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH][i386,AVX] Enable VBMI2 support [5/7]
@ 2017-10-24 10:05 Koval, Julia
  2017-12-03 17:51 ` Gerald Pfeifer
  0 siblings, 1 reply; 10+ messages in thread
From: Koval, Julia @ 2017-10-24 10:05 UTC (permalink / raw)
  To: GCC Patches; +Cc: Kirill Yukhin

Hi,
This patch enables VPSHRD instruction. The doc for isaset and instruction: https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf

Ok for trunk?

gcc/
	config/i386/avx512vbmi2intrin.h (_mm512_shrdi_epi16,
	_mm512_mask_shrdi_epi16, _mm512_maskz_shrdi_epi16, _mm512_shrdi_epi32,
	_mm512_mask_shrdi_epi32, _mm512_maskz_shrdi_epi32, _mm512_shrdi_epi64,
	_mm512_mask_shrdi_epi64, _mm512_maskz_shrdi_epi64): New intrinsics.
	config/i386/avx512vbmi2vlintrin.h (_mm256_shrdi_epi16,
	_mm256_mask_shrdi_epi16, _mm256_maskz_shrdi_epi16,
	_mm256_mask_shrdi_epi32, _mm256_maskz_shrdi_epi32, _mm256_shrdi_epi32,
	_mm256_mask_shrdi_epi64, _mm256_maskz_shrdi_epi64, _mm256_shrdi_epi64,
	_mm_mask_shrdi_epi16, _mm_maskz_shrdi_epi16, _mm_shrdi_epi16,
	_mm_mask_shrdi_epi32, _mm_maskz_shrdi_epi32, _mm_shrdi_epi32,
	_mm_mask_shrdi_epi64, _mm_maskz_shrdi_epi64, _mm_shrdi_epi64): Ditto.
	config/i386/i386-builtin.def (__builtin_ia32_vpshrd_v32hi,
	__builtin_ia32_vpshrd_v32hi_mask, __builtin_ia32_vpshrd_v16hi,
	__builtin_ia32_vpshrd_v16hi_mask, __builtin_ia32_vpshrd_v8hi,
	__builtin_ia32_vpshrd_v8hi_mask, __builtin_ia32_vpshrd_v16si,
	__builtin_ia32_vpshrd_v16si_mask, __builtin_ia32_vpshrd_v8si,
	__builtin_ia32_vpshrd_v8si_mask, __builtin_ia32_vpshrd_v4si,
	__builtin_ia32_vpshrd_v4si_mask, __builtin_ia32_vpshrd_v8di,
	__builtin_ia32_vpshrd_v8di_mask, __builtin_ia32_vpshrd_v4di,
	__builtin_ia32_vpshrd_v4di_mask, __builtin_ia32_vpshrd_v2di,
	__builtin_ia32_vpshrd_v2di_mask): New builtins.
	config/i386/sse.md (vpshrd_<mode><mask_name>): New pattern.

gcc/testsuite/
	gcc.target/i386/avx-1.c: Handle new intrinsics.
	gcc.target/i386/sse-13.c: Ditto.
	gcc.target/i386/sse-23.c: Ditto.
	gcc.target/i386/avx512f-vpshrdd-2.c: New.
	gcc.target/i386/avx512f-vpshrdq-2.c: Ditto.
	gcc.target/i386/avx512vl-vpshrd-1.c: Ditto.
	gcc.target/i386/avx512vl-vpshrdd-2.c: Ditto.
	gcc.target/i386/avx512vl-vpshrdq-2.c: Ditto.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-12-12 22:18 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-24 10:16 [PATCH][i386,AVX] Enable VBMI2 support [5/7] Koval, Julia
2017-11-28 18:09 ` Kirill Yukhin
  -- strict thread matches above, loose matches on Subject: below --
2017-10-24 10:05 Koval, Julia
2017-12-03 17:51 ` Gerald Pfeifer
2017-12-04  7:19   ` Koval, Julia
2017-12-09 13:49     ` Gerald Pfeifer
2017-12-12  9:05       ` Koval, Julia
2017-12-12 10:34         ` Gerald Pfeifer
2017-12-12 17:06           ` Koval, Julia
2017-12-12 22:18             ` Gerald Pfeifer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).