public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics
@ 2017-06-28 10:01 Peryt, Sebastian
  2017-06-28 21:00 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Peryt, Sebastian @ 2017-06-28 10:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 745 bytes --]

Hi,

This patch adds missing intrinsics:
	- _mm256_permutexvar_epi32
 	- _mm256_permutex_epi64
	- _mm256_permutexvar_epi64

gcc/
	* config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, _mm256_permutexvar_epi32,
	_mm256_permutex_epi64): New intrinsics.
	
gcc/tesuite/
	* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): Test new intrinsic.
	* gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): Ditto.
	* gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): Ditto.
	*gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
	* gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
	* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.

Is it ok for trunk?

Thanks,
Sebastian

[-- Attachment #2: permutex.patch --]
[-- Type: application/octet-stream, Size: 7775 bytes --]

diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index f62f641..0555051 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -9099,6 +9099,17 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+						     (__v4di) __X,
+						     (__v4di)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
 			       __m256i __Y)
 {
@@ -9163,6 +9174,17 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+						     (__v8si) __X,
+						     (__v8si)
+						     _mm256_setzero_si256 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
 			       __m256i __Y)
 {
@@ -9751,6 +9773,17 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #ifdef __OPTIMIZE__
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex_epi64 (__m256i __X, const int __I)
+{
+  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+					      __I,
+					      (__v4di)
+					      _mm256_setzero_si256(),
+					      (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
 			    __m256i __X, const int __I)
 {
@@ -12367,6 +12400,13 @@ _mm256_permutex_pd (__m256d __X, const int __M)
 					    _mm256_undefined_pd (),		\
 					    (__mmask8)-1))
 
+#define _mm256_permutex_epi64(X, I)               \
+  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+					    (int)(I),		\
+					    (__v4di)(__m256i)	\
+					    (_mm256_setzero_si256 ()),\
+					    (__mmask8) -1))
+
 #define _mm256_maskz_permutex_epi64(M, X, I)                    \
   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
 					    (int)(I),                \
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
index dbd4544..b36a9c2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
@@ -41,18 +41,14 @@ TEST (void)
       res3.a[i] = DEFAULT_VALUE;
     }
 
-#if AVX512F_LEN == 512
   res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x);
-#endif
   res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x);
   res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x);
 
   CALC (src1.a, src2.a, res_ref);
 
-#if AVX512F_LEN == 512
   if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
     abort ();
-#endif
 
   MASK_ZERO (i_d) (res_ref, mask, SIZE);
   if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
index 770d562..dd88cd4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
@@ -40,18 +40,14 @@ TEST (void)
       res3.a[i] = DEFAULT_VALUE;
     }
 
-#if AVX512F_LEN == 512
   res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK);
-#endif
   res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK);
   res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK);
 
   CALC (src1.a, IMM_MASK, res_ref);
 
-#if AVX512F_LEN == 512
   if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
     abort ();
-#endif
 
   MASK_ZERO (i_q) (res_ref, mask, SIZE);
   if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
index c596b1d..6c22288 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
@@ -41,18 +41,14 @@ TEST (void)
       res3.a[i] = DEFAULT_VALUE;
     }
 
-#if AVX512F_LEN == 512
   res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x);
-#endif
   res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x);
   res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x);
 
   CALC (src1.a, src2.a, res_ref);
 
-#if AVX512F_LEN == 512
   if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
     abort ();
-#endif
 
   MASK_ZERO (i_q) (res_ref, mask, SIZE);
   if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
index fa1aaa3..069bb5d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 
@@ -11,6 +12,7 @@ volatile __mmask8 m;
 void extern
 avx512vl_test (void)
 {
+  x = _mm256_permutexvar_epi32 (x, x);
   x = _mm256_maskz_permutexvar_epi32 (m, x, x);
   x = _mm256_mask_permutexvar_epi32 (x, m, x, x);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
index c74c8ce..2340a6d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 
@@ -11,6 +12,7 @@ volatile __mmask8 m;
 void extern
 avx512vl_test (void)
 {
+  x = _mm256_permutex_epi64 (x, 13);
   x = _mm256_mask_permutex_epi64 (x, m, x, 13);
   x = _mm256_maskz_permutex_epi64 (m, x, 13);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
index 43ccad3..69185e5 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 
@@ -11,6 +12,7 @@ volatile __mmask8 m;
 void extern
 avx512vl_test (void)
 {
+  x = _mm256_permutexvar_epi64 (x, x);
   x = _mm256_maskz_permutexvar_epi64 (m, x, x);
   x = _mm256_mask_permutexvar_epi64 (x, m, x, x);
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics
  2017-06-28 10:01 [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics Peryt, Sebastian
@ 2017-06-28 21:00 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2017-06-28 21:00 UTC (permalink / raw)
  To: Peryt, Sebastian; +Cc: gcc-patches

On Wed, Jun 28, 2017 at 12:01 PM, Peryt, Sebastian
<sebastian.peryt@intel.com> wrote:
> Hi,
>
> This patch adds missing intrinsics:
>         - _mm256_permutexvar_epi32
>         - _mm256_permutex_epi64
>         - _mm256_permutexvar_epi64
>
> gcc/
>         * config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, _mm256_permutexvar_epi32,
>         _mm256_permutex_epi64): New intrinsics.
>
> gcc/tesuite/
>         * gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): Test new intrinsic.
>         * gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): Ditto.
>         * gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): Ditto.
>         *gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
>         * gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
>         * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
>
> Is it ok for trunk?

Approved and committed to mainline SVN.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-06-28 21:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 10:01 [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics Peryt, Sebastian
2017-06-28 21:00 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).