* [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics
@ 2017-06-28 10:01 Peryt, Sebastian
2017-06-28 21:00 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Peryt, Sebastian @ 2017-06-28 10:01 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak
[-- Attachment #1: Type: text/plain, Size: 745 bytes --]
Hi,
This patch adds missing intrinsics:
- _mm256_permutexvar_epi32
- _mm256_permutex_epi64
- _mm256_permutexvar_epi64
gcc/
* config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, _mm256_permutexvar_epi32,
_mm256_permutex_epi64): New intrinsics.
gcc/tesuite/
* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): Test new intrinsic.
* gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): Ditto.
* gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): Ditto.
*gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
* gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
Is it ok for trunk?
Thanks,
Sebastian
[-- Attachment #2: permutex.patch --]
[-- Type: application/octet-stream, Size: 7775 bytes --]
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index f62f641..0555051 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -9099,6 +9099,17 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+ (__v4di) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y)
{
@@ -9163,6 +9174,17 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+ (__v8si) __X,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y)
{
@@ -9751,6 +9773,17 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex_epi64 (__m256i __X, const int __I)
+{
+ return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+ __I,
+ (__v4di)
+ _mm256_setzero_si256(),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
__m256i __X, const int __I)
{
@@ -12367,6 +12400,13 @@ _mm256_permutex_pd (__m256d __X, const int __M)
_mm256_undefined_pd (), \
(__mmask8)-1))
+#define _mm256_permutex_epi64(X, I) \
+ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+ (int)(I), \
+ (__v4di)(__m256i) \
+ (_mm256_setzero_si256 ()),\
+ (__mmask8) -1))
+
#define _mm256_maskz_permutex_epi64(M, X, I) \
((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
(int)(I), \
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
index dbd4544..b36a9c2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
@@ -41,18 +41,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
-#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x);
-#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref);
-#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
-#endif
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
index 770d562..dd88cd4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
@@ -40,18 +40,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
-#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK);
-#endif
res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK);
res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK);
CALC (src1.a, IMM_MASK, res_ref);
-#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
-#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
index c596b1d..6c22288 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
@@ -41,18 +41,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
-#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x);
-#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref);
-#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
-#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
index fa1aaa3..069bb5d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern
avx512vl_test (void)
{
+ x = _mm256_permutexvar_epi32 (x, x);
x = _mm256_maskz_permutexvar_epi32 (m, x, x);
x = _mm256_mask_permutexvar_epi32 (x, m, x, x);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
index c74c8ce..2340a6d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern
avx512vl_test (void)
{
+ x = _mm256_permutex_epi64 (x, 13);
x = _mm256_mask_permutex_epi64 (x, m, x, 13);
x = _mm256_maskz_permutex_epi64 (m, x, 13);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
index 43ccad3..69185e5 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern
avx512vl_test (void)
{
+ x = _mm256_permutexvar_epi64 (x, x);
x = _mm256_maskz_permutexvar_epi64 (m, x, x);
x = _mm256_mask_permutexvar_epi64 (x, m, x, x);
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics
2017-06-28 10:01 [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics Peryt, Sebastian
@ 2017-06-28 21:00 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2017-06-28 21:00 UTC (permalink / raw)
To: Peryt, Sebastian; +Cc: gcc-patches
On Wed, Jun 28, 2017 at 12:01 PM, Peryt, Sebastian
<sebastian.peryt@intel.com> wrote:
> Hi,
>
> This patch adds missing intrinsics:
> - _mm256_permutexvar_epi32
> - _mm256_permutex_epi64
> - _mm256_permutexvar_epi64
>
> gcc/
> * config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, _mm256_permutexvar_epi32,
> _mm256_permutex_epi64): New intrinsics.
>
> gcc/tesuite/
> * gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): Test new intrinsic.
> * gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): Ditto.
> * gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): Ditto.
> *gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
> * gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
> * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
>
> Is it ok for trunk?
Approved and committed to mainline SVN.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-06-28 21:00 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-28 10:01 [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics Peryt, Sebastian
2017-06-28 21:00 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).