* [patch][i386, AVX] Adding missing CMP* intrinsics
@ 2017-10-20 8:46 Peryt, Sebastian
2017-10-26 18:23 ` Kirill Yukhin
0 siblings, 1 reply; 2+ messages in thread
From: Peryt, Sebastian @ 2017-10-20 8:46 UTC (permalink / raw)
To: gcc-patches; +Cc: Makhotina, Olga, Kirill Yukhin, Peryt, Sebastian
[-- Attachment #1: Type: text/plain, Size: 3383 bytes --]
Hi,
This patch written by Olga Makhotina adds listed below missing intrinsics:
_mm512_[mask_]cmpeq_[pd|ps]_mask
_mm512_[mask_]cmple_[pd|ps]_mask
_mm512_[mask_]cmplt_[pd|ps]_mask
_mm512_[mask_]cmpneq_[pd|ps]_mask
_mm512_[mask_]cmpnle_[pd|ps]_mask
_mm512_[mask_]cmpnlt_[pd|ps]_mask
_mm512_[mask_]cmpord_[pd|ps]_mask
_mm512_[mask_]cmpunord_[pd|ps]_mask
20.10.2017 Olga Makhotina <olga.makhotina@intel.com>
gcc/
* config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask,
_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
_mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask,
_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
_mm512_mask_cmpunord_ps_mask): New intrinsics.
20.10.2017 Olga Makhotina <olga.makhotina@intel.com>
gcc/testsuite/
* gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask,
_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
_mm512_mask_cmpunord_ps_mask): Test new intrinsics.
* gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask,
_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
_mm512_mask_cmpunord_ps_mask): Test new intrinsics.
* gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask,
_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
_mm512_mask_cmpunord_pd_mask): Test new intrinsics.
* gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask,
_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
_mm512_mask_cmpunord_pd_mask): Test new intrinsics.
Is it ok for trunk?
Thanks,
Sebastian
[-- Attachment #2: 0001-vcmpp-d-s.patch --]
[-- Type: application/octet-stream, Size: 22802 bytes --]
From 0658a4fb9022236501598c5919b1df2f4d84cf5f Mon Sep 17 00:00:00 2001
From: Olga Makhotina <olga.makhotina@intel.com>
Date: Wed, 18 Oct 2017 12:43:51 +0200
Subject: [PATCH] vcmpp[d/s]
modified: gcc/config/i386/avx512fintrin.h
modified: gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
modified: gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
modified: gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
modified: gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
---
gcc/config/i386/avx512fintrin.h | 320 +++++++++++++++++++++++
gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c | 29 +-
gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c | 77 +++---
gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c | 28 +-
gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c | 78 +++---
5 files changed, 461 insertions(+), 71 deletions(-)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 72f57f7..5dc5fae 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -14005,6 +14005,326 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_EQ_OQ,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_EQ_OQ,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LT_OS,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LT_OS,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LE_OS,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_LE_OS,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_UNORD_Q,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_UNORD_Q,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NEQ_UQ,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NEQ_UQ,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLT_US,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLT_US,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLE_US,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_NLE_US,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_ORD_Q,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+ return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+ (__v8df) __Y, _CMP_ORD_Q,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_EQ_OQ,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_EQ_OQ,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LT_OS,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LT_OS,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LE_OS,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_LE_OS,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_UNORD_Q,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_UNORD_Q,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NEQ_UQ,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NEQ_UQ,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLT_US,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLT_US,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLE_US,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_NLE_US,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_ORD_Q,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+ (__v16sf) __Y, _CMP_ORD_Q,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
{
return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
index 4b53e37..d3c30fc 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
@@ -17,4 +17,29 @@ avx512f_test (void)
m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ);
m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+ m = _mm512_cmpeq_pd_mask (x, x);
+ m = _mm512_mask_cmpeq_pd_mask (m, x, x);
+
+ m = _mm512_cmplt_pd_mask (x, x);
+ m = _mm512_mask_cmplt_pd_mask (m, x, x);
+
+ m = _mm512_cmple_pd_mask (x, x);
+ m = _mm512_mask_cmple_pd_mask (m, x, x);
+
+ m = _mm512_cmpunord_pd_mask (x, x);
+ m = _mm512_mask_cmpunord_pd_mask (m, x, x);
+
+ m = _mm512_cmpneq_pd_mask (x, x);
+ m = _mm512_mask_cmpneq_pd_mask (m, x, x);
+
+ m = _mm512_cmpnlt_pd_mask (x, x);
+ m = _mm512_mask_cmpnlt_pd_mask (m, x, x);
+
+ m = _mm512_cmpnle_pd_mask (x, x);
+ m = _mm512_mask_cmpnle_pd_mask (m, x, x);
+
+ m = _mm512_cmpord_pd_mask (x, x);
+ m = _mm512_mask_cmpord_pd_mask (m, x, x);
}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
index 52e226d..cee1197 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
@@ -11,58 +11,69 @@
#define SIZE (AVX512F_LEN / 64)
#include "avx512f-mask-type.h"
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
#if AVX512F_LEN == 512
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 8; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm512_loadu_pd(s1); \
- source2.x = _mm512_loadu_pd(s2); \
- dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 8
+
+#define GEN_CMP(type) \
+ { \
+ dst3 = _mm512_cmp##type##_pd_mask(source1.x, source2.x);\
+ dst4 = _mm512_mask_cmp##type##_pd_mask(mask, source1.x, source2.x);\
+ if (dst3 != dst1) abort(); \
+ if (dst4 != dst2) abort(); \
+ }
+
+#define CHECK_CMP(imm) \
+ if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \
+ if (imm == _CMP_LT_OS) GEN_CMP(lt) \
+ if (imm == _CMP_LE_OS) GEN_CMP(le) \
+ if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \
+ if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \
+ if (imm == _CMP_NLT_US) GEN_CMP(nlt) \
+ if (imm == _CMP_NLE_US) GEN_CMP(nle) \
+ if (imm == _CMP_ORD_Q) GEN_CMP(ord)
+
#endif
#if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 4; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm256_loadu_pd(s1); \
- source2.x = _mm256_loadu_pd(s2); \
- dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
#endif
#if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 2
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 2; i++) \
+ for (i = 0; i < SSIZE; i++) \
{ \
dst_ref = (((int) rel) << i) | dst_ref; \
} \
- source1.x = _mm_loadu_pd(s1); \
- source2.x = _mm_loadu_pd(s2); \
- dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\
- dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+ source1.x = SUF(_loadu_pd)(s1); \
+ source2.x = SUF(_loadu_pd)(s2); \
+ dst1 = SUF(_cmp_pd_mask)(source1.x, source2.x, imm);\
+ dst2 = SUF(_mask_cmp_pd_mask)(mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
-#endif
+ if ((dst_ref & mask) != dst2) abort(); \
+ CHECK_CMP(imm)
void
TEST ()
{
UNION_TYPE (AVX512F_LEN, d) source1, source2;
- MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
MASK_TYPE mask = MASK_VALUE;
int i;
double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
index 9812915..27be360 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
@@ -17,4 +17,28 @@ avx512f_test (void)
m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ);
m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+ m = _mm512_cmpeq_ps_mask (x, x);
+ m = _mm512_mask_cmpeq_ps_mask (m, x, x);
+
+ m = _mm512_cmplt_ps_mask (x, x);
+ m = _mm512_mask_cmplt_ps_mask (m, x, x);
+
+ m = _mm512_cmple_ps_mask (x, x);
+ m = _mm512_mask_cmple_ps_mask (m, x, x);
+
+ m = _mm512_cmpunord_ps_mask (x, x);
+ m = _mm512_mask_cmpunord_ps_mask (m, x, x);
+
+ m = _mm512_cmpneq_ps_mask (x, x);
+ m = _mm512_mask_cmpneq_ps_mask (m, x, x);
+
+ m = _mm512_cmpnlt_ps_mask (x, x);
+ m = _mm512_mask_cmpnlt_ps_mask (m, x, x);
+
+ m = _mm512_cmpnle_ps_mask (x, x);
+ m = _mm512_mask_cmpnle_ps_mask (m, x, x);
+
+ m = _mm512_cmpord_ps_mask (x, x);
+ m = _mm512_mask_cmpord_ps_mask (m, x, x);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
index 2ffa2ed..22e368f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
@@ -11,59 +11,69 @@
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
#if AVX512F_LEN == 512
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 16; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm512_loadu_ps(s1); \
- source2.x = _mm512_loadu_ps(s2); \
- dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 16
+
+#define GEN_CMP(type) \
+ { \
+ dst3 = _mm512_cmp##type##_ps_mask(source1.x, source2.x);\
+ dst4 = _mm512_mask_cmp##type##_ps_mask(mask, source1.x, source2.x);\
+ if (dst3 != dst1) abort(); \
+ if (dst4 != dst2) abort(); \
+ }
+
+#define CHECK_CMP(imm) \
+ if (imm == _CMP_EQ_OQ) GEN_CMP(eq) \
+ if (imm == _CMP_LT_OS) GEN_CMP(lt) \
+ if (imm == _CMP_LE_OS) GEN_CMP(le) \
+ if (imm == _CMP_UNORD_Q) GEN_CMP(unord) \
+ if (imm == _CMP_NEQ_UQ) GEN_CMP(neq) \
+ if (imm == _CMP_NLT_US) GEN_CMP(nlt) \
+ if (imm == _CMP_NLE_US) GEN_CMP(nle) \
+ if (imm == _CMP_ORD_Q) GEN_CMP(ord)
+
#endif
#if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel) \
- dst_ref = 0; \
- for (i = 0; i < 8; i++) \
- { \
- dst_ref = (((int) rel) << i) | dst_ref; \
- } \
- source1.x = _mm256_loadu_ps(s1); \
- source2.x = _mm256_loadu_ps(s2); \
- dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
- if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 8
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
#endif
#if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
- for (i = 0; i < 4; i++) \
+ for (i = 0; i < SSIZE; i++) \
{ \
dst_ref = (((int) rel) << i) | dst_ref; \
} \
- source1.x = _mm_loadu_ps(s1); \
- source2.x = _mm_loadu_ps(s2); \
- dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\
- dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+ source1.x = SUF(_loadu_ps)(s1); \
+ source2.x = SUF(_loadu_ps)(s2); \
+ dst1 = SUF(_cmp_ps_mask)(source1.x, source2.x, imm);\
+ dst2 = SUF(_mask_cmp_ps_mask)(mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
- if ((dst_ref & mask) != dst2) abort();
-#endif
+ if ((dst_ref & mask) != dst2) abort(); \
+ CHECK_CMP(imm)
void
TEST ()
{
UNION_TYPE (AVX512F_LEN,) source1, source2;
- MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
MASK_TYPE mask = MASK_VALUE;
int i;
float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464,
--
1.8.3.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [patch][i386, AVX] Adding missing CMP* intrinsics
2017-10-20 8:46 [patch][i386, AVX] Adding missing CMP* intrinsics Peryt, Sebastian
@ 2017-10-26 18:23 ` Kirill Yukhin
0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2017-10-26 18:23 UTC (permalink / raw)
To: Peryt, Sebastian; +Cc: gcc-patches, Makhotina, Olga
Hello Olga, Sebastian,
On 20 Oct 08:36, Peryt, Sebastian wrote:
> Hi,
>
> This patch written by Olga Makhotina adds listed below missing intrinsics:
> _mm512_[mask_]cmpeq_[pd|ps]_mask
> _mm512_[mask_]cmple_[pd|ps]_mask
> _mm512_[mask_]cmplt_[pd|ps]_mask
> _mm512_[mask_]cmpneq_[pd|ps]_mask
> _mm512_[mask_]cmpnle_[pd|ps]_mask
> _mm512_[mask_]cmpnlt_[pd|ps]_mask
> _mm512_[mask_]cmpord_[pd|ps]_mask
> _mm512_[mask_]cmpunord_[pd|ps]_mask
>
> Is it ok for trunk?
Your patch is OK for trunk. I've checked it in.
--
Thanks, K
> Thanks,
> Sebastian
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-10-26 18:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-20 8:46 [patch][i386, AVX] Adding missing CMP* intrinsics Peryt, Sebastian
2017-10-26 18:23 ` Kirill Yukhin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).