public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch][i386, AVX] Adding missing CMP* intrinsics
@ 2017-10-20  8:46 Peryt, Sebastian
  2017-10-26 18:23 ` Kirill Yukhin
  0 siblings, 1 reply; 2+ messages in thread
From: Peryt, Sebastian @ 2017-10-20  8:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: Makhotina, Olga, Kirill Yukhin, Peryt, Sebastian

[-- Attachment #1: Type: text/plain, Size: 3383 bytes --]

Hi,

This patch written by Olga Makhotina adds listed below missing intrinsics:
_mm512_[mask_]cmpeq_[pd|ps]_mask
_mm512_[mask_]cmple_[pd|ps]_mask
_mm512_[mask_]cmplt_[pd|ps]_mask
_mm512_[mask_]cmpneq_[pd|ps]_mask
_mm512_[mask_]cmpnle_[pd|ps]_mask
_mm512_[mask_]cmpnlt_[pd|ps]_mask
_mm512_[mask_]cmpord_[pd|ps]_mask
_mm512_[mask_]cmpunord_[pd|ps]_mask

20.10.2017  Olga Makhotina  <olga.makhotina@intel.com>

gcc/
	* config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask,
	_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
	_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
	_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
	_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
	_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
	_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
	_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
	_mm512_mask_cmpunord_pd_mask, _mm512_cmpeq_ps_mask,
	_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
	_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
	_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
	_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
	_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
	_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
	_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
	_mm512_mask_cmpunord_ps_mask): New intrinsics.

20.10.2017  Olga Makhotina  <olga.makhotina@intel.com>

gcc/testsuite/
	* gcc.target/i386/avx512f-vcmpps-1.c (_mm512_cmpeq_ps_mask,
	_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask,
	_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
	_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
	_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
	_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
	_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
	_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
	_mm512_mask_cmpunord_ps_mask): Test new intrinsics.
	* gcc.target/i386/avx512f-vcmpps-2.c (_mm512_cmpeq_ps_mask,
	_mm512_cmple_ps_mask, _mm512_cmplt_ps_mask, 
	_mm512_cmpneq_ps_mask, _mm512_cmpnle_ps_mask,
	_mm512_cmpnlt_ps_mask, _mm512_cmpord_ps_mask,
	_mm512_cmpunord_ps_mask, _mm512_mask_cmpeq_ps_mask,
	_mm512_mask_cmple_ps_mask, _mm512_mask_cmplt_ps_mask,
	_mm512_mask_cmpneq_ps_mask, _mm512_mask_cmpnle_ps_mask,
	_mm512_mask_cmpnlt_ps_mask, _mm512_mask_cmpord_ps_mask,
	_mm512_mask_cmpunord_ps_mask): Test new intrinsics.
	* gcc.target/i386/avx512f-vcmppd-1.c (_mm512_cmpeq_pd_mask,
	_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
	_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
	_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
	_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
	_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
	_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
	_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
	_mm512_mask_cmpunord_pd_mask): Test new intrinsics.
	* gcc.target/i386/avx512f-vcmppd-2.c (_mm512_cmpeq_pd_mask,
	_mm512_cmple_pd_mask, _mm512_cmplt_pd_mask,
	_mm512_cmpneq_pd_mask, _mm512_cmpnle_pd_mask,
	_mm512_cmpnlt_pd_mask, _mm512_cmpord_pd_mask,
	_mm512_cmpunord_pd_mask, _mm512_mask_cmpeq_pd_mask,
	_mm512_mask_cmple_pd_mask, _mm512_mask_cmplt_pd_mask,
	_mm512_mask_cmpneq_pd_mask, _mm512_mask_cmpnle_pd_mask,
	_mm512_mask_cmpnlt_pd_mask, _mm512_mask_cmpord_pd_mask,
	_mm512_mask_cmpunord_pd_mask): Test new intrinsics.

Is it ok for trunk?
 
Thanks,
Sebastian


[-- Attachment #2: 0001-vcmpp-d-s.patch --]
[-- Type: application/octet-stream, Size: 22802 bytes --]

From 0658a4fb9022236501598c5919b1df2f4d84cf5f Mon Sep 17 00:00:00 2001
From: Olga Makhotina <olga.makhotina@intel.com>
Date: Wed, 18 Oct 2017 12:43:51 +0200
Subject: [PATCH] vcmpp[d/s]

	modified:   gcc/config/i386/avx512fintrin.h
	modified:   gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
	modified:   gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
	modified:   gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
	modified:   gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
---
 gcc/config/i386/avx512fintrin.h                  | 320 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c |  29 +-
 gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c |  77 +++---
 gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c |  28 +-
 gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c |  78 +++---
 5 files changed, 461 insertions(+), 71 deletions(-)

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 72f57f7..5dc5fae 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -14005,6 +14005,326 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_EQ_OQ,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_EQ_OQ,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_LT_OS,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_LT_OS,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_LE_OS,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_LE_OS,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_UNORD_Q,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_UNORD_Q,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NEQ_UQ,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NEQ_UQ,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NLT_US,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NLT_US,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NLE_US,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_NLE_US,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_ORD_Q,
+						  (__mmask8) -1,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+						  (__v8df) __Y, _CMP_ORD_Q,
+						  (__mmask8) __U,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_EQ_OQ,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_EQ_OQ,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_LT_OS,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_LT_OS,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_LE_OS,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_LE_OS,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_UNORD_Q,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_UNORD_Q,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NEQ_UQ,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NEQ_UQ,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NLT_US,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NLT_US,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NLE_US,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_NLE_US,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_ORD_Q,
+						   (__mmask16) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
+{
+   return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
+						   (__v16sf) __Y, _CMP_ORD_Q,
+						   (__mmask16) __U,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
 {
   return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
index 4b53e37..d3c30fc 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
 /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vcmppd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 
@@ -17,4 +17,29 @@ avx512f_test (void)
   m = _mm512_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ);
   m = _mm512_cmp_round_pd_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
   m = _mm512_mask_cmp_round_pd_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+  m = _mm512_cmpeq_pd_mask (x, x);
+  m = _mm512_mask_cmpeq_pd_mask (m, x, x);
+
+  m = _mm512_cmplt_pd_mask (x, x);
+  m = _mm512_mask_cmplt_pd_mask (m, x, x);
+
+  m = _mm512_cmple_pd_mask (x, x);
+  m = _mm512_mask_cmple_pd_mask (m, x, x);
+
+  m = _mm512_cmpunord_pd_mask (x, x);
+  m = _mm512_mask_cmpunord_pd_mask (m, x, x);
+
+  m = _mm512_cmpneq_pd_mask (x, x);
+  m = _mm512_mask_cmpneq_pd_mask (m, x, x);
+
+  m = _mm512_cmpnlt_pd_mask (x, x);
+  m = _mm512_mask_cmpnlt_pd_mask (m, x, x);
+
+  m = _mm512_cmpnle_pd_mask (x, x);
+  m = _mm512_mask_cmpnle_pd_mask (m, x, x);
+
+  m = _mm512_cmpord_pd_mask (x, x);
+  m = _mm512_mask_cmpord_pd_mask (m, x, x);
 }
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
index 52e226d..cee1197 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
@@ -11,58 +11,69 @@
 #define SIZE (AVX512F_LEN / 64)
 #include "avx512f-mask-type.h"
 
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
 #if AVX512F_LEN == 512
-#define CMP(imm, rel)					\
-    dst_ref = 0;					\
-    for (i = 0; i < 8; i++)				\
-    {							\
-      dst_ref = (((int) rel) << i) | dst_ref;		\
-    }							\
-    source1.x = _mm512_loadu_pd(s1);			\
-    source2.x = _mm512_loadu_pd(s2);			\
-    dst1 = _mm512_cmp_pd_mask(source1.x, source2.x, imm);\
-    dst2 = _mm512_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
-    if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 8
+
+#define GEN_CMP(type)				\
+    {						\
+    dst3 = _mm512_cmp##type##_pd_mask(source1.x, source2.x);\
+    dst4 = _mm512_mask_cmp##type##_pd_mask(mask, source1.x, source2.x);\
+    if (dst3 != dst1) abort();			\
+    if (dst4 != dst2) abort();			\
+    }
+
+#define CHECK_CMP(imm)				\
+    if (imm == _CMP_EQ_OQ) GEN_CMP(eq)		\
+    if (imm == _CMP_LT_OS) GEN_CMP(lt)		\
+    if (imm == _CMP_LE_OS) GEN_CMP(le)		\
+    if (imm == _CMP_UNORD_Q) GEN_CMP(unord)	\
+    if (imm == _CMP_NEQ_UQ) GEN_CMP(neq)	\
+    if (imm == _CMP_NLT_US) GEN_CMP(nlt)	\
+    if (imm == _CMP_NLE_US) GEN_CMP(nle)	\
+    if (imm == _CMP_ORD_Q) GEN_CMP(ord)	
+
 #endif
 
 #if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel)					\
-    dst_ref = 0;					\
-    for (i = 0; i < 4; i++)				\
-    {							\
-      dst_ref = (((int) rel) << i) | dst_ref;		\
-    }							\
-    source1.x = _mm256_loadu_pd(s1);			\
-    source2.x = _mm256_loadu_pd(s2);			\
-    dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\
-    dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
-    if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
 #endif
 
 #if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 2
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 2; i++)				\
+    for (i = 0; i < SSIZE; i++)				\
     {							\
       dst_ref = (((int) rel) << i) | dst_ref;		\
     }							\
-    source1.x = _mm_loadu_pd(s1);			\
-    source2.x = _mm_loadu_pd(s2);			\
-    dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\
-    dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+    source1.x = SUF(_loadu_pd)(s1);			\
+    source2.x = SUF(_loadu_pd)(s2);			\
+    dst1 = SUF(_cmp_pd_mask)(source1.x, source2.x, imm);\
+    dst2 = SUF(_mask_cmp_pd_mask)(mask, source1.x, source2.x, imm);\
     if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
-#endif
+    if ((dst_ref & mask) != dst2) abort();		\
+    CHECK_CMP(imm)
 
 void
 TEST ()
 {
     UNION_TYPE (AVX512F_LEN, d) source1, source2;
-    MASK_TYPE dst1, dst2, dst_ref;
+    MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
     MASK_TYPE mask = MASK_VALUE;
     int i;
     double s1[8]={2134.3343, 6678.346, 453.345635, 54646.464,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
index 9812915..27be360 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512f" } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 9 } } */
+/* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\[^\}\]%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 9 } } */
 /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vcmpps\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[1-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 
@@ -17,4 +17,28 @@ avx512f_test (void)
   m = _mm512_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ);
   m = _mm512_cmp_round_ps_mask (x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
   m = _mm512_mask_cmp_round_ps_mask (m, x, x, _CMP_FALSE_OQ, _MM_FROUND_NO_EXC);
+
+  m = _mm512_cmpeq_ps_mask (x, x);
+  m = _mm512_mask_cmpeq_ps_mask (m, x, x);
+
+  m = _mm512_cmplt_ps_mask (x, x);
+  m = _mm512_mask_cmplt_ps_mask (m, x, x);
+
+  m = _mm512_cmple_ps_mask (x, x);
+  m = _mm512_mask_cmple_ps_mask (m, x, x);
+
+  m = _mm512_cmpunord_ps_mask (x, x);
+  m = _mm512_mask_cmpunord_ps_mask (m, x, x);
+
+  m = _mm512_cmpneq_ps_mask (x, x);
+  m = _mm512_mask_cmpneq_ps_mask (m, x, x);
+
+  m = _mm512_cmpnlt_ps_mask (x, x);
+  m = _mm512_mask_cmpnlt_ps_mask (m, x, x);
+
+  m = _mm512_cmpnle_ps_mask (x, x);
+  m = _mm512_mask_cmpnle_ps_mask (m, x, x);
+
+  m = _mm512_cmpord_ps_mask (x, x);
+  m = _mm512_mask_cmpord_ps_mask (m, x, x);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
index 2ffa2ed..22e368f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
@@ -11,59 +11,69 @@
 #define SIZE (AVX512F_LEN / 32)
 #include "avx512f-mask-type.h"
 
+#undef SUF
+#undef SSIZE
+#undef GEN_CMP
+#undef CHECK_CMP
+
 #if AVX512F_LEN == 512
-#undef CMP
-#define CMP(imm, rel)					\
-    dst_ref = 0;					\
-    for (i = 0; i < 16; i++)				\
-    {							\
-      dst_ref = (((int) rel) << i) | dst_ref;		\
-    }							\
-    source1.x = _mm512_loadu_ps(s1);			\
-    source2.x = _mm512_loadu_ps(s2);			\
-    dst1 = _mm512_cmp_ps_mask(source1.x, source2.x, imm);\
-    dst2 = _mm512_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
-    if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm512##fun
+#define SSIZE 16
+
+#define GEN_CMP(type)				\
+    {						\
+    dst3 = _mm512_cmp##type##_ps_mask(source1.x, source2.x);\
+    dst4 = _mm512_mask_cmp##type##_ps_mask(mask, source1.x, source2.x);\
+    if (dst3 != dst1) abort();			\
+    if (dst4 != dst2) abort();			\
+    }
+
+#define CHECK_CMP(imm)				\
+    if (imm == _CMP_EQ_OQ) GEN_CMP(eq)		\
+    if (imm == _CMP_LT_OS) GEN_CMP(lt)		\
+    if (imm == _CMP_LE_OS) GEN_CMP(le)		\
+    if (imm == _CMP_UNORD_Q) GEN_CMP(unord)	\
+    if (imm == _CMP_NEQ_UQ) GEN_CMP(neq)	\
+    if (imm == _CMP_NLT_US) GEN_CMP(nlt)	\
+    if (imm == _CMP_NLE_US) GEN_CMP(nle)	\
+    if (imm == _CMP_ORD_Q) GEN_CMP(ord)	
+
 #endif
 
 #if AVX512F_LEN == 256
-#undef CMP
-#define CMP(imm, rel)					\
-    dst_ref = 0;					\
-    for (i = 0; i < 8; i++)				\
-    {							\
-      dst_ref = (((int) rel) << i) | dst_ref;		\
-    }							\
-    source1.x = _mm256_loadu_ps(s1);			\
-    source2.x = _mm256_loadu_ps(s2);			\
-    dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\
-    dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
-    if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
+#define SUF(fun) _mm256##fun
+#define SSIZE 8
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
 #endif
 
 #if AVX512F_LEN == 128
+#define SUF(fun) _mm##fun
+#define SSIZE 4
+#define GEN_CMP(type)
+#define CHECK_CMP(imm)
+#endif
+
 #undef CMP
 #define CMP(imm, rel)					\
     dst_ref = 0;					\
-    for (i = 0; i < 4; i++)				\
+    for (i = 0; i < SSIZE; i++)				\
     {							\
       dst_ref = (((int) rel) << i) | dst_ref;		\
     }							\
-    source1.x = _mm_loadu_ps(s1);			\
-    source2.x = _mm_loadu_ps(s2);			\
-    dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\
-    dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+    source1.x = SUF(_loadu_ps)(s1);			\
+    source2.x = SUF(_loadu_ps)(s2);			\
+    dst1 = SUF(_cmp_ps_mask)(source1.x, source2.x, imm);\
+    dst2 = SUF(_mask_cmp_ps_mask)(mask, source1.x, source2.x, imm);\
     if (dst_ref != dst1) abort();			\
-    if ((dst_ref & mask) != dst2) abort();
-#endif
+    if ((dst_ref & mask) != dst2) abort();		\
+    CHECK_CMP(imm)
 
 void
 TEST ()
 {
     UNION_TYPE (AVX512F_LEN,) source1, source2;
-    MASK_TYPE dst1, dst2, dst_ref;
+    MASK_TYPE dst1, dst2, dst3, dst4, dst_ref;
     MASK_TYPE mask = MASK_VALUE;
     int i;
     float s1[16] = {2134.3343, 6678.346, 453.345635, 54646.464,
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch][i386, AVX] Adding missing CMP* intrinsics
  2017-10-20  8:46 [patch][i386, AVX] Adding missing CMP* intrinsics Peryt, Sebastian
@ 2017-10-26 18:23 ` Kirill Yukhin
  0 siblings, 0 replies; 2+ messages in thread
From: Kirill Yukhin @ 2017-10-26 18:23 UTC (permalink / raw)
  To: Peryt, Sebastian; +Cc: gcc-patches, Makhotina, Olga

Hello Olga, Sebastian,
On 20 Oct 08:36, Peryt, Sebastian wrote:
> Hi,
> 
> This patch written by Olga Makhotina adds listed below missing intrinsics:
> _mm512_[mask_]cmpeq_[pd|ps]_mask
> _mm512_[mask_]cmple_[pd|ps]_mask
> _mm512_[mask_]cmplt_[pd|ps]_mask
> _mm512_[mask_]cmpneq_[pd|ps]_mask
> _mm512_[mask_]cmpnle_[pd|ps]_mask
> _mm512_[mask_]cmpnlt_[pd|ps]_mask
> _mm512_[mask_]cmpord_[pd|ps]_mask
> _mm512_[mask_]cmpunord_[pd|ps]_mask
> 
> Is it ok for trunk?
Your patch is OK for trunk. I've checked it in.

--
Thanks, K

> Thanks,
> Sebastian
> 


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-10-26 18:20 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-20  8:46 [patch][i386, AVX] Adding missing CMP* intrinsics Peryt, Sebastian
2017-10-26 18:23 ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).