From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <hongtao.liu@intel.com>
Received: from mga12.intel.com (mga12.intel.com [192.55.52.136])
 by sourceware.org (Postfix) with ESMTPS id 686F9384A024
 for <gcc-patches@gcc.gnu.org>; Thu,  1 Jul 2021 06:17:58 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 686F9384A024
X-IronPort-AV: E=McAfee;i="6200,9189,10031"; a="188163520"
X-IronPort-AV: E=Sophos;i="5.83,313,1616482800"; d="scan'208";a="188163520"
Received: from orsmga005.jf.intel.com ([10.7.209.41])
 by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 30 Jun 2021 23:17:54 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.83,313,1616482800"; d="scan'208";a="626257555"
Received: from scymds01.sc.intel.com ([10.148.94.138])
 by orsmga005.jf.intel.com with ESMTP; 30 Jun 2021 23:17:54 -0700
Received: from shliclel320.sh.intel.com (shliclel320.sh.intel.com
 [10.239.236.50]) by scymds01.sc.intel.com
 with ESMTP id 1616GmfI031625; Wed, 30 Jun 2021 23:17:52 -0700
From: liuhongt <hongtao.liu@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: crazylht@gmail.com, hjl.tools@gmail.com, ubizjak@gmail.com,
 jakub@redhat.com
Subject: [PATCH 39/62] AVX512FP16: Add intrinsics for casting between vector
 float16 and vector float32/float64/integer.
Date: Thu,  1 Jul 2021 14:16:25 +0800
Message-Id: <20210701061648.9447-40-hongtao.liu@intel.com>
X-Mailer: git-send-email 2.18.1
In-Reply-To: <20210701061648.9447-1-hongtao.liu@intel.com>
References: <20210701061648.9447-1-hongtao.liu@intel.com>
X-Spam-Status: No, score=-12.2 required=5.0 tests=BAYES_00, GIT_PATCH_0,
 KAM_DMARC_NONE, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT,
 SPF_HELO_PASS, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.4
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Thu, 01 Jul 2021 06:18:00 -0000

gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm_undefined_ph):
	New intrinsic.
	(_mm256_undefined_ph): Likewise.
	(_mm512_undefined_ph): Likewise.
	(_mm_cvtsh_h): Likewise.
	(_mm256_cvtsh_h): Likewise.
	(_mm512_cvtsh_h): Likewise.
	(_mm512_castph_ps): Likewise.
	(_mm512_castph_pd): Likewise.
	(_mm512_castph_si512): Likewise.
	(_mm512_castph512_ph128): Likewise.
	(_mm512_castph512_ph256): Likewise.
	(_mm512_castph128_ph512): Likewise.
	(_mm512_castph256_ph512): Likewise.
	(_mm512_zextph128_ph512): Likewise.
	(_mm512_zextph256_ph512): Likewise.
	(_mm512_castps_ph): Likewise.
	(_mm512_castpd_ph): Likewise.
	(_mm512_castsi512_ph): Likewise.
	* config/i386/avx512fp16vlintrin.h (_mm_castph_ps):
	New intrinsic.
	(_mm256_castph_ps): Likewise.
	(_mm_castph_pd): Likewise.
	(_mm256_castph_pd): Likewise.
	(_mm_castph_si128): Likewise.
	(_mm256_castph_si256): Likewise.
	(_mm_castps_ph): Likewise.
	(_mm256_castps_ph): Likewise.
	(_mm_castpd_ph): Likewise.
	(_mm256_castpd_ph): Likewise.
	(_mm_castsi128_ph): Likewise.
	(_mm256_castsi256_ph): Likewise.
	(_mm256_castph256_ph128): Likewise.
	(_mm256_castph128_ph256): Likewise.
	(_mm256_zextph128_ph256): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-typecast-1.c: New test.
	* gcc.target/i386/avx512fp16-typecast-2.c: Ditto.
	* gcc.target/i386/avx512fp16vl-typecast-1.c: Ditto.
	* gcc.target/i386/avx512fp16vl-typecast-2.c: Ditto.
---
 gcc/config/i386/avx512fp16intrin.h            | 153 ++++++++++++++++++
 gcc/config/i386/avx512fp16vlintrin.h          | 117 ++++++++++++++
 .../gcc.target/i386/avx512fp16-typecast-1.c   |  44 +++++
 .../gcc.target/i386/avx512fp16-typecast-2.c   |  43 +++++
 .../gcc.target/i386/avx512fp16vl-typecast-1.c |  55 +++++++
 .../gcc.target/i386/avx512fp16vl-typecast-2.c |  37 +++++
 6 files changed, 449 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 05efbc5777b..ddb227529fa 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -192,6 +192,159 @@ _mm512_setzero_ph (void)
   return _mm512_set1_ph (0.0f);
 }
 
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ph (void)
+{
+  __m128h __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ph (void)
+{
+  __m256h __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ph (void)
+{
+  __m512h __Y = __Y;
+  return __Y;
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_h (__m128h __A)
+{
+  return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsh_h (__m256h __A)
+{
+  return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsh_h (__m512h __A)
+{
+  return __A[0];
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_ps (__m512h __a)
+{
+  return (__m512) __a;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_pd (__m512h __a)
+{
+  return (__m512d) __a;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_si512 (__m512h __a)
+{
+  return (__m512i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph128 (__m512h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph256 (__m512h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph128_ph512 (__m128h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph256_ph512 (__m256h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph128_ph512 (__m128h __A)
+{
+  return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
+				       (__m128) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph256_ph512 (__m256h __A)
+{
+  return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
+				       (__m256d) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castps_ph (__m512 __a)
+{
+  return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castpd_ph (__m512d __a)
+{
+  return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castsi512_ph (__m512i __a)
+{
+  return (__m512h) __a;
+}
+
 /* Create a vector with element 0 as F and the rest zero.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 0124b830dd5..bcbe4523357 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -34,6 +34,123 @@
 #define __DISABLE_AVX512FP16VL__
 #endif /* __AVX512FP16VL__ */
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_ps (__m128h __a)
+{
+  return (__m128) __a;
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_ps (__m256h __a)
+{
+  return (__m256) __a;
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_pd (__m128h __a)
+{
+  return (__m128d) __a;
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_pd (__m256h __a)
+{
+  return (__m256d) __a;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_si128 (__m128h __a)
+{
+  return (__m128i) __a;
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_si256 (__m256h __a)
+{
+  return (__m256i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_ph (__m128 __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_ph (__m256 __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ph (__m128d __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ph (__m256d __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ph (__m128i __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ph (__m256i __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph256_ph128 (__m256h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph128_ph256 (__m128h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zextph128_ph256 (__m128h __A)
+{
+  return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
+					 (__m128) __A, 0);
+}
+
 /* Intrinsics v[add,sub,mul,div]ph.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
new file mode 100644
index 00000000000..cf0cc7443c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+  V512 res;
+
+  res.ymmh[0] = _mm512_castph512_ph256 (src1.zmmh);
+  check_results (&res, &src1, 16, "_mm512_castph512_ph256");
+
+  res.xmmh[0] = _mm512_castph512_ph128 (src1.zmmh);
+  check_results (&res, &src1, 8, "_mm512_castph512_ph128");
+
+  res.zmmh = _mm512_castph256_ph512 (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm512_castph256_ph512");
+
+  res.zmmh = _mm512_castph128_ph512 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm512_castph128_ph512");
+
+  res.zmm = _mm512_castph_ps (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_ps");
+
+  res.zmmd = _mm512_castph_pd (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_pd");
+
+  res.zmmi = _mm512_castph_si512 (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_si512");
+
+  res.zmmh = _mm512_castps_ph (src1.zmm);
+  check_results (&res, &src1, 32, "_mm512_castps_ph");
+
+  res.zmmh = _mm512_castpd_ph (src1.zmmd);
+  check_results (&res, &src1, 32, "_mm512_castpd_ph");
+
+  res.zmmh = _mm512_castsi512_ph (src1.zmmi);
+  check_results (&res, &src1, 32, "_mm512_castsi512_ph");
+
+  if (n_errs != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
new file mode 100644
index 00000000000..a29f1dbd76a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+  union512i_d zero;
+  union512h ad;
+  union256h b,bd;
+  union128h c;
+
+  int i;
+
+  for (i = 0; i < 16; i++)
+    {
+      b.a[i] = 65.43f + i;
+      zero.a[i] = 0;
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      c.a[i] = 32.01f + i;
+    }
+
+  ad.x = _mm512_zextph256_ph512 (b.x);
+  if (memcmp (ad.a, b.a, 32)
+      || memcmp (&ad.a[16], &zero.a, 32))
+    abort ();
+
+  ad.x = _mm512_zextph128_ph512 (c.x);
+  if (memcmp (ad.a, c.a, 16)
+      || memcmp (&ad.a[8], &zero.a, 48))
+    abort ();
+   
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
new file mode 100644
index 00000000000..3621bb52f08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
@@ -0,0 +1,55 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+  V512 res;
+  res.xmm[0] = _mm_castph_ps (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_ps");
+
+  res.xmmd[0] = _mm_castph_pd (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_pd");
+
+  res.xmmi[0] = _mm_castph_si128 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_si128");
+
+  res.xmmh[0] = _mm_castps_ph (src1.xmm[0]);
+  check_results (&res, &src1, 8, "_mm_castps_ph");
+
+  res.xmmh[0] = _mm_castpd_ph (src1.xmmd[0]);
+  check_results (&res, &src1, 8, "_mm_castpd_ph");
+
+  res.xmmh[0] = _mm_castsi128_ph (src1.xmmi[0]);
+  check_results (&res, &src1, 8, "_mm_castsi128_ph");
+
+  res.ymm[0] = _mm256_castph_ps (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_ps");
+
+  res.ymmd[0] = _mm256_castph_pd (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_pd");
+
+  res.ymmi[0] = _mm256_castph_si256 (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_si256");
+
+  res.ymmh[0] = _mm256_castps_ph (src1.ymm[0]);
+  check_results (&res, &src1, 16, "_mm256_castps_ph");
+
+  res.ymmh[0] = _mm256_castpd_ph (src1.ymmd[0]);
+  check_results (&res, &src1, 16, "_mm256_castpd_ph");
+
+  res.ymmh[0] = _mm256_castsi256_ph (src1.ymmi[0]);
+  check_results (&res, &src1, 16, "_mm256_castsi256_ph");
+
+  res.xmmh[0] = _mm256_castph256_ph128 (src1.ymmh[0]);
+  check_results (&res, &src1, 8, "_mm256_castph256_ph128");
+
+  res.ymmh[0] = _mm256_castph128_ph256 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm256_castph128_ph256");
+  
+  if (n_errs != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
new file mode 100644
index 00000000000..dce387f1fab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+  union512i_d zero;
+  union512h ad;
+  union256h b,bd;
+  union128h c;
+
+  int i;
+
+  for (i = 0; i < 16; i++)
+    {
+      b.a[i] = 65.43f + i;
+      zero.a[i] = 0;
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      c.a[i] = 32.01f + i;
+    }
+   
+  bd.x = _mm256_zextph128_ph256 (c.x);
+  if (memcmp (bd.a, c.a, 16)
+      || memcmp (&bd.a[8], &zero.a, 16))
+    abort ();
+}
-- 
2.18.1