public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3610] AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer.
@ 2021-09-17  8:05 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-09-17  8:05 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a5873aadb6dd749558924eba3e1b4c21755721ba

commit r12-3610-ga5873aadb6dd749558924eba3e1b4c21755721ba
Author: liuhongt <hongtao.liu@intel.com>
Date:   Tue Feb 25 10:42:13 2020 +0800

    AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer.
    
    gcc/ChangeLog:
    
            * config/i386/avx512fp16intrin.h (_mm_undefined_ph):
            New intrinsic.
            (_mm256_undefined_ph): Likewise.
            (_mm512_undefined_ph): Likewise.
            (_mm_cvtsh_h): Likewise.
            (_mm256_cvtsh_h): Likewise.
            (_mm512_cvtsh_h): Likewise.
            (_mm512_castph_ps): Likewise.
            (_mm512_castph_pd): Likewise.
            (_mm512_castph_si512): Likewise.
            (_mm512_castph512_ph128): Likewise.
            (_mm512_castph512_ph256): Likewise.
            (_mm512_castph128_ph512): Likewise.
            (_mm512_castph256_ph512): Likewise.
            (_mm512_zextph128_ph512): Likewise.
            (_mm512_zextph256_ph512): Likewise.
            (_mm512_castps_ph): Likewise.
            (_mm512_castpd_ph): Likewise.
            (_mm512_castsi512_ph): Likewise.
            * config/i386/avx512fp16vlintrin.h (_mm_castph_ps):
            New intrinsic.
            (_mm256_castph_ps): Likewise.
            (_mm_castph_pd): Likewise.
            (_mm256_castph_pd): Likewise.
            (_mm_castph_si128): Likewise.
            (_mm256_castph_si256): Likewise.
            (_mm_castps_ph): Likewise.
            (_mm256_castps_ph): Likewise.
            (_mm_castpd_ph): Likewise.
            (_mm256_castpd_ph): Likewise.
            (_mm_castsi128_ph): Likewise.
            (_mm256_castsi256_ph): Likewise.
            (_mm256_castph256_ph128): Likewise.
            (_mm256_castph128_ph256): Likewise.
            (_mm256_zextph128_ph256): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx512fp16-typecast-1.c: New test.
            * gcc.target/i386/avx512fp16-typecast-2.c: Ditto.
            * gcc.target/i386/avx512fp16vl-typecast-1.c: Ditto.
            * gcc.target/i386/avx512fp16vl-typecast-2.c: Ditto.

Diff:
---
 gcc/config/i386/avx512fp16intrin.h                 | 153 +++++++++++++++++++++
 gcc/config/i386/avx512fp16vlintrin.h               | 117 ++++++++++++++++
 .../gcc.target/i386/avx512fp16-typecast-1.c        |  44 ++++++
 .../gcc.target/i386/avx512fp16-typecast-2.c        |  43 ++++++
 .../gcc.target/i386/avx512fp16vl-typecast-1.c      |  55 ++++++++
 .../gcc.target/i386/avx512fp16vl-typecast-2.c      |  37 +++++
 6 files changed, 449 insertions(+)

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 3b236ecbfc4..a5041ed3697 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -192,6 +192,159 @@ _mm512_setzero_ph (void)
   return _mm512_set1_ph (0.0f);
 }
 
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ph (void)
+{
+  __m128h __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ph (void)
+{
+  __m256h __Y = __Y;
+  return __Y;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ph (void)
+{
+  __m512h __Y = __Y;
+  return __Y;
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_h (__m128h __A)
+{
+  return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsh_h (__m256h __A)
+{
+  return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsh_h (__m512h __A)
+{
+  return __A[0];
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_ps (__m512h __a)
+{
+  return (__m512) __a;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_pd (__m512h __a)
+{
+  return (__m512d) __a;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_si512 (__m512h __a)
+{
+  return (__m512i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph128 (__m512h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph256 (__m512h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph128_ph512 (__m128h __A)
+{
+  union
+  {
+    __m128h a[4];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph256_ph512 (__m256h __A)
+{
+  union
+  {
+    __m256h a[2];
+    __m512h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph128_ph512 (__m128h __A)
+{
+  return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
+				       (__m128) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph256_ph512 (__m256h __A)
+{
+  return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
+				       (__m256d) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castps_ph (__m512 __a)
+{
+  return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castpd_ph (__m512d __a)
+{
+  return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castsi512_ph (__m512i __a)
+{
+  return (__m512h) __a;
+}
+
 /* Create a vector with element 0 as F and the rest zero.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 59128fdffb8..59906d2175d 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -34,6 +34,123 @@
 #define __DISABLE_AVX512FP16VL__
 #endif /* __AVX512FP16VL__ */
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_ps (__m128h __a)
+{
+  return (__m128) __a;
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_ps (__m256h __a)
+{
+  return (__m256) __a;
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_pd (__m128h __a)
+{
+  return (__m128d) __a;
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_pd (__m256h __a)
+{
+  return (__m256d) __a;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_si128 (__m128h __a)
+{
+  return (__m128i) __a;
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_si256 (__m256h __a)
+{
+  return (__m256i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_ph (__m128 __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_ph (__m256 __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ph (__m128d __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ph (__m256d __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ph (__m128i __a)
+{
+  return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ph (__m256i __a)
+{
+  return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph256_ph128 (__m256h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u = { .v = __A };
+  return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph128_ph256 (__m128h __A)
+{
+  union
+  {
+    __m128h a[2];
+    __m256h v;
+  } u;
+  u.a[0] = __A;
+  return u.v;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zextph128_ph256 (__m128h __A)
+{
+  return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
+					 (__m128) __A, 0);
+}
+
 /* Intrinsics v[add,sub,mul,div]ph.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
new file mode 100644
index 00000000000..cf0cc7443c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+  V512 res;
+
+  res.ymmh[0] = _mm512_castph512_ph256 (src1.zmmh);
+  check_results (&res, &src1, 16, "_mm512_castph512_ph256");
+
+  res.xmmh[0] = _mm512_castph512_ph128 (src1.zmmh);
+  check_results (&res, &src1, 8, "_mm512_castph512_ph128");
+
+  res.zmmh = _mm512_castph256_ph512 (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm512_castph256_ph512");
+
+  res.zmmh = _mm512_castph128_ph512 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm512_castph128_ph512");
+
+  res.zmm = _mm512_castph_ps (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_ps");
+
+  res.zmmd = _mm512_castph_pd (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_pd");
+
+  res.zmmi = _mm512_castph_si512 (src1.zmmh);
+  check_results (&res, &src1, 32, "_mm512_castph_si512");
+
+  res.zmmh = _mm512_castps_ph (src1.zmm);
+  check_results (&res, &src1, 32, "_mm512_castps_ph");
+
+  res.zmmh = _mm512_castpd_ph (src1.zmmd);
+  check_results (&res, &src1, 32, "_mm512_castpd_ph");
+
+  res.zmmh = _mm512_castsi512_ph (src1.zmmi);
+  check_results (&res, &src1, 32, "_mm512_castsi512_ph");
+
+  if (n_errs != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
new file mode 100644
index 00000000000..a29f1dbd76a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+  union512i_d zero;
+  union512h ad;
+  union256h b,bd;
+  union128h c;
+
+  int i;
+
+  for (i = 0; i < 16; i++)
+    {
+      b.a[i] = 65.43f + i;
+      zero.a[i] = 0;
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      c.a[i] = 32.01f + i;
+    }
+
+  ad.x = _mm512_zextph256_ph512 (b.x);
+  if (memcmp (ad.a, b.a, 32)
+      || memcmp (&ad.a[16], &zero.a, 32))
+    abort ();
+
+  ad.x = _mm512_zextph128_ph512 (c.x);
+  if (memcmp (ad.a, c.a, 16)
+      || memcmp (&ad.a[8], &zero.a, 48))
+    abort ();
+   
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
new file mode 100644
index 00000000000..3621bb52f08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
@@ -0,0 +1,55 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+  V512 res;
+  res.xmm[0] = _mm_castph_ps (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_ps");
+
+  res.xmmd[0] = _mm_castph_pd (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_pd");
+
+  res.xmmi[0] = _mm_castph_si128 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm_castph_si128");
+
+  res.xmmh[0] = _mm_castps_ph (src1.xmm[0]);
+  check_results (&res, &src1, 8, "_mm_castps_ph");
+
+  res.xmmh[0] = _mm_castpd_ph (src1.xmmd[0]);
+  check_results (&res, &src1, 8, "_mm_castpd_ph");
+
+  res.xmmh[0] = _mm_castsi128_ph (src1.xmmi[0]);
+  check_results (&res, &src1, 8, "_mm_castsi128_ph");
+
+  res.ymm[0] = _mm256_castph_ps (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_ps");
+
+  res.ymmd[0] = _mm256_castph_pd (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_pd");
+
+  res.ymmi[0] = _mm256_castph_si256 (src1.ymmh[0]);
+  check_results (&res, &src1, 16, "_mm256_castph_si256");
+
+  res.ymmh[0] = _mm256_castps_ph (src1.ymm[0]);
+  check_results (&res, &src1, 16, "_mm256_castps_ph");
+
+  res.ymmh[0] = _mm256_castpd_ph (src1.ymmd[0]);
+  check_results (&res, &src1, 16, "_mm256_castpd_ph");
+
+  res.ymmh[0] = _mm256_castsi256_ph (src1.ymmi[0]);
+  check_results (&res, &src1, 16, "_mm256_castsi256_ph");
+
+  res.xmmh[0] = _mm256_castph256_ph128 (src1.ymmh[0]);
+  check_results (&res, &src1, 8, "_mm256_castph256_ph128");
+
+  res.ymmh[0] = _mm256_castph128_ph256 (src1.xmmh[0]);
+  check_results (&res, &src1, 8, "_mm256_castph128_ph256");
+  
+  if (n_errs != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
new file mode 100644
index 00000000000..dce387f1fab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+  union512i_d zero;
+  union512h ad;
+  union256h b,bd;
+  union128h c;
+
+  int i;
+
+  for (i = 0; i < 16; i++)
+    {
+      b.a[i] = 65.43f + i;
+      zero.a[i] = 0;
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      c.a[i] = 32.01f + i;
+    }
+   
+  bd.x = _mm256_zextph128_ph256 (c.x);
+  if (memcmp (bd.a, c.a, 16)
+      || memcmp (&bd.a[8], &zero.a, 16))
+    abort ();
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-17  8:05 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17  8:05 [gcc r12-3610] AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).