From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga12.intel.com (mga12.intel.com [192.55.52.136]) by sourceware.org (Postfix) with ESMTPS id 686F9384A024 for ; Thu, 1 Jul 2021 06:17:58 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 686F9384A024 X-IronPort-AV: E=McAfee;i="6200,9189,10031"; a="188163520" X-IronPort-AV: E=Sophos;i="5.83,313,1616482800"; d="scan'208";a="188163520" Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga106.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 30 Jun 2021 23:17:54 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.83,313,1616482800"; d="scan'208";a="626257555" Received: from scymds01.sc.intel.com ([10.148.94.138]) by orsmga005.jf.intel.com with ESMTP; 30 Jun 2021 23:17:54 -0700 Received: from shliclel320.sh.intel.com (shliclel320.sh.intel.com [10.239.236.50]) by scymds01.sc.intel.com with ESMTP id 1616GmfI031625; Wed, 30 Jun 2021 23:17:52 -0700 From: liuhongt To: gcc-patches@gcc.gnu.org Cc: crazylht@gmail.com, hjl.tools@gmail.com, ubizjak@gmail.com, jakub@redhat.com Subject: [PATCH 39/62] AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer. Date: Thu, 1 Jul 2021 14:16:25 +0800 Message-Id: <20210701061648.9447-40-hongtao.liu@intel.com> X-Mailer: git-send-email 2.18.1 In-Reply-To: <20210701061648.9447-1-hongtao.liu@intel.com> References: <20210701061648.9447-1-hongtao.liu@intel.com> X-Spam-Status: No, score=-12.2 required=5.0 tests=BAYES_00, GIT_PATCH_0, KAM_DMARC_NONE, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT, SPF_HELO_PASS, SPF_NONE, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Jul 2021 06:18:00 -0000 gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm_undefined_ph): New intrinsic. (_mm256_undefined_ph): Likewise. (_mm512_undefined_ph): Likewise. (_mm_cvtsh_h): Likewise. (_mm256_cvtsh_h): Likewise. (_mm512_cvtsh_h): Likewise. (_mm512_castph_ps): Likewise. (_mm512_castph_pd): Likewise. (_mm512_castph_si512): Likewise. (_mm512_castph512_ph128): Likewise. (_mm512_castph512_ph256): Likewise. (_mm512_castph128_ph512): Likewise. (_mm512_castph256_ph512): Likewise. (_mm512_zextph128_ph512): Likewise. (_mm512_zextph256_ph512): Likewise. (_mm512_castps_ph): Likewise. (_mm512_castpd_ph): Likewise. (_mm512_castsi512_ph): Likewise. * config/i386/avx512fp16vlintrin.h (_mm_castph_ps): New intrinsic. (_mm256_castph_ps): Likewise. (_mm_castph_pd): Likewise. (_mm256_castph_pd): Likewise. (_mm_castph_si128): Likewise. (_mm256_castph_si256): Likewise. (_mm_castps_ph): Likewise. (_mm256_castps_ph): Likewise. (_mm_castpd_ph): Likewise. (_mm256_castpd_ph): Likewise. (_mm_castsi128_ph): Likewise. (_mm256_castsi256_ph): Likewise. (_mm256_castph256_ph128): Likewise. (_mm256_castph128_ph256): Likewise. (_mm256_zextph128_ph256): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-typecast-1.c: New test. * gcc.target/i386/avx512fp16-typecast-2.c: Ditto. * gcc.target/i386/avx512fp16vl-typecast-1.c: Ditto. * gcc.target/i386/avx512fp16vl-typecast-2.c: Ditto. --- gcc/config/i386/avx512fp16intrin.h | 153 ++++++++++++++++++ gcc/config/i386/avx512fp16vlintrin.h | 117 ++++++++++++++ .../gcc.target/i386/avx512fp16-typecast-1.c | 44 +++++ .../gcc.target/i386/avx512fp16-typecast-2.c | 43 +++++ .../gcc.target/i386/avx512fp16vl-typecast-1.c | 55 +++++++ .../gcc.target/i386/avx512fp16vl-typecast-2.c | 37 +++++ 6 files changed, 449 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index 05efbc5777b..ddb227529fa 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -192,6 +192,159 @@ _mm512_setzero_ph (void) return _mm512_set1_ph (0.0f); } +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ph (void) +{ + __m128h __Y = __Y; + return __Y; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_undefined_ph (void) +{ + __m256h __Y = __Y; + return __Y; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_undefined_ph (void) +{ + __m512h __Y = __Y; + return __Y; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_h (__m128h __A) +{ + return __A[0]; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtsh_h (__m256h __A) +{ + return __A[0]; +} + +extern __inline _Float16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsh_h (__m512h __A) +{ + return __A[0]; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_ps (__m512h __a) +{ + return (__m512) __a; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_pd (__m512h __a) +{ + return (__m512d) __a; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph_si512 (__m512h __a) +{ + return (__m512i) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph128 (__m512h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph512_ph256 (__m512h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph128_ph512 (__m128h __A) +{ + union + { + __m128h a[4]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castph256_ph512 (__m256h __A) +{ + union + { + __m256h a[2]; + __m512h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph128_ph512 (__m128h __A) +{ + return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (), + (__m128) __A, 0); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_zextph256_ph512 (__m256h __A) +{ + return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (), + (__m256d) __A, 0); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castps_ph (__m512 __a) +{ + return (__m512h) __a; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castpd_ph (__m512d __a) +{ + return (__m512h) __a; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_castsi512_ph (__m512i __a) +{ + return (__m512h) __a; +} + /* Create a vector with element 0 as F and the rest zero. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 0124b830dd5..bcbe4523357 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -34,6 +34,123 @@ #define __DISABLE_AVX512FP16VL__ #endif /* __AVX512FP16VL__ */ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_ps (__m128h __a) +{ + return (__m128) __a; +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_ps (__m256h __a) +{ + return (__m256) __a; +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_pd (__m128h __a) +{ + return (__m128d) __a; +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_pd (__m256h __a) +{ + return (__m256d) __a; +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castph_si128 (__m128h __a) +{ + return (__m128i) __a; +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph_si256 (__m256h __a) +{ + return (__m256i) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_ph (__m128 __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castps_ph (__m256 __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ph (__m128d __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castpd_ph (__m256d __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ph (__m128i __a) +{ + return (__m128h) __a; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castsi256_ph (__m256i __a) +{ + return (__m256h) __a; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph256_ph128 (__m256h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u = { .v = __A }; + return u.a[0]; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_castph128_ph256 (__m128h __A) +{ + union + { + __m128h a[2]; + __m256h v; + } u; + u.a[0] = __A; + return u.v; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_zextph128_ph256 (__m128h __A) +{ + return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (), + (__m128) __A, 0); +} + /* Intrinsics v[add,sub,mul,div]ph. */ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c new file mode 100644 index 00000000000..cf0cc7443c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c @@ -0,0 +1,44 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +void +test_512 (void) +{ + V512 res; + + res.ymmh[0] = _mm512_castph512_ph256 (src1.zmmh); + check_results (&res, &src1, 16, "_mm512_castph512_ph256"); + + res.xmmh[0] = _mm512_castph512_ph128 (src1.zmmh); + check_results (&res, &src1, 8, "_mm512_castph512_ph128"); + + res.zmmh = _mm512_castph256_ph512 (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm512_castph256_ph512"); + + res.zmmh = _mm512_castph128_ph512 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm512_castph128_ph512"); + + res.zmm = _mm512_castph_ps (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_ps"); + + res.zmmd = _mm512_castph_pd (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_pd"); + + res.zmmi = _mm512_castph_si512 (src1.zmmh); + check_results (&res, &src1, 32, "_mm512_castph_si512"); + + res.zmmh = _mm512_castps_ph (src1.zmm); + check_results (&res, &src1, 32, "_mm512_castps_ph"); + + res.zmmh = _mm512_castpd_ph (src1.zmmd); + check_results (&res, &src1, 32, "_mm512_castpd_ph"); + + res.zmmh = _mm512_castsi512_ph (src1.zmmi); + check_results (&res, &src1, 32, "_mm512_castsi512_ph"); + + if (n_errs != 0) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c new file mode 100644 index 00000000000..a29f1dbd76a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c @@ -0,0 +1,43 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */ + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512f-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +do_test (void) +{ + union512i_d zero; + union512h ad; + union256h b,bd; + union128h c; + + int i; + + for (i = 0; i < 16; i++) + { + b.a[i] = 65.43f + i; + zero.a[i] = 0; + } + + for (i = 0; i < 8; i++) + { + c.a[i] = 32.01f + i; + } + + ad.x = _mm512_zextph256_ph512 (b.x); + if (memcmp (ad.a, b.a, 32) + || memcmp (&ad.a[16], &zero.a, 32)) + abort (); + + ad.x = _mm512_zextph128_ph512 (c.x); + if (memcmp (ad.a, c.a, 16) + || memcmp (&ad.a[8], &zero.a, 48)) + abort (); + +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c new file mode 100644 index 00000000000..3621bb52f08 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c @@ -0,0 +1,55 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +#define AVX512FP16 +#include "avx512fp16-helper.h" + +void +test_512 (void) +{ + V512 res; + res.xmm[0] = _mm_castph_ps (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_ps"); + + res.xmmd[0] = _mm_castph_pd (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_pd"); + + res.xmmi[0] = _mm_castph_si128 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm_castph_si128"); + + res.xmmh[0] = _mm_castps_ph (src1.xmm[0]); + check_results (&res, &src1, 8, "_mm_castps_ph"); + + res.xmmh[0] = _mm_castpd_ph (src1.xmmd[0]); + check_results (&res, &src1, 8, "_mm_castpd_ph"); + + res.xmmh[0] = _mm_castsi128_ph (src1.xmmi[0]); + check_results (&res, &src1, 8, "_mm_castsi128_ph"); + + res.ymm[0] = _mm256_castph_ps (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_ps"); + + res.ymmd[0] = _mm256_castph_pd (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_pd"); + + res.ymmi[0] = _mm256_castph_si256 (src1.ymmh[0]); + check_results (&res, &src1, 16, "_mm256_castph_si256"); + + res.ymmh[0] = _mm256_castps_ph (src1.ymm[0]); + check_results (&res, &src1, 16, "_mm256_castps_ph"); + + res.ymmh[0] = _mm256_castpd_ph (src1.ymmd[0]); + check_results (&res, &src1, 16, "_mm256_castpd_ph"); + + res.ymmh[0] = _mm256_castsi256_ph (src1.ymmi[0]); + check_results (&res, &src1, 16, "_mm256_castsi256_ph"); + + res.xmmh[0] = _mm256_castph256_ph128 (src1.ymmh[0]); + check_results (&res, &src1, 8, "_mm256_castph256_ph128"); + + res.ymmh[0] = _mm256_castph128_ph256 (src1.xmmh[0]); + check_results (&res, &src1, 8, "_mm256_castph128_ph256"); + + if (n_errs != 0) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c new file mode 100644 index 00000000000..dce387f1fab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c @@ -0,0 +1,37 @@ +/* { dg-do run { target avx512fp16 } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ + +static void do_test (void); + +#define DO_TEST do_test +#define AVX512FP16 +#include "avx512f-check.h" + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); + +void +do_test (void) +{ + union512i_d zero; + union512h ad; + union256h b,bd; + union128h c; + + int i; + + for (i = 0; i < 16; i++) + { + b.a[i] = 65.43f + i; + zero.a[i] = 0; + } + + for (i = 0; i < 8; i++) + { + c.a[i] = 32.01f + i; + } + + bd.x = _mm256_zextph128_ph256 (c.x); + if (memcmp (bd.a, c.a, 16) + || memcmp (&bd.a[8], &zero.a, 16)) + abort (); +} -- 2.18.1