From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id B42AC3858D39; Thu, 23 Sep 2021 09:08:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B42AC3858D39 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-3833] AVX512FP16: Add expander for fmahf4 X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: cb8d916eef53327d922f5301bc2ce29735fb7e24 X-Git-Newrev: 1041111f19849da29117a0e962ce7ad5262a4403 Message-Id: <20210923090853.B42AC3858D39@sourceware.org> Date: Thu, 23 Sep 2021 09:08:53 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 23 Sep 2021 09:08:53 -0000 https://gcc.gnu.org/g:1041111f19849da29117a0e962ce7ad5262a4403 commit r12-3833-g1041111f19849da29117a0e962ce7ad5262a4403 Author: liuhongt Date: Tue Jul 14 14:45:32 2020 +0800 AVX512FP16: Add expander for fmahf4 gcc/ChangeLog: * config/i386/sse.md (FMAMODEM): extend to handle FP16. (VFH_SF_AVX512VL): Extend to handle HFmode. (VF_SF_AVX512VL): Deleted. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-fma-1.c: New test. * gcc.target/i386/avx512fp16vl-fma-1.c: New test. * gcc.target/i386/avx512fp16vl-fma-vectorize-1.c: New test. Diff: --- gcc/config/i386/sse.md | 11 ++-- gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c | 69 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c | 70 ++++++++++++++++++++++ .../gcc.target/i386/avx512fp16vl-fma-vectorize-1.c | 45 ++++++++++++++ 4 files changed, 190 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9079613e829..1ca95984afc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4650,7 +4650,11 @@ (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V16SF "TARGET_AVX512F") - (V8DF "TARGET_AVX512F")]) + (V8DF "TARGET_AVX512F") + (HF "TARGET_AVX512FP16") + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V32HF "TARGET_AVX512FP16")]) (define_expand "fma4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -4758,14 +4762,11 @@ (set_attr "mode" "")]) ;; Suppose AVX-512F as baseline -(define_mode_iterator VF_SF_AVX512VL - [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) - (define_mode_iterator VFH_SF_AVX512VL [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (HF "TARGET_AVX512FP16") SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c new file mode 100644 index 00000000000..d78d7629838 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16" } */ + +typedef _Float16 v32hf __attribute__ ((__vector_size__ (64))); + +_Float16 +foo1 (_Float16 a, _Float16 b, _Float16 c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo2 (_Float16 a, _Float16 b, _Float16 c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo3 (_Float16 a, _Float16 b, _Float16 c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo4 (_Float16 a, _Float16 b, _Float16 c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v32hf +foo5 (v32hf a, v32hf b, v32hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo6 (v32hf a, v32hf b, v32hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo7 (v32hf a, v32hf b, v32hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo8 (v32hf a, v32hf b, v32hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c new file mode 100644 index 00000000000..1a832f37d6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ + +typedef _Float16 v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 v16hf __attribute__ ((__vector_size__ (32))); + +v8hf +foo1 (v8hf a, v8hf b, v8hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo2 (v8hf a, v8hf b, v8hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo3 (v8hf a, v8hf b, v8hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo4 (v8hf a, v8hf b, v8hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v16hf +foo5 (v16hf a, v16hf b, v16hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo6 (v16hf a, v16hf b, v16hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo7 (v16hf a, v16hf b, v16hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo8 (v16hf a, v16hf b, v16hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c new file mode 100644 index 00000000000..d0b8bec34f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ + +typedef _Float16 v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 v16hf __attribute__ ((__vector_size__ (32))); + +void +foo1 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = pa[i] * pb[i] + pc[i]; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo2 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = -pa[i] * pb[i] + pc[i]; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo3 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = pa[i] * pb[i] - pc[i]; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo4 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = -pa[i] * pb[i] - pc[i]; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */