public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "Kong, Lingling" <lingling.kong@intel.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: "Liu, Hongtao" <hongtao.liu@intel.com>,
	"Kong, Lingling" <lingling.kong@intel.com>
Subject: [PATCH] i386: Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A) and combine FADD(A, FMUL(B, C)) to FMA(B, C, A).
Date: Fri, 22 Oct 2021 05:56:36 +0000	[thread overview]
Message-ID: <DM4PR11MB548717C429EA6CA35195B4D0EC809@DM4PR11MB5487.namprd11.prod.outlook.com> (raw)

Hi,

This patch is to support transform in fast-math something like _mm512_add_ph(x1, _mm512_fmadd_pch(a, b, _mm512_setzero_ph())) to  _mm512_fmadd_pch(a, b, x1).

And support transform _mm512_add_ph(x1, _mm512_fmul_pch(a, b)) to _mm512_fmadd_pch(a, b, x1).
Ok for master?

gcc/ChangeLog:

	* config/i386/sse.md (fma_<mode>_fadd_fmul): Add new
	define_insn_and_split.
	(fma_<mode>_fadd_fcmul):Likewise
	(fma_<complexopname>_<mode>_fma_zero):Likewise

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-complex-fma.c: New test.
---
 gcc/config/i386/sse.md                        | 52 +++++++++++++++++++
 .../gcc.target/i386/avx512fp16-complex-fma.c  | 18 +++++++
 2 files changed, 70 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index fbf056bf9e6..36407ca4a59 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5958,6 +5958,58 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "fma_<mode>_fadd_fmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(plus:VF_AVX512FP16VL
+	  (unspec:VF_AVX512FP16VL
+		[(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+		 (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+		 UNSPEC_COMPLEX_FMUL)
+	  (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VF_AVX512FP16VL
+	  [(match_dup 1) (match_dup 2) (match_dup 3)]
+	   UNSPEC_COMPLEX_FMA))])
+
+(define_insn_and_split "fma_<mode>_fadd_fcmul"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(plus:VF_AVX512FP16VL
+	  (unspec:VF_AVX512FP16VL
+		[(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+		 (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+		 UNSPEC_COMPLEX_FCMUL)
+	  (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VF_AVX512FP16VL
+	  [(match_dup 1) (match_dup 2) (match_dup 3)]
+	   UNSPEC_COMPLEX_FCMA))])
+
+(define_insn_and_split "fma_<complexopname>_<mode>_fma_zero"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(plus:VF_AVX512FP16VL
+	  (unspec:VF_AVX512FP16VL
+		[(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+		 (match_operand:VF_AVX512FP16VL 2 "vector_operand")
+		 (match_operand:VF_AVX512FP16VL 3 "const0_operand")]
+		 UNSPEC_COMPLEX_F_C_MA)
+	  (match_operand:VF_AVX512FP16VL 4 "vector_operand")))]
+  "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
+  && ix86_pre_reload_split()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VF_AVX512FP16VL
+	  [(match_dup 1) (match_dup 2) (match_dup 4)]
+	   UNSPEC_COMPLEX_F_C_MA))])
+
 (define_insn "<avx512>_<complexopname>_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
 	(vec_merge:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c
new file mode 100644
index 00000000000..2dfd369e785
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2 -Ofast" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-not "vaddph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-not "vfmulcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ 
+\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(
+?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+volatile __m512h x1, x2, res, a, b;
+void extern
+avx512f_test (void)
+{
+  res = _mm512_add_ph (x1, _mm512_fmadd_pch (a, b, 
+_mm512_setzero_ph()));
+  res = _mm512_add_ph (x1, _mm512_fcmadd_pch (a, b, 
+_mm512_setzero_ph()));
+
+  res = _mm512_add_ph (x1, _mm512_fmul_pch (a, b));
+  res = _mm512_add_ph (x1, _mm512_fcmul_pch (a, b)); }
--
2.18.1


             reply	other threads:[~2021-10-22  5:56 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-22  5:56 Kong, Lingling [this message]
2021-10-25  2:13 ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM4PR11MB548717C429EA6CA35195B4D0EC809@DM4PR11MB5487.namprd11.prod.outlook.com \
    --to=lingling.kong@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).