[gcc r14-5072] Support cmul{_conj}v4hf3/cmla{_conj}v4hf4 with AVX512FP16 instruction.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r14-5072] Support cmul{_conj}v4hf3/cmla{_conj}v4hf4 with AVX512FP16 instruction.
@ 2023-11-02  7:17 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2023-11-02  7:17 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1bb7d6f081023069944f1f46701adf3ed4c4029d

commit r14-5072-g1bb7d6f081023069944f1f46701adf3ed4c4029d
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed Nov 1 15:01:48 2023 +0800

    Support cmul{_conj}v4hf3/cmla{_conj}v4hf4 with AVX512FP16 instruction.
    
    gcc/ChangeLog:
    
            * config/i386/mmx.md (cmlav4hf4): New expander.
            (cmla_conjv4hf4): Ditto.
            (cmulv4hf3): Ditto.
            (cmul_conjv4hf3): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/part-vect-complexhf.c: New test.

Diff:
---
 gcc/config/i386/mmx.md                             | 86 ++++++++++++++++++++++
 .../gcc.target/i386/part-vect-complexhf.c          | 40 ++++++++++
 2 files changed, 126 insertions(+)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2b97bb8fa98d..ba81ff72551e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2622,6 +2622,92 @@
   DONE;
 })
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel half-precision floating point complex type operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "cmlav4hf4"
+  [(match_operand:V4HF 0 "register_operand")
+   (match_operand:V4HF 1 "vector_operand")
+   (match_operand:V4HF 2 "vector_operand")
+   (match_operand:V4HF 3 "vector_operand")]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op3 = gen_reg_rtx (V8HFmode);
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_cmlav8hf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "cmla_conjv4hf4"
+  [(match_operand:V4HF 0 "register_operand")
+   (match_operand:V4HF 1 "vector_operand")
+   (match_operand:V4HF 2 "vector_operand")
+   (match_operand:V4HF 3 "vector_operand")]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op3 = gen_reg_rtx (V8HFmode);
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_cmla_conjv8hf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "cmulv4hf3"
+  [(match_operand:V4HF 0 "register_operand")
+   (match_operand:V4HF 1 "vector_operand")
+   (match_operand:V4HF 2 "vector_operand")]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_cmulv8hf3 (op0, op1, op2));
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "cmul_conjv4hf3"
+  [(match_operand:V4HF 0 "register_operand")
+   (match_operand:V4HF 1 "vector_operand")
+   (match_operand:V4HF 2 "vector_operand")]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_cmul_conjv8hf3 (op0, op1, op2));
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel half-precision floating point conversion operations
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-complexhf.c b/gcc/testsuite/gcc.target/i386/part-vect-complexhf.c
new file mode 100644
index 000000000000..b9f4ba2f4cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-complexhf.c
@@ -0,0 +1,40 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O1 -ftree-vectorize -ffast-math -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */
+
+#include<complex.h>
+#define TYPE _Float16
+#define N 2
+
+void fma0 (_Complex TYPE *a, _Complex TYPE *b,
+           _Complex TYPE * __restrict c)
+{
+  for (int i = 0; i < N; i++)
+    c[i] += a[i] * b[i];
+}
+
+void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+	      _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] += a[i] * ~b[i];
+}
+
+void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+	   _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] = a[i] * b[i];
+}
+
+void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+	       _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] = a[i] * ~b[i];
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-02  7:17 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-02  7:17 [gcc r14-5072] Support cmul{_conj}v4hf3/cmla{_conj}v4hf4 with AVX512FP16 instruction hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).