[PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes
@ 2021-09-23  5:46 liuhongt
  2021-09-23  5:46 ` [PATCH 1/7] AVX512FP16: Add expander for rint/nearbyinthf2 liuhongt
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches

  xfail are added for testcases related to truncmn2/extendmn2 expanders since V2HF/V4HFmode
are not supported yet, they should be removed later.

  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
  Newly added runtime testcases passed on sde{-m32,}.

Hongyu Wang (5):
  AVX512FP16: Add expander for smin/maxhf3.
  AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes
  AVX512FP16: Add float(uns)?mn2 expander
  AVX512FP16: add truncmn2/extendmn2 expanders
  AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

liuhongt (2):
  AVX512FP16: Add expander for rint/nearbyinthf2.
  AVX512FP16: Add expander for fmahf4

 gcc/config/i386/i386-expand.c                 |   2 +
 gcc/config/i386/i386.md                       |  62 +++++
 gcc/config/i386/sse.md                        | 259 +++++++++++++++---
 .../i386/avx512fp16-vcondmn-minmax.C          |  25 ++
 .../g++.target/i386/avx512fp16-vcondmn-vec.C  |  70 +++++
 .../i386/avx512fp16-builtin-minmax-1.c        |  35 +++
 .../i386/avx512fp16-builtin-round-1.c         |  14 +
 .../gcc.target/i386/avx512fp16-floatvnhf.c    |  61 +++++
 .../gcc.target/i386/avx512fp16-fma-1.c        |  69 +++++
 .../i386/avx512fp16-trunc-extendvnhf.c        |  55 ++++
 .../gcc.target/i386/avx512fp16-trunchf.c      |  59 ++++
 .../gcc.target/i386/avx512fp16-truncvnhf.c    |  61 +++++
 .../i386/avx512fp16-vcondmn-loop-1.c          |  70 +++++
 .../i386/avx512fp16-vcondmn-loop-2.c          | 143 ++++++++++
 .../gcc.target/i386/avx512fp16-vec_cmpmn.c    |  32 +++
 .../gcc.target/i386/avx512fp16vl-fma-1.c      |  70 +++++
 .../i386/avx512fp16vl-fma-vectorize-1.c       |  45 +++
 17 files changed, 1100 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin-minmax-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c

-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] AVX512FP16: Add expander for rint/nearbyinthf2.
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 2/7] AVX512FP16: Add expander for fmahf4 liuhongt
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches

gcc/ChangeLog:

	* config/i386/i386.md (rinthf2): New expander.
	(nearbyinthf2): New expander.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-builtin-round-1.c: Add new testcase.
---
 gcc/config/i386/i386.md                       | 22 +++++++++++++++++++
 .../i386/avx512fp16-builtin-round-1.c         | 14 ++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 60d877668d5..4b13a59be82 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -18287,6 +18287,17 @@ (define_insn "rintxf2"
    (set_attr "znver1_decode" "vector")
    (set_attr "mode" "XF")])
 
+(define_expand "rinthf2"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:HF 1 "nonimmediate_operand")]
+  "TARGET_AVX512FP16"
+{
+  emit_insn (gen_sse4_1_roundhf2 (operands[0],
+				  operands[1],
+				  GEN_INT (ROUND_MXCSR)));
+  DONE;
+})
+
 (define_expand "rint<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "nonimmediate_operand"))]
@@ -18320,6 +18331,17 @@ (define_expand "nearbyintxf2"
   "TARGET_USE_FANCY_MATH_387
    && !flag_trapping_math")
 
+(define_expand "nearbyinthf2"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:HF 1 "nonimmediate_operand")]
+  "TARGET_AVX512FP16"
+{
+  emit_insn (gen_sse4_1_roundhf2 (operands[0],
+				  operands[1],
+				  GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
+  DONE;
+})
+
 (define_expand "nearbyint<mode>2"
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "nonimmediate_operand"))]
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c
index 3cab1526967..a1c6636e354 100644
--- a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c
@@ -25,7 +25,21 @@ f4 (_Float16 x)
   return __builtin_roundevenf16 (x);
 }
 
+_Float16
+f5 (_Float16 x)
+{
+  return __builtin_rintf16 (x);
+}
+
+_Float16
+f6 (_Float16 x)
+{
+  return __builtin_nearbyintf16 (x);
+}
+
 /* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$11\[^\n\r\]*xmm\[0-9\]" 1 } } */
 /* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$10\[^\n\r\]*xmm\[0-9\]" 1 } } */
 /* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$9\[^\n\r\]*xmm\[0-9\]" 1 } } */
 /* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$8\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$4\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$12\[^\n\r\]*xmm\[0-9\]" 1 } } */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/7] AVX512FP16: Add expander for fmahf4
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
  2021-09-23  5:46 ` [PATCH 1/7] AVX512FP16: Add expander for rint/nearbyinthf2 liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 3/7] AVX512FP16: Add expander for smin/maxhf3 liuhongt
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches

gcc/ChangeLog:

	* config/i386/sse.md (FMAMODEM): extend to handle FP16.
	(VFH_SF_AVX512VL): Extend to handle HFmode.
	(VF_SF_AVX512VL): Deleted.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-fma-1.c: New test.
	* gcc.target/i386/avx512fp16vl-fma-1.c: New test.
	* gcc.target/i386/avx512fp16vl-fma-vectorize-1.c: New test.
---
 gcc/config/i386/sse.md                        | 11 +--
 .../gcc.target/i386/avx512fp16-fma-1.c        | 69 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16vl-fma-1.c      | 70 +++++++++++++++++++
 .../i386/avx512fp16vl-fma-vectorize-1.c       | 45 ++++++++++++
 4 files changed, 190 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9079613e829..1ca95984afc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4650,7 +4650,11 @@ (define_mode_iterator FMAMODEM
    (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V16SF "TARGET_AVX512F")
-   (V8DF "TARGET_AVX512F")])
+   (V8DF "TARGET_AVX512F")
+   (HF "TARGET_AVX512FP16")
+   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V32HF "TARGET_AVX512FP16")])
 
 (define_expand "fma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -4758,14 +4762,11 @@ (define_insn "*fma_fmadd_<mode>"
    (set_attr "mode" "<MODE>")])
 
 ;; Suppose AVX-512F as baseline
-(define_mode_iterator VF_SF_AVX512VL
-  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
-
 (define_mode_iterator VFH_SF_AVX512VL
   [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (HF "TARGET_AVX512FP16")
    SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
new file mode 100644
index 00000000000..d78d7629838
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16" } */
+
+typedef _Float16 v32hf __attribute__ ((__vector_size__ (64)));
+
+_Float16
+foo1 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo2 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo3 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo4 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v32hf
+foo5 (v32hf a, v32hf b, v32hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo6 (v32hf a, v32hf b, v32hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo7 (v32hf a, v32hf b, v32hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo8 (v32hf a, v32hf b, v32hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
new file mode 100644
index 00000000000..1a832f37d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */
+
+typedef _Float16 v8hf __attribute__ ((__vector_size__ (16)));
+typedef _Float16 v16hf __attribute__ ((__vector_size__ (32)));
+
+v8hf
+foo1 (v8hf a, v8hf b, v8hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo2 (v8hf a, v8hf b, v8hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo3 (v8hf a, v8hf b, v8hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo4 (v8hf a, v8hf b, v8hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v16hf
+foo5 (v16hf a, v16hf b, v16hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo6 (v16hf a, v16hf b, v16hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo7 (v16hf a, v16hf b, v16hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo8 (v16hf a, v16hf b, v16hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c
new file mode 100644
index 00000000000..d0b8bec34f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */
+
+typedef _Float16 v8hf __attribute__ ((__vector_size__ (16)));
+typedef _Float16 v16hf __attribute__ ((__vector_size__ (32)));
+
+void
+foo1 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = pa[i] * pb[i] + pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo2 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+    for (int i = 0; i != 8; i++)
+    pd[i] = -pa[i] * pb[i] + pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo3 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = pa[i] * pb[i] - pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo4 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = -pa[i] * pb[i] - pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/7] AVX512FP16: Add expander for smin/maxhf3.
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
  2021-09-23  5:46 ` [PATCH 1/7] AVX512FP16: Add expander for rint/nearbyinthf2 liuhongt
  2021-09-23  5:46 ` [PATCH 2/7] AVX512FP16: Add expander for fmahf4 liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes liuhongt
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, Hongyu Wang

From: Hongyu Wang <hongyu.wang@intel.com>

gcc/ChangeLog:

	* config/i386/i386.md (<code>hf3): New expander.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-builtin-minmax-1.c: New test.
---
 gcc/config/i386/i386.md                       | 11 ++++++
 .../i386/avx512fp16-builtin-minmax-1.c        | 35 +++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-builtin-minmax-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4b13a59be82..a087e557d7f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19946,6 +19946,17 @@ (define_insn "<code><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<code>hf3"
+  [(set (match_operand:HF 0 "register_operand" "=v")
+	(smaxmin:HF
+	  (match_operand:HF 1 "nonimmediate_operand" "%v")
+	  (match_operand:HF 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16"
+  "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "sseadd")
+   (set_attr "mode" "HF")])
+
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
 ;;   max = (!(op1 < op2) ? op1 : op2)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-minmax-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-minmax-1.c
new file mode 100644
index 00000000000..90080e44216
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-minmax-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16 -mprefer-vector-width=512" } */
+
+_Float16
+minf1 (_Float16 a, _Float16 b)
+{
+  return __builtin_fminf16 (a, b);
+}
+
+void
+minf2 (_Float16* __restrict psrc1, _Float16* __restrict psrc2,
+       _Float16* __restrict pdst)
+{
+  for (int i = 0; i != 32; i++)
+    pdst[i] = __builtin_fminf16 (psrc1[i], psrc2[i]);
+}
+
+_Float16
+maxf1 (_Float16 a, _Float16 b)
+{
+  return __builtin_fmaxf16 (a, b);
+}
+
+void
+maxf2 (_Float16* __restrict psrc1, _Float16* __restrict psrc2,
+       _Float16* __restrict pdst)
+{
+  for (int i = 0; i != 32; i++)
+    pdst[i] = __builtin_fmaxf16 (psrc1[i], psrc2[i]);
+}
+
+/* { dg-final { scan-assembler-times "vmaxsh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminsh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminph\[^\n\r\]*zmm\[0-9\]" 1 } } */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
                   ` (2 preceding siblings ...)
  2021-09-23  5:46 ` [PATCH 3/7] AVX512FP16: Add expander for smin/maxhf3 liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 5/7] AVX512FP16: Add float(uns)?mn2 expander liuhongt
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, Hongyu Wang

From: Hongyu Wang <hongyu.wang@intel.com>

NB: 64bit/32bit vectorize for HFmode is not supported for now, will
adjust this patch when V2HF/V4HF operations supported.

gcc/ChangeLog:

	* config/i386/i386.md (fix<fixunssuffix>_trunchf<mode>2): New expander.
	(fixuns_trunchfhi2): Likewise.
	(*fixuns_trunchfsi2zext): New define_insn.
	* config/i386/sse.md (ssePHmodelower): New mode_attr.
	(fix<fixunssuffix>_trunc<ssePHmodelower><mode>2):
	New expander for same element vector fix_truncate.
	(fix<fixunssuffix>_trunc<ssePHmodelower><mode>2):
	Likewise for V4HF to V4SI/V4DI fix_truncate.
	(fix<fixunssuffix>_truncv2hfv2di2):
	Likeise for V2HF to V2DI fix_truncate.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-trunchf.c: New test.
	* gcc.target/i386/avx512fp16-truncvnhf.c: Ditto.
---
 gcc/config/i386/i386.md                       | 29 +++++++++
 gcc/config/i386/sse.md                        | 43 +++++++++++++
 .../gcc.target/i386/avx512fp16-trunchf.c      | 59 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16-truncvnhf.c    | 61 +++++++++++++++++++
 4 files changed, 192 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a087e557d7f..c6279e620c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4810,6 +4810,16 @@ (define_expand "fix_trunc<mode>di2"
    }
 })
 
+(define_insn "fix<fixunssuffix>_trunchf<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_fix:SWI48
+	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16"
+  "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;; Signed conversion to SImode.
 
 (define_expand "fix_truncxfsi2"
@@ -4917,6 +4927,17 @@ (define_insn "fixuns_trunc<mode>si2_avx512f"
    (set_attr "prefix" "evex")
    (set_attr "mode" "SI")])
 
+(define_insn "*fixuns_trunchfsi2zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unsigned_fix:SI
+	    (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
+  "TARGET_64BIT && TARGET_AVX512FP16"
+  "vcvttsh2usi\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
@@ -4949,6 +4970,14 @@ (define_insn_and_split "*fixuns_trunc<mode>_1"
 ;; Without these patterns, we'll try the unsigned SI conversion which
 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
 
+(define_expand "fixuns_trunchfhi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:HF 1 "nonimmediate_operand")))
+   (set (match_operand:HI 0 "nonimmediate_operand")
+	(subreg:HI (match_dup 2) 0))]
+  "TARGET_AVX512FP16"
+  "operands[2] = gen_reg_rtx (SImode);")
+
 (define_expand "fixuns_trunc<mode>hi2"
   [(set (match_dup 2)
 	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ca95984afc..f8a5f197f3c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1034,6 +1034,13 @@ (define_mode_attr ssePHmode
    (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")
    (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")])
 
+;; Mapping of vector modes to vector hf modes of same element.
+(define_mode_attr ssePHmodelower
+  [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf")
+   (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf")
+   (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
+   (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
+
 ;; Mapping of vector modes to packed single mode of the same size
 (define_mode_attr ssePSmode
   [(V16SI "V16SF") (V8DF "V16SF")
@@ -6175,6 +6182,12 @@ (define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+(define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2"
+  [(set (match_operand:VI2H_AVX512VL 0 "register_operand")
+	(any_fix:VI2H_AVX512VL
+	  (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>"
   [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
 	(any_fix:VI2H_AVX512VL
@@ -6185,6 +6198,21 @@ (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "fix<fixunssuffix>_truncv4hf<mode>2"
+  [(set (match_operand:VI4_128_8_256 0 "register_operand")
+	(any_fix:VI4_128_8_256
+	  (match_operand:V4HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode);
+      emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0],
+								operands[1]));
+      DONE;
+    }
+})
+
 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>"
   [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
 	(any_fix:VI4_128_8_256
@@ -6207,6 +6235,21 @@ (define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "fix<fixunssuffix>_truncv2hfv2di2"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(any_fix:V2DI
+	  (match_operand:V2HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode);
+      emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0],
+							      operands[1]));
+      DONE;
+  }
+})
+
 (define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
 	(any_fix:V2DI
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
new file mode 100644
index 00000000000..2c025b7803c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */
+
+#include <immintrin.h>
+
+short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si16 (_Float16 f)
+{
+  return f;
+}
+
+unsigned short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su16 (_Float16 f)
+{
+  return f;
+}
+
+int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si32 (_Float16 f)
+{
+  return f;
+}
+
+unsigned int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su32 (_Float16 f)
+{
+  return f;
+}
+
+long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si64 (_Float16 f)
+{
+  return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64 (_Float16 f)
+{
+  return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64_zext (_Float16 f)
+{
+  return (unsigned int) f;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
new file mode 100644
index 00000000000..ee55cd12300
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern long long di[8];
+extern unsigned long long udi[8];
+extern int si[16];
+extern unsigned int usi[16];
+extern short hi[32];
+extern unsigned short uhi[32];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define FIX_TRUNCHFVV(size, mode)	    \
+  void __attribute__ ((noinline, noclone))  \
+fix_trunc##size##hf##v##size##mode ()   \
+{\
+  int i;  \
+  DO_PRAGMA (GCC unroll size)	\
+  for (i = 0; i < size; i++)  \
+    mode[i] = hf[i];  \
+}
+
+FIX_TRUNCHFVV(32, hi)
+FIX_TRUNCHFVV(16, hi)
+FIX_TRUNCHFVV(8, hi)
+FIX_TRUNCHFVV(16, si)
+FIX_TRUNCHFVV(8, si)
+FIX_TRUNCHFVV(4, si)
+FIX_TRUNCHFVV(8, di)
+FIX_TRUNCHFVV(4, di)
+FIX_TRUNCHFVV(2, di)
+
+FIX_TRUNCHFVV(32, uhi)
+FIX_TRUNCHFVV(16, uhi)
+FIX_TRUNCHFVV(8, uhi)
+FIX_TRUNCHFVV(16, usi)
+FIX_TRUNCHFVV(8, usi)
+FIX_TRUNCHFVV(4, usi)
+FIX_TRUNCHFVV(8, udi)
+FIX_TRUNCHFVV(4, udi)
+FIX_TRUNCHFVV(2, udi)
+
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 5/7] AVX512FP16: Add float(uns)?mn2 expander
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
                   ` (3 preceding siblings ...)
  2021-09-23  5:46 ` [PATCH 4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 6/7] AVX512FP16: add truncmn2/extendmn2 expanders liuhongt
  2021-09-23  5:46 ` [PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes liuhongt
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, Hongyu Wang

From: Hongyu Wang <hongyu.wang@intel.com>

gcc/ChangeLog:

	* config/i386/sse.md (float<floatunssuffix><mode><ssePHmodelower>2):
	New expander.
	(avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>):
	Rename to ...
	(float<floatunssuffix><mode>v4hf2): ... this, and drop constraints.
	(avx512fp16_vcvt<floatsuffix>qq2ph_v2di): Rename to ...
	(float<floatunssuffix>v2div2hf2): ... this, and likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-floatvnhf.c: New test.
---
 gcc/config/i386/sse.md                        | 46 +++++++++++---
 .../gcc.target/i386/avx512fp16-floatvnhf.c    | 61 +++++++++++++++++++
 2 files changed, 99 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f8a5f197f3c..66062dc3bcf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6006,6 +6006,12 @@ (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "float<floatunssuffix><mode><ssePHmodelower>2"
+  [(set (match_operand:<ssePHmode> 0 "register_operand")
+	(any_float:<ssePHmode>
+	  (match_operand:VI2H_AVX512VL 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
 (define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>"
   [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
 	(any_float:<ssePHmode>
@@ -6016,11 +6022,23 @@ (define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><r
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>"
-  [(set (match_operand:V8HF 0 "register_operand" "=v")
+(define_expand "float<floatunssuffix><mode>v4hf2"
+  [(set (match_operand:V4HF 0 "register_operand")
+	(any_float:V4HF
+	  (match_operand:VI4_128_8_256 1 "vector_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  operands[0] = lowpart_subreg (V8HFmode, operands[0], V4HFmode);
+  emit_insn (gen_avx512fp16_float<floatunssuffix><mode>v4hf2 (operands[0],
+							      operands[1]));
+  DONE;
+})
+
+(define_expand "avx512fp16_float<floatunssuffix><mode>v4hf2"
+  [(set (match_operand:V8HF 0 "register_operand")
 	(vec_concat:V8HF
-	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
-	    (match_dup 2)))]
+	  (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand"))
+	  (match_dup 2)))]
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[2] = CONST0_RTX (V4HFmode);")
 
@@ -6079,11 +6097,23 @@ (define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di"
-  [(set (match_operand:V8HF 0 "register_operand" "=v")
+(define_expand "float<floatunssuffix>v2div2hf2"
+  [(set (match_operand:V2HF 0 "register_operand")
+	(any_float:V2HF
+	  (match_operand:V2DI 1 "vector_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  operands[0] = lowpart_subreg (V8HFmode, operands[0], V2HFmode);
+  emit_insn (gen_avx512fp16_float<floatunssuffix>v2div2hf2 (operands[0],
+							    operands[1]));
+  DONE;
+})
+
+(define_expand "avx512fp16_float<floatunssuffix>v2div2hf2"
+  [(set (match_operand:V8HF 0 "register_operand")
 	(vec_concat:V8HF
-	    (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
-	    (match_dup 2)))]
+	  (any_float:V2HF (match_operand:V2DI 1 "vector_operand"))
+	  (match_dup 2)))]
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[2] = CONST0_RTX (V6HFmode);")
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c
new file mode 100644
index 00000000000..112ac3e74d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-floatvnhf.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern long long di[8];
+extern unsigned long long udi[8];
+extern int si[16];
+extern unsigned int usi[16];
+extern short hi[32];
+extern unsigned short uhi[32];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define FLOATHFVV(size, mode)	    \
+  void __attribute__ ((noinline, noclone))  \
+float##v##size##mode##v##size##hf ()   \
+{\
+  int i;  \
+  DO_PRAGMA (GCC unroll size)	\
+  for (i = 0; i < size; i++)  \
+    hf[i] = (_Float16) mode[i];  \
+}
+
+FLOATHFVV(32, hi)
+FLOATHFVV(16, hi)
+FLOATHFVV(8, hi)
+FLOATHFVV(16, si)
+FLOATHFVV(8, si)
+FLOATHFVV(4, si)
+FLOATHFVV(8, di)
+FLOATHFVV(4, di)
+FLOATHFVV(2, di)
+
+FLOATHFVV(32, uhi)
+FLOATHFVV(16, uhi)
+FLOATHFVV(8, uhi)
+FLOATHFVV(16, usi)
+FLOATHFVV(8, usi)
+FLOATHFVV(4, usi)
+FLOATHFVV(8, udi)
+FLOATHFVV(4, udi)
+FLOATHFVV(2, udi)
+
+/* { dg-final { scan-assembler-times "vcvtqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtudq2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuw2ph\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 6/7] AVX512FP16: add truncmn2/extendmn2 expanders
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
                   ` (4 preceding siblings ...)
  2021-09-23  5:46 ` [PATCH 5/7] AVX512FP16: Add float(uns)?mn2 expander liuhongt
@ 2021-09-23  5:46 ` liuhongt
  2021-09-23  5:46 ` [PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes liuhongt
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, Hongyu Wang

From: Hongyu Wang <hongyu.wang@intel.com>

gcc/ChangeLog:

	* config/i386/sse.md (extend<ssePHmodelower><mode>2):
	New expander.
	(extendv4hf<mode>2): Likewise.
	(extendv2hfv2df2): Likewise.
	(trunc<mode><ssePHmodelower>2): Likewise.
	(avx512fp16_vcvt<castmode>2ph_<mode>): Rename to ...
	(trunc<mode>v4hf2): ... this, and drop constraints.
	(avx512fp16_vcvtpd2ph_v2df): Rename to ...
	(truncv2dfv2hf2): ... this, and likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-trunc-extendvnhf.c: New test.
---
 gcc/config/i386/sse.md                        | 75 +++++++++++++++++--
 .../i386/avx512fp16-trunc-extendvnhf.c        | 55 ++++++++++++++
 2 files changed, 123 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 66062dc3bcf..a48c8e8bede 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6328,6 +6328,12 @@ (define_mode_attr ph2pssuffix
   [(V16SF "x") (V8SF "x") (V4SF "x")
    (V8DF "") (V4DF "") (V2DF "")])
 
+(define_expand "extend<ssePHmodelower><mode>2"
+  [(set (match_operand:VF48H_AVX512VL 0 "register_operand")
+	(float_extend:VF48H_AVX512VL
+	  (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
 (define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>"
   [(set (match_operand:VF48H_AVX512VL 0 "register_operand" "=v")
 	(float_extend:VF48H_AVX512VL
@@ -6338,6 +6344,21 @@ (define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "extendv4hf<mode>2"
+  [(set (match_operand:VF4_128_8_256 0 "register_operand")
+	(float_extend:VF4_128_8_256
+	  (match_operand:V4HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode);
+      emit_insn (gen_avx512fp16_float_extend_ph<mode>2
+		 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
 (define_insn "avx512fp16_float_extend_ph<mode>2<mask_name>"
   [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v")
 	(float_extend:VF4_128_8_256
@@ -6360,6 +6381,21 @@ (define_insn "*avx512fp16_float_extend_ph<mode>2_load<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "extendv2hfv2df2"
+  [(set (match_operand:V2DF 0 "register_operand")
+	(float_extend:V2DF
+	  (match_operand:V2HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode);
+      emit_insn (gen_avx512fp16_float_extend_phv2df2
+		 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
 (define_insn "avx512fp16_float_extend_phv2df2<mask_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
 	(float_extend:V2DF
@@ -6382,6 +6418,12 @@ (define_insn "*avx512fp16_float_extend_phv2df2_load<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_expand "trunc<mode><ssePHmodelower>2"
+  [(set (match_operand:<ssePHmode> 0 "register_operand")
+	(float_truncate:<ssePHmode>
+	  (match_operand:VF48H_AVX512VL 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
 (define_insn "avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>"
   [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
 	(float_truncate:<ssePHmode>
@@ -6392,11 +6434,21 @@ (define_insn "avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvt<castmode>2ph_<mode>"
-  [(set (match_operand:V8HF 0 "register_operand" "=v")
+(define_expand "trunc<mode>v4hf2"
+  [(set (match_operand:V4HF 0 "register_operand")
+	(float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  operands[0] = lowpart_subreg (V8HFmode, operands[0], V4HFmode);
+  emit_insn (gen_avx512fp16_trunc<mode>v4hf2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "avx512fp16_trunc<mode>v4hf2"
+  [(set (match_operand:V8HF 0 "register_operand")
 	(vec_concat:V8HF
 	    (float_truncate:V4HF
-	      (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
+	      (match_operand:VF4_128_8_256 1 "vector_operand"))
 	    (match_dup 2)))]
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[2] = CONST0_RTX (V4HFmode);")
@@ -6461,11 +6513,20 @@ (define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvtpd2ph_v2df"
-  [(set (match_operand:V8HF 0 "register_operand" "=v")
+(define_expand "truncv2dfv2hf2"
+  [(set (match_operand:V2HF 0 "register_operand")
+	(float_truncate:V2HF (match_operand:V2DF 1 "vector_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  operands[0] = lowpart_subreg (V8HFmode, operands[0], V2HFmode);
+  emit_insn (gen_avx512fp16_truncv2dfv2hf2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "avx512fp16_truncv2dfv2hf2"
+  [(set (match_operand:V8HF 0 "register_operand")
 	(vec_concat:V8HF
-	  (float_truncate:V2HF
-	    (match_operand:V2DF 1 "vector_operand" "vm"))
+	  (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand"))
 	  (match_dup 2)))]
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[2] = CONST0_RTX (V6HFmode);")
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c
new file mode 100644
index 00000000000..286ea9f2624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunc-extendvnhf.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern double df[8];
+extern float sf[16];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define TRUNCHFVV(size, mode)	    \
+  void __attribute__ ((noinline, noclone))  \
+truncv##size##mode##v##size##hf ()   \
+{\
+  int i;  \
+  DO_PRAGMA (GCC unroll size)	\
+  for (i = 0; i < size; i++)  \
+    hf[i] = mode[i];  \
+}
+
+#define EXTENDHFVV(size, mode)	    \
+  void __attribute__ ((noinline, noclone))  \
+extendv##size##hf##v##size##mode ()   \
+{\
+  int i;  \
+  DO_PRAGMA (GCC unroll size)	\
+  for (i = 0; i < size; i++)  \
+    mode[i] = hf[i];  \
+}
+
+TRUNCHFVV(8, df)
+TRUNCHFVV(4, df)
+TRUNCHFVV(2, df)
+TRUNCHFVV(16, sf)
+TRUNCHFVV(8, sf)
+TRUNCHFVV(4, sf)
+EXTENDHFVV(8, df)
+EXTENDHFVV(4, df)
+EXTENDHFVV(2, df)
+EXTENDHFVV(16, sf)
+EXTENDHFVV(8, sf)
+EXTENDHFVV(4, sf)
+
+/* { dg-final { scan-assembler-times "vcvtpd2phz\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2phy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtpd2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtps2phx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2phxy\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2phxx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtph2pd\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2psx\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.
  2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
                   ` (5 preceding siblings ...)
  2021-09-23  5:46 ` [PATCH 6/7] AVX512FP16: add truncmn2/extendmn2 expanders liuhongt
@ 2021-09-23  5:46 ` liuhongt
  6 siblings, 0 replies; 8+ messages in thread
From: liuhongt @ 2021-09-23  5:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, Hongyu Wang

From: Hongyu Wang <hongyu.wang@intel.com>

gcc/ChangeLog:

	* config/i386/i386-expand.c (ix86_use_mask_cmp_p): Enable
	HFmode mask_cmp.
	* config/i386/sse.md (sseintvecmodelower): Add HF vector modes.
	(<avx512>_store<mode>_mask): Extend to support HF vector modes.
	(vec_cmp<mode><avx512fmaskmodelower>): Likewise.
	(vcond_mask_<mode><avx512fmaskmodelower>): Likewise.
	(vcond<mode><mode>): New expander.
	(vcond<mode><sseintvecmodelower>): Likewise.
	(vcond<sseintvecmodelower><mode>): Likewise.
	(vcondu<mode><sseintvecmodelower>): Likewise.

gcc/testsuite/ChangeLog:

	* g++.target/i386/avx512fp16-vcondmn-vec.C: New test.
	* g++.target/i386/avx512fp16-vcondmn-minmax.C: Ditto.
	* gcc.target/i386/avx512fp16-vcondmn-loop-1.c: Ditto.
	* gcc.target/i386/avx512fp16-vcondmn-loop-2.c: Ditto.
	* gcc.target/i386/avx512fp16-vec_cmpmn.c: Ditto.
---
 gcc/config/i386/i386-expand.c                 |   2 +
 gcc/config/i386/sse.md                        |  84 ++++++++--
 .../i386/avx512fp16-vcondmn-minmax.C          |  25 +++
 .../g++.target/i386/avx512fp16-vcondmn-vec.C  |  70 +++++++++
 .../i386/avx512fp16-vcondmn-loop-1.c          |  70 +++++++++
 .../i386/avx512fp16-vcondmn-loop-2.c          | 143 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16-vec_cmpmn.c    |  32 ++++
 7 files changed, 414 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index dbbf5e34656..94ac303585e 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3638,6 +3638,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
     return false;
   else if (vector_size == 64)
     return true;
+  else if (GET_MODE_INNER (cmp_mode) == HFmode)
+    return true;
 
   /* When op_true is NULL, op_false must be NULL, or vice versa.  */
   gcc_assert (!op_true == !op_false);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a48c8e8bede..084fc7f4693 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -989,9 +989,9 @@ (define_mode_attr sseintvecmode2
    (V16HF "OI") (V8HF "TI")])
 
 (define_mode_attr sseintvecmodelower
-  [(V16SF "v16si") (V8DF "v8di")
-   (V8SF "v8si") (V4DF "v4di")
-   (V4SF "v4si") (V2DF "v2di")
+  [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di")
+   (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di")
+   (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di")
    (V8SI "v8si") (V4DI "v4di")
    (V4SI "v4si") (V2DI "v2di")
    (V16HI "v16hi") (V8HI "v8hi")
@@ -1568,9 +1568,9 @@ (define_insn "<avx512>_store<mode>_mask"
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_store<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
-	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+  [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI12HF_AVX512VL
+	  (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
   "TARGET_AVX512BW"
@@ -3810,8 +3810,8 @@ (define_insn "<sse>_<unord>comi<round_saeonly_name>"
 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(match_operator:<avx512fmaskmode> 1 ""
-	  [(match_operand:V48_AVX512VL 2 "register_operand")
-	   (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
+	  [(match_operand:V48H_AVX512VL 2 "register_operand")
+	   (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))]
   "TARGET_AVX512F"
 {
   bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -4018,6 +4018,51 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
   DONE;
 })
 
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(if_then_else:VF_AVX512FP16VL
+	  (match_operator 3 ""
+	    [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+	     (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+	  (match_operand:VF_AVX512FP16VL 1 "general_operand")
+	  (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(if_then_else:VF_AVX512FP16VL
+	  (match_operator 3 ""
+	    [(match_operand:<sseintvecmode> 4 "vector_operand")
+	     (match_operand:<sseintvecmode> 5 "vector_operand")])
+	  (match_operand:VF_AVX512FP16VL 1 "general_operand")
+	  (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<sseintvecmodelower><mode>"
+  [(set (match_operand:<sseintvecmode> 0 "register_operand")
+	(if_then_else:<sseintvecmode>
+	  (match_operator 3 ""
+	    [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+	     (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+	  (match_operand:<sseintvecmode> 1 "general_operand")
+	  (match_operand:<sseintvecmode> 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand")
 	(vec_merge:V48_AVX512VL
@@ -4027,10 +4072,10 @@ (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
   "TARGET_AVX512F")
 
 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-	(vec_merge:VI12_AVX512VL
-	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+  [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
+	(vec_merge:VI12HF_AVX512VL
+	  (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand")
+	  (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand")
 	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
   "TARGET_AVX512BW")
 
@@ -15538,6 +15583,21 @@ (define_expand "vcondu<VI8F_128:mode>v2di"
   DONE;
 })
 
+(define_expand "vcondu<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+	(if_then_else:VF_AVX512FP16VL
+	  (match_operator 3 ""
+	    [(match_operand:<sseintvecmode> 4 "vector_operand")
+	     (match_operand:<sseintvecmode> 5 "vector_operand")])
+	  (match_operand:VF_AVX512FP16VL 1 "general_operand")
+	  (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcondeq<VI8F_128:mode>v2di"
   [(set (match_operand:VI8F_128 0 "register_operand")
 	(if_then_else:VI8F_128
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
new file mode 100644
index 00000000000..6d50f4974c5
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vminph" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxph" 3 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCONDMINMAX(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vminmax_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b)  \
+{ \
+  return (a op b) ? a : b;  \
+}
+
+VCONDMINMAX (8, <, min)
+VCONDMINMAX (8, >, max)
+VCONDMINMAX (16, <, min)
+VCONDMINMAX (16, >, max)
+VCONDMINMAX (32, <, min)
+VCONDMINMAX (32, >, max)
+
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
new file mode 100644
index 00000000000..de93e2c5c86
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 45 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpw" 18 } } */
+/* { dg-final { scan-assembler-times "(?:vpblendmw|vmovdqu16\[^\{\n\]+\{%k\[1-7\]\})" 75 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
+typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
+typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
+
+#define VCONDMOV(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b,  \
+			   v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hi##v##size##hf##name (v##size##hi a, v##size##hi b,  \
+			   v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hi##name (v##size##hi a, v##size##hi b,  \
+			   v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##uhi##v##size##hf##name (v##size##uhi a, v##size##uhi b,  \
+			   v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##uhi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##uhi##name (v##size##uhi a, v##size##uhi b,  \
+			   v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+
+VCONDMOV (8, <, lt)
+VCONDMOV (8, >, gt)
+VCONDMOV (8, ==, eq)
+VCONDMOV (8, <=, le)
+VCONDMOV (8, >=, ge)
+VCONDMOV (16, <, lt)
+VCONDMOV (16, >, gt)
+VCONDMOV (16, <=, le)
+VCONDMOV (16, >=, ge)
+VCONDMOV (16, ==, eq)
+VCONDMOV (32, <, lt)
+VCONDMOV (32, >, gt)
+VCONDMOV (32, <=, le)
+VCONDMOV (32, >=, ge)
+VCONDMOV (32, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
new file mode 100644
index 00000000000..e8745aba64e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 27 } } */
+/* { dg-final { scan-assembler-times "(?:vpcmpw|vpcmpeqw)" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 6 } } */
+
+typedef unsigned short u16;
+typedef short s16;
+
+#define CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone, optimize("tree-vectorize"))) \
+loop_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,	\
+  type * restrict c, type * restrict d) \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])	\
+	d[i] = c[i];  \
+    } \
+}
+
+CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, s16, <, lt)
+CONDMOV_LOOP (32, _Float16, s16, >, gt)
+CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+CONDMOV_LOOP (16, _Float16, s16, <, lt)
+CONDMOV_LOOP (16, _Float16, s16, >, gt)
+CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+CONDMOV_LOOP (8, _Float16, s16, <, lt)
+CONDMOV_LOOP (8, _Float16, s16, >, gt)
+CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+CONDMOV_LOOP (32, s16, _Float16, <, lt)
+CONDMOV_LOOP (32, s16, _Float16, >, gt)
+CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+CONDMOV_LOOP (16, s16, _Float16, <, lt)
+CONDMOV_LOOP (16, s16, _Float16, >, gt)
+CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+CONDMOV_LOOP (8, s16, _Float16, <, lt)
+CONDMOV_LOOP (8, s16, _Float16, >, gt)
+CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, u16, <, lt)
+CONDMOV_LOOP (32, _Float16, u16, >, gt)
+CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+CONDMOV_LOOP (16, _Float16, u16, <, lt)
+CONDMOV_LOOP (16, _Float16, u16, >, gt)
+CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+CONDMOV_LOOP (8, _Float16, u16, <, lt)
+CONDMOV_LOOP (8, _Float16, u16, >, gt)
+CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+CONDMOV_LOOP (32, u16, _Float16, <, lt)
+CONDMOV_LOOP (32, u16, _Float16, >, gt)
+CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+CONDMOV_LOOP (16, u16, _Float16, <, lt)
+CONDMOV_LOOP (16, u16, _Float16, >, gt)
+CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+CONDMOV_LOOP (8, u16, _Float16, <, lt)
+CONDMOV_LOOP (8, u16, _Float16, >, gt)
+CONDMOV_LOOP (8, u16, _Float16, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
new file mode 100644
index 00000000000..a0d5f988088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
@@ -0,0 +1,143 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+static void condmov_test (void);
+#define DO_TEST condmov_test
+#define AVX512FP16
+#define AVX512VL
+#include "avx512f-check.h"
+#include "avx512fp16-vcondmn-loop-1.c"
+
+_Float16 a[32], b[32], c[32], fexp[32], fref[32];
+s16 sa[32], sb[32], sc[32], sexp[32], sref[32];
+u16 ua[32], ub[32], uc[32], uexp[32], uref[32];
+
+#define EMULATE_CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone)) \
+scalar_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,	\
+  type * restrict c, type * restrict d)  \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])	\
+	d[i] = c[i];  \
+    } \
+}
+
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, ==, eq)
+
+void init()
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    {
+      ua[i] = sa[i] = a[i] = i; 
+      ub[i] = sb[i] = b[i] = i;
+      uc[i] = sc[i] = c[i] = (32 - i) * 2;
+      uexp[i] = sexp[i] = fexp[i] = -1;
+      uref[i] = sref[i] = fref[i] = -1;
+    }
+}
+
+int check_cond(void *a, void *b, int size)
+{
+  int i;
+  u16 *pa = (u16 *)a, *pb = (u16 *)b;
+  for (i = 0; i < size; i++)
+    if (pa[i] != pb[i])
+      return 0;
+  return 1;
+}
+
+#define TEST_CONDMOV_LOOP(size, name)	\
+{ \
+  init ();  \
+  scalar_cond_##size##_Float16_Float16##name (a, b, c, fexp);  \
+  loop_cond_##size##_Float16_Float16##name (a, b, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16s16##name (a, b, sc, sexp);  \
+  loop_cond_##size##_Float16s16##name (a, b, sc, sref);  \
+  if (!check_cond ((void *)sexp, (void *)sref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##s16_Float16##name (sa, sb, c, fexp);  \
+  loop_cond_##size##s16_Float16##name (sa, sb, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16u16##name (a, b, uc, uexp);  \
+  loop_cond_##size##_Float16u16##name (a, b, uc, uref);  \
+  if (!check_cond ((void *)uexp, (void *)uref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##u16_Float16##name (ua, ub, c, fexp);  \
+  loop_cond_##size##u16_Float16##name (ua, ub, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+}
+
+static void condmov_test()
+{
+  TEST_CONDMOV_LOOP (32, lt)
+  TEST_CONDMOV_LOOP (32, gt)
+  TEST_CONDMOV_LOOP (32, eq)
+  TEST_CONDMOV_LOOP (16, lt)
+  TEST_CONDMOV_LOOP (16, gt)
+  TEST_CONDMOV_LOOP (16, eq)
+  TEST_CONDMOV_LOOP (8, lt)
+  TEST_CONDMOV_LOOP (8, gt)
+  TEST_CONDMOV_LOOP (8, eq)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
new file mode 100644
index 00000000000..ef9f85373f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 15 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCMPMN(type, op, name)	\
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v8hf, <, lt)
+VCMPMN (v16hf, <, lt)
+VCMPMN (v32hf, <, lt)
+VCMPMN (v8hf, <=, le)
+VCMPMN (v16hf, <=, le)
+VCMPMN (v32hf, <=, le)
+VCMPMN (v8hf, >, gt)
+VCMPMN (v16hf, >, gt)
+VCMPMN (v32hf, >, gt)
+VCMPMN (v8hf, >=, ge)
+VCMPMN (v16hf, >=, ge)
+VCMPMN (v32hf, >=, ge)
+VCMPMN (v8hf, ==, eq)
+VCMPMN (v16hf, ==, eq)
+VCMPMN (v32hf, ==, eq)
-- 
2.27.0


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-09-23  5:46 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-23  5:46 [PATCH 0/7] AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes liuhongt
2021-09-23  5:46 ` [PATCH 1/7] AVX512FP16: Add expander for rint/nearbyinthf2 liuhongt
2021-09-23  5:46 ` [PATCH 2/7] AVX512FP16: Add expander for fmahf4 liuhongt
2021-09-23  5:46 ` [PATCH 3/7] AVX512FP16: Add expander for smin/maxhf3 liuhongt
2021-09-23  5:46 ` [PATCH 4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes liuhongt
2021-09-23  5:46 ` [PATCH 5/7] AVX512FP16: Add float(uns)?mn2 expander liuhongt
2021-09-23  5:46 ` [PATCH 6/7] AVX512FP16: add truncmn2/extendmn2 expanders liuhongt
2021-09-23  5:46 ` [PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes liuhongt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).