[PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
@ 2023-10-23  8:48 liuhongt
  2023-10-23 12:32 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2023-10-23  8:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	PR target/103861
	* config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
	V2HF/V2BF/V4HF/V4BFmode.
	* config/i386/mmx.md (vec_cmpv4hfqi): New expander.
	(vcond<mode>v4hf): Ditto.
	(vcond<mode>v4hi): Ditto.
	(vcondu<mode>v4hi): Ditto.
	(vcond_mask_<mode>v4hi): Ditto.
	(vcond_mask_<mode>qi): Ditto.
	(vec_cmpv2hfqi): Ditto.
	(vcond<mode>v2hf): Ditto.
	(vcond<mode>v2hi): Ditto.
	(vcondu<mode>v2hi): Ditto.
	(vcond_mask_<mode>v2hi): Ditto.
	* config/i386/sse.md (vcond<mode><mode>): Merge this with ..
	(vcond<sseintvecmodelower><mode>): .. this into ..
	(vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
	and extend to V8BF/V16BF/V32BFmode.

gcc/testsuite/ChangeLog:

	* g++.target/i386/part-vect-vcondhf.C: New test.
	* gcc.target/i386/part-vect-vec_cmphf.c: New test.
---
 gcc/config/i386/i386-expand.cc                |   4 +
 gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
 gcc/config/i386/sse.md                        |  25 +-
 .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
 .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
 5 files changed, 304 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
 create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1eae9d7c78c..9658f9c5a2d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       break;
     case E_V8QImode:
     case E_V4HImode:
+    case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SImode:
       if (TARGET_SSE4_1)
 	{
@@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       break;
     case E_V4QImode:
     case E_V2HImode:
+    case E_V2HFmode:
+    case E_V2BFmode:
       if (TARGET_SSE4_1)
 	{
 	  gen = gen_mmx_pblendvb_v4qi;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 491a0a51272..b9617e9d8c6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
 
 (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
+(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
+(define_mode_iterator V4F_64 [V4HF V4BF])
+(define_mode_iterator V2F_32 [V2HF V2BF])
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
@@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
   [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
 
 (define_mode_attr mmxxmmmode
-  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
+  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
+   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
 
 (define_mode_attr mmxxmmmodelower
-  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
+  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
+   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
 
 (define_expand "movd_<mode>_to_sse"
   [(set (match_operand:<mmxxmmmode> 0 "register_operand")
@@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
   [(set (match_dup 0)
 	(ior:<MODE> (match_dup 1) (match_dup 2)))])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel half-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "vec_cmpv4hfqi"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 ""
+	  [(match_operand:V4HF 2 "nonimmediate_operand")
+	   (match_operand:V4HF 3 "nonimmediate_operand")]))]
+  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[4];
+  ops[3] = gen_reg_rtx (V8HFmode);
+  ops[2] = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
+  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
+  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
+  DONE;
+})
+
+(define_expand "vcond<mode>v4hf"
+  [(set (match_operand:V4FI_64 0 "register_operand")
+	(if_then_else:V4FI_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HF 4 "nonimmediate_operand")
+	     (match_operand:V4HF 5 "nonimmediate_operand")])
+	  (match_operand:V4FI_64 1 "general_operand")
+	  (match_operand:V4FI_64 2 "general_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
+{
+  rtx ops[6];
+  ops[5] = gen_reg_rtx (V8HFmode);
+  ops[4] = gen_reg_rtx (V8HFmode);
+  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
+  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[1]),
+			   <MODE>mode);
+  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[2]),
+			   <MODE>mode);
+  ops[3] = operands[3];
+  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
+  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
+  bool ok = ix86_expand_fp_vcond (ops);
+  gcc_assert (ok);
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
+					       <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vcond<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(if_then_else:V4F_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HI 4 "nonimmediate_operand")
+	     (match_operand:V4HI 5 "nonimmediate_operand")])
+	  (match_operand:V4F_64 1 "general_operand")
+	  (match_operand:V4F_64 2 "general_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(if_then_else:V4F_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HI 4 "nonimmediate_operand")
+	     (match_operand:V4HI 5 "nonimmediate_operand")])
+	  (match_operand:V4F_64 1 "general_operand")
+	  (match_operand:V4F_64 2 "general_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(vec_merge:V4F_64
+	  (match_operand:V4F_64 1 "register_operand")
+	  (match_operand:V4F_64 2 "register_operand")
+	  (match_operand:V4HI 3  "register_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  ix86_expand_sse_movcc (operands[0], operands[3],
+			 operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>qi"
+  [(set (match_operand:V4FI_64 0 "register_operand")
+	(vec_merge:V4FI_64
+	  (match_operand:V4FI_64 1 "register_operand")
+	  (match_operand:V4FI_64 2 "register_operand")
+	  (match_operand:QI 3 "register_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
+{
+  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
+  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
+  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
+  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
+						 operands[2], operands[3]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vec_cmpv2hfqi"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 ""
+	  [(match_operand:V2HF 2 "nonimmediate_operand")
+	   (match_operand:V2HF 3 "nonimmediate_operand")]))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[4];
+  ops[3] = gen_reg_rtx (V8HFmode);
+  ops[2] = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
+  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
+  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
+  DONE;
+})
+
+(define_expand "vcond<mode>v2hf"
+  [(set (match_operand:V2FI_32 0 "register_operand")
+	(if_then_else:V2FI_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HF 4 "nonimmediate_operand")
+	     (match_operand:V2HF 5 "nonimmediate_operand")])
+	  (match_operand:V2FI_32 1 "general_operand")
+	  (match_operand:V2FI_32 2 "general_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[6];
+  ops[5] = gen_reg_rtx (V8HFmode);
+  ops[4] = gen_reg_rtx (V8HFmode);
+  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
+  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[1]),
+			   <MODE>mode);
+  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[2]),
+			   <MODE>mode);
+  ops[3] = operands[3];
+  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
+  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
+  bool ok = ix86_expand_fp_vcond (ops);
+  gcc_assert (ok);
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
+					       <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vcond<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(if_then_else:V2F_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HI 4 "nonimmediate_operand")
+	     (match_operand:V2HI 5 "nonimmediate_operand")])
+	  (match_operand:V2F_32 1 "general_operand")
+	  (match_operand:V2F_32 2 "general_operand")))]
+  "TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(if_then_else:V2F_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HI 4 "nonimmediate_operand")
+	     (match_operand:V2HI 5 "nonimmediate_operand")])
+	  (match_operand:V2F_32 1 "general_operand")
+	  (match_operand:V2F_32 2 "general_operand")))]
+  "TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(vec_merge:V2F_32
+	  (match_operand:V2F_32 1 "register_operand")
+	  (match_operand:V2F_32 2 "register_operand")
+	  (match_operand:V2HI 3 "register_operand")))]
+  "TARGET_SSE4_1"
+{
+  ix86_expand_sse_movcc (operands[0], operands[3],
+			 operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>qi"
+  [(set (match_operand:V2FI_32 0 "register_operand")
+	(vec_merge:V2FI_32
+	  (match_operand:V2FI_32 1 "register_operand")
+	  (match_operand:V2FI_32 2 "register_operand")
+	  (match_operand:QI 3 "register_operand")))]
+  "TARGET_AVX512BW && TARGET_AVX512VL"
+{
+  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
+  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
+  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
+  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
+						 operands[2], operands[3]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel half-precision floating point rounding operations.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c988935d4df..e2a7cbeb722 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
   DONE;
 })
 
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
-	(if_then_else:VHF_AVX512VL
-	  (match_operator 3 ""
-	    [(match_operand:VHF_AVX512VL 4 "vector_operand")
-	     (match_operand:VHF_AVX512VL 5 "vector_operand")])
-	  (match_operand:VHF_AVX512VL 1 "general_operand")
-	  (match_operand:VHF_AVX512VL 2 "general_operand")))]
-  "TARGET_AVX512FP16"
-{
-  bool ok = ix86_expand_fp_vcond (operands);
-  gcc_assert (ok);
-  DONE;
-})
-
-(define_expand "vcond<sseintvecmodelower><mode>"
-  [(set (match_operand:<sseintvecmode> 0 "register_operand")
-	(if_then_else:<sseintvecmode>
+(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
+  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
+	(if_then_else:VI2HFBF_AVX512VL
 	  (match_operator 3 ""
 	    [(match_operand:VHF_AVX512VL 4 "vector_operand")
 	     (match_operand:VHF_AVX512VL 5 "vector_operand")])
-	  (match_operand:<sseintvecmode> 1 "general_operand")
-	  (match_operand:<sseintvecmode> 2 "general_operand")))]
+	  (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
+	  (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
   "TARGET_AVX512FP16"
 {
   bool ok = ix86_expand_fp_vcond (operands);
diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
new file mode 100644
index 00000000000..8bf01b7cb4a
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
@@ -0,0 +1,34 @@
+/* PR target/103861 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
+/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
+typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
+typedef short __attribute__((__vector_size__ (4))) __v2hi;
+
+typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
+typedef short __attribute__((__vector_size__ (8))) __v4hi;
+
+typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
+typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
+
+
+__v2hu au, bu;
+__v2hi as, bs;
+__v2hf af, bf;
+
+__v4hu cu, du;
+__v4hi cs, ds;
+__v4hf cf, df;
+
+__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
+__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
+__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
+__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
+
+__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
+__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
+__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
+__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
new file mode 100644
index 00000000000..ee8659395eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
+
+typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
+typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
+
+
+#define VCMPMN(type, op, name)	\
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v4hf, <, lt)
+VCMPMN (v2hf, <, lt)
+VCMPMN (v4hf, <=, le)
+VCMPMN (v2hf, <=, le)
+VCMPMN (v4hf, >, gt)
+VCMPMN (v2hf, >, gt)
+VCMPMN (v4hf, >=, ge)
+VCMPMN (v2hf, >=, ge)
+VCMPMN (v4hf, ==, eq)
+VCMPMN (v2hf, ==, eq)
-- 
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
  2023-10-23  8:48 [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf liuhongt
@ 2023-10-23 12:32 ` Richard Biener
  2023-10-24  2:53   ` Hongtao Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2023-10-23 12:32 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ready push to trunk.

vcond<mode> and vcondeq<mode> shouldn't be necessary if there's
vcond_mask<mode> and vcmp<mode> support which is the "modern"
way of handling vcond<mode>.  Unless the ISA really can do
compare and select with a single instruction.

Richard.

> gcc/ChangeLog:
>
>         PR target/103861
>         * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
>         V2HF/V2BF/V4HF/V4BFmode.
>         * config/i386/mmx.md (vec_cmpv4hfqi): New expander.
>         (vcond<mode>v4hf): Ditto.
>         (vcond<mode>v4hi): Ditto.
>         (vcondu<mode>v4hi): Ditto.
>         (vcond_mask_<mode>v4hi): Ditto.
>         (vcond_mask_<mode>qi): Ditto.
>         (vec_cmpv2hfqi): Ditto.
>         (vcond<mode>v2hf): Ditto.
>         (vcond<mode>v2hi): Ditto.
>         (vcondu<mode>v2hi): Ditto.
>         (vcond_mask_<mode>v2hi): Ditto.
>         * config/i386/sse.md (vcond<mode><mode>): Merge this with ..
>         (vcond<sseintvecmodelower><mode>): .. this into ..
>         (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
>         and extend to V8BF/V16BF/V32BFmode.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/part-vect-vcondhf.C: New test.
>         * gcc.target/i386/part-vect-vec_cmphf.c: New test.
> ---
>  gcc/config/i386/i386-expand.cc                |   4 +
>  gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
>  gcc/config/i386/sse.md                        |  25 +-
>  .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
>  .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
>  5 files changed, 304 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 1eae9d7c78c..9658f9c5a2d 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
>        break;
>      case E_V8QImode:
>      case E_V4HImode:
> +    case E_V4HFmode:
> +    case E_V4BFmode:
>      case E_V2SImode:
>        if (TARGET_SSE4_1)
>         {
> @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
>        break;
>      case E_V4QImode:
>      case E_V2HImode:
> +    case E_V2HFmode:
> +    case E_V2BFmode:
>        if (TARGET_SSE4_1)
>         {
>           gen = gen_mmx_pblendvb_v4qi;
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 491a0a51272..b9617e9d8c6 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
>  (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
>
>  (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
> +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
> +(define_mode_iterator V4F_64 [V4HF V4BF])
> +(define_mode_iterator V2F_32 [V2HF V2BF])
>  ;; 4-byte integer vector modes
>  (define_mode_iterator VI_32 [V4QI V2HI])
>
> @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
>    [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
>
>  (define_mode_attr mmxxmmmode
> -  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
> +  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
> +   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
>
>  (define_mode_attr mmxxmmmodelower
> -  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
> +  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
> +   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
>
>  (define_expand "movd_<mode>_to_sse"
>    [(set (match_operand:<mmxxmmmode> 0 "register_operand")
> @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
>    [(set (match_dup 0)
>         (ior:<MODE> (match_dup 1) (match_dup 2)))])
>
> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> +;;
> +;; Parallel half-precision floating point comparisons
> +;;
> +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> +
> +(define_expand "vec_cmpv4hfqi"
> +  [(set (match_operand:QI 0 "register_operand")
> +       (match_operator:QI 1 ""
> +         [(match_operand:V4HF 2 "nonimmediate_operand")
> +          (match_operand:V4HF 3 "nonimmediate_operand")]))]
> +  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
> +   && ix86_partial_vec_fp_math"
> +{
> +  rtx ops[4];
> +  ops[3] = gen_reg_rtx (V8HFmode);
> +  ops[2] = gen_reg_rtx (V8HFmode);
> +
> +  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
> +  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
> +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> +  DONE;
> +})
> +
> +(define_expand "vcond<mode>v4hf"
> +  [(set (match_operand:V4FI_64 0 "register_operand")
> +       (if_then_else:V4FI_64
> +         (match_operator 3 ""
> +           [(match_operand:V4HF 4 "nonimmediate_operand")
> +            (match_operand:V4HF 5 "nonimmediate_operand")])
> +         (match_operand:V4FI_64 1 "general_operand")
> +         (match_operand:V4FI_64 2 "general_operand")))]
> +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> +  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
> +{
> +  rtx ops[6];
> +  ops[5] = gen_reg_rtx (V8HFmode);
> +  ops[4] = gen_reg_rtx (V8HFmode);
> +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> +                          force_reg (<MODE>mode, operands[1]),
> +                          <MODE>mode);
> +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> +                          force_reg (<MODE>mode, operands[2]),
> +                          <MODE>mode);
> +  ops[3] = operands[3];
> +  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
> +  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
> +  bool ok = ix86_expand_fp_vcond (ops);
> +  gcc_assert (ok);
> +
> +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> +                                              <mmxxmmmode>mode));
> +  DONE;
> +})
> +
> +(define_expand "vcond<mode>v4hi"
> +  [(set (match_operand:V4F_64 0 "register_operand")
> +       (if_then_else:V4F_64
> +         (match_operator 3 ""
> +           [(match_operand:V4HI 4 "nonimmediate_operand")
> +            (match_operand:V4HI 5 "nonimmediate_operand")])
> +         (match_operand:V4F_64 1 "general_operand")
> +         (match_operand:V4F_64 2 "general_operand")))]
> +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> +{
> +  bool ok = ix86_expand_int_vcond (operands);
> +  gcc_assert (ok);
> +  DONE;
> +})
> +
> +(define_expand "vcondu<mode>v4hi"
> +  [(set (match_operand:V4F_64 0 "register_operand")
> +       (if_then_else:V4F_64
> +         (match_operator 3 ""
> +           [(match_operand:V4HI 4 "nonimmediate_operand")
> +            (match_operand:V4HI 5 "nonimmediate_operand")])
> +         (match_operand:V4F_64 1 "general_operand")
> +         (match_operand:V4F_64 2 "general_operand")))]
> +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> +{
> +  bool ok = ix86_expand_int_vcond (operands);
> +  gcc_assert (ok);
> +  DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode>v4hi"
> +  [(set (match_operand:V4F_64 0 "register_operand")
> +       (vec_merge:V4F_64
> +         (match_operand:V4F_64 1 "register_operand")
> +         (match_operand:V4F_64 2 "register_operand")
> +         (match_operand:V4HI 3  "register_operand")))]
> +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> +{
> +  ix86_expand_sse_movcc (operands[0], operands[3],
> +                        operands[1], operands[2]);
> +  DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode>qi"
> +  [(set (match_operand:V4FI_64 0 "register_operand")
> +       (vec_merge:V4FI_64
> +         (match_operand:V4FI_64 1 "register_operand")
> +         (match_operand:V4FI_64 2 "register_operand")
> +         (match_operand:QI 3 "register_operand")))]
> +  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
> +{
> +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> +                                                operands[2], operands[3]));
> +  emit_move_insn (operands[0],
> +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> +  DONE;
> +})
> +
> +(define_expand "vec_cmpv2hfqi"
> +  [(set (match_operand:QI 0 "register_operand")
> +       (match_operator:QI 1 ""
> +         [(match_operand:V2HF 2 "nonimmediate_operand")
> +          (match_operand:V2HF 3 "nonimmediate_operand")]))]
> +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> +   && ix86_partial_vec_fp_math"
> +{
> +  rtx ops[4];
> +  ops[3] = gen_reg_rtx (V8HFmode);
> +  ops[2] = gen_reg_rtx (V8HFmode);
> +
> +  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
> +  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
> +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> +  DONE;
> +})
> +
> +(define_expand "vcond<mode>v2hf"
> +  [(set (match_operand:V2FI_32 0 "register_operand")
> +       (if_then_else:V2FI_32
> +         (match_operator 3 ""
> +           [(match_operand:V2HF 4 "nonimmediate_operand")
> +            (match_operand:V2HF 5 "nonimmediate_operand")])
> +         (match_operand:V2FI_32 1 "general_operand")
> +         (match_operand:V2FI_32 2 "general_operand")))]
> +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> +   && ix86_partial_vec_fp_math"
> +{
> +  rtx ops[6];
> +  ops[5] = gen_reg_rtx (V8HFmode);
> +  ops[4] = gen_reg_rtx (V8HFmode);
> +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> +                          force_reg (<MODE>mode, operands[1]),
> +                          <MODE>mode);
> +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> +                          force_reg (<MODE>mode, operands[2]),
> +                          <MODE>mode);
> +  ops[3] = operands[3];
> +  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
> +  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
> +  bool ok = ix86_expand_fp_vcond (ops);
> +  gcc_assert (ok);
> +
> +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> +                                              <mmxxmmmode>mode));
> +  DONE;
> +})
> +
> +(define_expand "vcond<mode>v2hi"
> +  [(set (match_operand:V2F_32 0 "register_operand")
> +       (if_then_else:V2F_32
> +         (match_operator 3 ""
> +           [(match_operand:V2HI 4 "nonimmediate_operand")
> +            (match_operand:V2HI 5 "nonimmediate_operand")])
> +         (match_operand:V2F_32 1 "general_operand")
> +         (match_operand:V2F_32 2 "general_operand")))]
> +  "TARGET_SSE4_1"
> +{
> +  bool ok = ix86_expand_int_vcond (operands);
> +  gcc_assert (ok);
> +  DONE;
> +})
> +
> +(define_expand "vcondu<mode>v2hi"
> +  [(set (match_operand:V2F_32 0 "register_operand")
> +       (if_then_else:V2F_32
> +         (match_operator 3 ""
> +           [(match_operand:V2HI 4 "nonimmediate_operand")
> +            (match_operand:V2HI 5 "nonimmediate_operand")])
> +         (match_operand:V2F_32 1 "general_operand")
> +         (match_operand:V2F_32 2 "general_operand")))]
> +  "TARGET_SSE4_1"
> +{
> +  bool ok = ix86_expand_int_vcond (operands);
> +  gcc_assert (ok);
> +  DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode>v2hi"
> +  [(set (match_operand:V2F_32 0 "register_operand")
> +       (vec_merge:V2F_32
> +         (match_operand:V2F_32 1 "register_operand")
> +         (match_operand:V2F_32 2 "register_operand")
> +         (match_operand:V2HI 3 "register_operand")))]
> +  "TARGET_SSE4_1"
> +{
> +  ix86_expand_sse_movcc (operands[0], operands[3],
> +                        operands[1], operands[2]);
> +  DONE;
> +})
> +
> +(define_expand "vcond_mask_<mode>qi"
> +  [(set (match_operand:V2FI_32 0 "register_operand")
> +       (vec_merge:V2FI_32
> +         (match_operand:V2FI_32 1 "register_operand")
> +         (match_operand:V2FI_32 2 "register_operand")
> +         (match_operand:QI 3 "register_operand")))]
> +  "TARGET_AVX512BW && TARGET_AVX512VL"
> +{
> +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> +                                                operands[2], operands[3]));
> +  emit_move_insn (operands[0],
> +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> +  DONE;
> +})
> +
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  ;;
>  ;; Parallel half-precision floating point rounding operations.
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index c988935d4df..e2a7cbeb722 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
>    DONE;
>  })
>
> -(define_expand "vcond<mode><mode>"
> -  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
> -       (if_then_else:VHF_AVX512VL
> -         (match_operator 3 ""
> -           [(match_operand:VHF_AVX512VL 4 "vector_operand")
> -            (match_operand:VHF_AVX512VL 5 "vector_operand")])
> -         (match_operand:VHF_AVX512VL 1 "general_operand")
> -         (match_operand:VHF_AVX512VL 2 "general_operand")))]
> -  "TARGET_AVX512FP16"
> -{
> -  bool ok = ix86_expand_fp_vcond (operands);
> -  gcc_assert (ok);
> -  DONE;
> -})
> -
> -(define_expand "vcond<sseintvecmodelower><mode>"
> -  [(set (match_operand:<sseintvecmode> 0 "register_operand")
> -       (if_then_else:<sseintvecmode>
> +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
> +  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
> +       (if_then_else:VI2HFBF_AVX512VL
>           (match_operator 3 ""
>             [(match_operand:VHF_AVX512VL 4 "vector_operand")
>              (match_operand:VHF_AVX512VL 5 "vector_operand")])
> -         (match_operand:<sseintvecmode> 1 "general_operand")
> -         (match_operand:<sseintvecmode> 2 "general_operand")))]
> +         (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
> +         (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
>    "TARGET_AVX512FP16"
>  {
>    bool ok = ix86_expand_fp_vcond (operands);
> diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> new file mode 100644
> index 00000000000..8bf01b7cb4a
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> @@ -0,0 +1,34 @@
> +/* PR target/103861 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
> +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
> +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
> +typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
> +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> +
> +typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
> +typedef short __attribute__((__vector_size__ (8))) __v4hi;
> +
> +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> +
> +
> +__v2hu au, bu;
> +__v2hi as, bs;
> +__v2hf af, bf;
> +
> +__v4hu cu, du;
> +__v4hi cs, ds;
> +__v4hf cf, df;
> +
> +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
> +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
> +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
> +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
> +
> +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
> +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
> +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
> +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
> diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> new file mode 100644
> index 00000000000..ee8659395eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
> +
> +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
> +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
> +
> +
> +#define VCMPMN(type, op, name) \
> +type  \
> +__attribute__ ((noinline, noclone)) \
> +vec_cmp_##type##type##name (type a, type b) \
> +{ \
> +  return a op b;  \
> +}
> +
> +VCMPMN (v4hf, <, lt)
> +VCMPMN (v2hf, <, lt)
> +VCMPMN (v4hf, <=, le)
> +VCMPMN (v2hf, <=, le)
> +VCMPMN (v4hf, >, gt)
> +VCMPMN (v2hf, >, gt)
> +VCMPMN (v4hf, >=, ge)
> +VCMPMN (v2hf, >=, ge)
> +VCMPMN (v4hf, ==, eq)
> +VCMPMN (v2hf, ==, eq)
> --
> 2.31.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
  2023-10-23 12:32 ` Richard Biener
@ 2023-10-24  2:53   ` Hongtao Liu
  2023-10-24  5:23     ` Hongtao Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Hongtao Liu @ 2023-10-24  2:53 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, gcc-patches, hjl.tools

On Mon, Oct 23, 2023 at 8:35 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ready push to trunk.
>
> vcond<mode> and vcondeq<mode> shouldn't be necessary if there's
> vcond_mask<mode> and vcmp<mode> support which is the "modern"
> way of handling vcond<mode>.  Unless the ISA really can do
> compare and select with a single instruction.
For testcase

typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;


__v4hf cf, df;

__v4hf cfu (__v4hf c, __v4hf d) { return (c > d) ? cf : df; }

The data_mode passes to ix86_get_mask_mode is v4hi, not v4hf since

  /* Always construct signed integer vector type.  */
  intt = c_common_type_for_size
    (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type0))), 0);
  if (!intt)
    {
      if (complain & tf_error)
error_at (location, "could not find an integer type "
  "of the same size as %qT", TREE_TYPE (type0));
      return error_mark_node;
    }
  result_type = build_opaque_vector_type (intt,
  TYPE_VECTOR_SUBPARTS (type0));
  return build_vec_cmp (resultcode, result_type, op0, op1);

The backend can't distinguish whether it's a vector fp16 comparison or
a vector hi comparison.
the former require -mavx512fp16, the latter requires -mavx512bw
>
> Richard.
>
> > gcc/ChangeLog:
> >
> >         PR target/103861
> >         * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
> >         V2HF/V2BF/V4HF/V4BFmode.
> >         * config/i386/mmx.md (vec_cmpv4hfqi): New expander.
> >         (vcond<mode>v4hf): Ditto.
> >         (vcond<mode>v4hi): Ditto.
> >         (vcondu<mode>v4hi): Ditto.
> >         (vcond_mask_<mode>v4hi): Ditto.
> >         (vcond_mask_<mode>qi): Ditto.
> >         (vec_cmpv2hfqi): Ditto.
> >         (vcond<mode>v2hf): Ditto.
> >         (vcond<mode>v2hi): Ditto.
> >         (vcondu<mode>v2hi): Ditto.
> >         (vcond_mask_<mode>v2hi): Ditto.
> >         * config/i386/sse.md (vcond<mode><mode>): Merge this with ..
> >         (vcond<sseintvecmodelower><mode>): .. this into ..
> >         (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
> >         and extend to V8BF/V16BF/V32BFmode.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * g++.target/i386/part-vect-vcondhf.C: New test.
> >         * gcc.target/i386/part-vect-vec_cmphf.c: New test.
> > ---
> >  gcc/config/i386/i386-expand.cc                |   4 +
> >  gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
> >  gcc/config/i386/sse.md                        |  25 +-
> >  .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
> >  .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
> >  5 files changed, 304 insertions(+), 22 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> >
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index 1eae9d7c78c..9658f9c5a2d 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> >        break;
> >      case E_V8QImode:
> >      case E_V4HImode:
> > +    case E_V4HFmode:
> > +    case E_V4BFmode:
> >      case E_V2SImode:
> >        if (TARGET_SSE4_1)
> >         {
> > @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> >        break;
> >      case E_V4QImode:
> >      case E_V2HImode:
> > +    case E_V2HFmode:
> > +    case E_V2BFmode:
> >        if (TARGET_SSE4_1)
> >         {
> >           gen = gen_mmx_pblendvb_v4qi;
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 491a0a51272..b9617e9d8c6 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
> >  (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
> >
> >  (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
> > +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
> > +(define_mode_iterator V4F_64 [V4HF V4BF])
> > +(define_mode_iterator V2F_32 [V2HF V2BF])
> >  ;; 4-byte integer vector modes
> >  (define_mode_iterator VI_32 [V4QI V2HI])
> >
> > @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
> >    [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
> >
> >  (define_mode_attr mmxxmmmode
> > -  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
> > +  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
> > +   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
> >
> >  (define_mode_attr mmxxmmmodelower
> > -  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
> > +  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
> > +   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
> >
> >  (define_expand "movd_<mode>_to_sse"
> >    [(set (match_operand:<mmxxmmmode> 0 "register_operand")
> > @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
> >    [(set (match_dup 0)
> >         (ior:<MODE> (match_dup 1) (match_dup 2)))])
> >
> > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > +;;
> > +;; Parallel half-precision floating point comparisons
> > +;;
> > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > +
> > +(define_expand "vec_cmpv4hfqi"
> > +  [(set (match_operand:QI 0 "register_operand")
> > +       (match_operator:QI 1 ""
> > +         [(match_operand:V4HF 2 "nonimmediate_operand")
> > +          (match_operand:V4HF 3 "nonimmediate_operand")]))]
> > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
> > +   && ix86_partial_vec_fp_math"
> > +{
> > +  rtx ops[4];
> > +  ops[3] = gen_reg_rtx (V8HFmode);
> > +  ops[2] = gen_reg_rtx (V8HFmode);
> > +
> > +  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
> > +  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
> > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond<mode>v4hf"
> > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > +       (if_then_else:V4FI_64
> > +         (match_operator 3 ""
> > +           [(match_operand:V4HF 4 "nonimmediate_operand")
> > +            (match_operand:V4HF 5 "nonimmediate_operand")])
> > +         (match_operand:V4FI_64 1 "general_operand")
> > +         (match_operand:V4FI_64 2 "general_operand")))]
> > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > +  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
> > +{
> > +  rtx ops[6];
> > +  ops[5] = gen_reg_rtx (V8HFmode);
> > +  ops[4] = gen_reg_rtx (V8HFmode);
> > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > +                          force_reg (<MODE>mode, operands[1]),
> > +                          <MODE>mode);
> > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > +                          force_reg (<MODE>mode, operands[2]),
> > +                          <MODE>mode);
> > +  ops[3] = operands[3];
> > +  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
> > +  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
> > +  bool ok = ix86_expand_fp_vcond (ops);
> > +  gcc_assert (ok);
> > +
> > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > +                                              <mmxxmmmode>mode));
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond<mode>v4hi"
> > +  [(set (match_operand:V4F_64 0 "register_operand")
> > +       (if_then_else:V4F_64
> > +         (match_operator 3 ""
> > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > +         (match_operand:V4F_64 1 "general_operand")
> > +         (match_operand:V4F_64 2 "general_operand")))]
> > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > +{
> > +  bool ok = ix86_expand_int_vcond (operands);
> > +  gcc_assert (ok);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcondu<mode>v4hi"
> > +  [(set (match_operand:V4F_64 0 "register_operand")
> > +       (if_then_else:V4F_64
> > +         (match_operator 3 ""
> > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > +         (match_operand:V4F_64 1 "general_operand")
> > +         (match_operand:V4F_64 2 "general_operand")))]
> > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > +{
> > +  bool ok = ix86_expand_int_vcond (operands);
> > +  gcc_assert (ok);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond_mask_<mode>v4hi"
> > +  [(set (match_operand:V4F_64 0 "register_operand")
> > +       (vec_merge:V4F_64
> > +         (match_operand:V4F_64 1 "register_operand")
> > +         (match_operand:V4F_64 2 "register_operand")
> > +         (match_operand:V4HI 3  "register_operand")))]
> > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > +{
> > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > +                        operands[1], operands[2]);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond_mask_<mode>qi"
> > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > +       (vec_merge:V4FI_64
> > +         (match_operand:V4FI_64 1 "register_operand")
> > +         (match_operand:V4FI_64 2 "register_operand")
> > +         (match_operand:QI 3 "register_operand")))]
> > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
> > +{
> > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > +                                                operands[2], operands[3]));
> > +  emit_move_insn (operands[0],
> > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > +  DONE;
> > +})
> > +
> > +(define_expand "vec_cmpv2hfqi"
> > +  [(set (match_operand:QI 0 "register_operand")
> > +       (match_operator:QI 1 ""
> > +         [(match_operand:V2HF 2 "nonimmediate_operand")
> > +          (match_operand:V2HF 3 "nonimmediate_operand")]))]
> > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > +   && ix86_partial_vec_fp_math"
> > +{
> > +  rtx ops[4];
> > +  ops[3] = gen_reg_rtx (V8HFmode);
> > +  ops[2] = gen_reg_rtx (V8HFmode);
> > +
> > +  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
> > +  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
> > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond<mode>v2hf"
> > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > +       (if_then_else:V2FI_32
> > +         (match_operator 3 ""
> > +           [(match_operand:V2HF 4 "nonimmediate_operand")
> > +            (match_operand:V2HF 5 "nonimmediate_operand")])
> > +         (match_operand:V2FI_32 1 "general_operand")
> > +         (match_operand:V2FI_32 2 "general_operand")))]
> > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > +   && ix86_partial_vec_fp_math"
> > +{
> > +  rtx ops[6];
> > +  ops[5] = gen_reg_rtx (V8HFmode);
> > +  ops[4] = gen_reg_rtx (V8HFmode);
> > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > +                          force_reg (<MODE>mode, operands[1]),
> > +                          <MODE>mode);
> > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > +                          force_reg (<MODE>mode, operands[2]),
> > +                          <MODE>mode);
> > +  ops[3] = operands[3];
> > +  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
> > +  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
> > +  bool ok = ix86_expand_fp_vcond (ops);
> > +  gcc_assert (ok);
> > +
> > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > +                                              <mmxxmmmode>mode));
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond<mode>v2hi"
> > +  [(set (match_operand:V2F_32 0 "register_operand")
> > +       (if_then_else:V2F_32
> > +         (match_operator 3 ""
> > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > +         (match_operand:V2F_32 1 "general_operand")
> > +         (match_operand:V2F_32 2 "general_operand")))]
> > +  "TARGET_SSE4_1"
> > +{
> > +  bool ok = ix86_expand_int_vcond (operands);
> > +  gcc_assert (ok);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcondu<mode>v2hi"
> > +  [(set (match_operand:V2F_32 0 "register_operand")
> > +       (if_then_else:V2F_32
> > +         (match_operator 3 ""
> > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > +         (match_operand:V2F_32 1 "general_operand")
> > +         (match_operand:V2F_32 2 "general_operand")))]
> > +  "TARGET_SSE4_1"
> > +{
> > +  bool ok = ix86_expand_int_vcond (operands);
> > +  gcc_assert (ok);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond_mask_<mode>v2hi"
> > +  [(set (match_operand:V2F_32 0 "register_operand")
> > +       (vec_merge:V2F_32
> > +         (match_operand:V2F_32 1 "register_operand")
> > +         (match_operand:V2F_32 2 "register_operand")
> > +         (match_operand:V2HI 3 "register_operand")))]
> > +  "TARGET_SSE4_1"
> > +{
> > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > +                        operands[1], operands[2]);
> > +  DONE;
> > +})
> > +
> > +(define_expand "vcond_mask_<mode>qi"
> > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > +       (vec_merge:V2FI_32
> > +         (match_operand:V2FI_32 1 "register_operand")
> > +         (match_operand:V2FI_32 2 "register_operand")
> > +         (match_operand:QI 3 "register_operand")))]
> > +  "TARGET_AVX512BW && TARGET_AVX512VL"
> > +{
> > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > +                                                operands[2], operands[3]));
> > +  emit_move_insn (operands[0],
> > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > +  DONE;
> > +})
> > +
> >  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> >  ;;
> >  ;; Parallel half-precision floating point rounding operations.
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index c988935d4df..e2a7cbeb722 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
> >    DONE;
> >  })
> >
> > -(define_expand "vcond<mode><mode>"
> > -  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
> > -       (if_then_else:VHF_AVX512VL
> > -         (match_operator 3 ""
> > -           [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > -            (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > -         (match_operand:VHF_AVX512VL 1 "general_operand")
> > -         (match_operand:VHF_AVX512VL 2 "general_operand")))]
> > -  "TARGET_AVX512FP16"
> > -{
> > -  bool ok = ix86_expand_fp_vcond (operands);
> > -  gcc_assert (ok);
> > -  DONE;
> > -})
> > -
> > -(define_expand "vcond<sseintvecmodelower><mode>"
> > -  [(set (match_operand:<sseintvecmode> 0 "register_operand")
> > -       (if_then_else:<sseintvecmode>
> > +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
> > +  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
> > +       (if_then_else:VI2HFBF_AVX512VL
> >           (match_operator 3 ""
> >             [(match_operand:VHF_AVX512VL 4 "vector_operand")
> >              (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > -         (match_operand:<sseintvecmode> 1 "general_operand")
> > -         (match_operand:<sseintvecmode> 2 "general_operand")))]
> > +         (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
> > +         (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
> >    "TARGET_AVX512FP16"
> >  {
> >    bool ok = ix86_expand_fp_vcond (operands);
> > diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > new file mode 100644
> > index 00000000000..8bf01b7cb4a
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > @@ -0,0 +1,34 @@
> > +/* PR target/103861 */
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
> > +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
> > +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
> > +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
> > +typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
> > +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> > +
> > +typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
> > +typedef short __attribute__((__vector_size__ (8))) __v4hi;
> > +
> > +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> > +
> > +
> > +__v2hu au, bu;
> > +__v2hi as, bs;
> > +__v2hf af, bf;
> > +
> > +__v4hu cu, du;
> > +__v4hi cs, ds;
> > +__v4hf cf, df;
> > +
> > +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
> > +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
> > +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
> > +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
> > +
> > +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
> > +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
> > +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
> > +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
> > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > new file mode 100644
> > index 00000000000..ee8659395eb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
> > +
> > +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
> > +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
> > +
> > +
> > +#define VCMPMN(type, op, name) \
> > +type  \
> > +__attribute__ ((noinline, noclone)) \
> > +vec_cmp_##type##type##name (type a, type b) \
> > +{ \
> > +  return a op b;  \
> > +}
> > +
> > +VCMPMN (v4hf, <, lt)
> > +VCMPMN (v2hf, <, lt)
> > +VCMPMN (v4hf, <=, le)
> > +VCMPMN (v2hf, <=, le)
> > +VCMPMN (v4hf, >, gt)
> > +VCMPMN (v2hf, >, gt)
> > +VCMPMN (v4hf, >=, ge)
> > +VCMPMN (v2hf, >=, ge)
> > +VCMPMN (v4hf, ==, eq)
> > +VCMPMN (v2hf, ==, eq)
> > --
> > 2.31.1
> >



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
  2023-10-24  2:53   ` Hongtao Liu
@ 2023-10-24  5:23     ` Hongtao Liu
  2023-10-24  5:44       ` Hongtao Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Hongtao Liu @ 2023-10-24  5:23 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Oct 24, 2023 at 10:53 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Oct 23, 2023 at 8:35 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > Ready push to trunk.
> >
> > vcond<mode> and vcondeq<mode> shouldn't be necessary if there's
> > vcond_mask<mode> and vcmp<mode> support which is the "modern"
> > way of handling vcond<mode>.  Unless the ISA really can do
> > compare and select with a single instruction.
> For testcase
>
> typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
>
>
> __v4hf cf, df;
>
> __v4hf cfu (__v4hf c, __v4hf d) { return (c > d) ? cf : df; }
>
> The data_mode passes to ix86_get_mask_mode is v4hi, not v4hf since
>
>   /* Always construct signed integer vector type.  */
>   intt = c_common_type_for_size
>     (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type0))), 0);
>   if (!intt)
>     {
>       if (complain & tf_error)
> error_at (location, "could not find an integer type "
>   "of the same size as %qT", TREE_TYPE (type0));
>       return error_mark_node;
>     }
>   result_type = build_opaque_vector_type (intt,
>   TYPE_VECTOR_SUBPARTS (type0));
>   return build_vec_cmp (resultcode, result_type, op0, op1);
>
> The backend can't distinguish whether it's a vector fp16 comparison or
> a vector hi comparison.
> the former requires -mavx512fp16, the latter requires -mavx512bw
Should we pass type0 instead of result_type here?
> >
> > Richard.
> >
> > > gcc/ChangeLog:
> > >
> > >         PR target/103861
> > >         * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
> > >         V2HF/V2BF/V4HF/V4BFmode.
> > >         * config/i386/mmx.md (vec_cmpv4hfqi): New expander.
> > >         (vcond<mode>v4hf): Ditto.
> > >         (vcond<mode>v4hi): Ditto.
> > >         (vcondu<mode>v4hi): Ditto.
> > >         (vcond_mask_<mode>v4hi): Ditto.
> > >         (vcond_mask_<mode>qi): Ditto.
> > >         (vec_cmpv2hfqi): Ditto.
> > >         (vcond<mode>v2hf): Ditto.
> > >         (vcond<mode>v2hi): Ditto.
> > >         (vcondu<mode>v2hi): Ditto.
> > >         (vcond_mask_<mode>v2hi): Ditto.
> > >         * config/i386/sse.md (vcond<mode><mode>): Merge this with ..
> > >         (vcond<sseintvecmodelower><mode>): .. this into ..
> > >         (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
> > >         and extend to V8BF/V16BF/V32BFmode.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * g++.target/i386/part-vect-vcondhf.C: New test.
> > >         * gcc.target/i386/part-vect-vec_cmphf.c: New test.
> > > ---
> > >  gcc/config/i386/i386-expand.cc                |   4 +
> > >  gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
> > >  gcc/config/i386/sse.md                        |  25 +-
> > >  .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
> > >  .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
> > >  5 files changed, 304 insertions(+), 22 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > >
> > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > index 1eae9d7c78c..9658f9c5a2d 100644
> > > --- a/gcc/config/i386/i386-expand.cc
> > > +++ b/gcc/config/i386/i386-expand.cc
> > > @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > >        break;
> > >      case E_V8QImode:
> > >      case E_V4HImode:
> > > +    case E_V4HFmode:
> > > +    case E_V4BFmode:
> > >      case E_V2SImode:
> > >        if (TARGET_SSE4_1)
> > >         {
> > > @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > >        break;
> > >      case E_V4QImode:
> > >      case E_V2HImode:
> > > +    case E_V2HFmode:
> > > +    case E_V2BFmode:
> > >        if (TARGET_SSE4_1)
> > >         {
> > >           gen = gen_mmx_pblendvb_v4qi;
> > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > > index 491a0a51272..b9617e9d8c6 100644
> > > --- a/gcc/config/i386/mmx.md
> > > +++ b/gcc/config/i386/mmx.md
> > > @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
> > >  (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
> > >
> > >  (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
> > > +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
> > > +(define_mode_iterator V4F_64 [V4HF V4BF])
> > > +(define_mode_iterator V2F_32 [V2HF V2BF])
> > >  ;; 4-byte integer vector modes
> > >  (define_mode_iterator VI_32 [V4QI V2HI])
> > >
> > > @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
> > >    [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
> > >
> > >  (define_mode_attr mmxxmmmode
> > > -  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
> > > +  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
> > > +   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
> > >
> > >  (define_mode_attr mmxxmmmodelower
> > > -  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
> > > +  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
> > > +   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
> > >
> > >  (define_expand "movd_<mode>_to_sse"
> > >    [(set (match_operand:<mmxxmmmode> 0 "register_operand")
> > > @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
> > >    [(set (match_dup 0)
> > >         (ior:<MODE> (match_dup 1) (match_dup 2)))])
> > >
> > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > +;;
> > > +;; Parallel half-precision floating point comparisons
> > > +;;
> > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > +
> > > +(define_expand "vec_cmpv4hfqi"
> > > +  [(set (match_operand:QI 0 "register_operand")
> > > +       (match_operator:QI 1 ""
> > > +         [(match_operand:V4HF 2 "nonimmediate_operand")
> > > +          (match_operand:V4HF 3 "nonimmediate_operand")]))]
> > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
> > > +   && ix86_partial_vec_fp_math"
> > > +{
> > > +  rtx ops[4];
> > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > +
> > > +  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
> > > +  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
> > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond<mode>v4hf"
> > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > +       (if_then_else:V4FI_64
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V4HF 4 "nonimmediate_operand")
> > > +            (match_operand:V4HF 5 "nonimmediate_operand")])
> > > +         (match_operand:V4FI_64 1 "general_operand")
> > > +         (match_operand:V4FI_64 2 "general_operand")))]
> > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > +  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
> > > +{
> > > +  rtx ops[6];
> > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > +                          force_reg (<MODE>mode, operands[1]),
> > > +                          <MODE>mode);
> > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > +                          force_reg (<MODE>mode, operands[2]),
> > > +                          <MODE>mode);
> > > +  ops[3] = operands[3];
> > > +  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
> > > +  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
> > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > +  gcc_assert (ok);
> > > +
> > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > +                                              <mmxxmmmode>mode));
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond<mode>v4hi"
> > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > +       (if_then_else:V4F_64
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > +         (match_operand:V4F_64 1 "general_operand")
> > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > +{
> > > +  bool ok = ix86_expand_int_vcond (operands);
> > > +  gcc_assert (ok);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcondu<mode>v4hi"
> > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > +       (if_then_else:V4F_64
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > +         (match_operand:V4F_64 1 "general_operand")
> > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > +{
> > > +  bool ok = ix86_expand_int_vcond (operands);
> > > +  gcc_assert (ok);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond_mask_<mode>v4hi"
> > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > +       (vec_merge:V4F_64
> > > +         (match_operand:V4F_64 1 "register_operand")
> > > +         (match_operand:V4F_64 2 "register_operand")
> > > +         (match_operand:V4HI 3  "register_operand")))]
> > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > +{
> > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > +                        operands[1], operands[2]);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond_mask_<mode>qi"
> > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > +       (vec_merge:V4FI_64
> > > +         (match_operand:V4FI_64 1 "register_operand")
> > > +         (match_operand:V4FI_64 2 "register_operand")
> > > +         (match_operand:QI 3 "register_operand")))]
> > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
> > > +{
> > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > +                                                operands[2], operands[3]));
> > > +  emit_move_insn (operands[0],
> > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vec_cmpv2hfqi"
> > > +  [(set (match_operand:QI 0 "register_operand")
> > > +       (match_operator:QI 1 ""
> > > +         [(match_operand:V2HF 2 "nonimmediate_operand")
> > > +          (match_operand:V2HF 3 "nonimmediate_operand")]))]
> > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > +   && ix86_partial_vec_fp_math"
> > > +{
> > > +  rtx ops[4];
> > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > +
> > > +  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
> > > +  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
> > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond<mode>v2hf"
> > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > +       (if_then_else:V2FI_32
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V2HF 4 "nonimmediate_operand")
> > > +            (match_operand:V2HF 5 "nonimmediate_operand")])
> > > +         (match_operand:V2FI_32 1 "general_operand")
> > > +         (match_operand:V2FI_32 2 "general_operand")))]
> > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > +   && ix86_partial_vec_fp_math"
> > > +{
> > > +  rtx ops[6];
> > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > +                          force_reg (<MODE>mode, operands[1]),
> > > +                          <MODE>mode);
> > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > +                          force_reg (<MODE>mode, operands[2]),
> > > +                          <MODE>mode);
> > > +  ops[3] = operands[3];
> > > +  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
> > > +  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
> > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > +  gcc_assert (ok);
> > > +
> > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > +                                              <mmxxmmmode>mode));
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond<mode>v2hi"
> > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > +       (if_then_else:V2F_32
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > +         (match_operand:V2F_32 1 "general_operand")
> > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > +  "TARGET_SSE4_1"
> > > +{
> > > +  bool ok = ix86_expand_int_vcond (operands);
> > > +  gcc_assert (ok);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcondu<mode>v2hi"
> > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > +       (if_then_else:V2F_32
> > > +         (match_operator 3 ""
> > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > +         (match_operand:V2F_32 1 "general_operand")
> > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > +  "TARGET_SSE4_1"
> > > +{
> > > +  bool ok = ix86_expand_int_vcond (operands);
> > > +  gcc_assert (ok);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond_mask_<mode>v2hi"
> > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > +       (vec_merge:V2F_32
> > > +         (match_operand:V2F_32 1 "register_operand")
> > > +         (match_operand:V2F_32 2 "register_operand")
> > > +         (match_operand:V2HI 3 "register_operand")))]
> > > +  "TARGET_SSE4_1"
> > > +{
> > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > +                        operands[1], operands[2]);
> > > +  DONE;
> > > +})
> > > +
> > > +(define_expand "vcond_mask_<mode>qi"
> > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > +       (vec_merge:V2FI_32
> > > +         (match_operand:V2FI_32 1 "register_operand")
> > > +         (match_operand:V2FI_32 2 "register_operand")
> > > +         (match_operand:QI 3 "register_operand")))]
> > > +  "TARGET_AVX512BW && TARGET_AVX512VL"
> > > +{
> > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > +                                                operands[2], operands[3]));
> > > +  emit_move_insn (operands[0],
> > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > +  DONE;
> > > +})
> > > +
> > >  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > >  ;;
> > >  ;; Parallel half-precision floating point rounding operations.
> > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > index c988935d4df..e2a7cbeb722 100644
> > > --- a/gcc/config/i386/sse.md
> > > +++ b/gcc/config/i386/sse.md
> > > @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
> > >    DONE;
> > >  })
> > >
> > > -(define_expand "vcond<mode><mode>"
> > > -  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
> > > -       (if_then_else:VHF_AVX512VL
> > > -         (match_operator 3 ""
> > > -           [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > > -            (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > -         (match_operand:VHF_AVX512VL 1 "general_operand")
> > > -         (match_operand:VHF_AVX512VL 2 "general_operand")))]
> > > -  "TARGET_AVX512FP16"
> > > -{
> > > -  bool ok = ix86_expand_fp_vcond (operands);
> > > -  gcc_assert (ok);
> > > -  DONE;
> > > -})
> > > -
> > > -(define_expand "vcond<sseintvecmodelower><mode>"
> > > -  [(set (match_operand:<sseintvecmode> 0 "register_operand")
> > > -       (if_then_else:<sseintvecmode>
> > > +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
> > > +  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
> > > +       (if_then_else:VI2HFBF_AVX512VL
> > >           (match_operator 3 ""
> > >             [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > >              (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > -         (match_operand:<sseintvecmode> 1 "general_operand")
> > > -         (match_operand:<sseintvecmode> 2 "general_operand")))]
> > > +         (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
> > > +         (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
> > >    "TARGET_AVX512FP16"
> > >  {
> > >    bool ok = ix86_expand_fp_vcond (operands);
> > > diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > new file mode 100644
> > > index 00000000000..8bf01b7cb4a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > @@ -0,0 +1,34 @@
> > > +/* PR target/103861 */
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
> > > +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
> > > +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
> > > +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
> > > +typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
> > > +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> > > +
> > > +typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
> > > +typedef short __attribute__((__vector_size__ (8))) __v4hi;
> > > +
> > > +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > > +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> > > +
> > > +
> > > +__v2hu au, bu;
> > > +__v2hi as, bs;
> > > +__v2hf af, bf;
> > > +
> > > +__v4hu cu, du;
> > > +__v4hi cs, ds;
> > > +__v4hf cf, df;
> > > +
> > > +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
> > > +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
> > > +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
> > > +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
> > > +
> > > +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
> > > +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
> > > +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
> > > +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
> > > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > new file mode 100644
> > > index 00000000000..ee8659395eb
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > @@ -0,0 +1,26 @@
> > > +/* { dg-do compile { target { ! ia32 } } } */
> > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
> > > +
> > > +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
> > > +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
> > > +
> > > +
> > > +#define VCMPMN(type, op, name) \
> > > +type  \
> > > +__attribute__ ((noinline, noclone)) \
> > > +vec_cmp_##type##type##name (type a, type b) \
> > > +{ \
> > > +  return a op b;  \
> > > +}
> > > +
> > > +VCMPMN (v4hf, <, lt)
> > > +VCMPMN (v2hf, <, lt)
> > > +VCMPMN (v4hf, <=, le)
> > > +VCMPMN (v2hf, <=, le)
> > > +VCMPMN (v4hf, >, gt)
> > > +VCMPMN (v2hf, >, gt)
> > > +VCMPMN (v4hf, >=, ge)
> > > +VCMPMN (v2hf, >=, ge)
> > > +VCMPMN (v4hf, ==, eq)
> > > +VCMPMN (v2hf, ==, eq)
> > > --
> > > 2.31.1
> > >
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
  2023-10-24  5:23     ` Hongtao Liu
@ 2023-10-24  5:44       ` Hongtao Liu
  2023-10-24  7:13         ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Hongtao Liu @ 2023-10-24  5:44 UTC (permalink / raw)
  To: Richard Biener; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Oct 24, 2023 at 1:23 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Oct 24, 2023 at 10:53 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Mon, Oct 23, 2023 at 8:35 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > > Ready push to trunk.
> > >
> > > vcond<mode> and vcondeq<mode> shouldn't be necessary if there's
> > > vcond_mask<mode> and vcmp<mode> support which is the "modern"
> > > way of handling vcond<mode>.  Unless the ISA really can do
> > > compare and select with a single instruction.
> > For testcase
> >
> > typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> >
> >
> > __v4hf cf, df;
> >
> > __v4hf cfu (__v4hf c, __v4hf d) { return (c > d) ? cf : df; }
> >
> > The data_mode passes to ix86_get_mask_mode is v4hi, not v4hf since
> >
> >   /* Always construct signed integer vector type.  */
> >   intt = c_common_type_for_size
> >     (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type0))), 0);
> >   if (!intt)
> >     {
> >       if (complain & tf_error)
> > error_at (location, "could not find an integer type "
> >   "of the same size as %qT", TREE_TYPE (type0));
> >       return error_mark_node;
> >     }
> >   result_type = build_opaque_vector_type (intt,
> >   TYPE_VECTOR_SUBPARTS (type0));
> >   return build_vec_cmp (resultcode, result_type, op0, op1);
> >
> > The backend can't distinguish whether it's a vector fp16 comparison or
> > a vector hi comparison.
> > the former requires -mavx512fp16, the latter requires -mavx512bw
> Should we pass type0 instead of result_type here?
 6335@deftypefn {Target Hook} opt_machine_mode
TARGET_VECTORIZE_GET_MASK_MODE (machine_mode @var{mode})
 6336Return the mode to use for a vector mask that holds one boolean
 6337result for each element of vector mode @var{mode}.  The returned mask mode
 6338can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of
 6339booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class
 6340@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such
 6341mask mode exists.

Looks like it's on purpose, v2hi is exactly what we needed here.

Then we use either kmask or v4hi for both v4hf and v4hi comparison,
but can't use v4hi for v4hi comparison, but kmask for v4hf comparison.
> > >
> > > Richard.
> > >
> > > > gcc/ChangeLog:
> > > >
> > > >         PR target/103861
> > > >         * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
> > > >         V2HF/V2BF/V4HF/V4BFmode.
> > > >         * config/i386/mmx.md (vec_cmpv4hfqi): New expander.
> > > >         (vcond<mode>v4hf): Ditto.
> > > >         (vcond<mode>v4hi): Ditto.
> > > >         (vcondu<mode>v4hi): Ditto.
> > > >         (vcond_mask_<mode>v4hi): Ditto.
> > > >         (vcond_mask_<mode>qi): Ditto.
> > > >         (vec_cmpv2hfqi): Ditto.
> > > >         (vcond<mode>v2hf): Ditto.
> > > >         (vcond<mode>v2hi): Ditto.
> > > >         (vcondu<mode>v2hi): Ditto.
> > > >         (vcond_mask_<mode>v2hi): Ditto.
> > > >         * config/i386/sse.md (vcond<mode><mode>): Merge this with ..
> > > >         (vcond<sseintvecmodelower><mode>): .. this into ..
> > > >         (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
> > > >         and extend to V8BF/V16BF/V32BFmode.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         * g++.target/i386/part-vect-vcondhf.C: New test.
> > > >         * gcc.target/i386/part-vect-vec_cmphf.c: New test.
> > > > ---
> > > >  gcc/config/i386/i386-expand.cc                |   4 +
> > > >  gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
> > > >  gcc/config/i386/sse.md                        |  25 +-
> > > >  .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
> > > >  .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
> > > >  5 files changed, 304 insertions(+), 22 deletions(-)
> > > >  create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > >
> > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > index 1eae9d7c78c..9658f9c5a2d 100644
> > > > --- a/gcc/config/i386/i386-expand.cc
> > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > > >        break;
> > > >      case E_V8QImode:
> > > >      case E_V4HImode:
> > > > +    case E_V4HFmode:
> > > > +    case E_V4BFmode:
> > > >      case E_V2SImode:
> > > >        if (TARGET_SSE4_1)
> > > >         {
> > > > @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > > >        break;
> > > >      case E_V4QImode:
> > > >      case E_V2HImode:
> > > > +    case E_V2HFmode:
> > > > +    case E_V2BFmode:
> > > >        if (TARGET_SSE4_1)
> > > >         {
> > > >           gen = gen_mmx_pblendvb_v4qi;
> > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > > > index 491a0a51272..b9617e9d8c6 100644
> > > > --- a/gcc/config/i386/mmx.md
> > > > +++ b/gcc/config/i386/mmx.md
> > > > @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
> > > >  (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
> > > >
> > > >  (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
> > > > +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
> > > > +(define_mode_iterator V4F_64 [V4HF V4BF])
> > > > +(define_mode_iterator V2F_32 [V2HF V2BF])
> > > >  ;; 4-byte integer vector modes
> > > >  (define_mode_iterator VI_32 [V4QI V2HI])
> > > >
> > > > @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
> > > >    [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
> > > >
> > > >  (define_mode_attr mmxxmmmode
> > > > -  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
> > > > +  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
> > > > +   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
> > > >
> > > >  (define_mode_attr mmxxmmmodelower
> > > > -  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
> > > > +  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
> > > > +   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
> > > >
> > > >  (define_expand "movd_<mode>_to_sse"
> > > >    [(set (match_operand:<mmxxmmmode> 0 "register_operand")
> > > > @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
> > > >    [(set (match_dup 0)
> > > >         (ior:<MODE> (match_dup 1) (match_dup 2)))])
> > > >
> > > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > +;;
> > > > +;; Parallel half-precision floating point comparisons
> > > > +;;
> > > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > +
> > > > +(define_expand "vec_cmpv4hfqi"
> > > > +  [(set (match_operand:QI 0 "register_operand")
> > > > +       (match_operator:QI 1 ""
> > > > +         [(match_operand:V4HF 2 "nonimmediate_operand")
> > > > +          (match_operand:V4HF 3 "nonimmediate_operand")]))]
> > > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > +   && ix86_partial_vec_fp_math"
> > > > +{
> > > > +  rtx ops[4];
> > > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > > +
> > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
> > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
> > > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond<mode>v4hf"
> > > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > > +       (if_then_else:V4FI_64
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V4HF 4 "nonimmediate_operand")
> > > > +            (match_operand:V4HF 5 "nonimmediate_operand")])
> > > > +         (match_operand:V4FI_64 1 "general_operand")
> > > > +         (match_operand:V4FI_64 2 "general_operand")))]
> > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > +  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
> > > > +{
> > > > +  rtx ops[6];
> > > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > > +                          force_reg (<MODE>mode, operands[1]),
> > > > +                          <MODE>mode);
> > > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > > +                          force_reg (<MODE>mode, operands[2]),
> > > > +                          <MODE>mode);
> > > > +  ops[3] = operands[3];
> > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
> > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
> > > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > > +  gcc_assert (ok);
> > > > +
> > > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > > +                                              <mmxxmmmode>mode));
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond<mode>v4hi"
> > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > +       (if_then_else:V4F_64
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > > +         (match_operand:V4F_64 1 "general_operand")
> > > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > +{
> > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > +  gcc_assert (ok);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcondu<mode>v4hi"
> > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > +       (if_then_else:V4F_64
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > > +         (match_operand:V4F_64 1 "general_operand")
> > > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > +{
> > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > +  gcc_assert (ok);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond_mask_<mode>v4hi"
> > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > +       (vec_merge:V4F_64
> > > > +         (match_operand:V4F_64 1 "register_operand")
> > > > +         (match_operand:V4F_64 2 "register_operand")
> > > > +         (match_operand:V4HI 3  "register_operand")))]
> > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > +{
> > > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > > +                        operands[1], operands[2]);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond_mask_<mode>qi"
> > > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > > +       (vec_merge:V4FI_64
> > > > +         (match_operand:V4FI_64 1 "register_operand")
> > > > +         (match_operand:V4FI_64 2 "register_operand")
> > > > +         (match_operand:QI 3 "register_operand")))]
> > > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
> > > > +{
> > > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > > +                                                operands[2], operands[3]));
> > > > +  emit_move_insn (operands[0],
> > > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vec_cmpv2hfqi"
> > > > +  [(set (match_operand:QI 0 "register_operand")
> > > > +       (match_operator:QI 1 ""
> > > > +         [(match_operand:V2HF 2 "nonimmediate_operand")
> > > > +          (match_operand:V2HF 3 "nonimmediate_operand")]))]
> > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > +   && ix86_partial_vec_fp_math"
> > > > +{
> > > > +  rtx ops[4];
> > > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > > +
> > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
> > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
> > > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond<mode>v2hf"
> > > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > > +       (if_then_else:V2FI_32
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V2HF 4 "nonimmediate_operand")
> > > > +            (match_operand:V2HF 5 "nonimmediate_operand")])
> > > > +         (match_operand:V2FI_32 1 "general_operand")
> > > > +         (match_operand:V2FI_32 2 "general_operand")))]
> > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > +   && ix86_partial_vec_fp_math"
> > > > +{
> > > > +  rtx ops[6];
> > > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > > +                          force_reg (<MODE>mode, operands[1]),
> > > > +                          <MODE>mode);
> > > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > > +                          force_reg (<MODE>mode, operands[2]),
> > > > +                          <MODE>mode);
> > > > +  ops[3] = operands[3];
> > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
> > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
> > > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > > +  gcc_assert (ok);
> > > > +
> > > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > > +                                              <mmxxmmmode>mode));
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond<mode>v2hi"
> > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > +       (if_then_else:V2F_32
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > > +         (match_operand:V2F_32 1 "general_operand")
> > > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > > +  "TARGET_SSE4_1"
> > > > +{
> > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > +  gcc_assert (ok);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcondu<mode>v2hi"
> > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > +       (if_then_else:V2F_32
> > > > +         (match_operator 3 ""
> > > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > > +         (match_operand:V2F_32 1 "general_operand")
> > > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > > +  "TARGET_SSE4_1"
> > > > +{
> > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > +  gcc_assert (ok);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond_mask_<mode>v2hi"
> > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > +       (vec_merge:V2F_32
> > > > +         (match_operand:V2F_32 1 "register_operand")
> > > > +         (match_operand:V2F_32 2 "register_operand")
> > > > +         (match_operand:V2HI 3 "register_operand")))]
> > > > +  "TARGET_SSE4_1"
> > > > +{
> > > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > > +                        operands[1], operands[2]);
> > > > +  DONE;
> > > > +})
> > > > +
> > > > +(define_expand "vcond_mask_<mode>qi"
> > > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > > +       (vec_merge:V2FI_32
> > > > +         (match_operand:V2FI_32 1 "register_operand")
> > > > +         (match_operand:V2FI_32 2 "register_operand")
> > > > +         (match_operand:QI 3 "register_operand")))]
> > > > +  "TARGET_AVX512BW && TARGET_AVX512VL"
> > > > +{
> > > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > > +                                                operands[2], operands[3]));
> > > > +  emit_move_insn (operands[0],
> > > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > > +  DONE;
> > > > +})
> > > > +
> > > >  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > >  ;;
> > > >  ;; Parallel half-precision floating point rounding operations.
> > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > > index c988935d4df..e2a7cbeb722 100644
> > > > --- a/gcc/config/i386/sse.md
> > > > +++ b/gcc/config/i386/sse.md
> > > > @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
> > > >    DONE;
> > > >  })
> > > >
> > > > -(define_expand "vcond<mode><mode>"
> > > > -  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
> > > > -       (if_then_else:VHF_AVX512VL
> > > > -         (match_operator 3 ""
> > > > -           [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > > > -            (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > > -         (match_operand:VHF_AVX512VL 1 "general_operand")
> > > > -         (match_operand:VHF_AVX512VL 2 "general_operand")))]
> > > > -  "TARGET_AVX512FP16"
> > > > -{
> > > > -  bool ok = ix86_expand_fp_vcond (operands);
> > > > -  gcc_assert (ok);
> > > > -  DONE;
> > > > -})
> > > > -
> > > > -(define_expand "vcond<sseintvecmodelower><mode>"
> > > > -  [(set (match_operand:<sseintvecmode> 0 "register_operand")
> > > > -       (if_then_else:<sseintvecmode>
> > > > +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
> > > > +  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
> > > > +       (if_then_else:VI2HFBF_AVX512VL
> > > >           (match_operator 3 ""
> > > >             [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > > >              (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > > -         (match_operand:<sseintvecmode> 1 "general_operand")
> > > > -         (match_operand:<sseintvecmode> 2 "general_operand")))]
> > > > +         (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
> > > > +         (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
> > > >    "TARGET_AVX512FP16"
> > > >  {
> > > >    bool ok = ix86_expand_fp_vcond (operands);
> > > > diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > > new file mode 100644
> > > > index 00000000000..8bf01b7cb4a
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > > @@ -0,0 +1,34 @@
> > > > +/* PR target/103861 */
> > > > +/* { dg-do compile { target { ! ia32 } } } */
> > > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > > +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
> > > > +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
> > > > +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
> > > > +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
> > > > +typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
> > > > +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> > > > +
> > > > +typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
> > > > +typedef short __attribute__((__vector_size__ (8))) __v4hi;
> > > > +
> > > > +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > > > +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> > > > +
> > > > +
> > > > +__v2hu au, bu;
> > > > +__v2hi as, bs;
> > > > +__v2hf af, bf;
> > > > +
> > > > +__v4hu cu, du;
> > > > +__v4hi cs, ds;
> > > > +__v4hf cf, df;
> > > > +
> > > > +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
> > > > +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
> > > > +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
> > > > +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
> > > > +
> > > > +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
> > > > +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
> > > > +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
> > > > +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
> > > > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > > new file mode 100644
> > > > index 00000000000..ee8659395eb
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > > @@ -0,0 +1,26 @@
> > > > +/* { dg-do compile { target { ! ia32 } } } */
> > > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > > +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
> > > > +
> > > > +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
> > > > +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
> > > > +
> > > > +
> > > > +#define VCMPMN(type, op, name) \
> > > > +type  \
> > > > +__attribute__ ((noinline, noclone)) \
> > > > +vec_cmp_##type##type##name (type a, type b) \
> > > > +{ \
> > > > +  return a op b;  \
> > > > +}
> > > > +
> > > > +VCMPMN (v4hf, <, lt)
> > > > +VCMPMN (v2hf, <, lt)
> > > > +VCMPMN (v4hf, <=, le)
> > > > +VCMPMN (v2hf, <=, le)
> > > > +VCMPMN (v4hf, >, gt)
> > > > +VCMPMN (v2hf, >, gt)
> > > > +VCMPMN (v4hf, >=, ge)
> > > > +VCMPMN (v2hf, >=, ge)
> > > > +VCMPMN (v4hf, ==, eq)
> > > > +VCMPMN (v2hf, ==, eq)
> > > > --
> > > > 2.31.1
> > > >
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
  2023-10-24  5:44       ` Hongtao Liu
@ 2023-10-24  7:13         ` Richard Biener
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Biener @ 2023-10-24  7:13 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Oct 24, 2023 at 7:44 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Oct 24, 2023 at 1:23 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Tue, Oct 24, 2023 at 10:53 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Mon, Oct 23, 2023 at 8:35 PM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Mon, Oct 23, 2023 at 10:48 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > > >
> > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > > > Ready push to trunk.
> > > >
> > > > vcond<mode> and vcondeq<mode> shouldn't be necessary if there's
> > > > vcond_mask<mode> and vcmp<mode> support which is the "modern"
> > > > way of handling vcond<mode>.  Unless the ISA really can do
> > > > compare and select with a single instruction.
> > > For testcase
> > >
> > > typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > > typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> > >
> > >
> > > __v4hf cf, df;
> > >
> > > __v4hf cfu (__v4hf c, __v4hf d) { return (c > d) ? cf : df; }
> > >
> > > The data_mode passes to ix86_get_mask_mode is v4hi, not v4hf since
> > >
> > >   /* Always construct signed integer vector type.  */
> > >   intt = c_common_type_for_size
> > >     (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type0))), 0);
> > >   if (!intt)
> > >     {
> > >       if (complain & tf_error)
> > > error_at (location, "could not find an integer type "
> > >   "of the same size as %qT", TREE_TYPE (type0));
> > >       return error_mark_node;
> > >     }
> > >   result_type = build_opaque_vector_type (intt,
> > >   TYPE_VECTOR_SUBPARTS (type0));
> > >   return build_vec_cmp (resultcode, result_type, op0, op1);
> > >
> > > The backend can't distinguish whether it's a vector fp16 comparison or
> > > a vector hi comparison.
> > > the former requires -mavx512fp16, the latter requires -mavx512bw
> > Should we pass type0 instead of result_type here?
>  6335@deftypefn {Target Hook} opt_machine_mode
> TARGET_VECTORIZE_GET_MASK_MODE (machine_mode @var{mode})
>  6336Return the mode to use for a vector mask that holds one boolean
>  6337result for each element of vector mode @var{mode}.  The returned mask mode
>  6338can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of
>  6339booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class
>  6340@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such
>  6341mask mode exists.
>
> Looks like it's on purpose, v2hi is exactly what we needed here.
>
> Then we use either kmask or v4hi for both v4hf and v4hi comparison,
> but can't use v4hi for v4hi comparison, but kmask for v4hf comparison.

I think it's indeed on purpose that the result of v1 < v2 is a signed
integer vector type.
But build_vec_cmp should not use the truth type for the result but instead the
truth type for the comparison, so

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 112d28fd656..01dea608980 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -11986,7 +11986,7 @@ build_vec_cmp (tree_code code, tree type,
 {
   tree zero_vec = build_zero_cst (type);
   tree minus_one_vec = build_minus_one_cst (type);
-  tree cmp_type = truth_type_for (type);
+  tree cmp_type = truth_type_for (TREE_TYPE (arg0));
   tree cmp = build2 (code, cmp_type, arg0, arg1);
   return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
 }


> > > >
> > > > Richard.
> > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         PR target/103861
> > > > >         * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
> > > > >         V2HF/V2BF/V4HF/V4BFmode.
> > > > >         * config/i386/mmx.md (vec_cmpv4hfqi): New expander.
> > > > >         (vcond<mode>v4hf): Ditto.
> > > > >         (vcond<mode>v4hi): Ditto.
> > > > >         (vcondu<mode>v4hi): Ditto.
> > > > >         (vcond_mask_<mode>v4hi): Ditto.
> > > > >         (vcond_mask_<mode>qi): Ditto.
> > > > >         (vec_cmpv2hfqi): Ditto.
> > > > >         (vcond<mode>v2hf): Ditto.
> > > > >         (vcond<mode>v2hi): Ditto.
> > > > >         (vcondu<mode>v2hi): Ditto.
> > > > >         (vcond_mask_<mode>v2hi): Ditto.
> > > > >         * config/i386/sse.md (vcond<mode><mode>): Merge this with ..
> > > > >         (vcond<sseintvecmodelower><mode>): .. this into ..
> > > > >         (vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
> > > > >         and extend to V8BF/V16BF/V32BFmode.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         * g++.target/i386/part-vect-vcondhf.C: New test.
> > > > >         * gcc.target/i386/part-vect-vec_cmphf.c: New test.
> > > > > ---
> > > > >  gcc/config/i386/i386-expand.cc                |   4 +
> > > > >  gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
> > > > >  gcc/config/i386/sse.md                        |  25 +-
> > > > >  .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
> > > > >  .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
> > > > >  5 files changed, 304 insertions(+), 22 deletions(-)
> > > > >  create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > > >
> > > > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > > > > index 1eae9d7c78c..9658f9c5a2d 100644
> > > > > --- a/gcc/config/i386/i386-expand.cc
> > > > > +++ b/gcc/config/i386/i386-expand.cc
> > > > > @@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > > > >        break;
> > > > >      case E_V8QImode:
> > > > >      case E_V4HImode:
> > > > > +    case E_V4HFmode:
> > > > > +    case E_V4BFmode:
> > > > >      case E_V2SImode:
> > > > >        if (TARGET_SSE4_1)
> > > > >         {
> > > > > @@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> > > > >        break;
> > > > >      case E_V4QImode:
> > > > >      case E_V2HImode:
> > > > > +    case E_V2HFmode:
> > > > > +    case E_V2BFmode:
> > > > >        if (TARGET_SSE4_1)
> > > > >         {
> > > > >           gen = gen_mmx_pblendvb_v4qi;
> > > > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > > > > index 491a0a51272..b9617e9d8c6 100644
> > > > > --- a/gcc/config/i386/mmx.md
> > > > > +++ b/gcc/config/i386/mmx.md
> > > > > @@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
> > > > >  (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
> > > > >
> > > > >  (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
> > > > > +(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
> > > > > +(define_mode_iterator V4F_64 [V4HF V4BF])
> > > > > +(define_mode_iterator V2F_32 [V2HF V2BF])
> > > > >  ;; 4-byte integer vector modes
> > > > >  (define_mode_iterator VI_32 [V4QI V2HI])
> > > > >
> > > > > @@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
> > > > >    [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
> > > > >
> > > > >  (define_mode_attr mmxxmmmode
> > > > > -  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
> > > > > +  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
> > > > > +   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
> > > > >
> > > > >  (define_mode_attr mmxxmmmodelower
> > > > > -  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
> > > > > +  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
> > > > > +   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
> > > > >
> > > > >  (define_expand "movd_<mode>_to_sse"
> > > > >    [(set (match_operand:<mmxxmmmode> 0 "register_operand")
> > > > > @@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
> > > > >    [(set (match_dup 0)
> > > > >         (ior:<MODE> (match_dup 1) (match_dup 2)))])
> > > > >
> > > > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > > +;;
> > > > > +;; Parallel half-precision floating point comparisons
> > > > > +;;
> > > > > +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > > +
> > > > > +(define_expand "vec_cmpv4hfqi"
> > > > > +  [(set (match_operand:QI 0 "register_operand")
> > > > > +       (match_operator:QI 1 ""
> > > > > +         [(match_operand:V4HF 2 "nonimmediate_operand")
> > > > > +          (match_operand:V4HF 3 "nonimmediate_operand")]))]
> > > > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > > +   && ix86_partial_vec_fp_math"
> > > > > +{
> > > > > +  rtx ops[4];
> > > > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > > > +
> > > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
> > > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
> > > > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond<mode>v4hf"
> > > > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > > > +       (if_then_else:V4FI_64
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V4HF 4 "nonimmediate_operand")
> > > > > +            (match_operand:V4HF 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V4FI_64 1 "general_operand")
> > > > > +         (match_operand:V4FI_64 2 "general_operand")))]
> > > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > > +  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
> > > > > +{
> > > > > +  rtx ops[6];
> > > > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > > > +                          force_reg (<MODE>mode, operands[1]),
> > > > > +                          <MODE>mode);
> > > > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > > > +                          force_reg (<MODE>mode, operands[2]),
> > > > > +                          <MODE>mode);
> > > > > +  ops[3] = operands[3];
> > > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
> > > > > +  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
> > > > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > > > +  gcc_assert (ok);
> > > > > +
> > > > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > > > +                                              <mmxxmmmode>mode));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond<mode>v4hi"
> > > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > > +       (if_then_else:V4F_64
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V4F_64 1 "general_operand")
> > > > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > > +{
> > > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > > +  gcc_assert (ok);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcondu<mode>v4hi"
> > > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > > +       (if_then_else:V4F_64
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V4HI 4 "nonimmediate_operand")
> > > > > +            (match_operand:V4HI 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V4F_64 1 "general_operand")
> > > > > +         (match_operand:V4F_64 2 "general_operand")))]
> > > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > > +{
> > > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > > +  gcc_assert (ok);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond_mask_<mode>v4hi"
> > > > > +  [(set (match_operand:V4F_64 0 "register_operand")
> > > > > +       (vec_merge:V4F_64
> > > > > +         (match_operand:V4F_64 1 "register_operand")
> > > > > +         (match_operand:V4F_64 2 "register_operand")
> > > > > +         (match_operand:V4HI 3  "register_operand")))]
> > > > > +  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
> > > > > +{
> > > > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > > > +                        operands[1], operands[2]);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond_mask_<mode>qi"
> > > > > +  [(set (match_operand:V4FI_64 0 "register_operand")
> > > > > +       (vec_merge:V4FI_64
> > > > > +         (match_operand:V4FI_64 1 "register_operand")
> > > > > +         (match_operand:V4FI_64 2 "register_operand")
> > > > > +         (match_operand:QI 3 "register_operand")))]
> > > > > +  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
> > > > > +{
> > > > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > > > +                                                operands[2], operands[3]));
> > > > > +  emit_move_insn (operands[0],
> > > > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vec_cmpv2hfqi"
> > > > > +  [(set (match_operand:QI 0 "register_operand")
> > > > > +       (match_operator:QI 1 ""
> > > > > +         [(match_operand:V2HF 2 "nonimmediate_operand")
> > > > > +          (match_operand:V2HF 3 "nonimmediate_operand")]))]
> > > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > > +   && ix86_partial_vec_fp_math"
> > > > > +{
> > > > > +  rtx ops[4];
> > > > > +  ops[3] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[2] = gen_reg_rtx (V8HFmode);
> > > > > +
> > > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
> > > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
> > > > > +  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond<mode>v2hf"
> > > > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > > > +       (if_then_else:V2FI_32
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V2HF 4 "nonimmediate_operand")
> > > > > +            (match_operand:V2HF 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V2FI_32 1 "general_operand")
> > > > > +         (match_operand:V2FI_32 2 "general_operand")))]
> > > > > +  "TARGET_AVX512FP16 && TARGET_AVX512VL
> > > > > +   && ix86_partial_vec_fp_math"
> > > > > +{
> > > > > +  rtx ops[6];
> > > > > +  ops[5] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[4] = gen_reg_rtx (V8HFmode);
> > > > > +  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
> > > > > +  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
> > > > > +                          force_reg (<MODE>mode, operands[1]),
> > > > > +                          <MODE>mode);
> > > > > +  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
> > > > > +                          force_reg (<MODE>mode, operands[2]),
> > > > > +                          <MODE>mode);
> > > > > +  ops[3] = operands[3];
> > > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
> > > > > +  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
> > > > > +  bool ok = ix86_expand_fp_vcond (ops);
> > > > > +  gcc_assert (ok);
> > > > > +
> > > > > +  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
> > > > > +                                              <mmxxmmmode>mode));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond<mode>v2hi"
> > > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > > +       (if_then_else:V2F_32
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V2F_32 1 "general_operand")
> > > > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > > > +  "TARGET_SSE4_1"
> > > > > +{
> > > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > > +  gcc_assert (ok);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcondu<mode>v2hi"
> > > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > > +       (if_then_else:V2F_32
> > > > > +         (match_operator 3 ""
> > > > > +           [(match_operand:V2HI 4 "nonimmediate_operand")
> > > > > +            (match_operand:V2HI 5 "nonimmediate_operand")])
> > > > > +         (match_operand:V2F_32 1 "general_operand")
> > > > > +         (match_operand:V2F_32 2 "general_operand")))]
> > > > > +  "TARGET_SSE4_1"
> > > > > +{
> > > > > +  bool ok = ix86_expand_int_vcond (operands);
> > > > > +  gcc_assert (ok);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond_mask_<mode>v2hi"
> > > > > +  [(set (match_operand:V2F_32 0 "register_operand")
> > > > > +       (vec_merge:V2F_32
> > > > > +         (match_operand:V2F_32 1 "register_operand")
> > > > > +         (match_operand:V2F_32 2 "register_operand")
> > > > > +         (match_operand:V2HI 3 "register_operand")))]
> > > > > +  "TARGET_SSE4_1"
> > > > > +{
> > > > > +  ix86_expand_sse_movcc (operands[0], operands[3],
> > > > > +                        operands[1], operands[2]);
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > > +(define_expand "vcond_mask_<mode>qi"
> > > > > +  [(set (match_operand:V2FI_32 0 "register_operand")
> > > > > +       (vec_merge:V2FI_32
> > > > > +         (match_operand:V2FI_32 1 "register_operand")
> > > > > +         (match_operand:V2FI_32 2 "register_operand")
> > > > > +         (match_operand:QI 3 "register_operand")))]
> > > > > +  "TARGET_AVX512BW && TARGET_AVX512VL"
> > > > > +{
> > > > > +  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
> > > > > +  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
> > > > > +  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
> > > > > +  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
> > > > > +                                                operands[2], operands[3]));
> > > > > +  emit_move_insn (operands[0],
> > > > > +                 lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
> > > > > +  DONE;
> > > > > +})
> > > > > +
> > > > >  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> > > > >  ;;
> > > > >  ;; Parallel half-precision floating point rounding operations.
> > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > > > > index c988935d4df..e2a7cbeb722 100644
> > > > > --- a/gcc/config/i386/sse.md
> > > > > +++ b/gcc/config/i386/sse.md
> > > > > @@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
> > > > >    DONE;
> > > > >  })
> > > > >
> > > > > -(define_expand "vcond<mode><mode>"
> > > > > -  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
> > > > > -       (if_then_else:VHF_AVX512VL
> > > > > -         (match_operator 3 ""
> > > > > -           [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > > > > -            (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > > > -         (match_operand:VHF_AVX512VL 1 "general_operand")
> > > > > -         (match_operand:VHF_AVX512VL 2 "general_operand")))]
> > > > > -  "TARGET_AVX512FP16"
> > > > > -{
> > > > > -  bool ok = ix86_expand_fp_vcond (operands);
> > > > > -  gcc_assert (ok);
> > > > > -  DONE;
> > > > > -})
> > > > > -
> > > > > -(define_expand "vcond<sseintvecmodelower><mode>"
> > > > > -  [(set (match_operand:<sseintvecmode> 0 "register_operand")
> > > > > -       (if_then_else:<sseintvecmode>
> > > > > +(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
> > > > > +  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
> > > > > +       (if_then_else:VI2HFBF_AVX512VL
> > > > >           (match_operator 3 ""
> > > > >             [(match_operand:VHF_AVX512VL 4 "vector_operand")
> > > > >              (match_operand:VHF_AVX512VL 5 "vector_operand")])
> > > > > -         (match_operand:<sseintvecmode> 1 "general_operand")
> > > > > -         (match_operand:<sseintvecmode> 2 "general_operand")))]
> > > > > +         (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
> > > > > +         (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
> > > > >    "TARGET_AVX512FP16"
> > > > >  {
> > > > >    bool ok = ix86_expand_fp_vcond (operands);
> > > > > diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > > > new file mode 100644
> > > > > index 00000000000..8bf01b7cb4a
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
> > > > > @@ -0,0 +1,34 @@
> > > > > +/* PR target/103861 */
> > > > > +/* { dg-do compile { target { ! ia32 } } } */
> > > > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > > > +/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
> > > > > +/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
> > > > > +/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
> > > > > +/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
> > > > > +typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
> > > > > +typedef short __attribute__((__vector_size__ (4))) __v2hi;
> > > > > +
> > > > > +typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
> > > > > +typedef short __attribute__((__vector_size__ (8))) __v4hi;
> > > > > +
> > > > > +typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
> > > > > +typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
> > > > > +
> > > > > +
> > > > > +__v2hu au, bu;
> > > > > +__v2hi as, bs;
> > > > > +__v2hf af, bf;
> > > > > +
> > > > > +__v4hu cu, du;
> > > > > +__v4hi cs, ds;
> > > > > +__v4hf cf, df;
> > > > > +
> > > > > +__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
> > > > > +__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
> > > > > +__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
> > > > > +__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
> > > > > +
> > > > > +__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
> > > > > +__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
> > > > > +__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
> > > > > +__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
> > > > > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > > > new file mode 100644
> > > > > index 00000000000..ee8659395eb
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
> > > > > @@ -0,0 +1,26 @@
> > > > > +/* { dg-do compile { target { ! ia32 } } } */
> > > > > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> > > > > +/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
> > > > > +
> > > > > +typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
> > > > > +typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
> > > > > +
> > > > > +
> > > > > +#define VCMPMN(type, op, name) \
> > > > > +type  \
> > > > > +__attribute__ ((noinline, noclone)) \
> > > > > +vec_cmp_##type##type##name (type a, type b) \
> > > > > +{ \
> > > > > +  return a op b;  \
> > > > > +}
> > > > > +
> > > > > +VCMPMN (v4hf, <, lt)
> > > > > +VCMPMN (v2hf, <, lt)
> > > > > +VCMPMN (v4hf, <=, le)
> > > > > +VCMPMN (v2hf, <=, le)
> > > > > +VCMPMN (v4hf, >, gt)
> > > > > +VCMPMN (v2hf, >, gt)
> > > > > +VCMPMN (v4hf, >=, ge)
> > > > > +VCMPMN (v2hf, >=, ge)
> > > > > +VCMPMN (v4hf, ==, eq)
> > > > > +VCMPMN (v2hf, ==, eq)
> > > > > --
> > > > > 2.31.1
> > > > >
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
> >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-24  7:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-23  8:48 [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf liuhongt
2023-10-23 12:32 ` Richard Biener
2023-10-24  2:53   ` Hongtao Liu
2023-10-24  5:23     ` Hongtao Liu
2023-10-24  5:44       ` Hongtao Liu
2023-10-24  7:13         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).