public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf.
@ 2023-10-23  8:48 liuhongt
  2023-10-23 12:32 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2023-10-23  8:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	PR target/103861
	* config/i386/i386-expand.cc (ix86_expand_sse_movcc): Handle
	V2HF/V2BF/V4HF/V4BFmode.
	* config/i386/mmx.md (vec_cmpv4hfqi): New expander.
	(vcond<mode>v4hf): Ditto.
	(vcond<mode>v4hi): Ditto.
	(vcondu<mode>v4hi): Ditto.
	(vcond_mask_<mode>v4hi): Ditto.
	(vcond_mask_<mode>qi): Ditto.
	(vec_cmpv2hfqi): Ditto.
	(vcond<mode>v2hf): Ditto.
	(vcond<mode>v2hi): Ditto.
	(vcondu<mode>v2hi): Ditto.
	(vcond_mask_<mode>v2hi): Ditto.
	* config/i386/sse.md (vcond<mode><mode>): Merge this with ..
	(vcond<sseintvecmodelower><mode>): .. this into ..
	(vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>): .. this,
	and extend to V8BF/V16BF/V32BFmode.

gcc/testsuite/ChangeLog:

	* g++.target/i386/part-vect-vcondhf.C: New test.
	* gcc.target/i386/part-vect-vec_cmphf.c: New test.
---
 gcc/config/i386/i386-expand.cc                |   4 +
 gcc/config/i386/mmx.md                        | 237 +++++++++++++++++-
 gcc/config/i386/sse.md                        |  25 +-
 .../g++.target/i386/part-vect-vcondhf.C       |  34 +++
 .../gcc.target/i386/part-vect-vec_cmphf.c     |  26 ++
 5 files changed, 304 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
 create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 1eae9d7c78c..9658f9c5a2d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4198,6 +4198,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       break;
     case E_V8QImode:
     case E_V4HImode:
+    case E_V4HFmode:
+    case E_V4BFmode:
     case E_V2SImode:
       if (TARGET_SSE4_1)
 	{
@@ -4207,6 +4209,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
       break;
     case E_V4QImode:
     case E_V2HImode:
+    case E_V2HFmode:
+    case E_V2BFmode:
       if (TARGET_SSE4_1)
 	{
 	  gen = gen_mmx_pblendvb_v4qi;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 491a0a51272..b9617e9d8c6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -61,6 +61,9 @@ (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 (define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
 
 (define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
+(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
+(define_mode_iterator V4F_64 [V4HF V4BF])
+(define_mode_iterator V2F_32 [V2HF V2BF])
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
@@ -1972,10 +1975,12 @@ (define_mode_attr mov_to_sse_suffix
   [(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
 
 (define_mode_attr mmxxmmmode
-  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")])
+  [(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
+   (V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
 
 (define_mode_attr mmxxmmmodelower
-  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")])
+  [(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
+   (V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
 
 (define_expand "movd_<mode>_to_sse"
   [(set (match_operand:<mmxxmmmode> 0 "register_operand")
@@ -2114,6 +2119,234 @@ (define_insn_and_split "*mmx_nabs<mode>2"
   [(set (match_dup 0)
 	(ior:<MODE> (match_dup 1) (match_dup 2)))])
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel half-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "vec_cmpv4hfqi"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 ""
+	  [(match_operand:V4HF 2 "nonimmediate_operand")
+	   (match_operand:V4HF 3 "nonimmediate_operand")]))]
+  "TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[4];
+  ops[3] = gen_reg_rtx (V8HFmode);
+  ops[2] = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
+  emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
+  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
+  DONE;
+})
+
+(define_expand "vcond<mode>v4hf"
+  [(set (match_operand:V4FI_64 0 "register_operand")
+	(if_then_else:V4FI_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HF 4 "nonimmediate_operand")
+	     (match_operand:V4HF 5 "nonimmediate_operand")])
+	  (match_operand:V4FI_64 1 "general_operand")
+	  (match_operand:V4FI_64 2 "general_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+  && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
+{
+  rtx ops[6];
+  ops[5] = gen_reg_rtx (V8HFmode);
+  ops[4] = gen_reg_rtx (V8HFmode);
+  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
+  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[1]),
+			   <MODE>mode);
+  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[2]),
+			   <MODE>mode);
+  ops[3] = operands[3];
+  emit_insn (gen_movq_v4hf_to_sse (ops[4], operands[4]));
+  emit_insn (gen_movq_v4hf_to_sse (ops[5], operands[5]));
+  bool ok = ix86_expand_fp_vcond (ops);
+  gcc_assert (ok);
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
+					       <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vcond<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(if_then_else:V4F_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HI 4 "nonimmediate_operand")
+	     (match_operand:V4HI 5 "nonimmediate_operand")])
+	  (match_operand:V4F_64 1 "general_operand")
+	  (match_operand:V4F_64 2 "general_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(if_then_else:V4F_64
+	  (match_operator 3 ""
+	    [(match_operand:V4HI 4 "nonimmediate_operand")
+	     (match_operand:V4HI 5 "nonimmediate_operand")])
+	  (match_operand:V4F_64 1 "general_operand")
+	  (match_operand:V4F_64 2 "general_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>v4hi"
+  [(set (match_operand:V4F_64 0 "register_operand")
+	(vec_merge:V4F_64
+	  (match_operand:V4F_64 1 "register_operand")
+	  (match_operand:V4F_64 2 "register_operand")
+	  (match_operand:V4HI 3  "register_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
+{
+  ix86_expand_sse_movcc (operands[0], operands[3],
+			 operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>qi"
+  [(set (match_operand:V4FI_64 0 "register_operand")
+	(vec_merge:V4FI_64
+	  (match_operand:V4FI_64 1 "register_operand")
+	  (match_operand:V4FI_64 2 "register_operand")
+	  (match_operand:QI 3 "register_operand")))]
+  "TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
+{
+  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
+  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
+  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
+  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
+						 operands[2], operands[3]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vec_cmpv2hfqi"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 ""
+	  [(match_operand:V2HF 2 "nonimmediate_operand")
+	   (match_operand:V2HF 3 "nonimmediate_operand")]))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[4];
+  ops[3] = gen_reg_rtx (V8HFmode);
+  ops[2] = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
+  emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
+  emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
+  DONE;
+})
+
+(define_expand "vcond<mode>v2hf"
+  [(set (match_operand:V2FI_32 0 "register_operand")
+	(if_then_else:V2FI_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HF 4 "nonimmediate_operand")
+	     (match_operand:V2HF 5 "nonimmediate_operand")])
+	  (match_operand:V2FI_32 1 "general_operand")
+	  (match_operand:V2FI_32 2 "general_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL
+   && ix86_partial_vec_fp_math"
+{
+  rtx ops[6];
+  ops[5] = gen_reg_rtx (V8HFmode);
+  ops[4] = gen_reg_rtx (V8HFmode);
+  ops[0] = gen_reg_rtx (<mmxxmmmode>mode);
+  ops[1] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[1]),
+			   <MODE>mode);
+  ops[2] = lowpart_subreg (<mmxxmmmode>mode,
+			   force_reg (<MODE>mode, operands[2]),
+			   <MODE>mode);
+  ops[3] = operands[3];
+  emit_insn (gen_movd_v2hf_to_sse (ops[4], operands[4]));
+  emit_insn (gen_movd_v2hf_to_sse (ops[5], operands[5]));
+  bool ok = ix86_expand_fp_vcond (ops);
+  gcc_assert (ok);
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, ops[0],
+					       <mmxxmmmode>mode));
+  DONE;
+})
+
+(define_expand "vcond<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(if_then_else:V2F_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HI 4 "nonimmediate_operand")
+	     (match_operand:V2HI 5 "nonimmediate_operand")])
+	  (match_operand:V2F_32 1 "general_operand")
+	  (match_operand:V2F_32 2 "general_operand")))]
+  "TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(if_then_else:V2F_32
+	  (match_operator 3 ""
+	    [(match_operand:V2HI 4 "nonimmediate_operand")
+	     (match_operand:V2HI 5 "nonimmediate_operand")])
+	  (match_operand:V2F_32 1 "general_operand")
+	  (match_operand:V2F_32 2 "general_operand")))]
+  "TARGET_SSE4_1"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>v2hi"
+  [(set (match_operand:V2F_32 0 "register_operand")
+	(vec_merge:V2F_32
+	  (match_operand:V2F_32 1 "register_operand")
+	  (match_operand:V2F_32 2 "register_operand")
+	  (match_operand:V2HI 3 "register_operand")))]
+  "TARGET_SSE4_1"
+{
+  ix86_expand_sse_movcc (operands[0], operands[3],
+			 operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "vcond_mask_<mode>qi"
+  [(set (match_operand:V2FI_32 0 "register_operand")
+	(vec_merge:V2FI_32
+	  (match_operand:V2FI_32 1 "register_operand")
+	  (match_operand:V2FI_32 2 "register_operand")
+	  (match_operand:QI 3 "register_operand")))]
+  "TARGET_AVX512BW && TARGET_AVX512VL"
+{
+  rtx op0 = gen_reg_rtx (<mmxxmmmode>mode);
+  operands[1] = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
+  operands[2] = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
+  emit_insn (gen_vcond_mask_<mmxxmmmodelower>qi (op0, operands[1],
+						 operands[2], operands[3]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, op0, <mmxxmmmode>mode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel half-precision floating point rounding operations.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c988935d4df..e2a7cbeb722 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4644,29 +4644,14 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
   DONE;
 })
 
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:VHF_AVX512VL 0 "register_operand")
-	(if_then_else:VHF_AVX512VL
-	  (match_operator 3 ""
-	    [(match_operand:VHF_AVX512VL 4 "vector_operand")
-	     (match_operand:VHF_AVX512VL 5 "vector_operand")])
-	  (match_operand:VHF_AVX512VL 1 "general_operand")
-	  (match_operand:VHF_AVX512VL 2 "general_operand")))]
-  "TARGET_AVX512FP16"
-{
-  bool ok = ix86_expand_fp_vcond (operands);
-  gcc_assert (ok);
-  DONE;
-})
-
-(define_expand "vcond<sseintvecmodelower><mode>"
-  [(set (match_operand:<sseintvecmode> 0 "register_operand")
-	(if_then_else:<sseintvecmode>
+(define_expand "vcond<VI2HFBF_AVX512VL:mode><VHF_AVX512VL:mode>"
+  [(set (match_operand:VI2HFBF_AVX512VL 0 "register_operand")
+	(if_then_else:VI2HFBF_AVX512VL
 	  (match_operator 3 ""
 	    [(match_operand:VHF_AVX512VL 4 "vector_operand")
 	     (match_operand:VHF_AVX512VL 5 "vector_operand")])
-	  (match_operand:<sseintvecmode> 1 "general_operand")
-	  (match_operand:<sseintvecmode> 2 "general_operand")))]
+	  (match_operand:VI2HFBF_AVX512VL 1 "general_operand")
+	  (match_operand:VI2HFBF_AVX512VL 2 "general_operand")))]
   "TARGET_AVX512FP16"
 {
   bool ok = ix86_expand_fp_vcond (operands);
diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
new file mode 100644
index 00000000000..8bf01b7cb4a
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
@@ -0,0 +1,34 @@
+/* PR target/103861 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
+/* { dg-final { scan-assembler-times "vcmpph" 4 } } */
+/* { dg-final { scan-assembler-times "vpblendvb" 4 } } */
+typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
+typedef short __attribute__((__vector_size__ (4))) __v2hi;
+
+typedef unsigned short  __attribute__((__vector_size__ (8))) __v4hu;
+typedef short __attribute__((__vector_size__ (8))) __v4hi;
+
+typedef _Float16 __attribute__((__vector_size__ (4))) __v2hf;
+typedef _Float16 __attribute__((__vector_size__ (8))) __v4hf;
+
+
+__v2hu au, bu;
+__v2hi as, bs;
+__v2hf af, bf;
+
+__v4hu cu, du;
+__v4hi cs, ds;
+__v4hf cf, df;
+
+__v2hf auf (__v2hu a, __v2hu b) { return (a > b) ? af : bf; }
+__v2hf asf (__v2hi a, __v2hi b) { return (a > b) ? af : bf; }
+__v2hu afu (__v2hf a, __v2hf b) { return (a > b) ? au : bu; }
+__v2hi afs (__v2hf a, __v2hf b) { return (a > b) ? as : bs; }
+
+__v4hf cuf (__v4hu c, __v4hu d) { return (c > d) ? cf : df; }
+__v4hf csf (__v4hi c, __v4hi d) { return (c > d) ? cf : df; }
+__v4hu cfu (__v4hf c, __v4hf d) { return (c > d) ? cu : du; }
+__v4hi cfs (__v4hf c, __v4hf d) { return (c > d) ? cs : ds; }
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
new file mode 100644
index 00000000000..ee8659395eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmphf.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vcmpph" 10 } } */
+
+typedef _Float16 __attribute__((__vector_size__ (4))) v2hf;
+typedef _Float16 __attribute__((__vector_size__ (8))) v4hf;
+
+
+#define VCMPMN(type, op, name)	\
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v4hf, <, lt)
+VCMPMN (v2hf, <, lt)
+VCMPMN (v4hf, <=, le)
+VCMPMN (v2hf, <=, le)
+VCMPMN (v4hf, >, gt)
+VCMPMN (v2hf, >, gt)
+VCMPMN (v4hf, >=, ge)
+VCMPMN (v2hf, >=, ge)
+VCMPMN (v4hf, ==, eq)
+VCMPMN (v2hf, ==, eq)
-- 
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-24  7:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-23  8:48 [PATCH] Support vec_cmpmn/vcondmn for v2hf/v4hf liuhongt
2023-10-23 12:32 ` Richard Biener
2023-10-24  2:53   ` Hongtao Liu
2023-10-24  5:23     ` Hongtao Liu
2023-10-24  5:44       ` Hongtao Liu
2023-10-24  7:13         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).