public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-5025] Fix wrong code due to incorrect define_split
@ 2023-10-31  3:25 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2023-10-31  3:25 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f5d33d0c790a00cb5240a966dd99df9961bf57a0

commit r14-5025-gf5d33d0c790a00cb5240a966dd99df9961bf57a0
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Oct 30 14:05:25 2023 +0800

    Fix wrong code due to incorrect define_split
    
    -(define_split
    -  [(set (match_operand:V2HI 0 "register_operand")
    -        (eq:V2HI
    -          (eq:V2HI
    -            (us_minus:V2HI
    -              (match_operand:V2HI 1 "register_operand")
    -              (match_operand:V2HI 2 "register_operand"))
    -            (match_operand:V2HI 3 "const0_operand"))
    -          (match_operand:V2HI 4 "const0_operand")))]
    -  "TARGET_SSE4_1"
    -  [(set (match_dup 0)
    -        (umin:V2HI (match_dup 1) (match_dup 2)))
    -   (set (match_dup 0)
    -        (eq:V2HI (match_dup 0) (match_dup 2)))])
    
    the splitter is wrong when op1 == op2.(the original pattern returns 0, after split, it returns 1)
    So remove the splitter.
    
    Also extend another define_split to define_insn_and_split to handle
    below pattern
    
    494(set (reg:V4QI 112)
    495    (unspec:V4QI [
    496            (subreg:V4QI (reg:V2HF 111 [ bf ]) 0)
    497            (subreg:V4QI (reg:V2HF 110 [ af ]) 0)
    498            (subreg:V4QI (eq:V2HI (eq:V2HI (reg:V2HI 105)
    499                        (const_vector:V2HI [
    500                                (const_int 0 [0]) repeated x2
    501                            ]))
    502                    (const_vector:V2HI [
    503                            (const_int 0 [0]) repeated x2
    504                        ])) 0)
    505        ] UNSPEC_BLENDV))
    
    define_split doesn't work since pass_combine assume it produces at
    most 2 insns after split, but here it produces 3 since we need to move
    const0_rtx (V2HImode) to reg. The move insn can be eliminated later.
    
    gcc/ChangeLog:
    
            PR target/112276
            * config/i386/mmx.md (*mmx_pblendvb_v8qi_1): Change
            define_split to define_insn_and_split to handle
            immediate_operand for comparison.
            (*mmx_pblendvb_v8qi_2): Ditto.
            (*mmx_pblendvb_<mode>_1): Ditto.
            (*mmx_pblendvb_v4qi_2): Ditto.
            (<code><mode>3): Remove define_split after it.
            (<code>v8qi3): Ditto.
            (<code><mode>3): Ditto.
            (<ode>v2hi3): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/i386/part-vect-vcondhf.C: Adjust testcase.
            * gcc.target/i386/pr112276.c: New test.

Diff:
---
 gcc/config/i386/mmx.md                            | 112 +++++++---------------
 gcc/testsuite/g++.target/i386/part-vect-vcondhf.C |   1 -
 gcc/testsuite/gcc.target/i386/pr112276.c          |  36 +++++++
 3 files changed, 70 insertions(+), 79 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e3d0fb5b1075..2b97bb8fa98d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3360,21 +3360,6 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V4HI 0 "register_operand")
-	(eq:V4HI
-	  (eq:V4HI
-	    (us_minus:V4HI
-	      (match_operand:V4HI 1 "register_operand")
-	      (match_operand:V4HI 2 "register_operand"))
-	    (match_operand:V4HI 3 "const0_operand"))
-	  (match_operand:V4HI 4 "const0_operand")))]
-  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
-  [(set (match_dup 0)
-	(umin:V4HI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V4HI (match_dup 0) (match_dup 2)))])
-
 (define_expand "mmx_<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
         (umaxmin:V8QI
@@ -3408,21 +3393,6 @@
 	  (match_operand:V8QI 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE")
 
-(define_split
-  [(set (match_operand:V8QI 0 "register_operand")
-	(eq:V8QI
-	  (eq:V8QI
-	    (us_minus:V8QI
-	      (match_operand:V8QI 1 "register_operand")
-	      (match_operand:V8QI 2 "register_operand"))
-	    (match_operand:V8QI 3 "const0_operand"))
-	  (match_operand:V8QI 4 "const0_operand")))]
-  "TARGET_MMX_WITH_SSE"
-  [(set (match_dup 0)
-	(umin:V8QI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V8QI (match_dup 0) (match_dup 2)))])
-
 (define_insn "<code><mode>3"
   [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
         (umaxmin:VI1_16_32
@@ -3436,21 +3406,6 @@
    (set_attr "type" "sseiadd")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V4QI 0 "register_operand")
-	(eq:V4QI
-	  (eq:V4QI
-	    (us_minus:V4QI
-	      (match_operand:V4QI 1 "register_operand")
-	      (match_operand:V4QI 2 "register_operand"))
-	    (match_operand:V4QI 3 "const0_operand"))
-	  (match_operand:V4QI 4 "const0_operand")))]
-  "TARGET_SSE2"
-  [(set (match_dup 0)
-	(umin:V4QI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V4QI (match_dup 0) (match_dup 2)))])
-
 (define_insn "<code>v2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv")
 	(umaxmin:V2HI
@@ -3467,21 +3422,6 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V2HI 0 "register_operand")
-	(eq:V2HI
-	  (eq:V2HI
-	    (us_minus:V2HI
-	      (match_operand:V2HI 1 "register_operand")
-	      (match_operand:V2HI 2 "register_operand"))
-	    (match_operand:V2HI 3 "const0_operand"))
-	  (match_operand:V2HI 4 "const0_operand")))]
-  "TARGET_SSE4_1"
-  [(set (match_dup 0)
-	(umin:V2HI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V2HI (match_dup 0) (match_dup 2)))])
-
 (define_insn "ssse3_abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
 	(abs:MMXMODEI
@@ -3954,7 +3894,7 @@
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_1"
   [(set (match_operand:V8QI 0 "register_operand")
        (unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand")
@@ -3962,21 +3902,26 @@
 	   (eq:V8QI
 	     (eq:V8QI
 		(match_operand:V8QI 3 "register_operand")
-		(match_operand:V8QI 4 "register_operand"))
+		(match_operand:V8QI 4 "nonmemory_operand"))
 	     (match_operand:V8QI 5 "const0_operand"))]
 	   UNSPEC_BLENDV))]
-  "TARGET_MMX_WITH_SSE"
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:V8QI (match_dup 3) (match_dup 4)))
+	(eq:V8QI (match_dup 3) (match_dup 7)))
    (set (match_dup 0)
 	(unspec:V8QI
 	  [(match_dup 2)
 	   (match_dup 1)
 	   (match_dup 6)]
 	  UNSPEC_BLENDV))]
-  "operands[6] = gen_reg_rtx (V8QImode);")
+{
+  operands[6] = gen_reg_rtx (V8QImode);
+  operands[7] = force_reg (V8QImode, operands[4]);
+})
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_2"
   [(set (match_operand:V8QI 0 "register_operand")
        (unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand")
@@ -3985,12 +3930,14 @@
 	     (eq:MMXMODE24
 	       (eq:MMXMODE24
 		 (match_operand:MMXMODE24 3 "register_operand")
-		 (match_operand:MMXMODE24 4 "register_operand"))
+		 (match_operand:MMXMODE24 4 "nonmemory_operand"))
 	     (match_operand:MMXMODE24 5 "const0_operand")) 0)]
 	   UNSPEC_BLENDV))]
-  "TARGET_MMX_WITH_SSE"
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:MMXMODE24 (match_dup 3) (match_dup 4)))
+	(eq:MMXMODE24 (match_dup 3) (match_dup 8)))
    (set (match_dup 0)
 	(unspec:V8QI
 	  [(match_dup 2)
@@ -4000,6 +3947,7 @@
 {
   operands[6] = gen_reg_rtx (<MODE>mode);
   operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode);
+  operands[8] = force_reg (<MODE>mode, operands[4]);
 })
 
 (define_insn "mmx_pblendvb_<mode>"
@@ -4022,7 +3970,7 @@
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_<mode>_1"
   [(set (match_operand:VI_16_32 0 "register_operand")
 	(unspec:VI_16_32
 	  [(match_operand:VI_16_32 1 "register_operand")
@@ -4030,21 +3978,26 @@
 	   (eq:VI_16_32
 	     (eq:VI_16_32
 		(match_operand:VI_16_32 3 "register_operand")
-		(match_operand:VI_16_32 4 "register_operand"))
+		(match_operand:VI_16_32 4 "nonmemory_operand"))
 	     (match_operand:VI_16_32 5 "const0_operand"))]
 	   UNSPEC_BLENDV))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:VI_16_32 (match_dup 3) (match_dup 4)))
+	(eq:VI_16_32 (match_dup 3) (match_dup 7)))
    (set (match_dup 0)
 	(unspec:VI_16_32
 	  [(match_dup 2)
 	   (match_dup 1)
 	   (match_dup 6)]
 	  UNSPEC_BLENDV))]
-  "operands[6] = gen_reg_rtx (<MODE>mode);")
+{
+  operands[6] = gen_reg_rtx (<MODE>mode);
+  operands[7] = force_reg (<MODE>mode, operands[4]);
+})
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v4qi_2"
   [(set (match_operand:V4QI 0 "register_operand")
        (unspec:V4QI
 	  [(match_operand:V4QI 1 "register_operand")
@@ -4053,12 +4006,14 @@
 	     (eq:V2HI
 	       (eq:V2HI
 		 (match_operand:V2HI 3 "register_operand")
-		 (match_operand:V2HI 4 "register_operand"))
+		 (match_operand:V2HI 4 "nonmemory_operand"))
 	     (match_operand:V2HI 5 "const0_operand")) 0)]
 	   UNSPEC_BLENDV))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:V2HI (match_dup 3) (match_dup 4)))
+	(eq:V2HI (match_dup 3) (match_dup 8)))
    (set (match_dup 0)
 	(unspec:V4QI
 	  [(match_dup 2)
@@ -4068,6 +4023,7 @@
 {
   operands[6] = gen_reg_rtx (V2HImode);
   operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode);
+  operands[8] = force_reg (V2HImode, operands[4]);
 })
 
 ;; XOP parallel XMM conditional moves
diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
index f19727816cf5..e623e6cde794 100644
--- a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
+++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
@@ -3,7 +3,6 @@
 /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
 /* { dg-final { scan-assembler-times "vpcmpeqw" 6 } } */
 /* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuw" 2 } } */
 /* { dg-final { scan-assembler-times "vcmpph" 8 } } */
 /* { dg-final { scan-assembler-times "vpblendvb" 8 } } */
 typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
diff --git a/gcc/testsuite/gcc.target/i386/pr112276.c b/gcc/testsuite/gcc.target/i386/pr112276.c
new file mode 100644
index 000000000000..5365313f4c2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112276.c
@@ -0,0 +1,36 @@
+/* { dg-do run  { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+typedef unsigned short __attribute__((__vector_size__ (8))) U4;
+typedef unsigned short __attribute__((__vector_size__ (4))) U2;
+
+U4
+__attribute__((noipa))
+foo4 (U4 a, U4 b)
+{
+  return a > b;
+}
+
+U2
+__attribute__((noipa))
+foo2 (U2 a, U2 b)
+{
+  return a > b;
+}
+
+static void
+sse4_1_test ()
+{
+  U4 a = __extension__(U4) {1, 1, 1, 1};
+  U4 b = foo4 (a, a);
+  if (b[0] || b[1] || b[2] || b[3]) __builtin_abort();
+
+  U2 c = __extension__(U2) {1, 1};
+  U2 d = foo2 (c, c);
+  if (d[0] || d[1]) __builtin_abort();
+
+  return;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-31  3:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-31  3:25 [gcc r14-5025] Fix wrong code due to incorrect define_split hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).