public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Uros Bizjak <ubizjak@gmail.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Richard Biener <rguenther@suse.de>
Subject: [PATCH, i386]; Fix PR 66560, Fails to generate ADDSUBPS
Date: Tue, 23 Jun 2015 08:59:00 -0000	[thread overview]
Message-ID: <CAFULd4ZEoZeyHEYAh_iLLvr8jU+PzXOm_mfFarr6vGCi9x1sEg@mail.gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1402 bytes --]

Hello!

Attached patch introduces combiner splitters to handle every possible
ADDSUB permutation of vec_merge and vec_select/vec_concat operands.
These combiners handle swapped PLUS and MINUS operators, and account
for commutative operands of PLUS RTX. As shown in the attached
testcases, there are quite some ways to create ADDSUB.

2015-06-23  Uros Bizjak  <ubizjak@gmail.com>

    PR target/66560
    * config/i386/predicates.md (addsub_vm_operator): New predicate.
    (addsub_vs_operator): Ditto.
    (addsub_vs_parallel): Ditto.
    * config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
    (avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
    Put minus RTX before plus and adjust vec_merge selector.
    (*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
    (*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
    (*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
    (addsub vec_merge splitters): New combiner splitters.
    (addsub vec_select/vec_concat splitters): Ditto.

testsuite/ChangeLog:

2015-06-23  Uros Bizjak  <ubizjak@gmail.com>

    PR target/66560
    * gcc.target/i386/pr66560-1.c: New test.
    * gcc.target/i386/pr66560-2.c: Ditto.
    * gcc.target/i386/pr66560-3.c: Ditto.
    * gcc.target/i386/pr66560-4.c: Ditto.

Patch was tested on x86_64-linux-gnu {,-m32}.and was committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 19704 bytes --]

Index: testsuite/gcc.target/i386/pr66560-1.c
===================================================================
--- testsuite/gcc.target/i386/pr66560-1.c	(revision 0)
+++ testsuite/gcc.target/i386/pr66560-1.c	(revision 0)
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+v4sf foo1 (v4sf x, v4sf y)
+{
+  v4sf tem0 = x - y;
+  v4sf tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
+}
+
+v4sf foo2 (v4sf x, v4sf y)
+{
+  v4sf tem0 = x - y;
+  v4sf tem1 = y + x;
+  return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
+}
+
+v4sf foo3 (v4sf x, v4sf y)
+{
+  v4sf tem0 = x + y;
+  v4sf tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
+}
+
+v4sf foo4 (v4sf x, v4sf y)
+{
+  v4sf tem0 = y + x;
+  v4sf tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
+}
+
+/* { dg-final { scan-assembler-times "addsubps" 4 } } */
Index: testsuite/gcc.target/i386/pr66560-2.c
===================================================================
--- testsuite/gcc.target/i386/pr66560-2.c	(revision 0)
+++ testsuite/gcc.target/i386/pr66560-2.c	(revision 0)
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+v2df foo1 (v2df x, v2df y)
+{
+  v2df tem0 = x - y;
+  v2df tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
+}
+
+v2df foo2 (v2df x, v2df y)
+{
+  v2df tem0 = x - y;
+  v2df tem1 = y + x;
+  return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
+}
+
+v2df foo3 (v2df x, v2df y)
+{
+  v2df tem0 = x + y;
+  v2df tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
+}
+
+v2df foo4 (v2df x, v2df y)
+{
+  v2df tem0 = y + x;
+  v2df tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
+}
+
+/* { dg-final { scan-assembler-times "addsubpd" 4 } } */
Index: testsuite/gcc.target/i386/pr66560-3.c
===================================================================
--- testsuite/gcc.target/i386/pr66560-3.c	(revision 0)
+++ testsuite/gcc.target/i386/pr66560-3.c	(revision 0)
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef float v8sf __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+v8sf foo1 (v8sf x, v8sf y)
+{
+  v8sf tem0 = x - y;
+  v8sf tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
+}
+
+v8sf foo2 (v8sf x, v8sf y)
+{
+  v8sf tem0 = x - y;
+  v8sf tem1 = y + x;
+  return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
+}
+
+v8sf foo3 (v8sf x, v8sf y)
+{
+  v8sf tem0 = x + y;
+  v8sf tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
+}
+
+v8sf foo4 (v8sf x, v8sf y)
+{
+  v8sf tem0 = y + x;
+  v8sf tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
+}
+
+/* { dg-final { scan-assembler-times "vaddsubps" 4 } } */
Index: testsuite/gcc.target/i386/pr66560-4.c
===================================================================
--- testsuite/gcc.target/i386/pr66560-4.c	(revision 0)
+++ testsuite/gcc.target/i386/pr66560-4.c	(revision 0)
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef double v4df __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+v4df foo1 (v4df x, v4df y)
+{
+  v4df tem0 = x - y;
+  v4df tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
+}
+
+v4df foo2 (v4df x, v4df y)
+{
+  v4df tem0 = x - y;
+  v4df tem1 = y + x;
+  return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
+}
+
+v4df foo3 (v4df x, v4df y)
+{
+  v4df tem0 = x + y;
+  v4df tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
+}
+
+v4df foo4 (v4df x, v4df y)
+{
+  v4df tem0 = y + x;
+  v4df tem1 = x - y;
+  return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
+}
+
+/* { dg-final { scan-assembler-times "vaddsubpd" 4 } } */
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md	(revision 224769)
+++ config/i386/predicates.md	(working copy)
@@ -1426,8 +1426,105 @@
   (and (match_code "unspec_volatile")
        (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
 
+;; Return true if OP is an addsub vec_merge operation
+(define_predicate "addsub_vm_operator"
+  (match_code "vec_merge")
+{
+  rtx op0, op1;
+  int swapped;
+  HOST_WIDE_INT mask;
+  int nunits, elt;
+
+  op0 = XEXP (op, 0);
+  op1 = XEXP (op, 1);
+
+  /* Sanity check.  */
+  if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+    swapped = 0;
+  else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+    swapped = 1;
+  else
+    gcc_unreachable ();
+
+  mask = INTVAL (XEXP (op, 2));
+  nunits = GET_MODE_NUNITS (mode);
+
+  for (elt = 0; elt < nunits; elt++)
+    {
+      /* bit clear: take from op0, set: take from op1  */
+      int bit = !(mask & (HOST_WIDE_INT_1U << elt));
+
+      if (bit != ((elt & 1) ^ swapped))
+	return false;
+    }
+
+  return true;
+})
+
+;; Return true if OP is an addsub vec_select/vec_concat operation
+(define_predicate "addsub_vs_operator"
+  (and (match_code "vec_select")
+       (match_code "vec_concat" "0"))
+{
+  rtx op0, op1;
+  bool swapped;
+  int nunits, elt;
+
+  op0 = XEXP (XEXP (op, 0), 0);
+  op1 = XEXP (XEXP (op, 0), 1);
+
+  /* Sanity check.  */
+  if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+    swapped = false;
+  else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+    swapped = true;
+  else
+    gcc_unreachable ();
+
+  nunits = GET_MODE_NUNITS (mode);
+  if (XVECLEN (XEXP (op, 1), 0) != nunits)
+    return false;
+
+  /* We already checked that permutation is suitable for addsub,
+     so only look at the first element of the parallel.  */
+  elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0));
+
+  return elt == (swapped ? nunits : 0);
+})
+
+;; Return true if OP is a parallel for an addsub vec_select.
+(define_predicate "addsub_vs_parallel"
+  (and (match_code "parallel")
+       (match_code "const_int" "a"))
+{
+  int nelt = XVECLEN (op, 0);
+  int elt, i;
+  
+  if (nelt < 2)
+    return false;
+
+  /* Check that the permutation is suitable for addsub.
+     For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }.  */
+  elt = INTVAL (XVECEXP (op, 0, 0));
+  if (elt == 0)
+    {
+      for (i = 1; i < nelt; ++i)
+	if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt))
+	  return false;
+    }
+  else if (elt == nelt)
+    {
+      for (i = 1; i < nelt; ++i)
+	if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt))
+	  return false;
+    }
+  else
+    return false;
+
+  return true;
+})
+
 ;; Return true if OP is a parallel for a vbroadcast permute.
-
 (define_predicate "avx_vbroadcast_operand"
   (and (match_code "parallel")
        (match_code "const_int" "a"))
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 224769)
+++ config/i386/sse.md	(working copy)
@@ -487,10 +487,12 @@
    (V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
 
 (define_mode_attr ssedoublemode
-  [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
-   (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
-   (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
-   (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
+  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
+   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
+   (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
+   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
+   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
+   (V4DI "V8DI") (V8DI "V16DI")])
 
 (define_mode_attr ssebytemode
   [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
@@ -2021,57 +2023,25 @@
 (define_insn "avx_addsubv4df3"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
 	(vec_merge:V4DF
-	  (plus:V4DF
+	  (minus:V4DF
 	    (match_operand:V4DF 1 "register_operand" "x")
 	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
-	  (minus:V4DF (match_dup 1) (match_dup 2))
-	  (const_int 10)))]
+	  (plus:V4DF (match_dup 1) (match_dup 2))
+	  (const_int 5)))]
   "TARGET_AVX"
   "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_insn "*avx_addsubv4df3_1"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-  	(vec_select:V4DF
-	  (vec_concat:V8DF
-	    (minus:V4DF
-	      (match_operand:V4DF 1 "register_operand" "x")
-	      (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
-	    (plus:V4DF (match_dup 1) (match_dup 2)))
-	  (parallel [(const_int 0) (const_int 5)
-		     (const_int 2) (const_int 7)])))]
-  "TARGET_AVX"
-  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "*avx_addsubv4df3_1s"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-  	(vec_select:V4DF
-	  (vec_concat:V8DF
-	    (minus:V4DF
-	      (match_operand:V4DF 1 "register_operand" "x")
-	      (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
-	    (plus:V4DF (match_dup 2) (match_dup 1)))
-	  (parallel [(const_int 0) (const_int 5)
-		     (const_int 2) (const_int 7)])))]
-  "TARGET_AVX"
-  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V4DF")])
-
 (define_insn "sse3_addsubv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
 	(vec_merge:V2DF
-	  (plus:V2DF
+	  (minus:V2DF
 	    (match_operand:V2DF 1 "register_operand" "0,x")
 	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
-	  (minus:V2DF (match_dup 1) (match_dup 2))
-	  (const_int 2)))]
+	  (plus:V2DF (match_dup 1) (match_dup 2))
+	  (const_int 1)))]
   "TARGET_SSE3"
   "@
    addsubpd\t{%2, %0|%0, %2}
@@ -2082,102 +2052,28 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "*sse3_addsubv2df3_1"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-	(vec_select:V2DF
-	  (vec_concat:V4DF
-	    (minus:V2DF
-	      (match_operand:V2DF 1 "register_operand" "0,x")
-	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
-	    (plus:V2DF (match_dup 1) (match_dup 2)))
-	  (parallel [(const_int 0) (const_int 3)])))]
-  "TARGET_SSE3"
-  "@
-   addsubpd\t{%2, %0|%0, %2}
-   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "*sse3_addsubv2df3_1s"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-	(vec_select:V2DF
-	  (vec_concat:V4DF
-	    (minus:V2DF
-	      (match_operand:V2DF 1 "register_operand" "0,x")
-	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
-	    (plus:V2DF (match_dup 2) (match_dup 1)))
-	  (parallel [(const_int 0) (const_int 3)])))]
-  "TARGET_SSE3"
-  "@
-   addsubpd\t{%2, %0|%0, %2}
-   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "atom_unit" "complex")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "V2DF")])
-
 (define_insn "avx_addsubv8sf3"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
 	(vec_merge:V8SF
-	  (plus:V8SF
+	  (minus:V8SF
 	    (match_operand:V8SF 1 "register_operand" "x")
 	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
-	  (minus:V8SF (match_dup 1) (match_dup 2))
-	  (const_int 170)))]
+	  (plus:V8SF (match_dup 1) (match_dup 2))
+	  (const_int 85)))]
   "TARGET_AVX"
   "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "*avx_addsubv8sf3_1"
-  [(set (match_operand:V8SF 0 "register_operand" "=x")
-	(vec_select:V8SF
-	  (vec_concat:V16SF
-	    (minus:V8SF
-	      (match_operand:V8SF 1 "register_operand" "x")
-	      (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
-	    (plus:V8SF (match_dup 1) (match_dup 2)))
-	  (parallel [(const_int 0) (const_int 9)
-		     (const_int 2) (const_int 11)
-		     (const_int 4) (const_int 13)
-		     (const_int 6) (const_int 15)])))]
-  "TARGET_AVX"
-  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
-
-(define_insn "*avx_addsubv8sf3_1s"
-  [(set (match_operand:V8SF 0 "register_operand" "=x")
-	(vec_select:V8SF
-	  (vec_concat:V16SF
-	    (minus:V8SF
-	      (match_operand:V8SF 1 "register_operand" "x")
-	      (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
-	    (plus:V8SF (match_dup 2) (match_dup 1)))
-	  (parallel [(const_int 0) (const_int 9)
-		     (const_int 2) (const_int 11)
-		     (const_int 4) (const_int 13)
-		     (const_int 6) (const_int 15)])))]
-  "TARGET_AVX"
-  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
-
 (define_insn "sse3_addsubv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
 	(vec_merge:V4SF
-	  (plus:V4SF
+	  (minus:V4SF
 	    (match_operand:V4SF 1 "register_operand" "0,x")
 	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
-	  (minus:V4SF (match_dup 1) (match_dup 2))
-	  (const_int 10)))]
+	  (plus:V4SF (match_dup 1) (match_dup 2))
+	  (const_int 5)))]
   "TARGET_SSE3"
   "@
    addsubps\t{%2, %0|%0, %2}
@@ -2188,46 +2084,124 @@
    (set_attr "prefix_rep" "1,*")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*sse3_addsubv4sf3_1"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (minus:V4SF
-	      (match_operand:V4SF 1 "register_operand" "0,x")
-	      (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
-	    (plus:V4SF (match_dup 1) (match_dup 2)))
-	  (parallel [(const_int 0) (const_int 5)
-		     (const_int 2) (const_int 7)])))]
-  "TARGET_SSE3"
-  "@
-   addsubps\t{%2, %0|%0, %2}
-   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "prefix_rep" "1,*")
-   (set_attr "mode" "V4SF")])
+(define_split
+  [(set (match_operand:VF_128_256 0 "register_operand")
+	(match_operator:VF_128_256 6 "addsub_vm_operator"
+	  [(minus:VF_128_256
+	     (match_operand:VF_128_256 1 "register_operand")
+	     (match_operand:VF_128_256 2 "nonimmediate_operand"))
+	   (plus:VF_128_256
+	     (match_operand:VF_128_256 3 "nonimmediate_operand")
+	     (match_operand:VF_128_256 4 "nonimmediate_operand"))
+	   (match_operand 5 "const_int_operand")]))]
+  "TARGET_SSE3
+   && can_create_pseudo_p ()
+   && ((rtx_equal_p (operands[1], operands[3])
+	&& rtx_equal_p (operands[2], operands[4]))
+       || (rtx_equal_p (operands[1], operands[4])
+	   && rtx_equal_p (operands[2], operands[3])))"
+  [(set (match_dup 0)
+	(vec_merge:VF_128_256
+	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
+	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
+	  (match_dup 5)))])
 
-(define_insn "*sse3_addsubv4sf3_1s"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-	(vec_select:V4SF
-	  (vec_concat:V8SF
-	    (minus:V4SF
-	      (match_operand:V4SF 1 "register_operand" "0,x")
-	      (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
-	    (plus:V4SF (match_dup 2) (match_dup 1)))
-	  (parallel [(const_int 0) (const_int 5)
-		     (const_int 2) (const_int 7)])))]
-  "TARGET_SSE3"
-  "@
-   addsubps\t{%2, %0|%0, %2}
-   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "prefix_rep" "1,*")
-   (set_attr "mode" "V4SF")])
+(define_split
+  [(set (match_operand:VF_128_256 0 "register_operand")
+	(match_operator:VF_128_256 6 "addsub_vm_operator"
+	  [(plus:VF_128_256
+	     (match_operand:VF_128_256 1 "nonimmediate_operand")
+	     (match_operand:VF_128_256 2 "nonimmediate_operand"))
+	   (minus:VF_128_256
+	     (match_operand:VF_128_256 3 "register_operand")
+	     (match_operand:VF_128_256 4 "nonimmediate_operand"))
+	   (match_operand 5 "const_int_operand")]))]
+  "TARGET_SSE3
+   && can_create_pseudo_p ()
+   && ((rtx_equal_p (operands[1], operands[3])
+	&& rtx_equal_p (operands[2], operands[4]))
+       || (rtx_equal_p (operands[1], operands[4])
+	   && rtx_equal_p (operands[2], operands[3])))"
+  [(set (match_dup 0)
+	(vec_merge:VF_128_256
+	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
+	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
+	  (match_dup 5)))]
+{
+  /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes.  */
+  operands[5]
+    = GEN_INT (~INTVAL (operands[5])
+	       & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
+})
 
+(define_split
+  [(set (match_operand:VF_128_256 0 "register_operand")
+	(match_operator:VF_128_256 7 "addsub_vs_operator"
+	  [(vec_concat:<ssedoublemode>
+	     (minus:VF_128_256
+	       (match_operand:VF_128_256 1 "register_operand")
+	       (match_operand:VF_128_256 2 "nonimmediate_operand"))
+	     (plus:VF_128_256
+	       (match_operand:VF_128_256 3 "nonimmediate_operand")
+	       (match_operand:VF_128_256 4 "nonimmediate_operand")))
+	   (match_parallel 5 "addsub_vs_parallel"
+	     [(match_operand 6 "const_int_operand")])]))]
+  "TARGET_SSE3
+   && can_create_pseudo_p ()
+   && ((rtx_equal_p (operands[1], operands[3])
+	&& rtx_equal_p (operands[2], operands[4]))
+       || (rtx_equal_p (operands[1], operands[4])
+	   && rtx_equal_p (operands[2], operands[3])))"
+  [(set (match_dup 0)
+	(vec_merge:VF_128_256
+	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
+	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
+	  (match_dup 5)))]
+{
+  int i, nelt = XVECLEN (operands[5], 0);
+  HOST_WIDE_INT ival = 0;
+
+  for (i = 0; i < nelt; i++)
+    if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
+      ival |= HOST_WIDE_INT_1 << i;
+
+  operands[5] = GEN_INT (ival);
+})
+
+(define_split
+  [(set (match_operand:VF_128_256 0 "register_operand")
+	(match_operator:VF_128_256 7 "addsub_vs_operator"
+	  [(vec_concat:<ssedoublemode>
+	     (plus:VF_128_256
+	       (match_operand:VF_128_256 1 "nonimmediate_operand")
+	       (match_operand:VF_128_256 2 "nonimmediate_operand"))
+	     (minus:VF_128_256
+	       (match_operand:VF_128_256 3 "register_operand")
+	       (match_operand:VF_128_256 4 "nonimmediate_operand")))
+	   (match_parallel 5 "addsub_vs_parallel"
+	     [(match_operand 6 "const_int_operand")])]))]
+  "TARGET_SSE3
+   && can_create_pseudo_p ()
+   && ((rtx_equal_p (operands[1], operands[3])
+	&& rtx_equal_p (operands[2], operands[4]))
+       || (rtx_equal_p (operands[1], operands[4])
+	   && rtx_equal_p (operands[2], operands[3])))"
+  [(set (match_dup 0)
+	(vec_merge:VF_128_256
+	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
+	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
+	  (match_dup 5)))]
+{
+  int i, nelt = XVECLEN (operands[5], 0);
+  HOST_WIDE_INT ival = 0;
+
+  for (i = 0; i < nelt; i++)
+    if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
+      ival |= HOST_WIDE_INT_1 << i;
+
+  operands[5] = GEN_INT (ival);
+})
+
 (define_insn "avx_h<plusminus_insn>v4df3"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
 	(vec_concat:V4DF

                 reply	other threads:[~2015-06-23  8:59 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAFULd4ZEoZeyHEYAh_iLLvr8jU+PzXOm_mfFarr6vGCi9x1sEg@mail.gmail.com \
    --to=ubizjak@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).