public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd.
@ 2023-07-11  9:13 liuhongt
  2023-07-17  6:51 ` Hongtao Liu
  0 siblings, 1 reply; 5+ messages in thread
From: liuhongt @ 2023-07-11  9:13 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

Similar like we did for CMPXCHG, but extended to all
ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same
as CMP.

When operand order in CMP insn is same as that in CMPCCXADD,
CMP insn can be eliminated directly.

When operand order is swapped in CMP insn, only optimize
cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead
after jcc/setcc plus adjusting code for jcc/setcc.

gcc/ChangeLog:

	PR target/110591
	* config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern
	to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra
	3 define_peephole2 after the pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110591.c: New test.
	* gcc.target/i386/pr110591-2.c: New test.
---
 gcc/config/i386/sync.md                    | 160 ++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/pr110591-2.c |  90 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr110591.c   |  66 +++++++++
 3 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index e1fa1504deb..e84226cf895 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>"
 	  UNSPECV_CMPCCXADD))
    (set (match_dup 1)
 	(unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
-   (clobber (reg:CC FLAGS_REG))]
+   (set (reg:CC FLAGS_REG)
+	(compare:CC (match_dup 1)
+		    (match_dup 2)))]
   "TARGET_CMPCCXADD && TARGET_64BIT"
 {
   char buf[128];
@@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>"
   output_asm_insn (buf, operands);
   return "";
 })
+
+(define_peephole2
+  [(set (match_operand:SWI48x 0 "register_operand")
+	(match_operand:SWI48x 1 "x86_64_general_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_operand:SWI48x 2 "memory_operand")
+		      (match_dup 0)
+		      (match_operand:SWI48x 3 "register_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (reg FLAGS_REG)
+	(compare (match_operand:SWI48x 5 "register_operand")
+		 (match_operand:SWI48x 6 "x86_64_general_operand")))]
+  "TARGET_CMPCCXADD && TARGET_64BIT
+   && rtx_equal_p (operands[0], operands[5])
+   && rtx_equal_p (operands[1], operands[6])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_dup 2)
+		      (match_dup 0)
+		      (match_dup 3)
+		      (match_dup 4)]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (match_dup 7)
+	(match_op_dup 8
+	  [(match_dup 9) (const_int 0)]))])
+
+(define_peephole2
+  [(set (match_operand:SWI48x 0 "register_operand")
+	(match_operand:SWI48x 1 "x86_64_general_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_operand:SWI48x 2 "memory_operand")
+		      (match_dup 0)
+		      (match_operand:SWI48x 3 "register_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (reg FLAGS_REG)
+	(compare (match_operand:SWI48x 5 "register_operand")
+		 (match_operand:SWI48x 6 "x86_64_general_operand")))
+   (set (match_operand:QI 7 "nonimmediate_operand")
+	(match_operator:QI 8 "ix86_comparison_int_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_CMPCCXADD && TARGET_64BIT
+   && rtx_equal_p (operands[0], operands[6])
+   && rtx_equal_p (operands[1], operands[5])
+   && peep2_regno_dead_p (4, FLAGS_REG)"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_dup 2)
+		      (match_dup 0)
+		      (match_dup 3)
+		      (match_dup 4)]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (match_dup 7)
+	(match_op_dup 8
+	  [(match_dup 9) (const_int 0)]))]
+{
+  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
+  if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
+     {
+       operands[8] = shallow_copy_rtx (operands[8]);
+       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
+       PUT_CODE (operands[8], ccode);
+       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
+						  operands[6],
+						  operands[5]),
+				   FLAGS_REG);
+     }
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48x 0 "register_operand")
+	(match_operand:SWI48x 1 "x86_64_general_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_operand:SWI48x 2 "memory_operand")
+		      (match_dup 0)
+		      (match_operand:SWI48x 3 "register_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (reg FLAGS_REG)
+	(compare (match_operand:SWI48x 5 "register_operand")
+		 (match_operand:SWI48x 6 "x86_64_general_operand")))
+   (set (pc)
+	(if_then_else (match_operator 7 "ix86_comparison_int_operator"
+	  [(reg FLAGS_REG) (const_int 0)])
+	  (label_ref (match_operand 8))
+	  (pc)))]
+  "TARGET_CMPCCXADD && TARGET_64BIT
+   && rtx_equal_p (operands[0], operands[6])
+   && rtx_equal_p (operands[1], operands[5])
+   && peep2_regno_dead_p (4, FLAGS_REG)"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_dup 2)
+		      (match_dup 0)
+		      (match_dup 3)
+		      (match_dup 4)]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2)
+			       (match_dup 0)))])
+   (set (pc)
+	(if_then_else
+	 (match_op_dup 7
+	  [(match_dup 9) (const_int 0)])
+	  (label_ref (match_dup 8))
+	  (pc)))]
+{
+  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG);
+  if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7]))
+     {
+       operands[7] = shallow_copy_rtx (operands[7]);
+       enum rtx_code ccode = swap_condition (GET_CODE (operands[7]));
+       PUT_CODE (operands[7], ccode);
+       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
+						  operands[6],
+						  operands[5]),
+				   FLAGS_REG);
+     }
+})
diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c
new file mode 100644
index 00000000000..92ffdb97d62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c
@@ -0,0 +1,90 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */
+/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
+/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
+
+#include <immintrin.h>
+
+int foo_jg (int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v)
+    return 100;
+  return 200;
+}
+
+int foo_jl (int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v)
+    return 300;
+  return 100;
+}
+
+int foo_je(int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v)
+    return 123;
+  return 134;
+}
+
+int foo_jne(int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v)
+    return 111;
+  return 12;
+}
+
+int foo_jge(int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v)
+    return 413;
+  return 23;
+}
+
+int foo_jle(int *ptr, int v)
+{
+  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v)
+    return 3141;
+  return 341;
+}
+
+int fooq_jg (long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v)
+    return 123;
+  return 3;
+}
+
+int fooq_jl (long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v)
+    return 313;
+  return 5;
+}
+
+int fooq_je(long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v)
+    return 1313;
+  return 13;
+}
+
+int fooq_jne(long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v)
+    return 1314;
+  return 132;
+}
+
+int fooq_jge(long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v)
+    return 14314;
+  return 434;
+}
+
+int fooq_jle(long long *ptr, long long v)
+{
+  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v)
+    return 14414;
+  return 43;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c
new file mode 100644
index 00000000000..32a515b429e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110591.c
@@ -0,0 +1,66 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mcmpccxadd -O2" } */
+/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
+/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
+
+#include <immintrin.h>
+
+_Bool foo_setg (int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
+}
+
+_Bool foo_setl (int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
+}
+
+_Bool foo_sete(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
+}
+
+_Bool foo_setne(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
+}
+
+_Bool foo_setge(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
+}
+
+_Bool foo_setle(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
+}
+
+_Bool fooq_setg (long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
+}
+
+_Bool fooq_setl (long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
+}
+
+_Bool fooq_sete(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
+}
+
+_Bool fooq_setne(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
+}
+
+_Bool fooq_setge(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
+}
+
+_Bool fooq_setle(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd.
  2023-07-11  9:13 [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd liuhongt
@ 2023-07-17  6:51 ` Hongtao Liu
  2023-07-17 11:20   ` Uros Bizjak
  0 siblings, 1 reply; 5+ messages in thread
From: Hongtao Liu @ 2023-07-17  6:51 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, ubizjak

Ping.

On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Similar like we did for CMPXCHG, but extended to all
> ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same
> as CMP.
>
> When operand order in CMP insn is same as that in CMPCCXADD,
> CMP insn can be eliminated directly.
>
> When operand order is swapped in CMP insn, only optimize
> cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead
> after jcc/setcc plus adjusting code for jcc/setcc.
>
> gcc/ChangeLog:
>
>         PR target/110591
>         * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern
>         to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra
>         3 define_peephole2 after the pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110591.c: New test.
>         * gcc.target/i386/pr110591-2.c: New test.
> ---
>  gcc/config/i386/sync.md                    | 160 ++++++++++++++++++++-
>  gcc/testsuite/gcc.target/i386/pr110591-2.c |  90 ++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110591.c   |  66 +++++++++
>  3 files changed, 315 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c
>
> diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> index e1fa1504deb..e84226cf895 100644
> --- a/gcc/config/i386/sync.md
> +++ b/gcc/config/i386/sync.md
> @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>"
>           UNSPECV_CMPCCXADD))
>     (set (match_dup 1)
>         (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> -   (clobber (reg:CC FLAGS_REG))]
> +   (set (reg:CC FLAGS_REG)
> +       (compare:CC (match_dup 1)
> +                   (match_dup 2)))]
>    "TARGET_CMPCCXADD && TARGET_64BIT"
>  {
>    char buf[128];
> @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>"
>    output_asm_insn (buf, operands);
>    return "";
>  })
> +
> +(define_peephole2
> +  [(set (match_operand:SWI48x 0 "register_operand")
> +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_operand:SWI48x 2 "memory_operand")
> +                     (match_dup 0)
> +                     (match_operand:SWI48x 3 "register_operand")
> +                     (match_operand:SI 4 "const_int_operand")]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (reg FLAGS_REG)
> +       (compare (match_operand:SWI48x 5 "register_operand")
> +                (match_operand:SWI48x 6 "x86_64_general_operand")))]
> +  "TARGET_CMPCCXADD && TARGET_64BIT
> +   && rtx_equal_p (operands[0], operands[5])
> +   && rtx_equal_p (operands[1], operands[6])"
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_dup 2)
> +                     (match_dup 0)
> +                     (match_dup 3)
> +                     (match_dup 4)]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (match_dup 7)
> +       (match_op_dup 8
> +         [(match_dup 9) (const_int 0)]))])
> +
> +(define_peephole2
> +  [(set (match_operand:SWI48x 0 "register_operand")
> +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_operand:SWI48x 2 "memory_operand")
> +                     (match_dup 0)
> +                     (match_operand:SWI48x 3 "register_operand")
> +                     (match_operand:SI 4 "const_int_operand")]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (reg FLAGS_REG)
> +       (compare (match_operand:SWI48x 5 "register_operand")
> +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> +   (set (match_operand:QI 7 "nonimmediate_operand")
> +       (match_operator:QI 8 "ix86_comparison_int_operator"
> +         [(reg FLAGS_REG) (const_int 0)]))]
> +  "TARGET_CMPCCXADD && TARGET_64BIT
> +   && rtx_equal_p (operands[0], operands[6])
> +   && rtx_equal_p (operands[1], operands[5])
> +   && peep2_regno_dead_p (4, FLAGS_REG)"
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_dup 2)
> +                     (match_dup 0)
> +                     (match_dup 3)
> +                     (match_dup 4)]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (match_dup 7)
> +       (match_op_dup 8
> +         [(match_dup 9) (const_int 0)]))]
> +{
> +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
> +  if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
> +     {
> +       operands[8] = shallow_copy_rtx (operands[8]);
> +       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
> +       PUT_CODE (operands[8], ccode);
> +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> +                                                 operands[6],
> +                                                 operands[5]),
> +                                  FLAGS_REG);
> +     }
> +})
> +
> +(define_peephole2
> +  [(set (match_operand:SWI48x 0 "register_operand")
> +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_operand:SWI48x 2 "memory_operand")
> +                     (match_dup 0)
> +                     (match_operand:SWI48x 3 "register_operand")
> +                     (match_operand:SI 4 "const_int_operand")]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (reg FLAGS_REG)
> +       (compare (match_operand:SWI48x 5 "register_operand")
> +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> +   (set (pc)
> +       (if_then_else (match_operator 7 "ix86_comparison_int_operator"
> +         [(reg FLAGS_REG) (const_int 0)])
> +         (label_ref (match_operand 8))
> +         (pc)))]
> +  "TARGET_CMPCCXADD && TARGET_64BIT
> +   && rtx_equal_p (operands[0], operands[6])
> +   && rtx_equal_p (operands[1], operands[5])
> +   && peep2_regno_dead_p (4, FLAGS_REG)"
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_dup 2)
> +                     (match_dup 0)
> +                     (match_dup 3)
> +                     (match_dup 4)]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (set (reg:CC FLAGS_REG)
> +                  (compare:CC (match_dup 2)
> +                              (match_dup 0)))])
> +   (set (pc)
> +       (if_then_else
> +        (match_op_dup 7
> +         [(match_dup 9) (const_int 0)])
> +         (label_ref (match_dup 8))
> +         (pc)))]
> +{
> +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG);
> +  if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7]))
> +     {
> +       operands[7] = shallow_copy_rtx (operands[7]);
> +       enum rtx_code ccode = swap_condition (GET_CODE (operands[7]));
> +       PUT_CODE (operands[7], ccode);
> +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> +                                                 operands[6],
> +                                                 operands[5]),
> +                                  FLAGS_REG);
> +     }
> +})
> diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> new file mode 100644
> index 00000000000..92ffdb97d62
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> @@ -0,0 +1,90 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */
> +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> +
> +#include <immintrin.h>
> +
> +int foo_jg (int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v)
> +    return 100;
> +  return 200;
> +}
> +
> +int foo_jl (int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v)
> +    return 300;
> +  return 100;
> +}
> +
> +int foo_je(int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v)
> +    return 123;
> +  return 134;
> +}
> +
> +int foo_jne(int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v)
> +    return 111;
> +  return 12;
> +}
> +
> +int foo_jge(int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v)
> +    return 413;
> +  return 23;
> +}
> +
> +int foo_jle(int *ptr, int v)
> +{
> +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v)
> +    return 3141;
> +  return 341;
> +}
> +
> +int fooq_jg (long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v)
> +    return 123;
> +  return 3;
> +}
> +
> +int fooq_jl (long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v)
> +    return 313;
> +  return 5;
> +}
> +
> +int fooq_je(long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v)
> +    return 1313;
> +  return 13;
> +}
> +
> +int fooq_jne(long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v)
> +    return 1314;
> +  return 132;
> +}
> +
> +int fooq_jge(long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v)
> +    return 14314;
> +  return 434;
> +}
> +
> +int fooq_jle(long long *ptr, long long v)
> +{
> +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v)
> +    return 14414;
> +  return 43;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c
> new file mode 100644
> index 00000000000..32a515b429e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110591.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mcmpccxadd -O2" } */
> +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> +
> +#include <immintrin.h>
> +
> +_Bool foo_setg (int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
> +}
> +
> +_Bool foo_setl (int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
> +}
> +
> +_Bool foo_sete(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
> +}
> +
> +_Bool foo_setne(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
> +}
> +
> +_Bool foo_setge(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
> +}
> +
> +_Bool foo_setle(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
> +}
> +
> +_Bool fooq_setg (long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
> +}
> +
> +_Bool fooq_setl (long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
> +}
> +
> +_Bool fooq_sete(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
> +}
> +
> +_Bool fooq_setne(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
> +}
> +
> +_Bool fooq_setge(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
> +}
> +
> +_Bool fooq_setle(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd.
  2023-07-17  6:51 ` Hongtao Liu
@ 2023-07-17 11:20   ` Uros Bizjak
  0 siblings, 0 replies; 5+ messages in thread
From: Uros Bizjak @ 2023-07-17 11:20 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, gcc-patches

On Mon, Jul 17, 2023 at 8:44 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Ping.
>
> On Tue, Jul 11, 2023 at 5:16 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Similar like we did for CMPXCHG, but extended to all
> > ix86_comparison_int_operator since CMPCCXADD set EFLAGS exactly same
> > as CMP.
> >
> > When operand order in CMP insn is same as that in CMPCCXADD,
> > CMP insn can be eliminated directly.
> >
> > When operand order is swapped in CMP insn, only optimize
> > cmpccxadd + cmpl + jcc/setcc to cmpccxadd + jcc/setcc when FLAGS_REG is dead
> > after jcc/setcc plus adjusting code for jcc/setcc.
> >
> > gcc/ChangeLog:
> >
> >         PR target/110591
> >         * config/i386/sync.md (cmpccxadd_<mode>): Adjust the pattern
> >         to explicitly set FLAGS_REG like *cmp<mode>_1, also add extra
> >         3 define_peephole2 after the pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr110591.c: New test.
> >         * gcc.target/i386/pr110591-2.c: New test.

LGTM.

Thanks,
Uros.

> > ---
> >  gcc/config/i386/sync.md                    | 160 ++++++++++++++++++++-
> >  gcc/testsuite/gcc.target/i386/pr110591-2.c |  90 ++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr110591.c   |  66 +++++++++
> >  3 files changed, 315 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c
> >
> > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> > index e1fa1504deb..e84226cf895 100644
> > --- a/gcc/config/i386/sync.md
> > +++ b/gcc/config/i386/sync.md
> > @@ -1093,7 +1093,9 @@ (define_insn "cmpccxadd_<mode>"
> >           UNSPECV_CMPCCXADD))
> >     (set (match_dup 1)
> >         (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > -   (clobber (reg:CC FLAGS_REG))]
> > +   (set (reg:CC FLAGS_REG)
> > +       (compare:CC (match_dup 1)
> > +                   (match_dup 2)))]
> >    "TARGET_CMPCCXADD && TARGET_64BIT"
> >  {
> >    char buf[128];
> > @@ -1105,3 +1107,159 @@ (define_insn "cmpccxadd_<mode>"
> >    output_asm_insn (buf, operands);
> >    return "";
> >  })
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[5])
> > +   && rtx_equal_p (operands[1], operands[6])"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (match_dup 7)
> > +       (match_op_dup 8
> > +         [(match_dup 9) (const_int 0)]))])
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> > +   (set (match_operand:QI 7 "nonimmediate_operand")
> > +       (match_operator:QI 8 "ix86_comparison_int_operator"
> > +         [(reg FLAGS_REG) (const_int 0)]))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[6])
> > +   && rtx_equal_p (operands[1], operands[5])
> > +   && peep2_regno_dead_p (4, FLAGS_REG)"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (match_dup 7)
> > +       (match_op_dup 8
> > +         [(match_dup 9) (const_int 0)]))]
> > +{
> > +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
> > +  if (swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
> > +     {
> > +       operands[8] = shallow_copy_rtx (operands[8]);
> > +       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
> > +       PUT_CODE (operands[8], ccode);
> > +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> > +                                                 operands[6],
> > +                                                 operands[5]),
> > +                                  FLAGS_REG);
> > +     }
> > +})
> > +
> > +(define_peephole2
> > +  [(set (match_operand:SWI48x 0 "register_operand")
> > +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_operand:SWI48x 2 "memory_operand")
> > +                     (match_dup 0)
> > +                     (match_operand:SWI48x 3 "register_operand")
> > +                     (match_operand:SI 4 "const_int_operand")]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (reg FLAGS_REG)
> > +       (compare (match_operand:SWI48x 5 "register_operand")
> > +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> > +   (set (pc)
> > +       (if_then_else (match_operator 7 "ix86_comparison_int_operator"
> > +         [(reg FLAGS_REG) (const_int 0)])
> > +         (label_ref (match_operand 8))
> > +         (pc)))]
> > +  "TARGET_CMPCCXADD && TARGET_64BIT
> > +   && rtx_equal_p (operands[0], operands[6])
> > +   && rtx_equal_p (operands[1], operands[5])
> > +   && peep2_regno_dead_p (4, FLAGS_REG)"
> > +  [(set (match_dup 0)
> > +       (match_dup 1))
> > +   (parallel [(set (match_dup 0)
> > +                  (unspec_volatile:SWI48x
> > +                    [(match_dup 2)
> > +                     (match_dup 0)
> > +                     (match_dup 3)
> > +                     (match_dup 4)]
> > +                    UNSPECV_CMPCCXADD))
> > +             (set (match_dup 2)
> > +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> > +             (set (reg:CC FLAGS_REG)
> > +                  (compare:CC (match_dup 2)
> > +                              (match_dup 0)))])
> > +   (set (pc)
> > +       (if_then_else
> > +        (match_op_dup 7
> > +         [(match_dup 9) (const_int 0)])
> > +         (label_ref (match_dup 8))
> > +         (pc)))]
> > +{
> > +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[7], 0)), FLAGS_REG);
> > +  if (swap_condition (GET_CODE (operands[7])) != GET_CODE (operands[7]))
> > +     {
> > +       operands[7] = shallow_copy_rtx (operands[7]);
> > +       enum rtx_code ccode = swap_condition (GET_CODE (operands[7]));
> > +       PUT_CODE (operands[7], ccode);
> > +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> > +                                                 operands[6],
> > +                                                 operands[5]),
> > +                                  FLAGS_REG);
> > +     }
> > +})
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110591-2.c b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> > new file mode 100644
> > index 00000000000..92ffdb97d62
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110591-2.c
> > @@ -0,0 +1,90 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-mcmpccxadd -O2 -fno-if-conversion -fno-if-conversion2" } */
> > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> > +
> > +#include <immintrin.h>
> > +
> > +int foo_jg (int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v)
> > +    return 100;
> > +  return 200;
> > +}
> > +
> > +int foo_jl (int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v)
> > +    return 300;
> > +  return 100;
> > +}
> > +
> > +int foo_je(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v)
> > +    return 123;
> > +  return 134;
> > +}
> > +
> > +int foo_jne(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v)
> > +    return 111;
> > +  return 12;
> > +}
> > +
> > +int foo_jge(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v)
> > +    return 413;
> > +  return 23;
> > +}
> > +
> > +int foo_jle(int *ptr, int v)
> > +{
> > +  if (_cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v)
> > +    return 3141;
> > +  return 341;
> > +}
> > +
> > +int fooq_jg (long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v)
> > +    return 123;
> > +  return 3;
> > +}
> > +
> > +int fooq_jl (long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v)
> > +    return 313;
> > +  return 5;
> > +}
> > +
> > +int fooq_je(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v)
> > +    return 1313;
> > +  return 13;
> > +}
> > +
> > +int fooq_jne(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v)
> > +    return 1314;
> > +  return 132;
> > +}
> > +
> > +int fooq_jge(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v)
> > +    return 14314;
> > +  return 434;
> > +}
> > +
> > +int fooq_jle(long long *ptr, long long v)
> > +{
> > +  if (_cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v)
> > +    return 14414;
> > +  return 43;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c
> > new file mode 100644
> > index 00000000000..32a515b429e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110591.c
> > @@ -0,0 +1,66 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-mcmpccxadd -O2" } */
> > +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> > +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> > +
> > +#include <immintrin.h>
> > +
> > +_Bool foo_setg (int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
> > +}
> > +
> > +_Bool foo_setl (int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
> > +}
> > +
> > +_Bool foo_sete(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
> > +}
> > +
> > +_Bool foo_setne(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
> > +}
> > +
> > +_Bool foo_setge(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
> > +}
> > +
> > +_Bool foo_setle(int *ptr, int v)
> > +{
> > +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
> > +}
> > +
> > +_Bool fooq_setg (long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
> > +}
> > +
> > +_Bool fooq_setl (long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
> > +}
> > +
> > +_Bool fooq_sete(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
> > +}
> > +
> > +_Bool fooq_setne(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
> > +}
> > +
> > +_Bool fooq_setge(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
> > +}
> > +
> > +_Bool fooq_setle(long long *ptr, long long v)
> > +{
> > +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd.
  2023-07-11  3:13 liuhongt
@ 2023-07-11  3:23 ` Hongtao Liu
  0 siblings, 0 replies; 5+ messages in thread
From: Hongtao Liu @ 2023-07-11  3:23 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, ubizjak

Please ignore this patch, I'm testing another patch to separate non
swap operands case where a setcc is not needed in the peephole2.

On Tue, Jul 11, 2023 at 11:14 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Similar like we did for cmpxchg, but extended to all
> ix86_comparison_int_operator since cmpccxadd set EFLAGS exactly same
> as CMP.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,},
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/110591
>         * config/i386/sync.md (cmpccxadd_<mode>): Add a new
>         define_peephole2 after the pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110591.c: New test.
> ---
>  gcc/config/i386/sync.md                  | 56 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110591.c | 66 ++++++++++++++++++++++++
>  2 files changed, 122 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c
>
> diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> index e1fa1504deb..43f6421bcb8 100644
> --- a/gcc/config/i386/sync.md
> +++ b/gcc/config/i386/sync.md
> @@ -1105,3 +1105,59 @@ (define_insn "cmpccxadd_<mode>"
>    output_asm_insn (buf, operands);
>    return "";
>  })
> +
> +(define_peephole2
> +  [(set (match_operand:SWI48x 0 "register_operand")
> +       (match_operand:SWI48x 1 "x86_64_general_operand"))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_operand:SWI48x 2 "memory_operand")
> +                     (match_dup 0)
> +                     (match_operand:SWI48x 3 "register_operand")
> +                     (match_operand:SI 4 "const_int_operand")]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (clobber (reg:CC FLAGS_REG))])
> +   (set (reg FLAGS_REG)
> +       (compare (match_operand:SWI48x 5 "register_operand")
> +                (match_operand:SWI48x 6 "x86_64_general_operand")))
> +   (set (match_operand:QI 7 "nonimmediate_operand")
> +       (match_operator:QI 8 "ix86_comparison_int_operator"
> +         [(reg FLAGS_REG) (const_int 0)]))]
> +  "TARGET_CMPCCXADD && TARGET_64BIT
> +   && ((rtx_equal_p (operands[0], operands[5])
> +       && rtx_equal_p (operands[1], operands[6]))
> +       || ((rtx_equal_p (operands[0], operands[6])
> +           && rtx_equal_p (operands[1], operands[5]))
> +          && peep2_regno_dead_p (4, FLAGS_REG)))"
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +   (parallel [(set (match_dup 0)
> +                  (unspec_volatile:SWI48x
> +                    [(match_dup 2)
> +                     (match_dup 0)
> +                     (match_dup 3)
> +                     (match_dup 4)]
> +                    UNSPECV_CMPCCXADD))
> +             (set (match_dup 2)
> +                  (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +             (clobber (reg:CC FLAGS_REG))])
> +   (set (match_dup 7)
> +       (match_op_dup 8
> +         [(match_dup 9) (const_int 0)]))]
> +{
> +  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
> +  if (rtx_equal_p (operands[0], operands[6])
> +     && rtx_equal_p (operands[1], operands[5])
> +     && swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
> +     {
> +       operands[8] = shallow_copy_rtx (operands[8]);
> +       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
> +       PUT_CODE (operands[8], ccode);
> +       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
> +                                                 operands[6],
> +                                                 operands[5]),
> +                                  FLAGS_REG);
> +     }
> +})
> diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c
> new file mode 100644
> index 00000000000..32a515b429e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110591.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mcmpccxadd -O2" } */
> +/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
> +/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
> +
> +#include <immintrin.h>
> +
> +_Bool foo_setg (int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
> +}
> +
> +_Bool foo_setl (int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
> +}
> +
> +_Bool foo_sete(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
> +}
> +
> +_Bool foo_setne(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
> +}
> +
> +_Bool foo_setge(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
> +}
> +
> +_Bool foo_setle(int *ptr, int v)
> +{
> +    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
> +}
> +
> +_Bool fooq_setg (long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
> +}
> +
> +_Bool fooq_setl (long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
> +}
> +
> +_Bool fooq_sete(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
> +}
> +
> +_Bool fooq_setne(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
> +}
> +
> +_Bool fooq_setge(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
> +}
> +
> +_Bool fooq_setle(long long *ptr, long long v)
> +{
> +    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd.
@ 2023-07-11  3:13 liuhongt
  2023-07-11  3:23 ` Hongtao Liu
  0 siblings, 1 reply; 5+ messages in thread
From: liuhongt @ 2023-07-11  3:13 UTC (permalink / raw)
  To: gcc-patches; +Cc: ubizjak

Similar like we did for cmpxchg, but extended to all
ix86_comparison_int_operator since cmpccxadd set EFLAGS exactly same
as CMP.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,},
Ok for trunk?

gcc/ChangeLog:

	PR target/110591
	* config/i386/sync.md (cmpccxadd_<mode>): Add a new
	define_peephole2 after the pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110591.c: New test.
---
 gcc/config/i386/sync.md                  | 56 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110591.c | 66 ++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110591.c

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index e1fa1504deb..43f6421bcb8 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -1105,3 +1105,59 @@ (define_insn "cmpccxadd_<mode>"
   output_asm_insn (buf, operands);
   return "";
 })
+
+(define_peephole2
+  [(set (match_operand:SWI48x 0 "register_operand")
+	(match_operand:SWI48x 1 "x86_64_general_operand"))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_operand:SWI48x 2 "memory_operand")
+		      (match_dup 0)
+		      (match_operand:SWI48x 3 "register_operand")
+		      (match_operand:SI 4 "const_int_operand")]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (reg FLAGS_REG)
+	(compare (match_operand:SWI48x 5 "register_operand")
+		 (match_operand:SWI48x 6 "x86_64_general_operand")))
+   (set (match_operand:QI 7 "nonimmediate_operand")
+	(match_operator:QI 8 "ix86_comparison_int_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_CMPCCXADD && TARGET_64BIT
+   && ((rtx_equal_p (operands[0], operands[5])
+	&& rtx_equal_p (operands[1], operands[6]))
+       || ((rtx_equal_p (operands[0], operands[6])
+	    && rtx_equal_p (operands[1], operands[5]))
+	   && peep2_regno_dead_p (4, FLAGS_REG)))"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (unspec_volatile:SWI48x
+		     [(match_dup 2)
+		      (match_dup 0)
+		      (match_dup 3)
+		      (match_dup 4)]
+		     UNSPECV_CMPCCXADD))
+	      (set (match_dup 2)
+		   (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 7)
+	(match_op_dup 8
+	  [(match_dup 9) (const_int 0)]))]
+{
+  operands[9] = gen_rtx_REG (GET_MODE (XEXP (operands[8], 0)), FLAGS_REG);
+  if (rtx_equal_p (operands[0], operands[6])
+     && rtx_equal_p (operands[1], operands[5])
+     && swap_condition (GET_CODE (operands[8])) != GET_CODE (operands[8]))
+     {
+       operands[8] = shallow_copy_rtx (operands[8]);
+       enum rtx_code ccode = swap_condition (GET_CODE (operands[8]));
+       PUT_CODE (operands[8], ccode);
+       operands[9] = gen_rtx_REG (SELECT_CC_MODE (ccode,
+						  operands[6],
+						  operands[5]),
+				   FLAGS_REG);
+     }
+})
diff --git a/gcc/testsuite/gcc.target/i386/pr110591.c b/gcc/testsuite/gcc.target/i386/pr110591.c
new file mode 100644
index 00000000000..32a515b429e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110591.c
@@ -0,0 +1,66 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mcmpccxadd -O2" } */
+/* { dg-final { scan-assembler-not {cmp[lq]?[ \t]+} } } */
+/* { dg-final { scan-assembler-times {cmpoxadd[ \t]+} 12 } } */
+
+#include <immintrin.h>
+
+_Bool foo_setg (int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) > v;
+}
+
+_Bool foo_setl (int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) < v;
+}
+
+_Bool foo_sete(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) == v;
+}
+
+_Bool foo_setne(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) != v;
+}
+
+_Bool foo_setge(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) >= v;
+}
+
+_Bool foo_setle(int *ptr, int v)
+{
+    return _cmpccxadd_epi32(ptr, v, 1, _CMPCCX_O) <= v;
+}
+
+_Bool fooq_setg (long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) > v;
+}
+
+_Bool fooq_setl (long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) < v;
+}
+
+_Bool fooq_sete(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) == v;
+}
+
+_Bool fooq_setne(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) != v;
+}
+
+_Bool fooq_setge(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) >= v;
+}
+
+_Bool fooq_setle(long long *ptr, long long v)
+{
+    return _cmpccxadd_epi64(ptr, v, 1, _CMPCCX_O) <= v;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-07-17 11:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-11  9:13 [PATCH] Add peephole to eliminate redundant comparison after cmpccxadd liuhongt
2023-07-17  6:51 ` Hongtao Liu
2023-07-17 11:20   ` Uros Bizjak
  -- strict thread matches above, loose matches on Subject: below --
2023-07-11  3:13 liuhongt
2023-07-11  3:23 ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).