From: Tamar Christina <Tamar.Christina@arm.com>
To: Richard Biener <rguenther@suse.de>
Cc: Andrew Pinski <pinskia@gmail.com>, nd <nd@arm.com>,
"gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: RE: [PATCH]middle-end simplify complex if expressions where comparisons are inverse of one another.
Date: Mon, 31 Oct 2022 11:42:31 +0000 [thread overview]
Message-ID: <VI1PR08MB532528C7619B621263E65E28FF379@VI1PR08MB5325.eurprd08.prod.outlook.com> (raw)
In-Reply-To: <nycvar.YFH.7.77.849.2209261040460.6652@jbgna.fhfr.qr>
[-- Attachment #1: Type: text/plain, Size: 6706 bytes --]
Hi,
This is a cleaned up version addressing all feedback.
Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* match.pd: Add new rule.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/if-compare_1.c: New test.
* gcc.target/aarch64/if-compare_2.c: New test.
--- inline copy of patch ---
diff --git a/gcc/match.pd b/gcc/match.pd
index 297c4d7a2355c2faa2afb7cfa0b4e171a161ef39..463132bfd35cf61f50381cedd07d7617c638034b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1903,6 +1903,61 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type))
(bit_and @0 @1)))
+(for cmp (tcc_comparison)
+ icmp (inverted_tcc_comparison)
+ /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1. */
+ (simplify
+ (bit_ior
+ (bit_and:c (convert? (cmp@0 @01 @02)) @3)
+ (bit_and:c (convert? (icmp@4 @01 @02)) @5))
+ (if (INTEGRAL_TYPE_P (type)
+ /* The scalar version has to be canonicalized after vectorization
+ because it makes unconditional loads conditional ones, which
+ means we lose vectorization because the loads may trap. */
+ && canonicalize_math_after_vectorization_p ())
+ (bit_and (cond @0 @3 @5) { build_one_cst (type); })))
+
+ /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is
+ canonicalized further and we recognize the conditional form:
+ (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */
+ (simplify
+ (bit_ior
+ (cond (cmp@0 @01 @02) @3 zerop)
+ (cond (icmp@4 @01 @02) @5 zerop))
+ (if (INTEGRAL_TYPE_P (type)
+ /* The scalar version has to be canonicalized after vectorization
+ because it makes unconditional loads conditional ones, which
+ means we lose vectorization because the loads may trap. */
+ && canonicalize_math_after_vectorization_p ())
+ (cond @0 @3 @5)))
+
+ /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d.
+ and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */
+ (simplify
+ (bit_ior
+ (bit_and:c (vec_cond:s (cmp@0 @6 @7) @4 @5) @2)
+ (bit_and:c (vec_cond:s (icmp@1 @6 @7) @4 @5) @3))
+ (if (integer_zerop (@5))
+ (switch
+ (if (integer_onep (@4))
+ (bit_and (vec_cond @0 @2 @3) @4))
+ (if (integer_minus_onep (@4))
+ (vec_cond @0 @2 @3)))
+ (if (integer_zerop (@4))
+ (switch
+ (if (integer_onep (@5))
+ (bit_and (vec_cond @0 @3 @2) @5))
+ (if (integer_minus_onep (@5))
+ (vec_cond @0 @3 @2))))))
+
+ /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d))
+ into a < b ? d : c. */
+ (simplify
+ (bit_ior
+ (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop)
+ (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
+ (vec_cond @0 @2 @3)))
+
/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */
(simplify
(bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..53bbd779a30e1a30e0ce0e4e5eaf589bfaf570fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+extern void abort ();
+
+/*
+**zoo1:
+** cmp w0, w1
+** csel w0, w2, w3, lt
+** and w0, w0, 1
+** ret
+*/
+__attribute((noipa, noinline))
+int zoo1 (int a, int b, int c, int d)
+{
+ return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**zoo2:
+** cmp w0, w1
+** csel w0, w2, w3, lt
+** ret
+*/
+__attribute((noipa, noinline))
+int zoo2 (int a, int b, int c, int d)
+{
+ return (-(a < b) & c) | (-(a >= b) & d);
+}
+
+int main ()
+{
+ if (zoo1 (-3, 3, 5, 8) != 1)
+ abort ();
+
+ if (zoo1 (3, -3, 5, 8) != 0)
+ abort ();
+
+ if (zoo2 (-3, 3, 5, 8) != 5)
+ abort ();
+
+ if (zoo2 (3, -3, 5, 8) != 8)
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..14988abac45989578b198f28c7c0ea203959c08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,96 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -std=c99 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo1:
+** cmgt v0.4s, v1.4s, v0.4s
+** bsl v0.16b, v2.16b, v3.16b
+** ret
+*/
+v4si foo1 (v4si a, v4si b, v4si c, v4si d) {
+ return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**foo2:
+** cmgt v0.4s, v1.4s, v0.4s
+** bsl v0.16b, v3.16b, v2.16b
+** ret
+*/
+v4si foo2 (v4si a, v4si b, v4si c, v4si d) {
+ return (~(a < b) & c) | (~(a >= b) & d);
+}
+
+
+/**
+**bar1:
+**...
+** cmge v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** and v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar1 (int * restrict a, int * restrict b, int * restrict c,
+ int * restrict d, int * restrict res, int n)
+{
+ for (int i = 0; i < (n & -4); i++)
+ res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+
+/**
+**bar2:
+**...
+** cmge v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar2 (int * restrict a, int * restrict b, int * restrict c,
+ int * restrict d, int * restrict res, int n)
+{
+ for (int i = 0; i < (n & -4); i++)
+ res[i] = (-(a[i] < b[i]) & c[i]) | (-(a[i] >= b[i]) & d[i]);
+}
+
+extern void abort ();
+
+int main ()
+{
+
+ v4si a = { -3, -3, -3, -3 };
+ v4si b = { 3, 3, 3, 3 };
+ v4si c = { 5, 5, 5, 5 };
+ v4si d = { 8, 8, 8, 8 };
+
+ v4si res1 = foo1 (a, b, c, d);
+ if (memcmp (&res1, &c, 16UL) != 0)
+ abort ();
+
+ v4si res2 = foo2 (a, b, c, d);
+ if (memcmp (&res2, &d, 16UL) != 0)
+ abort ();
+
+ int ar[4] = { -3, -3, -3, -3 };
+ int br[4] = { 3, 3, 3, 3 };
+ int cr[4] = { 5, 5, 5, 5 };
+ int dr[4] = { 8, 8, 8, 8 };
+
+ int exp1[4] = { 1, 1, 1, 1 };
+ int res3[4];
+ bar1 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res3, 4);
+ if (memcmp (&res3, &exp1, 16UL) != 0)
+ abort ();
+
+ int res4[4];
+ bar2 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res4, 4);
+ if (memcmp (&res4, &cr, 16UL) != 0)
+ abort ();
+
+ return 0;
+}
[-- Attachment #2: rb15679.patch --]
[-- Type: application/octet-stream, Size: 6088 bytes --]
diff --git a/gcc/match.pd b/gcc/match.pd
index 297c4d7a2355c2faa2afb7cfa0b4e171a161ef39..463132bfd35cf61f50381cedd07d7617c638034b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1903,6 +1903,61 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type))
(bit_and @0 @1)))
+(for cmp (tcc_comparison)
+ icmp (inverted_tcc_comparison)
+ /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1. */
+ (simplify
+ (bit_ior
+ (bit_and:c (convert? (cmp@0 @01 @02)) @3)
+ (bit_and:c (convert? (icmp@4 @01 @02)) @5))
+ (if (INTEGRAL_TYPE_P (type)
+ /* The scalar version has to be canonicalized after vectorization
+ because it makes unconditional loads conditional ones, which
+ means we lose vectorization because the loads may trap. */
+ && canonicalize_math_after_vectorization_p ())
+ (bit_and (cond @0 @3 @5) { build_one_cst (type); })))
+
+ /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is
+ canonicalized further and we recognize the conditional form:
+ (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */
+ (simplify
+ (bit_ior
+ (cond (cmp@0 @01 @02) @3 zerop)
+ (cond (icmp@4 @01 @02) @5 zerop))
+ (if (INTEGRAL_TYPE_P (type)
+ /* The scalar version has to be canonicalized after vectorization
+ because it makes unconditional loads conditional ones, which
+ means we lose vectorization because the loads may trap. */
+ && canonicalize_math_after_vectorization_p ())
+ (cond @0 @3 @5)))
+
+ /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d.
+ and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */
+ (simplify
+ (bit_ior
+ (bit_and:c (vec_cond:s (cmp@0 @6 @7) @4 @5) @2)
+ (bit_and:c (vec_cond:s (icmp@1 @6 @7) @4 @5) @3))
+ (if (integer_zerop (@5))
+ (switch
+ (if (integer_onep (@4))
+ (bit_and (vec_cond @0 @2 @3) @4))
+ (if (integer_minus_onep (@4))
+ (vec_cond @0 @2 @3)))
+ (if (integer_zerop (@4))
+ (switch
+ (if (integer_onep (@5))
+ (bit_and (vec_cond @0 @3 @2) @5))
+ (if (integer_minus_onep (@5))
+ (vec_cond @0 @3 @2))))))
+
+ /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d))
+ into a < b ? d : c. */
+ (simplify
+ (bit_ior
+ (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop)
+ (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
+ (vec_cond @0 @2 @3)))
+
/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */
(simplify
(bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..53bbd779a30e1a30e0ce0e4e5eaf589bfaf570fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+extern void abort ();
+
+/*
+**zoo1:
+** cmp w0, w1
+** csel w0, w2, w3, lt
+** and w0, w0, 1
+** ret
+*/
+__attribute((noipa, noinline))
+int zoo1 (int a, int b, int c, int d)
+{
+ return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**zoo2:
+** cmp w0, w1
+** csel w0, w2, w3, lt
+** ret
+*/
+__attribute((noipa, noinline))
+int zoo2 (int a, int b, int c, int d)
+{
+ return (-(a < b) & c) | (-(a >= b) & d);
+}
+
+int main ()
+{
+ if (zoo1 (-3, 3, 5, 8) != 1)
+ abort ();
+
+ if (zoo1 (3, -3, 5, 8) != 0)
+ abort ();
+
+ if (zoo2 (-3, 3, 5, 8) != 5)
+ abort ();
+
+ if (zoo2 (3, -3, 5, 8) != 8)
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..14988abac45989578b198f28c7c0ea203959c08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,96 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -std=c99 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo1:
+** cmgt v0.4s, v1.4s, v0.4s
+** bsl v0.16b, v2.16b, v3.16b
+** ret
+*/
+v4si foo1 (v4si a, v4si b, v4si c, v4si d) {
+ return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**foo2:
+** cmgt v0.4s, v1.4s, v0.4s
+** bsl v0.16b, v3.16b, v2.16b
+** ret
+*/
+v4si foo2 (v4si a, v4si b, v4si c, v4si d) {
+ return (~(a < b) & c) | (~(a >= b) & d);
+}
+
+
+/**
+**bar1:
+**...
+** cmge v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+** and v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar1 (int * restrict a, int * restrict b, int * restrict c,
+ int * restrict d, int * restrict res, int n)
+{
+ for (int i = 0; i < (n & -4); i++)
+ res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+
+/**
+**bar2:
+**...
+** cmge v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+** bsl v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar2 (int * restrict a, int * restrict b, int * restrict c,
+ int * restrict d, int * restrict res, int n)
+{
+ for (int i = 0; i < (n & -4); i++)
+ res[i] = (-(a[i] < b[i]) & c[i]) | (-(a[i] >= b[i]) & d[i]);
+}
+
+extern void abort ();
+
+int main ()
+{
+
+ v4si a = { -3, -3, -3, -3 };
+ v4si b = { 3, 3, 3, 3 };
+ v4si c = { 5, 5, 5, 5 };
+ v4si d = { 8, 8, 8, 8 };
+
+ v4si res1 = foo1 (a, b, c, d);
+ if (memcmp (&res1, &c, 16UL) != 0)
+ abort ();
+
+ v4si res2 = foo2 (a, b, c, d);
+ if (memcmp (&res2, &d, 16UL) != 0)
+ abort ();
+
+ int ar[4] = { -3, -3, -3, -3 };
+ int br[4] = { 3, 3, 3, 3 };
+ int cr[4] = { 5, 5, 5, 5 };
+ int dr[4] = { 8, 8, 8, 8 };
+
+ int exp1[4] = { 1, 1, 1, 1 };
+ int res3[4];
+ bar1 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res3, 4);
+ if (memcmp (&res3, &exp1, 16UL) != 0)
+ abort ();
+
+ int res4[4];
+ bar2 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res4, 4);
+ if (memcmp (&res4, &cr, 16UL) != 0)
+ abort ();
+
+ return 0;
+}
next prev parent reply other threads:[~2022-10-31 11:42 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-16 13:31 Tamar Christina
2022-06-20 8:56 ` Richard Biener
2022-07-05 15:15 ` Tamar Christina
2022-07-06 2:09 ` Andrew Pinski
2022-07-06 16:06 ` Tamar Christina
2022-07-06 19:37 ` Andrew Pinski
2022-07-07 7:48 ` Tamar Christina
2022-07-08 11:03 ` Richard Biener
2022-09-23 9:16 ` Tamar Christina
2022-09-23 13:10 ` Richard Biener
2022-09-23 13:27 ` Tamar Christina
2022-09-26 10:42 ` Richard Biener
2022-10-31 11:42 ` Tamar Christina [this message]
2022-10-31 21:23 ` Jeff Law
2022-07-07 16:07 ` Jeff Law
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=VI1PR08MB532528C7619B621263E65E28FF379@VI1PR08MB5325.eurprd08.prod.outlook.com \
--to=tamar.christina@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=pinskia@gmail.com \
--cc=rguenther@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).