public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com, rguenther@suse.de
Subject: [PATCH 5/7]middle-end Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2>
Date: Wed, 29 Sep 2021 17:21:08 +0100	[thread overview]
Message-ID: <20210929162106.GA5336@arm.com> (raw)
In-Reply-To: <patch-14899-tamar@arm.com>

[-- Attachment #1: Type: text/plain, Size: 16236 bytes --]

Hi All,

This optimizes the case where a mask Y which fulfills ~Y + 1 == pow2 is used to
clear a some bits and then compared against 0 into one without the masking and
a compare against a different bit immediate.

We can do this for all unsigned compares and for signed we can do it for
comparisons of EQ and NE:

(x & (~255)) == 0 becomes x <= 255. Which for leaves it to the target to
optimally deal with the comparison.

This transformation has to be done in the mid-end because in RTL you don't have
the signs of the comparison operands and if the target needs an immediate this
should be floated outside of the loop.

The RTL loop invariant hoisting is done before split1.

i.e.

void fun1(int32_t *x, int n)
{
    for (int i = 0; i < (n & -16); i++)
      x[i] = (x[i]&(~255)) == 0;
}

now generates:

.L3:
        ldr     q0, [x0]
        cmhs    v0.4s, v2.4s, v0.4s
        and     v0.16b, v1.16b, v0.16b
        str     q0, [x0], 16
        cmp     x0, x1
        bne     .L3

and floats the immediate out of the loop.

instead of:

.L3:
        ldr     q0, [x0]
        bic     v0.4s, #255
        cmeq    v0.4s, v0.4s, #0
        and     v0.16b, v1.16b, v0.16b
        str     q0, [x0], 16
        cmp     x0, x1
        bne     .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* match.pd: New bitmask compare pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/bic-bitmask-10.c: New test.
	* gcc.dg/bic-bitmask-11.c: New test.
	* gcc.dg/bic-bitmask-12.c: New test.
	* gcc.dg/bic-bitmask-2.c: New test.
	* gcc.dg/bic-bitmask-3.c: New test.
	* gcc.dg/bic-bitmask-4.c: New test.
	* gcc.dg/bic-bitmask-5.c: New test.
	* gcc.dg/bic-bitmask-6.c: New test.
	* gcc.dg/bic-bitmask-7.c: New test.
	* gcc.dg/bic-bitmask-8.c: New test.
	* gcc.dg/bic-bitmask-9.c: New test.
	* gcc.dg/bic-bitmask.h: New test.
	* gcc.target/aarch64/bic-bitmask-1.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 0fcfd0ea62c043dc217d0d560ce5b7e569b70e7d..df9212cb27d172856b9d43b0875262f96e8993c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4288,6 +4288,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (ic == ncmp)
      (ncmp @0 @1))))))
 
+/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z
+   where ~Y + 1 == pow2 and Z = ~Y.  */
+(for cmp (simple_comparison)
+ (simplify
+  (cmp (bit_and:c @0 VECTOR_CST@1) integer_zerop)
+   (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@1))
+	&& uniform_vector_p (@1))
+    (with { tree elt = vector_cst_elt (@1, 0); }
+     (switch
+      (if (TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_uhwi_p (elt))
+	(with { unsigned HOST_WIDE_INT diff = tree_to_uhwi (elt);
+	        tree tdiff = wide_int_to_tree (TREE_TYPE (elt), (~diff) + 1);
+		tree newval = wide_int_to_tree (TREE_TYPE (elt), ~diff);
+		tree newmask = build_uniform_cst (TREE_TYPE (@1), newval); }
+	 (if (integer_pow2p (tdiff))
+	  (switch
+	   /* ((mask & x) < 0) -> 0.  */
+	   (if (cmp == LT_EXPR)
+	    { build_zero_cst (TREE_TYPE (@1)); })
+	   /* ((mask & x) <= 0) -> x < mask.  */
+	   (if (cmp == LE_EXPR)
+	    (lt @0 { newmask; }))
+	   /* ((mask & x) == 0) -> x < mask.  */
+	   (if (cmp == EQ_EXPR)
+	    (le @0 { newmask; }))
+	   /* ((mask & x) != 0) -> x > mask.  */
+	   (if (cmp == NE_EXPR)
+	    (gt @0 { newmask; }))
+	   /* ((mask & x) >= 0) -> x <= mask.  */
+	   (if (cmp == GE_EXPR)
+	    (le @0 { newmask; }))
+	    /* ((mask & x) > 0) -> x < mask.  */
+	   (if (cmp == GT_EXPR)
+	    (lt @0 { newmask; }))))))
+      (if (!TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_shwi_p (elt))
+	(with { unsigned HOST_WIDE_INT diff = tree_to_shwi (elt);
+		tree ustype = unsigned_type_for (TREE_TYPE (elt));
+		tree uvtype = unsigned_type_for (TREE_TYPE (@1));
+	        tree tdiff = wide_int_to_tree (ustype, (~diff) + 1);
+	        tree udiff = wide_int_to_tree (ustype, ~diff);
+		tree cst = build_uniform_cst (uvtype, udiff); }
+	 (if (integer_pow2p (tdiff))
+	  (switch
+	    /* ((mask & x) == 0) -> x < mask.  */
+	    (if (cmp == EQ_EXPR)
+	     (le (convert:uvtype @0) { cst; }))
+	    /* ((mask & x) != 0) -> x > mask.  */
+	    (if (cmp == NE_EXPR)
+	     (gt (convert:uvtype @0) { cst; })))))))))))
+
 /* Transform comparisons of the form X - Y CMP 0 to X CMP Y.
    ??? The transformation is valid for the other operators if overflow
    is undefined for the type, but performing it here badly interacts
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
new file mode 100644
index 0000000000000000000000000000000000000000..76a22a2313137a2a75dd711c2c15c2d3a34e15aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#define TYPE int32_t
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
new file mode 100644
index 0000000000000000000000000000000000000000..32553d7ba2f823f7a21237451990d0a216d2f912
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..e10cbf7fabe2dbf7ce436cdf37b0f8b207c58408
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+__attribute__((noinline, noipa))
+void fun(v4si *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bcf23ccf1447d6c8c999ed1eb25ba0a450028e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..6e5a2fca9992efbc01f8dbbc6f95936e86643028
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&`s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
new file mode 100644
index 0000000000000000000000000000000000000000..018e7a4348c9fc461106c3d9d01291325d3406c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
new file mode 100644
index 0000000000000000000000000000000000000000..798678fb7555052c93abc4ca34f617d640f73bb4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
new file mode 100644
index 0000000000000000000000000000000000000000..1dabe834ed57dfa0be48c1dc3dbb226092c79a1a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
new file mode 100644
index 0000000000000000000000000000000000000000..9c1f8ee0adfc45d1b9fc212138ea26bb6b693e49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h b/gcc/testsuite/gcc.dg/bic-bitmask.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b94065c025e0cbf71a21ac9b9d6314e24b0c2d9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask.h
@@ -0,0 +1,43 @@
+#include <stdio.h>
+
+#ifndef N
+#define N 50
+#endif
+
+#ifndef TYPE
+#define TYPE uint32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  for (int i = 0; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..568c1ffc8bc4148efaeeba7a45a75ecbd3a7a3dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -save-temps" } */
+
+#include <arm_neon.h>
+
+uint32x4_t foo (int32x4_t a)
+{
+  int32x4_t cst = vdupq_n_s32 (255);
+  int32x4_t zero = vdupq_n_s32 (0);
+  return vceqq_s32 (vbicq_s32 (a, cst), zero);
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } */


-- 

[-- Attachment #2: rb14896.patch --]
[-- Type: text/x-diff, Size: 14260 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index 0fcfd0ea62c043dc217d0d560ce5b7e569b70e7d..df9212cb27d172856b9d43b0875262f96e8993c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4288,6 +4288,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (ic == ncmp)
      (ncmp @0 @1))))))
 
+/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z
+   where ~Y + 1 == pow2 and Z = ~Y.  */
+(for cmp (simple_comparison)
+ (simplify
+  (cmp (bit_and:c @0 VECTOR_CST@1) integer_zerop)
+   (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@1))
+	&& uniform_vector_p (@1))
+    (with { tree elt = vector_cst_elt (@1, 0); }
+     (switch
+      (if (TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_uhwi_p (elt))
+	(with { unsigned HOST_WIDE_INT diff = tree_to_uhwi (elt);
+	        tree tdiff = wide_int_to_tree (TREE_TYPE (elt), (~diff) + 1);
+		tree newval = wide_int_to_tree (TREE_TYPE (elt), ~diff);
+		tree newmask = build_uniform_cst (TREE_TYPE (@1), newval); }
+	 (if (integer_pow2p (tdiff))
+	  (switch
+	   /* ((mask & x) < 0) -> 0.  */
+	   (if (cmp == LT_EXPR)
+	    { build_zero_cst (TREE_TYPE (@1)); })
+	   /* ((mask & x) <= 0) -> x < mask.  */
+	   (if (cmp == LE_EXPR)
+	    (lt @0 { newmask; }))
+	   /* ((mask & x) == 0) -> x < mask.  */
+	   (if (cmp == EQ_EXPR)
+	    (le @0 { newmask; }))
+	   /* ((mask & x) != 0) -> x > mask.  */
+	   (if (cmp == NE_EXPR)
+	    (gt @0 { newmask; }))
+	   /* ((mask & x) >= 0) -> x <= mask.  */
+	   (if (cmp == GE_EXPR)
+	    (le @0 { newmask; }))
+	    /* ((mask & x) > 0) -> x < mask.  */
+	   (if (cmp == GT_EXPR)
+	    (lt @0 { newmask; }))))))
+      (if (!TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_shwi_p (elt))
+	(with { unsigned HOST_WIDE_INT diff = tree_to_shwi (elt);
+		tree ustype = unsigned_type_for (TREE_TYPE (elt));
+		tree uvtype = unsigned_type_for (TREE_TYPE (@1));
+	        tree tdiff = wide_int_to_tree (ustype, (~diff) + 1);
+	        tree udiff = wide_int_to_tree (ustype, ~diff);
+		tree cst = build_uniform_cst (uvtype, udiff); }
+	 (if (integer_pow2p (tdiff))
+	  (switch
+	    /* ((mask & x) == 0) -> x < mask.  */
+	    (if (cmp == EQ_EXPR)
+	     (le (convert:uvtype @0) { cst; }))
+	    /* ((mask & x) != 0) -> x > mask.  */
+	    (if (cmp == NE_EXPR)
+	     (gt (convert:uvtype @0) { cst; })))))))))))
+
 /* Transform comparisons of the form X - Y CMP 0 to X CMP Y.
    ??? The transformation is valid for the other operators if overflow
    is undefined for the type, but performing it here badly interacts
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
new file mode 100644
index 0000000000000000000000000000000000000000..76a22a2313137a2a75dd711c2c15c2d3a34e15aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#define TYPE int32_t
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
new file mode 100644
index 0000000000000000000000000000000000000000..32553d7ba2f823f7a21237451990d0a216d2f912
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
new file mode 100644
index 0000000000000000000000000000000000000000..e10cbf7fabe2dbf7ce436cdf37b0f8b207c58408
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+__attribute__((noinline, noipa))
+void fun(v4si *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bcf23ccf1447d6c8c999ed1eb25ba0a450028e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
new file mode 100644
index 0000000000000000000000000000000000000000..6e5a2fca9992efbc01f8dbbc6f95936e86643028
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&`s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
new file mode 100644
index 0000000000000000000000000000000000000000..018e7a4348c9fc461106c3d9d01291325d3406c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
new file mode 100644
index 0000000000000000000000000000000000000000..798678fb7555052c93abc4ca34f617d640f73bb4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
new file mode 100644
index 0000000000000000000000000000000000000000..1dabe834ed57dfa0be48c1dc3dbb226092c79a1a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
new file mode 100644
index 0000000000000000000000000000000000000000..9c1f8ee0adfc45d1b9fc212138ea26bb6b693e49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h b/gcc/testsuite/gcc.dg/bic-bitmask.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b94065c025e0cbf71a21ac9b9d6314e24b0c2d9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask.h
@@ -0,0 +1,43 @@
+#include <stdio.h>
+
+#ifndef N
+#define N 50
+#endif
+
+#ifndef TYPE
+#define TYPE uint32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  for (int i = 0; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..568c1ffc8bc4148efaeeba7a45a75ecbd3a7a3dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -save-temps" } */
+
+#include <arm_neon.h>
+
+uint32x4_t foo (int32x4_t a)
+{
+  int32x4_t cst = vdupq_n_s32 (255);
+  int32x4_t zero = vdupq_n_s32 (0);
+  return vceqq_s32 (vbicq_s32 (a, cst), zero);
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } */


  parent reply	other threads:[~2021-09-29 16:21 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-29 16:19 [PATCH 0/7]AArch64 Optimize truncation, shifts and bitmask comparisons Tamar Christina
2021-09-29 16:19 ` [PATCH 1/7]AArch64 Add combine patterns for right shift and narrow Tamar Christina
2021-09-30  8:50   ` Kyrylo Tkachov
2021-10-06 14:32     ` Richard Sandiford
2021-10-12 16:18       ` Tamar Christina
2021-10-12 16:35         ` Kyrylo Tkachov
2021-09-29 16:19 ` [PATCH 2/7]AArch64 Add combine patterns for narrowing shift of half top bits (shuffle) Tamar Christina
2021-09-30  8:54   ` Kyrylo Tkachov
2021-10-12 16:23     ` Tamar Christina
2021-10-12 16:36       ` Kyrylo Tkachov
2021-09-29 16:20 ` [PATCH 3/7]AArch64 Add pattern for sshr to cmlt Tamar Christina
2021-09-30  9:27   ` Kyrylo Tkachov
2021-10-11 19:56     ` Andrew Pinski
2021-10-12 12:19       ` Kyrylo Tkachov
2021-10-12 16:20         ` Tamar Christina
2021-09-29 16:20 ` [PATCH 4/7]AArch64 Add pattern xtn+xtn2 to uzp2 Tamar Christina
2021-09-30  9:28   ` Kyrylo Tkachov
2021-10-12 16:25     ` Tamar Christina
2021-10-12 16:39       ` Kyrylo Tkachov
2021-10-13 11:05         ` Tamar Christina
2021-10-13 12:52           ` Kyrylo Tkachov
2021-09-29 16:21 ` Tamar Christina [this message]
2021-09-30  6:17   ` [PATCH 5/7]middle-end Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2> Richard Biener
2021-09-30  9:56     ` Tamar Christina
2021-09-30 10:26       ` Richard Biener
2021-10-05 12:55         ` Tamar Christina
2021-10-13 12:17           ` Richard Biener
2021-09-29 16:21 ` [PATCH 6/7]AArch64 Add neg + cmle into cmgt Tamar Christina
2021-09-30  9:34   ` Kyrylo Tkachov
2021-09-29 16:21 ` [PATCH 7/7]AArch64 Combine cmeq 0 + not into cmtst Tamar Christina
2021-09-30  9:35   ` Kyrylo Tkachov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210929162106.GA5336@arm.com \
    --to=tamar.christina@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    --cc=rguenther@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).