public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5489] middle-end: Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2>
@ 2021-11-24  6:40 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-11-24  6:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0888d6bbe97e10de0e624f4ab46acc276e5ee1d7

commit r12-5489-g0888d6bbe97e10de0e624f4ab46acc276e5ee1d7
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Wed Nov 24 06:38:18 2021 +0000

    middle-end: Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2>
    
    This optimizes the case where a mask Y which fulfills ~Y + 1 == pow2 is used to
    clear a some bits and then compared against 0 into one without the masking and
    a compare against a different bit immediate.
    
    We can do this for all unsigned compares and for signed we can do it for
    comparisons of EQ and NE:
    
    (x & (~255)) == 0 becomes x <= 255. Which for leaves it to the target to
    optimally deal with the comparison.
    
    This transformation has to be done in the mid-end because in RTL you don't have
    the signs of the comparison operands and if the target needs an immediate this
    should be floated outside of the loop.
    
    The RTL loop invariant hoisting is done before split1.
    
    i.e.
    
    void fun1(int32_t *x, int n)
    {
        for (int i = 0; i < (n & -16); i++)
          x[i] = (x[i]&(~255)) == 0;
    }
    
    now generates:
    
    .L3:
            ldr     q0, [x0]
            cmhs    v0.4s, v2.4s, v0.4s
            and     v0.16b, v1.16b, v0.16b
            str     q0, [x0], 16
            cmp     x0, x1
            bne     .L3
    
    and floats the immediate out of the loop.
    
    instead of:
    
    .L3:
            ldr     q0, [x0]
            bic     v0.4s, #255
            cmeq    v0.4s, v0.4s, #0
            and     v0.16b, v1.16b, v0.16b
            str     q0, [x0], 16
            cmp     x0, x1
            bne     .L3
    
    In order to not break IVopts and CSE I have added a
    requirement for the scalar version to be single use.
    
    gcc/ChangeLog:
    
            * tree.c (bitmask_inv_cst_vector_p): New.
            * tree.h (bitmask_inv_cst_vector_p): New.
            * match.pd: Use it in new bitmask compare pattern.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/bic-bitmask-10.c: New test.
            * gcc.dg/bic-bitmask-11.c: New test.
            * gcc.dg/bic-bitmask-12.c: New test.
            * gcc.dg/bic-bitmask-13.c: New test.
            * gcc.dg/bic-bitmask-14.c: New test.
            * gcc.dg/bic-bitmask-15.c: New test.
            * gcc.dg/bic-bitmask-16.c: New test.
            * gcc.dg/bic-bitmask-17.c: New test.
            * gcc.dg/bic-bitmask-18.c: New test.
            * gcc.dg/bic-bitmask-19.c: New test.
            * gcc.dg/bic-bitmask-2.c: New test.
            * gcc.dg/bic-bitmask-20.c: New test.
            * gcc.dg/bic-bitmask-21.c: New test.
            * gcc.dg/bic-bitmask-22.c: New test.
            * gcc.dg/bic-bitmask-23.c: New test.
            * gcc.dg/bic-bitmask-3.c: New test.
            * gcc.dg/bic-bitmask-4.c: New test.
            * gcc.dg/bic-bitmask-5.c: New test.
            * gcc.dg/bic-bitmask-6.c: New test.
            * gcc.dg/bic-bitmask-7.c: New test.
            * gcc.dg/bic-bitmask-8.c: New test.
            * gcc.dg/bic-bitmask-9.c: New test.
            * gcc.dg/bic-bitmask.h: New test.
            * gcc.target/aarch64/bic-bitmask-1.c: New test.

Diff:
---
 gcc/match.pd                                     | 21 +++++++++-
 gcc/testsuite/gcc.dg/bic-bitmask-10.c            | 26 ++++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-11.c            | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-12.c            | 17 ++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-13.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-14.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-15.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-16.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-17.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-18.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-19.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-2.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-20.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-21.c            | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-22.c            | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-23.c            | 16 +++++++
 gcc/testsuite/gcc.dg/bic-bitmask-3.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-4.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-5.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-6.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-7.c             | 24 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-8.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask-9.c             | 25 +++++++++++
 gcc/testsuite/gcc.dg/bic-bitmask.h               | 43 +++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c | 13 ++++++
 gcc/tree.c                                       | 53 ++++++++++++++++++++++++
 gcc/tree.h                                       |  5 +++
 27 files changed, 658 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 60b4ad5f706..3e54e2cf5a6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
    uniform_integer_cst_p
    HONOR_NANS
    uniform_vector_p
-   expand_vec_cmp_expr_p)
+   expand_vec_cmp_expr_p
+   bitmask_inv_cst_vector_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -5207,6 +5208,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (eqcmp (bit_and @1 { wide_int_to_tree (ty, mask - rhs); })
 	     { build_zero_cst (ty); }))))))
 
+/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z
+   where ~Y + 1 == pow2 and Z = ~Y.  */
+(for cst (VECTOR_CST INTEGER_CST)
+ (for cmp (le eq ne ge gt)
+      icmp (le le gt le gt)
+ (simplify
+  (cmp (bit_and:c@2 @0 cst@1) integer_zerop)
+   (with { tree csts = bitmask_inv_cst_vector_p (@1); }
+     (switch
+      (if (csts && TYPE_UNSIGNED (TREE_TYPE (@1))
+	   && (VECTOR_TYPE_P (TREE_TYPE (@1)) || single_use (@2)))
+       (icmp @0 { csts; }))
+      (if (csts && !TYPE_UNSIGNED (TREE_TYPE (@1))
+	   && (cmp == EQ_EXPR || cmp == NE_EXPR)
+	   && (VECTOR_TYPE_P (TREE_TYPE (@1)) || single_use (@2)))
+       (with { tree utype = unsigned_type_for (TREE_TYPE (@1)); }
+	(icmp (convert:utype @0) { csts; }))))))))
+
 /* -A CMP -B -> B CMP A.  */
 (for cmp (tcc_comparison)
      scmp (swapped_tcc_comparison)
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
new file mode 100644
index 00000000000..0d0416028eb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#define TYPE int32_t
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
new file mode 100644
index 00000000000..0e589c96290
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
new file mode 100644
index 00000000000..50eb563f3b1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+__attribute__((noinline, noipa))
+void fun(v4si *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-13.c b/gcc/testsuite/gcc.dg/bic-bitmask-13.c
new file mode 100644
index 00000000000..bac86c2cfce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-13.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-14.c b/gcc/testsuite/gcc.dg/bic-bitmask-14.c
new file mode 100644
index 00000000000..ec3bd6a7e04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-14.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-15.c b/gcc/testsuite/gcc.dg/bic-bitmask-15.c
new file mode 100644
index 00000000000..8bdf1ea4eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-15.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {=\s* 1} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-16.c b/gcc/testsuite/gcc.dg/bic-bitmask-16.c
new file mode 100644
index 00000000000..cfea925b591
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-16.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s* 255} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-17.c b/gcc/testsuite/gcc.dg/bic-bitmask-17.c
new file mode 100644
index 00000000000..86873b97f27
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-17.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s* 255} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967040} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-18.c b/gcc/testsuite/gcc.dg/bic-bitmask-18.c
new file mode 100644
index 00000000000..9d11b3bf4a4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-18.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {= 0} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-19.c b/gcc/testsuite/gcc.dg/bic-bitmask-19.c
new file mode 100644
index 00000000000..c4620dfaad3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-19.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s* 1} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967294} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
new file mode 100644
index 00000000000..59ba9a414ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-20.c b/gcc/testsuite/gcc.dg/bic-bitmask-20.c
new file mode 100644
index 00000000000..a114122e075
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-20.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not {<=\s* 4294967289} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump {&\s* 4294967290} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-21.c b/gcc/testsuite/gcc.dg/bic-bitmask-21.c
new file mode 100644
index 00000000000..bd12a58da1e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-21.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#define TYPE int32_t
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {<=\s* 255} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967290} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-22.c b/gcc/testsuite/gcc.dg/bic-bitmask-22.c
new file mode 100644
index 00000000000..a9f0867b5cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-22.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O0")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump {>\s* 255} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s* 4294967290} dce7 { target vect_int } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-23.c b/gcc/testsuite/gcc.dg/bic-bitmask-23.c
new file mode 100644
index 00000000000..b41651b962f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-23.c
@@ -0,0 +1,16 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+typedef unsigned int v4si __attribute__ ((vector_size (16)));
+
+__attribute__((noinline, noipa))
+v4si fun(v4si x)
+{
+    v4si mask = { 255, 15, 1, 0xFFFF };
+    v4si zeros = {0};
+    return (x & ~mask) == zeros;
+}
+
+/* { dg-final { scan-tree-dump {<=\s*.+\{ 255, 15, 1, 65535 \}} dce7 { target vect_int } } } */
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
new file mode 100644
index 00000000000..59ba9a414ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
new file mode 100644
index 00000000000..7e0614d458c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) >= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
new file mode 100644
index 00000000000..e71b17d1272
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) > 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
new file mode 100644
index 00000000000..a48a226efc1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~255)) <= 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
new file mode 100644
index 00000000000..bc49f299aac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) < 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
new file mode 100644
index 00000000000..cd06e0ce7be
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~1)) != 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
new file mode 100644
index 00000000000..3d88b74787a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */
+
+#include <stdint.h>
+
+__attribute__((noinline, noipa))
+void fun1(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+__attribute__((noinline, noipa, optimize("O1")))
+void fun2(uint32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (x[i]&(~5)) == 0;
+}
+
+#include "bic-bitmask.h"
+
+/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 { target vect_int } } } */
+/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h b/gcc/testsuite/gcc.dg/bic-bitmask.h
new file mode 100644
index 00000000000..faf80b974db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/bic-bitmask.h
@@ -0,0 +1,43 @@
+#include <stdio.h>
+
+#ifndef N
+#define N 65
+#endif
+
+#ifndef TYPE
+#define TYPE uint32_t
+#endif
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+#define BASE ((TYPE) -1 < 0 ? -126 : 4)
+
+int main ()
+{
+  TYPE a[N];
+  TYPE b[N];
+
+  for (int i = 0; i < N; ++i)
+    {
+      a[i] = BASE + i * 13;
+      b[i] = BASE + i * 13;
+      if (DEBUG)
+        printf ("%d: 0x%x\n", i, a[i]);
+    }
+
+  fun1 (a, N);
+  fun2 (b, N);
+
+  for (int i = 0; i < N; ++i)
+    {
+      if (DEBUG)
+        printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]);
+
+      if (a[i] != b[i])
+        __builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
new file mode 100644
index 00000000000..568c1ffc8bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -save-temps" } */
+
+#include <arm_neon.h>
+
+uint32x4_t foo (int32x4_t a)
+{
+  int32x4_t cst = vdupq_n_s32 (255);
+  int32x4_t zero = vdupq_n_s32 (0);
+  return vceqq_s32 (vbicq_s32 (a, cst), zero);
+}
+
+/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } */
diff --git a/gcc/tree.c b/gcc/tree.c
index 62d9d78671a..910fb06d6f5 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10273,6 +10273,59 @@ uniform_integer_cst_p (tree t)
   return NULL_TREE;
 }
 
+/* Checks to see if T is a constant or a constant vector and if each element E
+   adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE.  */
+
+tree
+bitmask_inv_cst_vector_p (tree t)
+{
+
+  tree_code code = TREE_CODE (t);
+  tree type = TREE_TYPE (t);
+
+  if (!INTEGRAL_TYPE_P (type)
+      && !VECTOR_INTEGER_TYPE_P (type))
+    return NULL_TREE;
+
+  unsigned HOST_WIDE_INT nelts = 1;
+  tree cst;
+  unsigned int idx = 0;
+  bool uniform = uniform_integer_cst_p (t);
+  tree newtype = unsigned_type_for (type);
+  tree_vector_builder builder;
+  if (code == INTEGER_CST)
+    cst = t;
+  else
+    {
+      if (!VECTOR_CST_NELTS (t).is_constant (&nelts))
+	return NULL_TREE;
+
+      cst = vector_cst_elt (t, 0);
+      builder.new_vector (newtype, nelts, 1);
+    }
+
+  tree ty = unsigned_type_for (TREE_TYPE (cst));
+
+  do {
+    if (idx > 0)
+      cst = vector_cst_elt (t, idx);
+    wide_int icst = wi::to_wide (cst);
+    wide_int inv =  wi::bit_not (icst);
+    icst = wi::add (1, inv);
+    if (wi::popcount (icst) != 1)
+      return NULL_TREE;
+
+    tree newcst = wide_int_to_tree (ty, inv);
+
+    if (uniform)
+      return build_uniform_cst (newtype, newcst);
+
+    builder.quick_push (newcst);
+  } while (++idx < nelts);
+
+  return builder.build ();
+}
+
 /* If VECTOR_CST T has a single nonzero element, return the index of that
    element, otherwise return -1.  */
 
diff --git a/gcc/tree.h b/gcc/tree.h
index 03719b18c67..f0e72b55abe 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4922,6 +4922,11 @@ extern bool integer_minus_onep (const_tree);
 
 extern bool integer_pow2p (const_tree);
 
+/* Checks to see if T is a constant or a constant vector and if each element E
+   adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE.  */
+
+extern tree bitmask_inv_cst_vector_p (tree);
+
 /* integer_nonzerop (tree x) is nonzero if X is an integer constant
    with a nonzero value.  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-24  6:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-24  6:40 [gcc r12-5489] middle-end: Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2> Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).