diff --git a/gcc/match.pd b/gcc/match.pd index 0fcfd0ea62c043dc217d0d560ce5b7e569b70e7d..df9212cb27d172856b9d43b0875262f96e8993c4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4288,6 +4288,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (ic == ncmp) (ncmp @0 @1)))))) +/* Transform comparisons of the form (X & Y) CMP 0 to X CMP2 Z + where ~Y + 1 == pow2 and Z = ~Y. */ +(for cmp (simple_comparison) + (simplify + (cmp (bit_and:c @0 VECTOR_CST@1) integer_zerop) + (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@1)) + && uniform_vector_p (@1)) + (with { tree elt = vector_cst_elt (@1, 0); } + (switch + (if (TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_uhwi_p (elt)) + (with { unsigned HOST_WIDE_INT diff = tree_to_uhwi (elt); + tree tdiff = wide_int_to_tree (TREE_TYPE (elt), (~diff) + 1); + tree newval = wide_int_to_tree (TREE_TYPE (elt), ~diff); + tree newmask = build_uniform_cst (TREE_TYPE (@1), newval); } + (if (integer_pow2p (tdiff)) + (switch + /* ((mask & x) < 0) -> 0. */ + (if (cmp == LT_EXPR) + { build_zero_cst (TREE_TYPE (@1)); }) + /* ((mask & x) <= 0) -> x < mask. */ + (if (cmp == LE_EXPR) + (lt @0 { newmask; })) + /* ((mask & x) == 0) -> x < mask. */ + (if (cmp == EQ_EXPR) + (le @0 { newmask; })) + /* ((mask & x) != 0) -> x > mask. */ + (if (cmp == NE_EXPR) + (gt @0 { newmask; })) + /* ((mask & x) >= 0) -> x <= mask. */ + (if (cmp == GE_EXPR) + (le @0 { newmask; })) + /* ((mask & x) > 0) -> x < mask. */ + (if (cmp == GT_EXPR) + (lt @0 { newmask; })))))) + (if (!TYPE_UNSIGNED (TREE_TYPE (@1)) && tree_fits_shwi_p (elt)) + (with { unsigned HOST_WIDE_INT diff = tree_to_shwi (elt); + tree ustype = unsigned_type_for (TREE_TYPE (elt)); + tree uvtype = unsigned_type_for (TREE_TYPE (@1)); + tree tdiff = wide_int_to_tree (ustype, (~diff) + 1); + tree udiff = wide_int_to_tree (ustype, ~diff); + tree cst = build_uniform_cst (uvtype, udiff); } + (if (integer_pow2p (tdiff)) + (switch + /* ((mask & x) == 0) -> x < mask. */ + (if (cmp == EQ_EXPR) + (le (convert:uvtype @0) { cst; })) + /* ((mask & x) != 0) -> x > mask. */ + (if (cmp == NE_EXPR) + (gt (convert:uvtype @0) { cst; }))))))))))) + /* Transform comparisons of the form X - Y CMP 0 to X CMP Y. ??? The transformation is valid for the other operators if overflow is undefined for the type, but performing it here badly interacts diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-10.c b/gcc/testsuite/gcc.dg/bic-bitmask-10.c new file mode 100644 index 0000000000000000000000000000000000000000..76a22a2313137a2a75dd711c2c15c2d3a34e15aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-10.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +#define TYPE int32_t +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-11.c b/gcc/testsuite/gcc.dg/bic-bitmask-11.c new file mode 100644 index 0000000000000000000000000000000000000000..32553d7ba2f823f7a21237451990d0a216d2f912 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-11.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) != 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) != 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump {>\s*.+\{ 255,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-12.c b/gcc/testsuite/gcc.dg/bic-bitmask-12.c new file mode 100644 index 0000000000000000000000000000000000000000..e10cbf7fabe2dbf7ce436cdf37b0f8b207c58408 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-12.c @@ -0,0 +1,17 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 -fdump-tree-dce" } */ + +#include + +typedef unsigned int v4si __attribute__ ((vector_size (16))); + +__attribute__((noinline, noipa)) +void fun(v4si *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +/* { dg-final { scan-tree-dump {<=\s*.+\{ 255,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967290,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-2.c b/gcc/testsuite/gcc.dg/bic-bitmask-2.c new file mode 100644 index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-2.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-3.c b/gcc/testsuite/gcc.dg/bic-bitmask-3.c new file mode 100644 index 0000000000000000000000000000000000000000..da30fad89f6c8239baa4395b3ffaec0be577e13f --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-3.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) == 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-4.c b/gcc/testsuite/gcc.dg/bic-bitmask-4.c new file mode 100644 index 0000000000000000000000000000000000000000..1bcf23ccf1447d6c8c999ed1eb25ba0a450028e1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-4.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) >= 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) >= 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {=\s*.+\{ 1,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-5.c b/gcc/testsuite/gcc.dg/bic-bitmask-5.c new file mode 100644 index 0000000000000000000000000000000000000000..6e5a2fca9992efbc01f8dbbc6f95936e86643028 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-5.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) > 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) > 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 255,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&`s*.+\{ 4294967040,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-6.c b/gcc/testsuite/gcc.dg/bic-bitmask-6.c new file mode 100644 index 0000000000000000000000000000000000000000..018e7a4348c9fc461106c3d9d01291325d3406c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-6.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) <= 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~255)) <= 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {<=\s*.+\{ 255,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967040,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-7.c b/gcc/testsuite/gcc.dg/bic-bitmask-7.c new file mode 100644 index 0000000000000000000000000000000000000000..798678fb7555052c93abc4ca34f617d640f73bb4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-7.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~1)) < 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~1)) < 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {__builtin_memset} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-8.c b/gcc/testsuite/gcc.dg/bic-bitmask-8.c new file mode 100644 index 0000000000000000000000000000000000000000..1dabe834ed57dfa0be48c1dc3dbb226092c79a1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-8.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~1)) != 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~1)) != 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-times {>\s*.+\{ 1,.+\}} 1 dce7 } } */ +/* { dg-final { scan-tree-dump-not {&\s*.+\{ 4294967294,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask-9.c b/gcc/testsuite/gcc.dg/bic-bitmask-9.c new file mode 100644 index 0000000000000000000000000000000000000000..9c1f8ee0adfc45d1b9fc212138ea26bb6b693e49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask-9.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fdump-tree-dce" } */ + +#include + +__attribute__((noinline, noipa)) +void fun1(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~5)) == 0; +} + +__attribute__((noinline, noipa, optimize("O1"))) +void fun2(uint32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (x[i]&(~5)) == 0; +} + +#include "bic-bitmask.h" + +/* { dg-final { scan-tree-dump-not {<=\s*.+\{ 4294967289,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump {&\s*.+\{ 4294967290,.+\}} dce7 } } */ +/* { dg-final { scan-tree-dump-not {\s+bic\s+} dce7 { target { aarch64*-*-* } } } } */ + diff --git a/gcc/testsuite/gcc.dg/bic-bitmask.h b/gcc/testsuite/gcc.dg/bic-bitmask.h new file mode 100644 index 0000000000000000000000000000000000000000..2b94065c025e0cbf71a21ac9b9d6314e24b0c2d9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bic-bitmask.h @@ -0,0 +1,43 @@ +#include + +#ifndef N +#define N 50 +#endif + +#ifndef TYPE +#define TYPE uint32_t +#endif + +#ifndef DEBUG +#define DEBUG 0 +#endif + +#define BASE ((TYPE) -1 < 0 ? -126 : 4) + +int main () +{ + TYPE a[N]; + TYPE b[N]; + + for (int i = 0; i < N; ++i) + { + a[i] = BASE + i * 13; + b[i] = BASE + i * 13; + if (DEBUG) + printf ("%d: 0x%x\n", i, a[i]); + } + + fun1 (a, N); + fun2 (b, N); + + for (int i = 0; i < N; ++i) + { + if (DEBUG) + printf ("%d = 0x%x == 0x%x\n", i, a[i], b[i]); + + if (a[i] != b[i]) + __builtin_abort (); + } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c new file mode 100644 index 0000000000000000000000000000000000000000..568c1ffc8bc4148efaeeba7a45a75ecbd3a7a3dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bic-bitmask-1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -save-temps" } */ + +#include + +uint32x4_t foo (int32x4_t a) +{ + int32x4_t cst = vdupq_n_s32 (255); + int32x4_t zero = vdupq_n_s32 (0); + return vceqq_s32 (vbicq_s32 (a, cst), zero); +} + +/* { dg-final { scan-assembler-not {\tbic\t} { xfail { aarch64*-*-* } } } } */