diff --git a/gcc/match.pd b/gcc/match.pd index a052c9e3dbc..f9297fcadbe 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3436,20 +3436,66 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (integer_zerop (@0)) @2))) -/* Sink unary operations to constant branches, but only if we do fold it to - constants. */ +#if GIMPLE +/* Sink unary operations to branches, but only if we do fold both. */ (for op (negate bit_not abs absu) (simplify - (op (vec_cond @0 VECTOR_CST@1 VECTOR_CST@2)) - (with - { - tree cst1, cst2; - cst1 = const_unop (op, type, @1); - if (cst1) - cst2 = const_unop (op, type, @2); - } - (if (cst1 && cst2) - (vec_cond @0 { cst1; } { cst2; }))))) + (op (vec_cond:s @0 @1 @2)) + (vec_cond @0 (op! @1) (op! @2)))) + +/* Sink binary operation to branches, but only if we can fold it. */ +(for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor + rdiv trunc_div ceil_div floor_div round_div + trunc_mod ceil_mod floor_mod round_mod min max) +/* (c ? a : b) op (c ? d : e) --> c ? (a op d) : (b op e) */ + (simplify + (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4)) + (vec_cond @0 (op! @1 @3) (op! @2 @4))) + +/* (c ? a : b) op d --> c ? (a op d) : (b op d) */ + (simplify + (op (vec_cond:s @0 @1 @2) @3) + (vec_cond @0 (op! @1 @3) (op! @2 @3))) + (simplify + (op @3 (vec_cond:s @0 @1 @2)) + (vec_cond @0 (op! @3 @1) (op! @3 @2)))) +#endif + +/* (v ? w : 0) ? a : b is just (v & w) ? a : b */ +(simplify + (vec_cond (vec_cond:s @0 @3 integer_zerop) @1 @2) + (if (types_match (@0, @3)) + (vec_cond (bit_and @0 @3) @1 @2))) +(simplify + (vec_cond (vec_cond:s @0 integer_all_onesp @3) @1 @2) + (if (types_match (@0, @3)) + (vec_cond (bit_ior @0 @3) @1 @2))) +(simplify + (vec_cond (vec_cond:s @0 integer_zerop @3) @1 @2) + (if (types_match (@0, @3)) + (vec_cond (bit_ior @0 (bit_not @3)) @2 @1))) +(simplify + (vec_cond (vec_cond:s @0 @3 integer_all_onesp) @1 @2) + (if (types_match (@0, @3)) + (vec_cond (bit_and @0 (bit_not @3)) @2 @1))) + +/* c1 ? c2 ? a : b : b --> (c1 & c2) ? a : b */ +(simplify + (vec_cond @0 (vec_cond:s @1 @2 @3) @3) + (if (types_match (@0, @1)) + (vec_cond (bit_and @0 @1) @2 @3))) +(simplify + (vec_cond @0 @2 (vec_cond:s @1 @2 @3)) + (if (types_match (@0, @1)) + (vec_cond (bit_ior @0 @1) @2 @3))) +(simplify + (vec_cond @0 (vec_cond:s @1 @2 @3) @2) + (if (types_match (@0, @1)) + (vec_cond (bit_ior (bit_not @0) @1) @2 @3))) +(simplify + (vec_cond @0 @3 (vec_cond:s @1 @2 @3)) + (if (types_match (@0, @1)) + (vec_cond (bit_and (bit_not @0) @1) @2 @3))) /* Simplification moved from fold_cond_expr_with_comparison. It may also be extended. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnot-2.c b/gcc/testsuite/gcc.dg/tree-ssa/andnot-2.c new file mode 100644 index 00000000000..e0955ce3ffd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/andnot-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */ + +typedef long vec __attribute__((vector_size(16))); +vec f(vec x){ + vec y = x < 10; + return y & (y == 0); +} + +/* { dg-final { scan-tree-dump-not "_expr" "forwprop3" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c new file mode 100644 index 00000000000..3d820a58e93 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */ + +// FIXME: this should further optimize to a MAX_EXPR +typedef signed char v16i8 __attribute__((vector_size(16))); +v16i8 f(v16i8 a, v16i8 b) +{ + v16i8 cmp = (a > b); + return (cmp & a) | (~cmp & b); +} + +/* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */ +/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr70314.c b/gcc/testsuite/gcc.target/i386/pr70314.c new file mode 100644 index 00000000000..aad8dd9b57e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr70314.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=skylake-avx512 -O2" } */ +/* { dg-final { scan-assembler-times "cmp" 2 } } */ +/* { dg-final { scan-assembler-not "and" } } */ + +typedef long vec __attribute__((vector_size(16))); +vec f(vec x, vec y){ + return (x < 5) & (y < 8); +} + +/* On x86_64, currently + vpcmpq $2, .LC1(%rip), %xmm1, %k1 + vpcmpq $2, .LC0(%rip), %xmm0, %k0{%k1} + vpmovm2q %k0, %xmm0 +*/