> > This moves the pattern detection to match.pd instead. > > where's the other copy and is it possible to remove it with this patch? > It looks like it's spread over various passes. Starting with forwardprop. > > > + (simplify > > + (bit_ior:c > > + (mult:c @0 (convert (convert2? (op@4 @2 @3)))) > > + (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 > > + @2 @3)))))) > > can you add a comment with what you match here as well? You are using > (op@4 @2 @3) twice, the point of the @4 capture is that the second > occurance could be just @4. I wonder how we arrived at the multiplication > here? That seems somewhat premature :/ That's being done by forwardprop. To remove the various converts I broke down the operations into several Smaller steps that on their own are sound optimizations. When taken all together it reduced the versions with casts down to the final optimized version. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * match.pd: New cond select pattern. (inverted_simple_comparison): New. gcc/testsuite/ChangeLog: * gcc.dg/select_cond_1.c: New test. * gcc.dg/select_cond_2.c: New test. * gcc.dg/select_cond_3.c: New test. --- inline copy of patch --- diff --git a/gcc/match.pd b/gcc/match.pd index 45f8941396ffd843f1bb23ab638c3b8c6d0d54b6..84d931c5d559ef1a605ea1121fbe655fcf870195 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -51,8 +51,9 @@ along with GCC; see the file COPYING3. If not see unge ungt ne eq unlt unle ordered unordered ge gt le lt ltgt uneq) (define_operator_list swapped_tcc_comparison gt ge eq ne le lt unordered ordered ungt unge unlt unle uneq ltgt) -(define_operator_list simple_comparison lt le eq ne ge gt) -(define_operator_list swapped_simple_comparison gt ge eq ne le lt) +(define_operator_list simple_comparison lt le eq ne ge gt) +(define_operator_list inverted_simple_comparison ge gt ne eq lt le) +(define_operator_list swapped_simple_comparison gt ge eq ne le lt) #include "cfn-operators.pd" @@ -2081,7 +2082,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (!canonicalize_math_p ()) (for cmp (gt lt ge le) (simplify - (mult (convert (cmp @0 @1)) @2) + (mult:c (convert (cmp @0 @1)) @2) (cond (cmp @0 @1) @2 { build_zero_cst (type); })))) /* For integral types with undefined overflow and C != 0 fold @@ -3551,6 +3552,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1) (max @2 @1)) +/* (a & ((c `cmp` d) - 1)) | (b & ~((c `cmp` d) - 1)) -> c `cmp` d ? a : b. + by matching the canonicanized xor form. This relies on: 0 ^ a == a and + (a ^ b) ^ a == b. So (((a ^ b) & -((unsigned T)(c `cmp` d))) ^ b) into + c `cmp` d ? a : b. */ +(for cmp (simple_comparison) + icmp (inverted_simple_comparison) + (simplify + (bit_xor:c + @0 + (bit_and:c + (bit_xor:c @1 @0) + (negate (cmp@4 @2 @3)))) + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) + (convert:type (cond @4 @1 @0)))) + /* ((unsigned T)(((signed T) (a `cmp` b)) + -1)) -> a `icmp` b ? -1 : 0. */ + (simplify + (convert (plus:c integer_minus_onep (convert2?@1 (cmp@2 @3 @4)))) + (if (tree_zero_one_valued_p (@2) + && INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (type) + && !TYPE_UNSIGNED (TREE_TYPE (@1)) + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@1)), type)) + (cond (icmp @3 @4) { build_all_ones_cst (type); } + { build_zero_cst (type); }))) + /* ((-(unsigned T)(a `cmp` b)) & c) -> a `cmp` b ? c : 0. */ + (simplify + (bit_and:c + (convert2?@5 (negate@4 (convert (cmp@0 @1 @2)))) @3) + (if (tree_zero_one_valued_p (@0) + && INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@4)) + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@5)), + TREE_TYPE (@4))) + (cond (cmp @1 @2) @3 { build_zero_cst (type); }))) + /* (a `cmp` b) ? d : (a `icmp` b) ? c : e -> a `cmp` b ? d : c. */ + (simplify + (cond (cmp@0 @1 @2) @4 + (cond (icmp @1 @2) @3 @5)) + (cond @0 @4 @3)) + + /* (a `cmp` b) ? ((a `icmp` b) ? : c) : d -> a `cmp` b ? c : d. */ + (simplify + (cond (cmp@0 @1 @2) + (cond (icmp @1 @2) @3 @4) @5) + (cond @0 @4 @5))) + +(for op (bit_ior bit_and bit_xor) + /* (a `op` (b ? 0 : c) -> b ? a `op` 0 : a `op` c + and (a `op` (b ? c : 0) -> b ? a `op` c : a `op` 0 + we only fold 0 because one operand is guaranteed to simplify. */ + (simplify + (op:c (cond @0 zerop@3 @1) @2) + (cond @0 (op @3 @2) (op @1 @2))) + (simplify + (op:c (cond @0 @1 zerop@3) @2) + (cond @0 (op @1 @2) (op @3 @2)))) + /* Simplifications of shift and rotates. */ (for rotate (lrotate rrotate) diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c new file mode 100644 index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max1_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min1_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max1_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min2_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max2_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min2_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max2_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min3_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max3_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min3_32u (5, 3, 7 , 8) != 7) + abort (); + + if (max3_32u (5, 3, 7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c new file mode 100644 index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_2.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7 , 8) != 8) + abort (); + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7, 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c new file mode 100644 index 0000000000000000000000000000000000000000..80087d4a0bd37066dbd44335aa3d1bf13fe3fa95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_3.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include + +__attribute__((noipa, noinline)) +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? [^ ]+ : [^ ]+;} 8 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u1 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u2 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u3 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u4 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u5 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u6 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u7 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u8 (3, 5, 7 , 8) != 7) + abort (); + + return 0; +}