On Mon, Sep 16, 2019 at 08:56:58AM +0200, Richard Biener wrote: > > As mentioned in the PR, the sqrt (x) < c optimization into x < c*c > > sometimes breaks the boundary case, if c2=c*c is inexact then in some cases > > we need to optimize it into x <= c*c rather than x < c*c. The original > > bugreport is when c is small and c2 is 0.0, then obviously we need <= 0.0 > > rather than < 0.0, but the testcase includes another example where it makes > > a difference, plus has a >= testcase too. > > > > Bootstrapped/regtested on powerpc64le-linux, ok for trunk? > > I was hoping Joseph might chime in here... anyway, does this assume > round-to-nearest or does it work with round to +-Inf as well? I > realize this all is under flag_unsafe_math_optimizations, but > this flag is notoriously underspecified... So the question is > whether we should disable the transform if c*c isn't exact and > flag_rounding_math? The transform also doesn't seem to guard > against isnan (c) (-funsafe-math-optimizations sets > -fno-trapping-math and -fno-signed-zeros but not -ffinite-math-only > or disables itself on -frounding-math) Here is an updated patch, which on top of the previous patch: 1) punts for -frounding-math 2) punts for sqrt comparisons against NaN constant 3) for the c*c inexact also handles the other two comparisons that apparently need to be handled too 4) for all 4 comparisons also checks nexttoward (c2, 0.0) or nexttoward (c2, inf) depending on the comparison kind, because as Joseph correctly noted, with rounding to nearest up to 3 different floating point values can have the same sqrt result, and if c2 is the middle one from them, we need to use the 1 ulp smaller or larger one in the comparison 5) had to adjust the testcase, because while it worked fine on powerpc64le, on x86_64 if the test is linked with -ffast-math/-Ofast etc., crtfastmath.o is linked in and subnormals are flushed to zero, which is not what we want for the testcase (at least for a subset of the tests). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? BTW, I've used attached programs to look for the problematic cases on random float/doubles and the cases the patch handles seem to be the only problematic ones, there is never need to go further than one nexttoward to 0 or inf. 2019-09-21 Jakub Jelinek PR tree-optimization/91734 * generic-match-head.c: Include fold-const-call.h. * match.pd (sqrt(x) cmp c): Check the boundary value and in case inexact computation of c*c affects comparison of the boundary, turn LT_EXPR into LE_EXPR, GE_EXPR into GT_EXPR, LE_EXPR into LT_EXPR or GT_EXPR into GE_EXPR. Punt for sqrt comparisons against NaN and for -frounding-math. For c2, try the next smaller or larger floating point constant depending on comparison code and if it has the same sqrt as c2, use it instead of c2. * gcc.dg/pr91734.c: New test. --- gcc/generic-match-head.c.jj 2019-09-20 12:24:56.376189996 +0200 +++ gcc/generic-match-head.c 2019-09-20 12:43:08.017273166 +0200 @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. #include "cgraph.h" #include "vec-perm-indices.h" #include "fold-const.h" +#include "fold-const-call.h" #include "stor-layout.h" #include "tree-dfa.h" #include "builtins.h" --- gcc/match.pd.jj 2019-09-20 12:25:27.323710388 +0200 +++ gcc/match.pd 2019-09-20 17:20:22.974316837 +0200 @@ -3711,8 +3711,7 @@ (define_operator_list COND_TERNARY (cmp { tem; } @1))))) /* Fold comparisons against built-in math functions. */ - (if (flag_unsafe_math_optimizations - && ! flag_errno_math) + (if (flag_unsafe_math_optimizations && ! flag_errno_math) (for sq (SQRT) (simplify (cmp (sq @0) REAL_CST@1) @@ -3747,56 +3746,108 @@ (define_operator_list COND_TERNARY if x is negative or NaN. Due to -funsafe-math-optimizations, the results for other x follow from natural arithmetic. */ (cmp @0 @1))) - (if (cmp == GT_EXPR || cmp == GE_EXPR) + (if ((cmp == LT_EXPR + || cmp == LE_EXPR + || cmp == GT_EXPR + || cmp == GE_EXPR) + && !REAL_VALUE_ISNAN (TREE_REAL_CST (@1)) + /* Give up for -frounding-math. */ + && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (@0))) (with { - REAL_VALUE_TYPE c2; + REAL_VALUE_TYPE c2; + enum tree_code ncmp = cmp; + const real_format *fmt + = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (@0))); real_arithmetic (&c2, MULT_EXPR, &TREE_REAL_CST (@1), &TREE_REAL_CST (@1)); - real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2); + real_convert (&c2, fmt, &c2); + /* See PR91734: if c2 is inexact and sqrt(c2) < c (or sqrt(c2) >= c), + then change LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR. */ + if (!REAL_VALUE_ISINF (c2)) + { + tree c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0), + build_real (TREE_TYPE (@0), c2)); + if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST) + ncmp = ERROR_MARK; + else if ((cmp == LT_EXPR || cmp == GE_EXPR) + && real_less (&TREE_REAL_CST (c3), &TREE_REAL_CST (@1))) + ncmp = cmp == LT_EXPR ? LE_EXPR : GT_EXPR; + else if ((cmp == LE_EXPR || cmp == GT_EXPR) + && real_less (&TREE_REAL_CST (@1), &TREE_REAL_CST (c3))) + ncmp = cmp == LE_EXPR ? LT_EXPR : GE_EXPR; + else + { + /* With rounding to even, sqrt of up to 3 different values + gives the same normal result, so in some cases c2 needs + to be adjusted. */ + REAL_VALUE_TYPE c2alt, tow; + if (cmp == LT_EXPR || cmp == GE_EXPR) + tow = dconst0; + else + real_inf (&tow); + real_nextafter (&c2alt, fmt, &c2, &tow); + real_convert (&c2alt, fmt, &c2alt); + if (REAL_VALUE_ISINF (c2alt)) + ncmp = ERROR_MARK; + else + { + c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0), + build_real (TREE_TYPE (@0), c2alt)); + if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST) + ncmp = ERROR_MARK; + else if (real_equal (&TREE_REAL_CST (c3), + &TREE_REAL_CST (@1))) + c2 = c2alt; + } + } + } } - (if (REAL_VALUE_ISINF (c2)) - /* sqrt(x) > y is x == +Inf, when y is very large. */ - (if (HONOR_INFINITIES (@0)) - (eq @0 { build_real (TREE_TYPE (@0), c2); }) - { constant_boolean_node (false, type); }) - /* sqrt(x) > c is the same as x > c*c. */ - (cmp @0 { build_real (TREE_TYPE (@0), c2); })))) - (if (cmp == LT_EXPR || cmp == LE_EXPR) - (with - { - REAL_VALUE_TYPE c2; - real_arithmetic (&c2, MULT_EXPR, - &TREE_REAL_CST (@1), &TREE_REAL_CST (@1)); - real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2); - } - (if (REAL_VALUE_ISINF (c2)) - (switch - /* sqrt(x) < y is always true, when y is a very large - value and we don't care about NaNs or Infinities. */ - (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0)) - { constant_boolean_node (true, type); }) - /* sqrt(x) < y is x != +Inf when y is very large and we - don't care about NaNs. */ - (if (! HONOR_NANS (@0)) - (ne @0 { build_real (TREE_TYPE (@0), c2); })) - /* sqrt(x) < y is x >= 0 when y is very large and we - don't care about Infinities. */ - (if (! HONOR_INFINITIES (@0)) - (ge @0 { build_real (TREE_TYPE (@0), dconst0); })) - /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large. */ - (if (GENERIC) - (truth_andif - (ge @0 { build_real (TREE_TYPE (@0), dconst0); }) - (ne @0 { build_real (TREE_TYPE (@0), c2); })))) - /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs. */ - (if (! HONOR_NANS (@0)) - (cmp @0 { build_real (TREE_TYPE (@0), c2); }) - /* sqrt(x) < c is the same as x >= 0 && x < c*c. */ - (if (GENERIC) - (truth_andif - (ge @0 { build_real (TREE_TYPE (@0), dconst0); }) - (cmp @0 { build_real (TREE_TYPE (@0), c2); }))))))))) + (if (cmp == GT_EXPR || cmp == GE_EXPR) + (if (REAL_VALUE_ISINF (c2)) + /* sqrt(x) > y is x == +Inf, when y is very large. */ + (if (HONOR_INFINITIES (@0)) + (eq @0 { build_real (TREE_TYPE (@0), c2); }) + { constant_boolean_node (false, type); }) + /* sqrt(x) > c is the same as x > c*c. */ + (if (ncmp != ERROR_MARK) + (if (ncmp == GE_EXPR) + (ge @0 { build_real (TREE_TYPE (@0), c2); }) + (gt @0 { build_real (TREE_TYPE (@0), c2); })))) + /* else if (cmp == LT_EXPR || cmp == LE_EXPR) */ + (if (REAL_VALUE_ISINF (c2)) + (switch + /* sqrt(x) < y is always true, when y is a very large + value and we don't care about NaNs or Infinities. */ + (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0)) + { constant_boolean_node (true, type); }) + /* sqrt(x) < y is x != +Inf when y is very large and we + don't care about NaNs. */ + (if (! HONOR_NANS (@0)) + (ne @0 { build_real (TREE_TYPE (@0), c2); })) + /* sqrt(x) < y is x >= 0 when y is very large and we + don't care about Infinities. */ + (if (! HONOR_INFINITIES (@0)) + (ge @0 { build_real (TREE_TYPE (@0), dconst0); })) + /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large. */ + (if (GENERIC) + (truth_andif + (ge @0 { build_real (TREE_TYPE (@0), dconst0); }) + (ne @0 { build_real (TREE_TYPE (@0), c2); })))) + /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs. */ + (if (ncmp != ERROR_MARK && ! HONOR_NANS (@0)) + (if (ncmp == LT_EXPR) + (lt @0 { build_real (TREE_TYPE (@0), c2); }) + (le @0 { build_real (TREE_TYPE (@0), c2); })) + /* sqrt(x) < c is the same as x >= 0 && x < c*c. */ + (if (ncmp != ERROR_MARK && GENERIC) + (if (ncmp == LT_EXPR) + (truth_andif + (ge @0 { build_real (TREE_TYPE (@0), dconst0); }) + (lt @0 { build_real (TREE_TYPE (@0), c2); })) + (truth_andif + (ge @0 { build_real (TREE_TYPE (@0), dconst0); }) + (le @0 { build_real (TREE_TYPE (@0), c2); }))))))))))) /* Transform sqrt(x) cmp sqrt(y) -> x cmp y. */ (simplify (cmp (sq @0) (sq @1)) --- gcc/testsuite/gcc.dg/pr91734.c.jj 2019-09-20 12:43:08.019273135 +0200 +++ gcc/testsuite/gcc.dg/pr91734.c 2019-09-21 07:57:26.102273700 +0200 @@ -0,0 +1,97 @@ +/* PR tree-optimization/91734 */ +/* { dg-do run } */ +/* { dg-add-options ieee } */ +/* { dg-additional-options "-O2 -std=gnu99" } */ + +__attribute__((noipa, optimize ("Ofast"))) int +f1 (float x) +{ + return __builtin_sqrtf (x) < __FLT_MIN__; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f2 (float x) +{ + return __builtin_sqrtf (x) < 0x1.2dd3d0p-65f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f3 (float x) +{ + return __builtin_sqrtf (x) >= 0x1.2dd3d0p-65f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f4 (float x) +{ + return __builtin_sqrtf (x) >= 0x1.5642e6p+54f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f5 (float x) +{ + return __builtin_sqrtf (x) > 0x1.5642e6p+54f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f6 (float x) +{ + return __builtin_sqrtf (x) < 0x1.4da1cp-19f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f7 (float x) +{ + return __builtin_sqrtf (x) <= 0x1.4da1cp-19f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f8 (float x) +{ + return __builtin_sqrtf (x) < 0x1.50cb62p-65f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f9 (float x) +{ + return __builtin_sqrtf (x) <= 0x1.4fc00cp-73f; +} + +__attribute__((noipa, optimize ("Ofast"))) int +f10 (float x) +{ + return __builtin_sqrtf (x) < 0x1.001002p+0f; +} + +int +main () +{ + if (__FLT_RADIX__ != 2 + || __FLT_MANT_DIG__ != 24 + || __FLT_MIN_EXP__ != -125 + || __FLT_MAX_EXP__ != 128 + || __FLT_HAS_DENORM__ != 1 + || __FLT_HAS_INFINITY__ != 1) + return 0; + if (!f1 (0.0f) || f1 (0x1.0p-149f)) + __builtin_abort (); + if (!f2 (0x1.63dbc0p-130f)) + __builtin_abort (); + if (f3 (0x1.63dbc0p-130f)) + __builtin_abort (); + if (!f4 (0x1.c996d0p+108f) || !f4 (0x1.c996cep+108f) || f4 (0x1.c996ccp+108f)) + __builtin_abort (); + if (f5 (0x1.c996d0p+108f) || f5 (0x1.c996d2p+108f) || !f5 (0x1.c996d4p+108f)) + __builtin_abort (); + if (!f6 (0x1.b2ce3p-38f) || f6 (0x1.b2ce32p-38f) || f6 (0x1.b2ce34p-38f)) + __builtin_abort (); + if (!f7 (0x1.b2ce3p-38f) || !f7 (0x1.b2ce34p-38f) || !f7 (0x1.b2ce36p-38f) || f7 (0x1.b2ce38p-38f)) + __builtin_abort (); + if (!f8 (0x1.bb166p-130f) || !f8 (0x1.bb168p-130f) || f8 (0x1.bb16ap-130f) || f8 (0x1.bb16cp-130f)) + __builtin_abort (); + if (!f9 (0x1.8p-146f) || !f9 (0x1.ap-146f) || f9 (0x1.cp-146f) || f9 (0x1.ep-146f)) + __builtin_abort (); + if (f10 (0x1.002004p+0f)) + __builtin_abort (); + return 0; +} Jakub