On Mon, Sep 16, 2019 at 08:56:58AM +0200, Richard Biener wrote:
> > As mentioned in the PR, the sqrt (x) < c optimization into x < c*c
> > sometimes breaks the boundary case, if c2=c*c is inexact then in some cases
> > we need to optimize it into x <= c*c rather than x < c*c.  The original
> > bugreport is when c is small and c2 is 0.0, then obviously we need <= 0.0
> > rather than < 0.0, but the testcase includes another example where it makes
> > a difference, plus has a >= testcase too.
> > 
> > Bootstrapped/regtested on powerpc64le-linux, ok for trunk?
> 
> I was hoping Joseph might chime in here...  anyway, does this assume
> round-to-nearest or does it work with round to +-Inf as well?  I
> realize this all is under flag_unsafe_math_optimizations, but
> this flag is notoriously underspecified...  So the question is
> whether we should disable the transform if c*c isn't exact and
> flag_rounding_math?  The transform also doesn't seem to guard
> against isnan (c) (-funsafe-math-optimizations sets
> -fno-trapping-math and -fno-signed-zeros but not -ffinite-math-only
> or disables itself on -frounding-math)

Here is an updated patch, which on top of the previous patch:
1) punts for -frounding-math
2) punts for sqrt comparisons against NaN constant
3) for the c*c inexact also handles the other two comparisons that
apparently need to be handled too
4) for all 4 comparisons also checks nexttoward (c2, 0.0) or nexttoward (c2,
inf) depending on the comparison kind, because as Joseph correctly noted,
with rounding to nearest up to 3 different floating point values can have
the same sqrt result, and if c2 is the middle one from them, we need to use
the 1 ulp smaller or larger one in the comparison
5) had to adjust the testcase, because while it worked fine on powerpc64le,
on x86_64 if the test is linked with -ffast-math/-Ofast etc., crtfastmath.o
is linked in and subnormals are flushed to zero, which is not what we want
for the testcase (at least for a subset of the tests).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

BTW, I've used attached programs to look for the problematic cases on random
float/doubles and the cases the patch handles seem to be the only
problematic ones, there is never need to go further than one nexttoward to 0
or inf.

2019-09-21  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/91734
	* generic-match-head.c: Include fold-const-call.h.
	* match.pd (sqrt(x) cmp c): Check the boundary value and
	in case inexact computation of c*c affects comparison of the boundary,
	turn LT_EXPR into LE_EXPR, GE_EXPR into GT_EXPR, LE_EXPR into LT_EXPR
	or GT_EXPR into GE_EXPR.  Punt for sqrt comparisons against NaN and
	for -frounding-math.  For c2, try the next smaller or larger floating
	point constant depending on comparison code and if it has the same
	sqrt as c2, use it instead of c2.

	* gcc.dg/pr91734.c: New test.

--- gcc/generic-match-head.c.jj	2019-09-20 12:24:56.376189996 +0200
+++ gcc/generic-match-head.c	2019-09-20 12:43:08.017273166 +0200
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
 #include "cgraph.h"
 #include "vec-perm-indices.h"
 #include "fold-const.h"
+#include "fold-const-call.h"
 #include "stor-layout.h"
 #include "tree-dfa.h"
 #include "builtins.h"
--- gcc/match.pd.jj	2019-09-20 12:25:27.323710388 +0200
+++ gcc/match.pd	2019-09-20 17:20:22.974316837 +0200
@@ -3711,8 +3711,7 @@ (define_operator_list COND_TERNARY
      (cmp { tem; } @1)))))
 
  /* Fold comparisons against built-in math functions.  */
- (if (flag_unsafe_math_optimizations
-      && ! flag_errno_math)
+ (if (flag_unsafe_math_optimizations && ! flag_errno_math)
   (for sq (SQRT)
    (simplify
     (cmp (sq @0) REAL_CST@1)
@@ -3747,56 +3746,108 @@ (define_operator_list COND_TERNARY
 	  if x is negative or NaN.  Due to -funsafe-math-optimizations,
 	  the results for other x follow from natural arithmetic.  */
        (cmp @0 @1)))
-     (if (cmp == GT_EXPR || cmp == GE_EXPR)
+     (if ((cmp == LT_EXPR
+	   || cmp == LE_EXPR
+	   || cmp == GT_EXPR
+	   || cmp == GE_EXPR)
+	  && !REAL_VALUE_ISNAN (TREE_REAL_CST (@1))
+	  /* Give up for -frounding-math.  */
+	  && !HONOR_SIGN_DEPENDENT_ROUNDING (TREE_TYPE (@0)))
       (with
        {
-         REAL_VALUE_TYPE c2;
+	 REAL_VALUE_TYPE c2;
+	 enum tree_code ncmp = cmp;
+	 const real_format *fmt
+	   = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (@0)));
 	 real_arithmetic (&c2, MULT_EXPR,
 			  &TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
-	 real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
+	 real_convert (&c2, fmt, &c2);
+	 /* See PR91734: if c2 is inexact and sqrt(c2) < c (or sqrt(c2) >= c),
+	    then change LT_EXPR into LE_EXPR or GE_EXPR into GT_EXPR.  */
+	 if (!REAL_VALUE_ISINF (c2))
+	   {
+	     tree c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+					build_real (TREE_TYPE (@0), c2));
+	     if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+	       ncmp = ERROR_MARK;
+	     else if ((cmp == LT_EXPR || cmp == GE_EXPR)
+		      && real_less (&TREE_REAL_CST (c3), &TREE_REAL_CST (@1)))
+	       ncmp = cmp == LT_EXPR ? LE_EXPR : GT_EXPR;
+	     else if ((cmp == LE_EXPR || cmp == GT_EXPR)
+		      && real_less (&TREE_REAL_CST (@1), &TREE_REAL_CST (c3)))
+	       ncmp = cmp == LE_EXPR ? LT_EXPR : GE_EXPR;
+	     else
+	       {
+		 /* With rounding to even, sqrt of up to 3 different values
+		    gives the same normal result, so in some cases c2 needs
+		    to be adjusted.  */
+		 REAL_VALUE_TYPE c2alt, tow;
+		 if (cmp == LT_EXPR || cmp == GE_EXPR)
+		   tow = dconst0;
+		 else
+		   real_inf (&tow);
+		 real_nextafter (&c2alt, fmt, &c2, &tow);
+		 real_convert (&c2alt, fmt, &c2alt);
+		 if (REAL_VALUE_ISINF (c2alt))
+		   ncmp = ERROR_MARK;
+		 else
+		   {
+		     c3 = fold_const_call (CFN_SQRT, TREE_TYPE (@0),
+					   build_real (TREE_TYPE (@0), c2alt));
+		     if (c3 == NULL_TREE || TREE_CODE (c3) != REAL_CST)
+		       ncmp = ERROR_MARK;
+		     else if (real_equal (&TREE_REAL_CST (c3),
+					  &TREE_REAL_CST (@1)))
+		       c2 = c2alt;
+		   }
+	       }
+	   }
        }
-       (if (REAL_VALUE_ISINF (c2))
-	/* sqrt(x) > y is x == +Inf, when y is very large.  */
-	(if (HONOR_INFINITIES (@0))
-	 (eq @0 { build_real (TREE_TYPE (@0), c2); })
-	 { constant_boolean_node (false, type); })
-	/* sqrt(x) > c is the same as x > c*c.  */
-	(cmp @0 { build_real (TREE_TYPE (@0), c2); }))))
-     (if (cmp == LT_EXPR || cmp == LE_EXPR)
-      (with
-       {
-       	 REAL_VALUE_TYPE c2;
-	 real_arithmetic (&c2, MULT_EXPR,
-			  &TREE_REAL_CST (@1), &TREE_REAL_CST (@1));
-	 real_convert (&c2, TYPE_MODE (TREE_TYPE (@0)), &c2);
-       }
-       (if (REAL_VALUE_ISINF (c2))
-        (switch
-	 /* sqrt(x) < y is always true, when y is a very large
-	    value and we don't care about NaNs or Infinities.  */
-	 (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
-	  { constant_boolean_node (true, type); })
-	 /* sqrt(x) < y is x != +Inf when y is very large and we
-	    don't care about NaNs.  */
-	 (if (! HONOR_NANS (@0))
-	  (ne @0 { build_real (TREE_TYPE (@0), c2); }))
-	 /* sqrt(x) < y is x >= 0 when y is very large and we
-	    don't care about Infinities.  */
-	 (if (! HONOR_INFINITIES (@0))
-	  (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
-	 /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large.  */
-	 (if (GENERIC)
-	  (truth_andif
-	   (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
-	   (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
-	/* sqrt(x) < c is the same as x < c*c, if we ignore NaNs.  */
-	(if (! HONOR_NANS (@0))
-	 (cmp @0 { build_real (TREE_TYPE (@0), c2); })
-	 /* sqrt(x) < c is the same as x >= 0 && x < c*c.  */
-	 (if (GENERIC)
-	  (truth_andif
-	   (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
-	   (cmp @0 { build_real (TREE_TYPE (@0), c2); })))))))))
+       (if (cmp == GT_EXPR || cmp == GE_EXPR)
+	(if (REAL_VALUE_ISINF (c2))
+	 /* sqrt(x) > y is x == +Inf, when y is very large.  */
+	 (if (HONOR_INFINITIES (@0))
+	  (eq @0 { build_real (TREE_TYPE (@0), c2); })
+	  { constant_boolean_node (false, type); })
+	 /* sqrt(x) > c is the same as x > c*c.  */
+	 (if (ncmp != ERROR_MARK)
+	  (if (ncmp == GE_EXPR)
+	   (ge @0 { build_real (TREE_TYPE (@0), c2); })
+	   (gt @0 { build_real (TREE_TYPE (@0), c2); }))))
+	/* else if (cmp == LT_EXPR || cmp == LE_EXPR)  */
+	(if (REAL_VALUE_ISINF (c2))
+	 (switch
+	  /* sqrt(x) < y is always true, when y is a very large
+	     value and we don't care about NaNs or Infinities.  */
+	  (if (! HONOR_NANS (@0) && ! HONOR_INFINITIES (@0))
+	   { constant_boolean_node (true, type); })
+	  /* sqrt(x) < y is x != +Inf when y is very large and we
+	     don't care about NaNs.  */
+	  (if (! HONOR_NANS (@0))
+	   (ne @0 { build_real (TREE_TYPE (@0), c2); }))
+	  /* sqrt(x) < y is x >= 0 when y is very large and we
+	     don't care about Infinities.  */
+	  (if (! HONOR_INFINITIES (@0))
+	   (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))
+	  /* sqrt(x) < y is x >= 0 && x != +Inf, when y is large.  */
+	  (if (GENERIC)
+	   (truth_andif
+	    (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+	    (ne @0 { build_real (TREE_TYPE (@0), c2); }))))
+	 /* sqrt(x) < c is the same as x < c*c, if we ignore NaNs.  */
+	 (if (ncmp != ERROR_MARK && ! HONOR_NANS (@0))
+	  (if (ncmp == LT_EXPR)
+	   (lt @0 { build_real (TREE_TYPE (@0), c2); })
+	   (le @0 { build_real (TREE_TYPE (@0), c2); }))
+	  /* sqrt(x) < c is the same as x >= 0 && x < c*c.  */
+	  (if (ncmp != ERROR_MARK && GENERIC)
+	   (if (ncmp == LT_EXPR)
+	    (truth_andif
+	     (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+	     (lt @0 { build_real (TREE_TYPE (@0), c2); }))
+	    (truth_andif
+	     (ge @0 { build_real (TREE_TYPE (@0), dconst0); })
+	     (le @0 { build_real (TREE_TYPE (@0), c2); })))))))))))
    /* Transform sqrt(x) cmp sqrt(y) -> x cmp y.  */
    (simplify
     (cmp (sq @0) (sq @1))
--- gcc/testsuite/gcc.dg/pr91734.c.jj	2019-09-20 12:43:08.019273135 +0200
+++ gcc/testsuite/gcc.dg/pr91734.c	2019-09-21 07:57:26.102273700 +0200
@@ -0,0 +1,97 @@
+/* PR tree-optimization/91734 */
+/* { dg-do run } */
+/* { dg-add-options ieee } */
+/* { dg-additional-options "-O2 -std=gnu99" } */
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f1 (float x)
+{
+  return __builtin_sqrtf (x) < __FLT_MIN__;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f2 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.2dd3d0p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f3 (float x)
+{
+  return __builtin_sqrtf (x) >= 0x1.2dd3d0p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f4 (float x)
+{
+  return __builtin_sqrtf (x) >= 0x1.5642e6p+54f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f5 (float x)
+{
+  return __builtin_sqrtf (x) > 0x1.5642e6p+54f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f6 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.4da1cp-19f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f7 (float x)
+{
+  return __builtin_sqrtf (x) <= 0x1.4da1cp-19f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f8 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.50cb62p-65f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f9 (float x)
+{
+  return __builtin_sqrtf (x) <= 0x1.4fc00cp-73f;
+}
+
+__attribute__((noipa, optimize ("Ofast"))) int
+f10 (float x)
+{
+  return __builtin_sqrtf (x) < 0x1.001002p+0f;
+}
+
+int
+main ()
+{
+  if (__FLT_RADIX__ != 2
+      || __FLT_MANT_DIG__ != 24
+      || __FLT_MIN_EXP__ != -125
+      || __FLT_MAX_EXP__ != 128
+      || __FLT_HAS_DENORM__ != 1
+      || __FLT_HAS_INFINITY__ != 1)
+    return 0;
+  if (!f1 (0.0f) || f1 (0x1.0p-149f))
+    __builtin_abort ();
+  if (!f2 (0x1.63dbc0p-130f))
+    __builtin_abort ();
+  if (f3 (0x1.63dbc0p-130f))
+    __builtin_abort ();
+  if (!f4 (0x1.c996d0p+108f) || !f4 (0x1.c996cep+108f) || f4 (0x1.c996ccp+108f))
+    __builtin_abort ();
+  if (f5 (0x1.c996d0p+108f) || f5 (0x1.c996d2p+108f) || !f5 (0x1.c996d4p+108f))
+    __builtin_abort ();
+  if (!f6 (0x1.b2ce3p-38f) || f6 (0x1.b2ce32p-38f) || f6 (0x1.b2ce34p-38f))
+    __builtin_abort ();
+  if (!f7 (0x1.b2ce3p-38f) || !f7 (0x1.b2ce34p-38f) || !f7 (0x1.b2ce36p-38f) || f7 (0x1.b2ce38p-38f))
+    __builtin_abort ();
+  if (!f8 (0x1.bb166p-130f) || !f8 (0x1.bb168p-130f) || f8 (0x1.bb16ap-130f) || f8 (0x1.bb16cp-130f))
+    __builtin_abort ();
+  if (!f9 (0x1.8p-146f) || !f9 (0x1.ap-146f) || f9 (0x1.cp-146f) || f9 (0x1.ep-146f))
+    __builtin_abort ();
+  if (f10 (0x1.002004p+0f))
+    __builtin_abort ();
+  return 0;
+}


	Jakub