From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id A55BC3858D35; Wed,  9 Nov 2022 20:24:54 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A55BC3858D35
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1668025494;
	bh=MaQpZ6cYwSWIoc9nMI2sh7b/TALJdQiX+wM6J+El9gI=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=QKdkV18gp3FYPdZxnBXujAPpronaQICMysIoE5RMdw9KSG3FShgQWFZWhnUVAkJNs
	 XIZB/BM4wbPJIkyCBaMT6TX0aGXUZ985/Q/Fdii8m20Z0EYj+Yv87CD9awM3AiUtOW
	 98w9Fpo8HbEVGb/t7UetxnPjLXGX3xFVOvbiQfoQ=
From: "jakub at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/107591] range-op{,-float}.cc for x * x
Date: Wed, 09 Nov 2022 20:24:53 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: tree-optimization
X-Bugzilla-Version: 13.0
X-Bugzilla-Keywords: missed-optimization
X-Bugzilla-Severity: normal
X-Bugzilla-Who: jakub at gcc dot gnu.org
X-Bugzilla-Status: NEW
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-107591-4-q4wMe5f4xH@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-107591-4@http.gcc.gnu.org/bugzilla/>
References: <bug-107591-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D107591
--- Comment #14 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Incremental patch on top of the
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D107569#c18 patch which optim=
izes
the floating point x * x:

--- gcc/range-op-float.cc.jj    2022-11-09 19:06:11.075716000 +0100
+++ gcc/range-op-float.cc       2022-11-09 21:11:52.468256045 +0100
@@ -51,7 +51,7 @@ along with GCC; see the file COPYING3.
 bool
 range_operator_float::fold_range (frange &r, tree type,
                                  const frange &op1, const frange &op2,
-                                 relation_trio) const
+                                 relation_trio trio) const
 {
   if (empty_range_varying (r, type, op1, op2))
     return true;
@@ -65,7 +65,7 @@ range_operator_float::fold_range (frange
   bool maybe_nan;
   rv_fold (lb, ub, maybe_nan, type,
           op1.lower_bound (), op1.upper_bound (),
-          op2.lower_bound (), op2.upper_bound ());
+          op2.lower_bound (), op2.upper_bound (), trio);

   // Handle possible NANs by saturating to the appropriate INF if only
   // one end is a NAN.  If both ends are a NAN, just return a NAN.
@@ -103,8 +103,8 @@ range_operator_float::rv_fold (REAL_VALU
                               const REAL_VALUE_TYPE &lh_lb ATTRIBUTE_UNUSE=
D,
                               const REAL_VALUE_TYPE &lh_ub ATTRIBUTE_UNUSE=
D,
                               const REAL_VALUE_TYPE &rh_lb ATTRIBUTE_UNUSE=
D,
-                              const REAL_VALUE_TYPE &rh_ub ATTRIBUTE_UNUSE=
D)
-  const
+                              const REAL_VALUE_TYPE &rh_ub ATTRIBUTE_UNUSE=
D,
+                              relation_trio) const
 {
   lb =3D dconstninf;
   ub =3D dconstinf;
@@ -1868,7 +1868,8 @@ class foperator_plus : public range_oper
                const REAL_VALUE_TYPE &lh_lb,
                const REAL_VALUE_TYPE &lh_ub,
                const REAL_VALUE_TYPE &rh_lb,
-               const REAL_VALUE_TYPE &rh_ub) const final override
+               const REAL_VALUE_TYPE &rh_ub,
+               relation_trio) const final override
   {
     frange_arithmetic (PLUS_EXPR, type, lb, lh_lb, rh_lb, dconstninf);
     frange_arithmetic (PLUS_EXPR, type, ub, lh_ub, rh_ub, dconstinf);
@@ -1892,7 +1893,8 @@ class foperator_minus : public range_ope
                const REAL_VALUE_TYPE &lh_lb,
                const REAL_VALUE_TYPE &lh_ub,
                const REAL_VALUE_TYPE &rh_lb,
-               const REAL_VALUE_TYPE &rh_ub) const final override
+               const REAL_VALUE_TYPE &rh_ub,
+               relation_trio) const final override
   {
     frange_arithmetic (MINUS_EXPR, type, lb, lh_lb, rh_ub, dconstninf);
     frange_arithmetic (MINUS_EXPR, type, ub, lh_ub, rh_lb, dconstinf);
@@ -1910,7 +1912,7 @@ class foperator_minus : public range_ope

 /* Wrapper around frange_arithmetics, that computes the result
    if inexact rounded to both directions.  Also, if one of the
-   operands is +-0.0 and another +-inf, return +-0.0 rather than
+   operands is +-0.0 and another +-INF, return +-0.0 rather than
    NAN.  */

 static void
@@ -1945,13 +1947,42 @@ class foperator_mult : public range_oper
                const REAL_VALUE_TYPE &lh_lb,
                const REAL_VALUE_TYPE &lh_ub,
                const REAL_VALUE_TYPE &rh_lb,
-               const REAL_VALUE_TYPE &rh_ub) const final override
+               const REAL_VALUE_TYPE &rh_ub,
+               relation_trio trio) const final override
   {
     REAL_VALUE_TYPE cp[8];
+    bool is_square
+      =3D (trio.op1_op2 () =3D=3D VREL_EQ
+        && real_equal (&lh_lb, &rh_lb)
+        && real_equal (&lh_ub, &rh_ub)
+        && real_isneg (&lh_lb) =3D=3D real_isneg (&rh_lb)
+        && real_isneg (&lh_ub) =3D=3D real_isneg (&rh_ub));
     // Do a cross-product.
     frange_mult (type, cp[0], cp[4], lh_lb, rh_lb);
-    frange_mult (type, cp[1], cp[5], lh_lb, rh_ub);
-    frange_mult (type, cp[2], cp[6], lh_ub, rh_lb);
+    if (is_square)
+      {
+       // For x * x we can just do max (lh_lb * lh_lb, lh_ub * lh_ub)
+       // as maximum and -0.0 as minimum if 0.0 is in the range,
+       // otherwise min (lh_lb * lh_lb, lh_ub * lh_ub).
+       // -0.0 rather than 0.0 because VREL_EQ doesn't prove that
+       // x and y are bitwise equal, just that they compare equal.
+       if (real_compare (LE_EXPR, &lh_lb, &dconst0)
+           && real_compare (GE_EXPR, &lh_ub, &dconst0))
+         {
+           cp[1] =3D dconst0;
+           real_value_negate (&cp[1]);
+         }
+       else
+         cp[1] =3D cp[0];
+       cp[2] =3D cp[0];
+       cp[5] =3D cp[4];
+       cp[6] =3D cp[4];
+      }
+    else
+      {
+       frange_mult (type, cp[1], cp[5], lh_lb, rh_ub);
+       frange_mult (type, cp[2], cp[6], lh_ub, rh_lb);
+      }
     frange_mult (type, cp[3], cp[7], lh_ub, rh_ub);
     for (int i =3D 1; i < 3; ++i)
       {
@@ -1965,18 +1996,27 @@ class foperator_mult : public range_oper
     lb =3D cp[0];
     ub =3D cp[4];

-    // [+-0, +-0] * [+INF,+INF] (or [-INF,-INF] or swapped is a known NaN.
-    if ((real_iszero (&lh_lb) && real_iszero (&lh_ub)
-        && real_isinf (&rh_lb) && real_isinf (&rh_ub, real_isneg (&rh_lb)))
-       || (real_iszero (&rh_lb) && real_iszero (&rh_ub)
-           && real_isinf (&lh_lb) && real_isinf (&lh_ub, real_isneg
(&lh_lb))))
+    // If both operands are the same, then we know it can be +-0.0, or +-I=
NF,
+    // but not both at the same time, so it will never be invalid unless
+    // operand was already NAN.
+    if (is_square)
+      maybe_nan =3D false;
+    // [+-0, +-0] * [+INF,+INF] (or [-INF,-INF] or swapped is a known NAN.
+    else if ((real_iszero (&lh_lb)
+             && real_iszero (&lh_ub)
+             && real_isinf (&rh_lb)
+             && real_isinf (&rh_ub, real_isneg (&rh_lb)))
+            || (real_iszero (&rh_lb)
+                && real_iszero (&rh_ub)
+                && real_isinf (&lh_lb)
+                && real_isinf (&lh_ub, real_isneg (&lh_lb))))
       {
        real_nan (&lb, NULL, 0, TYPE_MODE (type));
        ub =3D lb;
        maybe_nan =3D true;
       }
     // Otherwise, if one range includes zero and the other ends with +-INF,
-    // it is a maybe NaN.
+    // it is a maybe NAN.
     else if (real_compare (LE_EXPR, &lh_lb, &dconst0)
             && real_compare (GE_EXPR, &lh_ub, &dconst0)
             && (real_isinf (&rh_lb) || real_isinf (&rh_ub)))
--- gcc/range-op.h.jj   2022-11-09 11:22:42.867624633 +0100
+++ gcc/range-op.h      2022-11-09 20:20:02.266964633 +0100
@@ -123,7 +123,8 @@ public:
                        const REAL_VALUE_TYPE &lh_lb,
                        const REAL_VALUE_TYPE &lh_ub,
                        const REAL_VALUE_TYPE &rh_lb,
-                       const REAL_VALUE_TYPE &rh_ub) const;
+                       const REAL_VALUE_TYPE &rh_ub,
+                       relation_trio) const;
   // Unary operations have the range of the LHS as op2.
   virtual bool fold_range (irange &r, tree type,
                           const frange &lh,

We determine the right range (I think), but then it helps just to optimize =
away
the call to sqrt function, not the actual comparison (bet for a fear that a
sNaN=20
could appear there).
If frange also tracked maybe sNaN (and cleared it say on all binops or unops
other than the operations that might not trigger exception/quiet it), perha=
ps
we could optimize that.  Or say without frange help just by assuming that s=
ay
result of a binary floating point operation can't ever be a sNaN.=