From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id 9F609385843A; Wed,  6 Mar 2024 02:47:35 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9F609385843A
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1709693255;
	bh=AKgr6k2LKRQyc3Y5hpa92X8VHDX0HcQJkWKr84755Vg=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=Oq2cekXTyLdA+p44CZBplwWn/uZuQYmI0hjPUMI+PTgqHvGKXVtMcVJAwGYI+NeFf
	 QS7I8jOyl9Nvb5cDoO86SBre/UdFBZxq0xhmHdsC1oearZmgcTUKMVeHf56Qrndmed
	 PMj4p7pyasgPzo3b7xUqhdD69jns1tw6uvvCFjUU=
From: "g.peterhoff@t-online.de" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug libstdc++/77776] C++17 std::hypot implementation is poor
Date: Wed, 06 Mar 2024 02:47:33 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: libstdc++
X-Bugzilla-Version: 7.0
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: g.peterhoff@t-online.de
X-Bugzilla-Status: ASSIGNED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: emsr at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-77776-4-Wv0fOXiifc@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-77776-4@http.gcc.gnu.org/bugzilla/>
References: <bug-77776-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D77776
--- Comment #19 from g.peterhoff@t-online.de ---
> So, no need to use frexp/ldexp, just comparisons of hi above against sqrt=
 of
> (max finite / 3), in that case scale by multiplying all 3 args by some
> appropriate scale constant, and similarly otherwise if lo1 is too small by
> some large scale.

I don't really know. With frexp/ldexp you probably get the highest accuracy
(even if it is probably slower) instead of doing it manually. The problem i=
s to
determine suitable scaling factors and to adjust the (return)values
accordingly. I have implemented both cases.

Error
* In the case (x=3D=3Dy && y=3D=3Dz), x*std::sqrt(T(3)) must not simply be =
returned, as
this can lead to an overflow (inf).

Generally
* Instead of using fmin/fmax to determine the values hi,lo1,lo0, it is bett=
er
to sort x,y,z. This is faster and clearer and no additional variables need =
to
be introduced.
* It also makes sense to consider the case (x=3D=3D0 && y=3D=3D0 && z=3D=3D=
0).

Optimizations
* You were probably wondering why I wrote "if (std::isinf(x) | std::isinf(y=
) |
std::isinf(z))", for example. This is intentional. The problem is that gcc
almost always produces branch code for logical operations, so *a lot* of
conditional jumps. By using arithmetic operations, so instead of || && just=
 |
&, I can get it to generate only actually necessary conditional jumps or
cmoves. branchfree code is always better.


template <typename T>
constexpr T     hypot3_exp(T x, T y, T z) noexcept
{
        using limits =3D std::numeric_limits<T>;

        constexpr T
                zero =3D 0;

        x =3D std::abs(x);
        y =3D std::abs(y);
        z =3D std::abs(z);

        if (std::isinf(x) | std::isinf(y) | std::isinf(z))  [[unlikely]]
                return limits::infinity();
        if (std::isnan(x) | std::isnan(y) | std::isnan(z))      [[unlikely]]
                return limits::quiet_NaN();
        if ((x=3D=3Dzero) & (y=3D=3Dzero) & (z=3D=3Dzero))  [[unlikely]]
                return zero;
        if ((y=3D=3Dzero) & (z=3D=3Dzero))      [[unlikely]]
                return x;
        if ((x=3D=3Dzero) & (z=3D=3Dzero))      [[unlikely]]
                return y;
        if ((x=3D=3Dzero) & (y=3D=3Dzero))      [[unlikely]]
                return z;

        auto sort =3D [](T& a, T& b, T& c)        constexpr noexcept -> void
        {
                if (a > b) std::swap(a, b);
                if (b > c) std::swap(b, c);
                if (a > b) std::swap(a, b);
        };

        sort(x, y, z);  //      x <=3D y <=3D z

        int
                exp =3D 0;

        z =3D std::frexp(z, &exp);
        y =3D std::ldexp(y, -exp);
        x =3D std::ldexp(x, -exp);

        T
                sum =3D x*x + y*y;

        sum +=3D z*z;
        return std::ldexp(std::sqrt(sum), exp);
}

template <typename T>
constexpr T     hypot3_scale(T x, T y, T z) noexcept
{
        using limits =3D std::numeric_limits<T>;

        auto prev_power2 =3D [](const T value)    constexpr noexcept -> T
        {
                return std::exp2(std::floor(std::log2(value)));
        };

        constexpr T
                sqrtmax         =3D std::sqrt(limits::max()),
                scale_up        =3D prev_power2(sqrtmax),
                scale_down      =3D T(1) / scale_up,
                zero            =3D 0;

        x =3D std::abs(x);
        y =3D std::abs(y);
        z =3D std::abs(z);

        if (std::isinf(x) | std::isinf(y) | std::isinf(z))  [[unlikely]]
                return limits::infinity();
        if (std::isnan(x) | std::isnan(y) | std::isnan(z))      [[unlikely]]
                return limits::quiet_NaN();
        if ((x=3D=3Dzero) & (y=3D=3Dzero) & (z=3D=3Dzero))  [[unlikely]]
                return zero;
        if ((y=3D=3Dzero) & (z=3D=3Dzero))      [[unlikely]]
                return x;
        if ((x=3D=3Dzero) & (z=3D=3Dzero))      [[unlikely]]
                return y;
        if ((x=3D=3Dzero) & (y=3D=3Dzero))      [[unlikely]]
                return z;

        auto sort =3D [](T& a, T& b, T& c)        constexpr noexcept -> void
        {
                if (a > b) std::swap(a, b);
                if (b > c) std::swap(b, c);
                if (a > b) std::swap(a, b);
        };

        sort(x, y, z);  //      x <=3D y <=3D z

        const T
                scale =3D (z > sqrtmax) ? scale_down : (z < 1) ? scale_up :=
 1;

        x *=3D scale;
        y *=3D scale;
        z *=3D scale;

        T
                sum =3D x*x + y*y;

        sum +=3D z*z;
        return std::sqrt(sum) / scale;
}


regards
Gero=