From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id AF9C53894C18; Wed, 10 Apr 2024 18:08:20 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AF9C53894C18
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1712772500;
	bh=eTe2BW5AqVmafHxmCfrqr7eZDsaj8QvKqu7ulXXAZ98=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=l4CvGBxDmLlE1rtfDTVlCNAwaOLLkeOgwfblGHLtvLnFLdnTDCtGh4+Z7q2T0dSSZ
	 NLKg6CtolPFpr+3bSUVRKuBxTPBn5OgXr8JjHXSbqOiXLp1lSLPY6GnszBUUP53j2b
	 FlHt7jrHWyXwjKVZR7hv0ZuOSBZyLb/ybL0zA2ns=
From: "g.peterhoff@t-online.de" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug libstdc++/77776] C++17 std::hypot implementation is poor
Date: Wed, 10 Apr 2024 18:08:18 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: libstdc++
X-Bugzilla-Version: 7.0
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: g.peterhoff@t-online.de
X-Bugzilla-Status: ASSIGNED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: emsr at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: 
Message-ID: <bug-77776-4-Gt6hPS09CM@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-77776-4@http.gcc.gnu.org/bugzilla/>
References: <bug-77776-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D77776
--- Comment #29 from g.peterhoff@t-online.de ---
(In reply to Jakub Jelinek from comment #28)
> As long as the scale is a power of two or 1.0 / power of two, I don't see
> why any version wouldn't be inaccurate.

Yes, but the constant scale_up is incorrectly selected.
scale_up =3D std::exp2(Type(limits::max_exponent-1)) --> ok
scale_up =3D std::exp2(Type(limits::max_exponent/2)) --> error
scale_up =3D prev_power2(sqrt_max) --> error

scale_down =3D std::exp2(Type(limits::min_exponent-1))
also seems to me to be more favorable.

PS:
There seems to be a problem with random numbers and std::float16_t, which is
why I use std::uniform_real_distribution<std::float128_t>. I have not yet f=
ound
out exactly where the error lies.

thx
Gero


template <std::floating_point Type>
inline constexpr Type   hypot_exp(Type x, Type y, Type z)       noexcept
{
        using limits =3D std::numeric_limits<Type>;

        constexpr Type
                zero =3D 0;

        x =3D std::abs(x);
        y =3D std::abs(y);
        z =3D std::abs(z);

        if (!(std::isnormal(x) && std::isnormal(y) && std::isnormal(z)))
[[unlikely]]
        {
                if              (std::isinf(x) | std::isinf(y) | std::isinf=
(z))
return limits::infinity();
                else if (std::isnan(x) | std::isnan(y) | std::isnan(z)) ret=
urn
limits::quiet_NaN();
                else
                {
                        const bool
                                xz{x =3D=3D zero},
                                yz{y =3D=3D zero},
                                zz{z =3D=3D zero};

                        if (xz)
                        {
                                if (yz)                 return zz ? zero : =
z;
                                else if (zz)    return y;
                        }
                        else if (yz && zz)      return x;
                }
        }

        if (x > z) std::swap(x, z);
        if (y > z) std::swap(y, z);

        int
                exp;

        z =3D std::frexp(z, &exp);
        y =3D std::ldexp(y, -exp);
        x =3D std::ldexp(x, -exp);
        return std::ldexp(std::sqrt(__builtin_assoc_barrier(x*x + y*y) + z*=
z),
exp);
}

template <std::floating_point Type>
inline constexpr Type   hypot_gp(Type x, Type y, Type z)        noexcept
{
        using limits =3D std::numeric_limits<Type>;

        constexpr Type
                sqrt_min        =3D std::sqrt(limits::min()),
                sqrt_max        =3D std::sqrt(limits::max()),
                scale_up        =3D std::exp2(Type(limits::max_exponent-1)),
                scale_down      =3D std::exp2(Type(limits::min_exponent-1)),
                zero            =3D 0;

        x =3D std::abs(x);
        y =3D std::abs(y);
        z =3D std::abs(z);

        if (!(std::isnormal(x) && std::isnormal(y) && std::isnormal(z)))
[[unlikely]]
        {
                if              (std::isinf(x) | std::isinf(y) | std::isinf=
(z))
return limits::infinity();
                else if (std::isnan(x) | std::isnan(y) | std::isnan(z)) ret=
urn
limits::quiet_NaN();
                else
                {
                        const bool
                                xz{x =3D=3D zero},
                                yz{y =3D=3D zero},
                                zz{z =3D=3D zero};

                        if (xz)
                        {
                                if (yz)                 return zz ? zero : =
z;
                                else if (zz)    return y;
                        }
                        else if (yz && zz)      return x;
                }
        }

        if (x > z) std::swap(x, z);
        if (y > z) std::swap(y, z);

        if (const bool b{z>=3Dsqrt_min}; b && z<=3Dsqrt_max) [[likely]]
        {
                //      no scale
                return std::sqrt(__builtin_assoc_barrier(x*x + y*y) + z*z);
        }
        else
        {
                const Type
                        scale =3D b ? scale_down : scale_up;

                x *=3D scale;
                y *=3D scale;
                z *=3D scale;
                return std::sqrt(__builtin_assoc_barrier(x*x + y*y) + z*z) /
scale;
        }
}

template <std::floating_point Type>
void    test(const size_t count, const Type min, const Type max, const Type
factor)
{
        std::random_device rd{};
        std::mt19937 gen{rd()};
        std::uniform_real_distribution<std::float128_t> dis{min, max};

        auto rnd =3D [&]() noexcept -> Type { return Type(dis(gen) * factor=
); };

        for (size_t i=3D0; i<count; ++i)
        {
                const Type
                        x =3D rnd(),
                        y =3D rnd(),
                        z =3D rnd(),
                        r0=3D hypot_exp(x, y, z),
                        r1=3D hypot_gp(x, y, z);

                if (r0 !=3D r1) [[unlikely]]
                {
                        std::cout << "error\n";
                        return;
                }
        }
        std::cout << "ok\n";
}

int main()
{
        using value_type =3D std::float64_t;
        using limits =3D std::numeric_limits<value_type>;

        test<value_type>(1024*1024, 0.5, 1, limits::max());
        test<value_type>(1024*1024, 0, 1, limits::min());

        return EXIT_SUCCESS;
}=