From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=E+i4=BE=gmail.com=crazylht@sourceware.org>
Received: from mail-yb1-xb35.google.com (mail-yb1-xb35.google.com [IPv6:2607:f8b0:4864:20::b35])
	by sourceware.org (Postfix) with ESMTPS id 300B73857710
	for <gcc-patches@gcc.gnu.org>; Mon, 15 May 2023 01:21:41 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 300B73857710
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com
Received: by mail-yb1-xb35.google.com with SMTP id 3f1490d57ef6-ba76528fe31so6553867276.1
        for <gcc-patches@gcc.gnu.org>; Sun, 14 May 2023 18:21:41 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=gmail.com; s=20221208; t=1684113700; x=1686705700;
        h=content-transfer-encoding:cc:to:subject:message-id:date:from
         :in-reply-to:references:mime-version:from:to:cc:subject:date
         :message-id:reply-to;
        bh=QkpjEOri7xNAjI1w9QgLVs0mBEjdWtCMiQdeJOkRypE=;
        b=ahdIKO95sWz3+RBwlpG4M/IbdKysywHNV5dqV/SgoIL8S28JMP90tjzZkHw2/4CkSf
         ZOtwKB0Z9lUSq0zsXphz4rJW5XeoaSPf/tqOvAoVIckODz//iVE2h+oqZ6+F1PTbs/1Y
         FIi5a0kRULgFboxc1tuzmkNyC1vTSiMAE9jnmLuagsIg9UKE3N70pk2U1UOGQlMsN0SM
         WxGRCNJc/Jnm5deTLM1WQjDKdzYIyLft2KBnPYDffbdT/G0VmpSc/YzEXBhPtemtIQ2a
         JYgLJsjTdH9gJ0I35HJYF8IoYuPZFRtHFg1ZT6yqqa868vX7oRSeffDUK4JV3sLiL/ag
         lzOw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20221208; t=1684113700; x=1686705700;
        h=content-transfer-encoding:cc:to:subject:message-id:date:from
         :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=QkpjEOri7xNAjI1w9QgLVs0mBEjdWtCMiQdeJOkRypE=;
        b=AOrW3+3amOUuMy6Aj5kYrsFfZF3EF0sV39B0tZ6ikbZflWZ1vox/coUUZmS0pRV5dy
         kVYozSBZfGz2JEQsRnFCiww31M6cOle7WfrmlvksZjBzJdNnYTUl4PEcn3DX2aOZog+A
         LkmImc63S1NT4FTwSaQavSVZsdg/T6nkjsi/3rnyvGNhYYu8GwCWJ5ag7Mf8dK6Y4Ayd
         TizZRtDvZAu5Xwu3LamwKPp+s/iBc04aQYnzuHBxcni+wrkZ9EojQU1GT4JR332zP2w1
         dZVCdee378YcAI2xgyCXFtNFZFGj+tqdomhQxGOzNIsoitKV5Rb2pUR4W+Hts17twgam
         PFaw==
X-Gm-Message-State: AC+VfDy234WkrOg2J4ZuJLMGLAGHgHd9ufYAK1/QR7Yk5syG979+K84d
	RSrDv95MltgihwuUgSGG5UDQdJgN6wMJWPbq1YYj1ciHAsg=
X-Google-Smtp-Source: ACHHUZ4TU4ru/aPkOnCPUnMc7AjOc1nlgPiVszs+7K4GR31plRaFnutAi9AD1Ds6anw1MX91gNzPSIZgOTy1oBTGwgg=
X-Received: by 2002:a81:1b03:0:b0:55a:776e:95f3 with SMTP id
 b3-20020a811b03000000b0055a776e95f3mr27264413ywb.25.1684113699807; Sun, 14
 May 2023 18:21:39 -0700 (PDT)
MIME-Version: 1.0
References: <ZEEtov14Ou3YuC0s@tucnak> <20230421135347.2519452-1-hongtao.liu@intel.com>
 <20230421135347.2519452-2-hongtao.liu@intel.com>
In-Reply-To: <20230421135347.2519452-2-hongtao.liu@intel.com>
From: Hongtao Liu <crazylht@gmail.com>
Date: Mon, 15 May 2023 09:21:29 +0800
Message-ID: <CAMZc-bwkPGVE=k+qbdG0TKVVDYLkvnXg_qegW1R69rhKXU41VA@mail.gmail.com>
Subject: Re: [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and
 __STDCPP_BFLOAT16_T__ for target attribute/pragmas.
To: liuhongt <hongtao.liu@intel.com>
Cc: gcc-patches@gcc.gnu.org, hjl.tools@gmail.com
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Spam-Status: No, score=-7.6 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

ping

On Fri, Apr 21, 2023 at 9:55=E2=80=AFPM liuhongt <hongtao.liu@intel.com> wr=
ote:
>
> > But for the C++23 macros, more importantly I think we really should
> > also in ix86_target_macros_internal add
> >   if (c_dialect_cxx ()
> >       && cxx_dialect > cxx20
> >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> >     {
> >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> >     }
> > plus associated libstdc++ changes.  It can be done incrementally though=
.
> Changed except for one place in libsupc++/compare, it's inside a function
> where pragma can be added. Not sure if this inconsistency will cause any
> issue.
>
> #ifdef __STDCPP_BFLOAT16_T__
>           if constexpr (__is_same(_Tp, decltype(0.0bf16)))
>             return _Bfloat16;
> #endif
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Successfully cross-build i686-linux-gnu.
> Ok for trunk?
>
> def_or_undef  target macros based on currently active ISA in pragmas
> to also do that for __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for
> C++, and change libstdc++ such that for x86 it adds similarly to x86
> intrin headers something like around std::float16_t/std::bfloat16_t stuff=
.
>
> gcc/ChangeLog:
>
>         PR target/109504
>         * config/i386/i386-c.cc (ix86_target_macros_internal):
>         def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__.
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/c++config: Add #pragma GCC target("sse2") for
>         _Float16 and bfloat16_t when __SSE2__ is not available.
>         * include/bits/cpp_type_traits.h: Ditto.
>         * include/bits/std_abs.h: Ditto.
>         * include/c_global/cmath: Ditto.
>         * include/ext/type_traits.h: Ditto.
>         * include/std/atomic: Ditto.
>         * include/std/charconv: Ditto.
>         * include/std/complex: Ditto.
>         * include/std/istream: Ditto.
>         * include/std/limits: Ditto.
>         * include/std/numbers: Ditto.
>         * include/std/ostream: Ditto.
>         * include/std/stdfloat: Ditto.
>         * include/std/type_traits: Ditto.
> ---
>  gcc/config/i386/i386-c.cc                   |   9 +-
>  libstdc++-v3/include/bits/c++config         |  11 +
>  libstdc++-v3/include/bits/cpp_type_traits.h |  27 +-
>  libstdc++-v3/include/bits/std_abs.h         |  23 +-
>  libstdc++-v3/include/c_global/cmath         | 733 +++++++++++---------
>  libstdc++-v3/include/ext/type_traits.h      |  23 +-
>  libstdc++-v3/include/std/atomic             |  43 +-
>  libstdc++-v3/include/std/charconv           |  90 ++-
>  libstdc++-v3/include/std/complex            | 227 +++---
>  libstdc++-v3/include/std/istream            |  61 +-
>  libstdc++-v3/include/std/limits             |  37 +-
>  libstdc++-v3/include/std/numbers            |  11 +
>  libstdc++-v3/include/std/ostream            |  29 +-
>  libstdc++-v3/include/std/stdfloat           |  19 +-
>  libstdc++-v3/include/std/type_traits        |  23 +-
>  15 files changed, 809 insertions(+), 557 deletions(-)
>
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index 2f83c9981e1..bcc17263e28 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -492,7 +492,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>    if (isa_flag & OPTION_MASK_ISA_SSE)
>      def_or_undef (parse_in, "__SSE__");
>    if (isa_flag & OPTION_MASK_ISA_SSE2)
> -    def_or_undef (parse_in, "__SSE2__");
> +    {
> +      def_or_undef (parse_in, "__SSE2__");
> +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> +       {
> +         def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> +         def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> +       }
> +    }
>    if (isa_flag & OPTION_MASK_ISA_SSE3)
>      def_or_undef (parse_in, "__SSE3__");
>    if (isa_flag & OPTION_MASK_ISA_SSSE3)
> diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/b=
its/c++config
> index 13892787e09..c858497fc6e 100644
> --- a/libstdc++-v3/include/bits/c++config
> +++ b/libstdc++-v3/include/bits/c++config
> @@ -820,6 +820,12 @@ namespace std
>  # define _GLIBCXX_LDOUBLE_IS_IEEE_BINARY128 1
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_BFLOAT16_T__
>  namespace __gnu_cxx
>  {
> @@ -827,6 +833,11 @@ namespace __gnu_cxx
>  }
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __has_builtin
>  # ifdef __is_identifier
>  // Intel and older Clang require !__is_identifier for some built-ins:
> diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h b/libstdc++-v3/i=
nclude/bits/cpp_type_traits.h
> index 4312f32a4e0..cadd5ca4fde 100644
> --- a/libstdc++-v3/include/bits/cpp_type_traits.h
> +++ b/libstdc++-v3/include/bits/cpp_type_traits.h
> @@ -315,6 +315,12 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>        typedef __true_type __type;
>      };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __is_floating<_Float16>
> @@ -324,36 +330,41 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT32_T__
> +#ifdef __STDCPP_BFLOAT16_T__
>    template<>
> -    struct __is_floating<_Float32>
> +    struct __is_floating<__gnu_cxx::__bfloat16_t>
>      {
>        enum { __value =3D 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT64_T__
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
>    template<>
> -    struct __is_floating<_Float64>
> +    struct __is_floating<_Float32>
>      {
>        enum { __value =3D 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT128_T__
> +#ifdef __STDCPP_FLOAT64_T__
>    template<>
> -    struct __is_floating<_Float128>
> +    struct __is_floating<_Float64>
>      {
>        enum { __value =3D 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> +#ifdef __STDCPP_FLOAT128_T__
>    template<>
> -    struct __is_floating<__gnu_cxx::__bfloat16_t>
> +    struct __is_floating<_Float128>
>      {
>        enum { __value =3D 1 };
>        typedef __true_type __type;
> diff --git a/libstdc++-v3/include/bits/std_abs.h b/libstdc++-v3/include/b=
its/std_abs.h
> index 1bb7ffbc2da..0423909e8c8 100644
> --- a/libstdc++-v3/include/bits/std_abs.h
> +++ b/libstdc++-v3/include/bits/std_abs.h
> @@ -97,12 +97,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    abs(__GLIBCXX_TYPE_INT_N_3 __x) { return __x >=3D 0 ? __x : -__x; }
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float16
>    abs(_Float16 __x)
>    { return _Float16(__builtin_fabsf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  abs(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float32
>    abs(_Float32 __x)
> @@ -125,12 +142,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_fabsf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  abs(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> -#endif
> -
>  #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
>    __extension__ inline _GLIBCXX_CONSTEXPR
>    __float128
> diff --git a/libstdc++-v3/include/c_global/cmath b/libstdc++-v3/include/c=
_global/cmath
> index 568eb354c2d..6bf3a5eade2 100644
> --- a/libstdc++-v3/include/c_global/cmath
> +++ b/libstdc++-v3/include/c_global/cmath
> @@ -515,6 +515,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      tanh(_Tp __x)
>      { return __builtin_tanh(__x); }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float16
>    acos(_Float16 __x)
> @@ -609,6 +615,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return _Float16(__builtin_tanhf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  acos(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  asin(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atan(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  ceil(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cos(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cosh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  exp(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fabs(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  floor(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log10(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
> +  {
> +    float __i, __ret =3D __builtin_modff(__x, &__i);
> +    *__iptr =3D __gnu_cxx::__bfloat16_t(__i);
> +    return __gnu_cxx::__bfloat16_t(__ret);
> +  }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sin(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sinh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sqrt(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tan(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tanh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float32
>    acos(_Float32 __x)
> @@ -979,100 +1084,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_tanhf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  acos(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  asin(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atan(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  ceil(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cos(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cosh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  exp(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fabs(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  floor(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log10(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
> -  {
> -    float __i, __ret =3D __builtin_modff(__x, &__i);
> -    *__iptr =3D __gnu_cxx::__bfloat16_t(__i);
> -    return __gnu_cxx::__bfloat16_t(__ret);
> -  }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sin(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sinh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sqrt(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tan(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tanh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
> -#endif
> -
>  #if _GLIBCXX_USE_C99_MATH
>  #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
>
> @@ -1507,6 +1518,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>  #endif // C++11
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    constexpr int
>    fpclassify(_Float16 __x)
> @@ -1558,6 +1575,62 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_isunordered(__x, __y); }
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  constexpr int
> +  fpclassify(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
> +                               FP_SUBNORMAL, FP_ZERO, __x); }
> +
> +  constexpr bool
> +  isfinite(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isfinite(__x); }
> +
> +  constexpr bool
> +  isinf(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isinf(__x); }
> +
> +  constexpr bool
> +  isnan(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isnan(__x); }
> +
> +  constexpr bool
> +  isnormal(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isnormal(__x); }
> +
> +  constexpr bool
> +  signbit(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_signbit(__x); }
> +
> +  constexpr bool
> +  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isgreater(__x, __y); }
> +
> +  constexpr bool
> +  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __=
y)
> +  { return __builtin_isgreaterequal(__x, __y); }
> +
> +  constexpr bool
> +  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isless(__x, __y); }
> +
> +  constexpr bool
> +  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_islessequal(__x, __y); }
> +
> +  constexpr bool
> +  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y=
)
> +  { return __builtin_islessgreater(__x, __y); }
> +
> +  constexpr bool
> +  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isunordered(__x, __y); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    constexpr int
>    fpclassify(_Float32 __x)
> @@ -1711,59 +1784,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_isunordered(__x, __y); }
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  constexpr int
> -  fpclassify(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
> -                               FP_SUBNORMAL, FP_ZERO, __x); }
> -
> -  constexpr bool
> -  isfinite(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isfinite(__x); }
> -
> -  constexpr bool
> -  isinf(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isinf(__x); }
> -
> -  constexpr bool
> -  isnan(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isnan(__x); }
> -
> -  constexpr bool
> -  isnormal(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isnormal(__x); }
> -
> -  constexpr bool
> -  signbit(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_signbit(__x); }
> -
> -  constexpr bool
> -  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isgreater(__x, __y); }
> -
> -  constexpr bool
> -  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __=
y)
> -  { return __builtin_isgreaterequal(__x, __y); }
> -
> -  constexpr bool
> -  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isless(__x, __y); }
> -
> -  constexpr bool
> -  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_islessequal(__x, __y); }
> -
> -  constexpr bool
> -  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y=
)
> -  { return __builtin_islessgreater(__x, __y); }
> -
> -  constexpr bool
> -  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isunordered(__x, __y); }
> -#endif
> -
> -#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
> -#endif /* _GLIBCXX_USE_C99_MATH */
> +#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
> +#endif /* _GLIBCXX_USE_C99_MATH */
>
>  #if __cplusplus >=3D 201103L
>
> @@ -2657,6 +2679,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { return __builtin_trunc(__x); }
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float16
>    acosh(_Float16 __x)
> @@ -2837,6 +2865,191 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return _Float16(__builtin_truncf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  acosh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  asinh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atanh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cbrt(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  erf(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  erfc(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  exp2(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  expm1(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cx=
x::__bfloat16_t __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
> +
> +  constexpr int
> +  ilogb(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  lgamma(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
> +
> +  constexpr long long
> +  llrint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
> +
> +  constexpr long long
> +  llround(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log1p(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
> +
> +  // DR 568.
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log2(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  logb(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
> +
> +  constexpr long
> +  lrint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
> +
> +  constexpr long
> +  lround(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  nearbyint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  {
> +    if (std::__is_constant_evaluated())
> +      return __builtin_nextafterf16b(__x, __y);
> +#ifdef __INT16_TYPE__
> +    using __bfloat16_int_type =3D __INT16_TYPE__;
> +#else
> +    using __bfloat16_int_type =3D short int;
> +#endif
> +    __bfloat16_int_type __hx, __hy, __ix, __iy;
> +    __builtin_memcpy(&__hx, &__x, sizeof(__x));
> +    __builtin_memcpy(&__hy, &__y, sizeof(__x));
> +    __ix =3D __hx & 0x7fff;      // |x|
> +    __iy =3D __hy & 0x7fff;      // |y|
> +    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
> +      return __x + __y;
> +    if (__x =3D=3D __y)
> +      return __y;              // x =3D=3D y, return y
> +    if (__ix =3D=3D 0)             // x =3D=3D 0
> +      {
> +       __hy =3D (__hy & 0x8000) | 1;     // return +-__BFLT16_DENORM_MIN=
__
> +       __builtin_memcpy(&__x, &__hy, sizeof(__x));
> +       __builtin_nextafterf(0.0f, 1.0f);       // raise underflow
> +       return __x;
> +      }
> +    if (__hx >=3D 0)             // x > 0
> +      {
> +       if (__hx > __hy)        // x > y, x -=3D ulp
> +         --__hx;
> +       else                    // x < y, x +=3D ulp
> +         ++__hx;
> +      }
> +    else                       // x < 0
> +      {
> +       if (__hy >=3D 0 || __hx > __hy)   // x < y, x -=3D ulp
> +         --__hx;
> +       else                    // x > y, x +=3D ulp
> +         ++__hx;
> +      }
> +    __hy =3D __hx & 0x7f80;
> +    if (__hy >=3D 0x7f80)
> +      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());     // overfl=
ow
> +    else if (__hy < 0x0080)
> +      __builtin_nextafterf(__FLT_MIN__, 0.0f); // underflow
> +    __builtin_memcpy(&__x, &__hx, sizeof(__x));
> +    return __x;
> +  }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* =
__pquo)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo));=
 }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  rint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  round(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tgamma(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  trunc(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    constexpr _Float32
>    acosh(_Float32 __x)
> @@ -3375,186 +3588,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_truncf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  acosh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  asinh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atanh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cbrt(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  erf(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  erfc(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  exp2(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  expm1(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cx=
x::__bfloat16_t __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
> -
> -  constexpr int
> -  ilogb(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  lgamma(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
> -
> -  constexpr long long
> -  llrint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
> -
> -  constexpr long long
> -  llround(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log1p(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
> -
> -  // DR 568.
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log2(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  logb(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
> -
> -  constexpr long
> -  lrint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
> -
> -  constexpr long
> -  lround(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  nearbyint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  {
> -    if (std::__is_constant_evaluated())
> -      return __builtin_nextafterf16b(__x, __y);
> -#ifdef __INT16_TYPE__
> -    using __bfloat16_int_type =3D __INT16_TYPE__;
> -#else
> -    using __bfloat16_int_type =3D short int;
> -#endif
> -    __bfloat16_int_type __hx, __hy, __ix, __iy;
> -    __builtin_memcpy(&__hx, &__x, sizeof(__x));
> -    __builtin_memcpy(&__hy, &__y, sizeof(__x));
> -    __ix =3D __hx & 0x7fff;      // |x|
> -    __iy =3D __hy & 0x7fff;      // |y|
> -    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
> -      return __x + __y;
> -    if (__x =3D=3D __y)
> -      return __y;              // x =3D=3D y, return y
> -    if (__ix =3D=3D 0)             // x =3D=3D 0
> -      {
> -       __hy =3D (__hy & 0x8000) | 1;     // return +-__BFLT16_DENORM_MIN=
__
> -       __builtin_memcpy(&__x, &__hy, sizeof(__x));
> -       __builtin_nextafterf(0.0f, 1.0f);       // raise underflow
> -       return __x;
> -      }
> -    if (__hx >=3D 0)             // x > 0
> -      {
> -       if (__hx > __hy)        // x > y, x -=3D ulp
> -         --__hx;
> -       else                    // x < y, x +=3D ulp
> -         ++__hx;
> -      }
> -    else                       // x < 0
> -      {
> -       if (__hy >=3D 0 || __hx > __hy)   // x < y, x -=3D ulp
> -         --__hx;
> -       else                    // x > y, x +=3D ulp
> -         ++__hx;
> -      }
> -    __hy =3D __hx & 0x7f80;
> -    if (__hy >=3D 0x7f80)
> -      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());     // overfl=
ow
> -    else if (__hy < 0x0080)
> -      __builtin_nextafterf(__FLT_MIN__, 0.0f); // underflow
> -    __builtin_memcpy(&__x, &__hx, sizeof(__x));
> -    return __x;
> -  }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* =
__pquo)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo));=
 }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  rint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  round(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tgamma(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  trunc(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
> -#endif
> -
>
>  #endif // _GLIBCXX_USE_C99_MATH_TR1
>  #endif // C++11
> @@ -3599,12 +3632,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        return std::__hypot3<__type>(__x, __y, __z);
>      }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float16
>    hypot(_Float16 __x, _Float16 __y, _Float16 __z)
>    { return std::__hypot3<_Float16>(__x, __y, __z); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_=
cxx::__bfloat16_t __z)
> +  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float32
>    hypot(_Float32 __x, _Float32 __y, _Float32 __z)
> @@ -3625,12 +3675,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return std::__hypot3<_Float128>(__x, __y, __z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_=
cxx::__bfloat16_t __z)
> -  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> -#endif
> -
>  #endif // C++17
>
>  #if __cplusplus >=3D 202002L
> @@ -3675,12 +3719,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        return std::__lerp<__type>(__x, __y, __z);
>      }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float16
>    lerp(_Float16 __x, _Float16 __y, _Float16 __z) noexcept
>    { return std::__lerp<_Float16>(__x, __y, __z); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_c=
xx::__bfloat16_t __z) noexcept
> +  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float32
>    lerp(_Float32 __x, _Float32 __y, _Float32 __z) noexcept
> @@ -3701,12 +3762,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return std::__lerp<_Float128>(__x, __y, __z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_c=
xx::__bfloat16_t __z) noexcept
> -  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> -#endif
> -
>  #endif // C++20
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/ext/type_traits.h b/libstdc++-v3/includ=
e/ext/type_traits.h
> index 4466c6712c3..823b9710e0c 100644
> --- a/libstdc++-v3/include/ext/type_traits.h
> +++ b/libstdc++-v3/include/ext/type_traits.h
> @@ -190,12 +190,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __promote<float>
>      { typedef float __type; };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __promote<_Float16>
>      { typedef _Float16 __type; };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __promote<__gnu_cxx::__bfloat16_t>
> +  { typedef __gnu_cxx::__bfloat16_t __type; };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __promote<_Float32>
> @@ -214,12 +231,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { typedef _Float128 __type; };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __promote<__gnu_cxx::__bfloat16_t>
> -    { typedef __gnu_cxx::__bfloat16_t __type; };
> -#endif
> -
>  #if __cpp_fold_expressions
>
>    template<typename... _Tp>
> diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/a=
tomic
> index 96e87ded864..5e9e9959270 100644
> --- a/libstdc++-v3/include/std/atomic
> +++ b/libstdc++-v3/include/std/atomic
> @@ -1664,6 +1664,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        using __atomic_float<long double>::operator=3D;
>      };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct atomic<_Float16> : __atomic_float<_Float16>
> @@ -1681,71 +1687,76 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT32_T__
> +#ifdef __STDCPP_BFLOAT16_T__
>    template<>
> -    struct atomic<_Float32> : __atomic_float<_Float32>
> +    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::_=
_bfloat16_t>
>      {
>        atomic() noexcept =3D default;
>
>        constexpr
> -      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
> +      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__g=
nu_cxx::__bfloat16_t>(__fp)
>        { }
>
>        atomic& operator=3D(const atomic&) volatile =3D delete;
>        atomic& operator=3D(const atomic&) =3D delete;
>
> -      using __atomic_float<_Float32>::operator=3D;
> +      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=3D;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT64_T__
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
>    template<>
> -    struct atomic<_Float64> : __atomic_float<_Float64>
> +    struct atomic<_Float32> : __atomic_float<_Float32>
>      {
>        atomic() noexcept =3D default;
>
>        constexpr
> -      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
> +      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
>        { }
>
>        atomic& operator=3D(const atomic&) volatile =3D delete;
>        atomic& operator=3D(const atomic&) =3D delete;
>
> -      using __atomic_float<_Float64>::operator=3D;
> +      using __atomic_float<_Float32>::operator=3D;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT128_T__
> +#ifdef __STDCPP_FLOAT64_T__
>    template<>
> -    struct atomic<_Float128> : __atomic_float<_Float128>
> +    struct atomic<_Float64> : __atomic_float<_Float64>
>      {
>        atomic() noexcept =3D default;
>
>        constexpr
> -      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
> +      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
>        { }
>
>        atomic& operator=3D(const atomic&) volatile =3D delete;
>        atomic& operator=3D(const atomic&) =3D delete;
>
> -      using __atomic_float<_Float128>::operator=3D;
> +      using __atomic_float<_Float64>::operator=3D;
>      };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> +#ifdef __STDCPP_FLOAT128_T__
>    template<>
> -    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::_=
_bfloat16_t>
> +    struct atomic<_Float128> : __atomic_float<_Float128>
>      {
>        atomic() noexcept =3D default;
>
>        constexpr
> -      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__g=
nu_cxx::__bfloat16_t>(__fp)
> +      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
>        { }
>
>        atomic& operator=3D(const atomic&) volatile =3D delete;
>        atomic& operator=3D(const atomic&) =3D delete;
>
> -      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=3D;
> +      using __atomic_float<_Float128>::operator=3D;
>      };
>  #endif
>
> diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std=
/charconv
> index b34d672f5bd..451fb4cba47 100644
> --- a/libstdc++-v3/include/std/charconv
> +++ b/libstdc++-v3/include/std/charconv
> @@ -689,6 +689,12 @@ namespace __detail
>                           float& __value,
>                           chars_format __fmt =3D chars_format::general) n=
oexcept;
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32) \
>      && defined(__cpp_lib_to_chars)
>    inline from_chars_result
> @@ -704,6 +710,27 @@ namespace __detail
>    }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32) \
> +    && defined(__cpp_lib_to_chars)
> +  inline from_chars_result
> +  from_chars(const char* __first, const char* __last,
> +            __gnu_cxx::__bfloat16_t & __value,
> +            chars_format __fmt =3D chars_format::general) noexcept
> +  {
> +    float __val;
> +    from_chars_result __res
> +      =3D __from_chars_bfloat16_t(__first, __last, __val, __fmt);
> +    if (__res.ec =3D=3D errc{})
> +      __value =3D __val;
> +    return __res;
> +  }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline from_chars_result
>    from_chars(const char* __first, const char* __last, _Float32& __value,
> @@ -763,22 +790,6 @@ namespace __detail
>              chars_format __fmt =3D chars_format::general) noexcept;
>  #endif
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32) \
> -    && defined(__cpp_lib_to_chars)
> -  inline from_chars_result
> -  from_chars(const char* __first, const char* __last,
> -            __gnu_cxx::__bfloat16_t & __value,
> -            chars_format __fmt =3D chars_format::general) noexcept
> -  {
> -    float __val;
> -    from_chars_result __res
> -      =3D __from_chars_bfloat16_t(__first, __last, __val, __fmt);
> -    if (__res.ec =3D=3D errc{})
> -      __value =3D __val;
> -    return __res;
> -  }
> -#endif
>  #endif
>
>  #if defined __cpp_lib_to_chars
> @@ -815,6 +826,12 @@ namespace __detail
>                                         float __value,
>                                         chars_format __fmt) noexcept;
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline to_chars_result
>    to_chars(char* __first, char* __last, _Float16 __value) noexcept
> @@ -832,6 +849,29 @@ namespace __detail
>    { return to_chars(__first, __last, float(__value), __fmt, __precision)=
; }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last,
> +          __gnu_cxx::__bfloat16_t __value) noexcept
> +  {
> +    return __to_chars_bfloat16_t(__first, __last, float(__value),
> +                                chars_format{});
> +  }
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> +          chars_format __fmt) noexcept
> +  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt)=
; }
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> +          chars_format __fmt, int __precision) noexcept
> +  { return to_chars(__first, __last, float(__value), __fmt, __precision)=
; }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline to_chars_result
>    to_chars(char* __first, char* __last, _Float32 __value) noexcept
> @@ -920,24 +960,6 @@ namespace __detail
>                            chars_format __fmt, int __precision) noexcept;
>  #endif
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last,
> -          __gnu_cxx::__bfloat16_t __value) noexcept
> -  {
> -    return __to_chars_bfloat16_t(__first, __last, float(__value),
> -                                chars_format{});
> -  }
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> -          chars_format __fmt) noexcept
> -  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt)=
; }
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> -          chars_format __fmt, int __precision) noexcept
> -  { return to_chars(__first, __last, float(__value), __fmt, __precision)=
; }
> -#endif
>  #endif
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/=
complex
> index 0f5f14c3ddb..2f47036e472 100644
> --- a/libstdc++-v3/include/std/complex
> +++ b/libstdc++-v3/include/std/complex
> @@ -599,6 +599,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #endif
>
>  #if _GLIBCXX_USE_C99_COMPLEX
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float16
>    __complex_abs(__complex__ _Float16 __z)
> @@ -649,6 +656,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return static_cast<__complex__ _Float16>(__builtin_cpowf(__x, __y));=
 }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  __complex_abs(__complex__ decltype(0.0bf16) __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  __complex_arg(__complex__ decltype(0.0bf16) __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_cos(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__=
z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_cosh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_exp(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__=
z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_log(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__=
z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sin(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__=
z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sinh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_tan(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__=
z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_tanh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_pow(__complex__ decltype(0.0bf16) __x,
> +               __complex__ decltype(0.0bf16) __y)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__=
x,
> +                                                                     __y=
)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline _Float32
>    __complex_abs(__complex__ _Float32 __z) { return __builtin_cabsf(__z);=
 }
> @@ -802,58 +866,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    __complex_pow(__complex__ _Float128 __x, __complex__ _Float128 __y)
>    { return __builtin_cpowf128(__x, __y); }
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  __complex_abs(__complex__ decltype(0.0bf16) __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  __complex_arg(__complex__ decltype(0.0bf16) __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_cos(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__=
z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_cosh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_exp(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__=
z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_log(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__=
z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sin(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__=
z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sinh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_tan(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__=
z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_tanh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_pow(__complex__ decltype(0.0bf16) __x,
> -               __complex__ decltype(0.0bf16) __y)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__=
x,
> -                                                                     __y=
)); }
> -#endif
>  #endif
>
>    // 26.2.7/3 abs(__z):  Returns the magnitude of __z.
> @@ -1804,12 +1816,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __complex_type
>      { };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __complex_type<_Float16>
>      { typedef __complex__ _Float16 type; };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __complex_type<__gnu_cxx::__bfloat16_t>
> +    { typedef __complex__ decltype(0.0bf16) type; };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __complex_type<_Float32>
> @@ -1828,12 +1857,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { typedef __complex__ _Float128 type; };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __complex_type<__gnu_cxx::__bfloat16_t>
> -    { typedef __complex__ decltype(0.0bf16) type; };
> -#endif
> -
>    template<typename _Tp>
>      requires requires { typename __complex_type<_Tp>::type; }
>      class complex<_Tp>
> @@ -2022,6 +2045,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      }
>
>  #if _GLIBCXX_USE_C99_COMPLEX_TR1
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline __complex__ _Float16
>    __complex_acos(__complex__ _Float16 __z)
> @@ -2048,6 +2078,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return static_cast<__complex__ _Float16>(__builtin_catanhf(__z)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_acos(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_asin(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_atan(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(_=
_z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_acosh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(=
__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_asinh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(=
__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_atanh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(=
__z)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline __complex__ _Float32
>    __complex_acos(__complex__ _Float32 __z)
> @@ -2149,32 +2210,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    __complex_atanh(__complex__ _Float128 __z)
>    { return __builtin_catanhf128(__z); }
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_acos(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_asin(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_atan(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(_=
_z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_acosh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(=
__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_asinh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(=
__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_atanh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(=
__z)); }
> -#endif
>  #endif
>
>  #if _GLIBCXX_USE_C99_COMPLEX_TR1
> @@ -2493,12 +2528,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_cprojl(__z.__rep()); }
>
>  #if __cplusplus > 202002L
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline __complex__ _Float16
>    __complex_proj(__complex__ _Float16 __z)
>    { return static_cast<__complex__ _Float16>(__builtin_cprojf(__z)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_proj(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(_=
_z)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>    inline __complex__ _Float32
>    __complex_proj(__complex__ _Float32 __z)
> @@ -2521,12 +2574,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_cprojf128(__z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_proj(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(_=
_z)); }
> -#endif
> -
>    template<typename _Tp>
>      requires requires { typename __complex_type<_Tp>::type; }
>      inline complex<_Tp>
> diff --git a/libstdc++-v3/include/std/istream b/libstdc++-v3/include/std/=
istream
> index 25d36973f4b..27893a505dd 100644
> --- a/libstdc++-v3/include/std/istream
> +++ b/libstdc++-v3/include/std/istream
> @@ -225,6 +225,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        { return _M_extract(__f); }
>        ///@}
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>        __attribute__((__always_inline__))
>        __istream_type&
> @@ -251,6 +256,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> +      __attribute__((__always_inline__))
> +      __istream_type&
> +      operator>>(__gnu_cxx::__bfloat16_t & __f)
> +      {
> +       float __flt;
> +       __istream_type& __ret =3D _M_extract(__flt);
> +       ios_base::iostate __err =3D ios_base::goodbit;
> +       if (__flt < -__BFLT16_MAX__)
> +         {
> +           __f =3D -__BFLT16_MAX__;
> +           __err =3D ios_base::failbit;
> +         }
> +       else if (__flt > __BFLT16_MAX__)
> +         {
> +           __f =3D __BFLT16_MAX__;
> +           __err =3D ios_base::failbit;
> +         }
> +       else
> +         __f =3D static_cast<__gnu_cxx::__bfloat16_t>(__flt);
> +       if (__err)
> +         this->setstate(__err);
> +       return __ret;
> +      }
> +#endif
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINA=
RY32)
>        __attribute__((__always_inline__))
>        __istream_type&
> @@ -287,32 +322,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BIN=
ARY32)
> -      __attribute__((__always_inline__))
> -      __istream_type&
> -      operator>>(__gnu_cxx::__bfloat16_t & __f)
> -      {
> -       float __flt;
> -       __istream_type& __ret =3D _M_extract(__flt);
> -       ios_base::iostate __err =3D ios_base::goodbit;
> -       if (__flt < -__BFLT16_MAX__)
> -         {
> -           __f =3D -__BFLT16_MAX__;
> -           __err =3D ios_base::failbit;
> -         }
> -       else if (__flt > __BFLT16_MAX__)
> -         {
> -           __f =3D __BFLT16_MAX__;
> -           __err =3D ios_base::failbit;
> -         }
> -       else
> -         __f =3D static_cast<__gnu_cxx::__bfloat16_t>(__flt);
> -       if (__err)
> -         this->setstate(__err);
> -       return __ret;
> -      }
> -#endif
> -
>        /**
>         *  @brief  Basic arithmetic extractors
>         *  @param  __p A variable of pointer type.
> diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/l=
imits
> index 8bafd6fb972..e715cec7dd9 100644
> --- a/libstdc++-v3/include/std/limits
> +++ b/libstdc++-v3/include/std/limits
> @@ -1980,21 +1980,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>         =3D round_to_nearest;                                            =
 \
>      };                                                                  =
       \
>
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>  __glibcxx_float_n(16)
>  #endif
> -#ifdef __STDCPP_FLOAT32_T__
> -__glibcxx_float_n(32)
> -#endif
> -#ifdef __STDCPP_FLOAT64_T__
> -__glibcxx_float_n(64)
> -#endif
> -#ifdef __STDCPP_FLOAT128_T__
> -__glibcxx_float_n(128)
> -#endif
> -#undef __glibcxx_float_n
> -#undef __glibcxx_concat3
> -#undef __glibcxx_concat3_
>
>  #ifdef __STDCPP_BFLOAT16_T__
>    __extension__
> @@ -2071,6 +2066,24 @@ __glibcxx_float_n(128)
>      };
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
> +__glibcxx_float_n(32)
> +#endif
> +#ifdef __STDCPP_FLOAT64_T__
> +__glibcxx_float_n(64)
> +#endif
> +#ifdef __STDCPP_FLOAT128_T__
> +__glibcxx_float_n(128)
> +#endif
> +#undef __glibcxx_float_n
> +#undef __glibcxx_concat3
> +#undef __glibcxx_concat3_
> +
>  #endif
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/std/numbers b/libstdc++-v3/include/std/=
numbers
> index d9d202f5392..39de869dd0e 100644
> --- a/libstdc++-v3/include/std/numbers
> +++ b/libstdc++-v3/include/std/numbers
> @@ -199,10 +199,21 @@ namespace numbers
>      inline constexpr TYPE phi_v<TYPE>                  \
>        =3D 1.618033988749894848204586834365638118##SUFFIX
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>  __glibcxx_numbers (_Float16, F16);
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>  __glibcxx_numbers (_Float32, F32);
>  #endif
> diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/=
ostream
> index 4711b8a3d96..6365fe7649b 100644
> --- a/libstdc++-v3/include/std/ostream
> +++ b/libstdc++-v3/include/std/ostream
> @@ -235,6 +235,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        { return _M_insert(__f); }
>        ///@}
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BIN=
ARY64)
>        __attribute__((__always_inline__))
>        __ostream_type&
> @@ -244,6 +250,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BI=
NARY64)
> +      __attribute__((__always_inline__))
> +      __ostream_type&
> +      operator<<(__gnu_cxx::__bfloat16_t __f)
> +      {
> +       return _M_insert(static_cast<double>(__f));
> +      }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BIN=
ARY64)
>        __attribute__((__always_inline__))
>        __ostream_type&
> @@ -271,15 +291,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BI=
NARY64)
> -      __attribute__((__always_inline__))
> -      __ostream_type&
> -      operator<<(__gnu_cxx::__bfloat16_t __f)
> -      {
> -       return _M_insert(static_cast<double>(__f));
> -      }
> -#endif
> -
>        /**
>         *  @brief  Pointer arithmetic inserters
>         *  @param  __p A variable of pointer type.
> diff --git a/libstdc++-v3/include/std/stdfloat b/libstdc++-v3/include/std=
/stdfloat
> index c39dbb64904..3ea582e1f5d 100644
> --- a/libstdc++-v3/include/std/stdfloat
> +++ b/libstdc++-v3/include/std/stdfloat
> @@ -36,10 +36,25 @@ namespace std
>  {
>  _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>    #ifdef __STDCPP_FLOAT16_T__
>    using float16_t =3D _Float16;
>    #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  using bfloat16_t =3D __gnu_cxx::__bfloat16_t;
> +  #endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>    #ifdef __STDCPP_FLOAT32_T__
>    using float32_t =3D _Float32;
>    #endif
> @@ -52,10 +67,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    using float128_t =3D _Float128;
>    #endif
>
> -  #ifdef __STDCPP_BFLOAT16_T__
> -  using bfloat16_t =3D __gnu_cxx::__bfloat16_t;
> -  #endif
> -
>  _GLIBCXX_END_NAMESPACE_VERSION
>  } // namespace std
>  #endif // C++23
> diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/=
std/type_traits
> index 2bd607a8b8f..549d6485708 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -459,12 +459,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __is_floating_point_helper<long double>
>      : public true_type { };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __is_floating_point_helper<_Float16>
>      : public true_type { };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
> +    : public true_type { };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __is_floating_point_helper<_Float32>
> @@ -483,12 +500,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      : public true_type { };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
> -    : public true_type { };
> -#endif
> -
>  #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
>    template<>
>      struct __is_floating_point_helper<__float128>
> --
> 2.39.1.388.g2fc9e9ca3c
>


--=20
BR,
Hongtao