From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=p+bv=AM=intel.com=hongtao.liu@sourceware.org>
Received: from mga04.intel.com (mga04.intel.com [192.55.52.120])
	by sourceware.org (Postfix) with ESMTPS id B4C553858C50
	for <gcc-patches@gcc.gnu.org>; Fri, 21 Apr 2023 13:55:57 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org B4C553858C50
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1682085358; x=1713621358;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=sYf7xYReuARkci1WvjomS7IYexdzD3pVRfcosvmvS0c=;
  b=nUYNXnXWbPHL/U9E5ABMS27K6VZkfcQFOBR52A5ZONGa03xzM0r2gMFW
   a1kDjVY1mOW6vqx4mLqkQe4Iv2QCPWQSKsj0Z4q4YXh6vTDvRnPtLFvWb
   x2Hdr5PQwNO6DvEZwvS1bIAB8Mu4RI6jLRgWqxLFfVVjsdZR2Uu6tHpm2
   Z8qj0rvtBX7a5Qxg48G4P9N5Dn2v/11Dm+eLKByZJ+JUn6Q6xgBL3QP75
   dh6E31l1LyMvvJm7xVz4mPHkJT8GVnEN8LrXpVOvcFmAMzmQMokdb0D3P
   0V3dmPRsHohNskZnJRV3olYHGMeiAnF75tIwvh7lR45MlAvduJZQEKGPr
   A==;
X-IronPort-AV: E=McAfee;i="6600,9927,10686"; a="344751199"
X-IronPort-AV: E=Sophos;i="5.99,214,1677571200"; 
   d="scan'208";a="344751199"
Received: from orsmga007.jf.intel.com ([10.7.209.58])
  by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Apr 2023 06:55:56 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=McAfee;i="6600,9927,10686"; a="685734041"
X-IronPort-AV: E=Sophos;i="5.99,214,1677571200"; 
   d="scan'208";a="685734041"
Received: from shvmail03.sh.intel.com ([10.239.245.20])
  by orsmga007.jf.intel.com with ESMTP; 21 Apr 2023 06:55:48 -0700
Received: from shliclel4217.sh.intel.com (shliclel4217.sh.intel.com [10.239.240.127])
	by shvmail03.sh.intel.com (Postfix) with ESMTP id 631F6100568A;
	Fri, 21 Apr 2023 21:55:47 +0800 (CST)
From: liuhongt <hongtao.liu@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: crazylht@gmail.com,
	hjl.tools@gmail.com,
	jakub@redhat.com
Subject: [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas.
Date: Fri, 21 Apr 2023 21:53:47 +0800
Message-Id: <20230421135347.2519452-2-hongtao.liu@intel.com>
X-Mailer: git-send-email 2.39.1.388.g2fc9e9ca3c
In-Reply-To: <20230421135347.2519452-1-hongtao.liu@intel.com>
References: <ZEEtov14Ou3YuC0s@tucnak>
 <20230421135347.2519452-1-hongtao.liu@intel.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Spam-Status: No, score=-12.1 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,SPF_HELO_NONE,SPF_NONE,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

> But for the C++23 macros, more importantly I think we really should
> also in ix86_target_macros_internal add
>   if (c_dialect_cxx ()
>       && cxx_dialect > cxx20
>       && (isa_flag & OPTION_MASK_ISA_SSE2))
>     {
>       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
>       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
>     }
> plus associated libstdc++ changes.  It can be done incrementally though.
Changed except for one place in libsupc++/compare, it's inside a function
where pragma can be added. Not sure if this inconsistency will cause any
issue.

#ifdef __STDCPP_BFLOAT16_T__
          if constexpr (__is_same(_Tp, decltype(0.0bf16)))
            return _Bfloat16;
#endif

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Successfully cross-build i686-linux-gnu.
Ok for trunk?

def_or_undef  target macros based on currently active ISA in pragmas
to also do that for __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for
C++, and change libstdc++ such that for x86 it adds similarly to x86
intrin headers something like around std::float16_t/std::bfloat16_t stuff.

gcc/ChangeLog:

	PR target/109504
	* config/i386/i386-c.cc (ix86_target_macros_internal):
	def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__.

libstdc++-v3/ChangeLog:

	* include/bits/c++config: Add #pragma GCC target("sse2") for
	_Float16 and bfloat16_t when __SSE2__ is not available.
	* include/bits/cpp_type_traits.h: Ditto.
	* include/bits/std_abs.h: Ditto.
	* include/c_global/cmath: Ditto.
	* include/ext/type_traits.h: Ditto.
	* include/std/atomic: Ditto.
	* include/std/charconv: Ditto.
	* include/std/complex: Ditto.
	* include/std/istream: Ditto.
	* include/std/limits: Ditto.
	* include/std/numbers: Ditto.
	* include/std/ostream: Ditto.
	* include/std/stdfloat: Ditto.
	* include/std/type_traits: Ditto.
---
 gcc/config/i386/i386-c.cc                   |   9 +-
 libstdc++-v3/include/bits/c++config         |  11 +
 libstdc++-v3/include/bits/cpp_type_traits.h |  27 +-
 libstdc++-v3/include/bits/std_abs.h         |  23 +-
 libstdc++-v3/include/c_global/cmath         | 733 +++++++++++---------
 libstdc++-v3/include/ext/type_traits.h      |  23 +-
 libstdc++-v3/include/std/atomic             |  43 +-
 libstdc++-v3/include/std/charconv           |  90 ++-
 libstdc++-v3/include/std/complex            | 227 +++---
 libstdc++-v3/include/std/istream            |  61 +-
 libstdc++-v3/include/std/limits             |  37 +-
 libstdc++-v3/include/std/numbers            |  11 +
 libstdc++-v3/include/std/ostream            |  29 +-
 libstdc++-v3/include/std/stdfloat           |  19 +-
 libstdc++-v3/include/std/type_traits        |  23 +-
 15 files changed, 809 insertions(+), 557 deletions(-)

diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 2f83c9981e1..bcc17263e28 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -492,7 +492,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
   if (isa_flag & OPTION_MASK_ISA_SSE)
     def_or_undef (parse_in, "__SSE__");
   if (isa_flag & OPTION_MASK_ISA_SSE2)
-    def_or_undef (parse_in, "__SSE2__");
+    {
+      def_or_undef (parse_in, "__SSE2__");
+      if (c_dialect_cxx () && cxx_dialect > cxx20)
+	{
+	  def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
+	  def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
+	}
+    }
   if (isa_flag & OPTION_MASK_ISA_SSE3)
     def_or_undef (parse_in, "__SSE3__");
   if (isa_flag & OPTION_MASK_ISA_SSSE3)
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index 13892787e09..c858497fc6e 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -820,6 +820,12 @@ namespace std
 # define _GLIBCXX_LDOUBLE_IS_IEEE_BINARY128 1
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_BFLOAT16_T__
 namespace __gnu_cxx
 {
@@ -827,6 +833,11 @@ namespace __gnu_cxx
 }
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __has_builtin
 # ifdef __is_identifier
 // Intel and older Clang require !__is_identifier for some built-ins:
diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h b/libstdc++-v3/include/bits/cpp_type_traits.h
index 4312f32a4e0..cadd5ca4fde 100644
--- a/libstdc++-v3/include/bits/cpp_type_traits.h
+++ b/libstdc++-v3/include/bits/cpp_type_traits.h
@@ -315,6 +315,12 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
       typedef __true_type __type;
     };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __is_floating<_Float16>
@@ -324,36 +330,41 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
     };
 #endif
 
-#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_BFLOAT16_T__
   template<>
-    struct __is_floating<_Float32>
+    struct __is_floating<__gnu_cxx::__bfloat16_t>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT64_T__
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
   template<>
-    struct __is_floating<_Float64>
+    struct __is_floating<_Float32>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT128_T__
+#ifdef __STDCPP_FLOAT64_T__
   template<>
-    struct __is_floating<_Float128>
+    struct __is_floating<_Float64>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT128_T__
   template<>
-    struct __is_floating<__gnu_cxx::__bfloat16_t>
+    struct __is_floating<_Float128>
     {
       enum { __value = 1 };
       typedef __true_type __type;
diff --git a/libstdc++-v3/include/bits/std_abs.h b/libstdc++-v3/include/bits/std_abs.h
index 1bb7ffbc2da..0423909e8c8 100644
--- a/libstdc++-v3/include/bits/std_abs.h
+++ b/libstdc++-v3/include/bits/std_abs.h
@@ -97,12 +97,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   abs(__GLIBCXX_TYPE_INT_N_3 __x) { return __x >= 0 ? __x : -__x; }
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   abs(_Float16 __x)
   { return _Float16(__builtin_fabsf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  abs(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   abs(_Float32 __x)
@@ -125,12 +142,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_fabsf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  abs(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
-#endif
-
 #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
   __extension__ inline _GLIBCXX_CONSTEXPR
   __float128
diff --git a/libstdc++-v3/include/c_global/cmath b/libstdc++-v3/include/c_global/cmath
index 568eb354c2d..6bf3a5eade2 100644
--- a/libstdc++-v3/include/c_global/cmath
+++ b/libstdc++-v3/include/c_global/cmath
@@ -515,6 +515,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     tanh(_Tp __x)
     { return __builtin_tanh(__x); }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   acos(_Float16 __x)
@@ -609,6 +615,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return _Float16(__builtin_tanhf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  acos(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  asin(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atan(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  ceil(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cos(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cosh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  exp(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fabs(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  floor(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
+  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log10(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
+  {
+    float __i, __ret = __builtin_modff(__x, &__i);
+    *__iptr = __gnu_cxx::__bfloat16_t(__i);
+    return __gnu_cxx::__bfloat16_t(__ret);
+  }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sin(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sinh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sqrt(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tan(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tanh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   acos(_Float32 __x)
@@ -979,100 +1084,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_tanhf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  acos(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  asin(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atan(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  ceil(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cos(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cosh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  exp(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fabs(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  floor(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
-  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log10(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
-  {
-    float __i, __ret = __builtin_modff(__x, &__i);
-    *__iptr = __gnu_cxx::__bfloat16_t(__i);
-    return __gnu_cxx::__bfloat16_t(__ret);
-  }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sin(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sinh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sqrt(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tan(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tanh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
-#endif
-
 #if _GLIBCXX_USE_C99_MATH
 #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
 
@@ -1507,6 +1518,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #endif // C++11
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   constexpr int
   fpclassify(_Float16 __x)
@@ -1558,6 +1575,62 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_isunordered(__x, __y); }
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  constexpr int
+  fpclassify(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
+				FP_SUBNORMAL, FP_ZERO, __x); }
+
+  constexpr bool
+  isfinite(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isfinite(__x); }
+
+  constexpr bool
+  isinf(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isinf(__x); }
+
+  constexpr bool
+  isnan(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isnan(__x); }
+
+  constexpr bool
+  isnormal(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isnormal(__x); }
+
+  constexpr bool
+  signbit(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_signbit(__x); }
+
+  constexpr bool
+  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isgreater(__x, __y); }
+
+  constexpr bool
+  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isgreaterequal(__x, __y); }
+
+  constexpr bool
+  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isless(__x, __y); }
+
+  constexpr bool
+  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_islessequal(__x, __y); }
+
+  constexpr bool
+  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_islessgreater(__x, __y); }
+
+  constexpr bool
+  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isunordered(__x, __y); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   constexpr int
   fpclassify(_Float32 __x)
@@ -1711,59 +1784,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_isunordered(__x, __y); }
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  constexpr int
-  fpclassify(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
-				FP_SUBNORMAL, FP_ZERO, __x); }
-
-  constexpr bool
-  isfinite(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isfinite(__x); }
-
-  constexpr bool
-  isinf(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isinf(__x); }
-
-  constexpr bool
-  isnan(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isnan(__x); }
-
-  constexpr bool
-  isnormal(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isnormal(__x); }
-
-  constexpr bool
-  signbit(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_signbit(__x); }
-
-  constexpr bool
-  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isgreater(__x, __y); }
-
-  constexpr bool
-  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isgreaterequal(__x, __y); }
-
-  constexpr bool
-  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isless(__x, __y); }
-
-  constexpr bool
-  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_islessequal(__x, __y); }
-
-  constexpr bool
-  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_islessgreater(__x, __y); }
-
-  constexpr bool
-  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isunordered(__x, __y); }
-#endif
-
-#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
-#endif /* _GLIBCXX_USE_C99_MATH */
+#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
+#endif /* _GLIBCXX_USE_C99_MATH */
 
 #if __cplusplus >= 201103L
 
@@ -2657,6 +2679,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { return __builtin_trunc(__x); }
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   acosh(_Float16 __x)
@@ -2837,6 +2865,191 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return _Float16(__builtin_truncf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  acosh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  asinh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atanh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cbrt(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  erf(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  erfc(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  exp2(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  expm1(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
+
+  constexpr int
+  ilogb(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  lgamma(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
+
+  constexpr long long
+  llrint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
+
+  constexpr long long
+  llround(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log1p(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
+
+  // DR 568.
+  constexpr __gnu_cxx::__bfloat16_t
+  log2(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  logb(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
+
+  constexpr long
+  lrint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
+
+  constexpr long
+  lround(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  nearbyint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  {
+    if (std::__is_constant_evaluated())
+      return __builtin_nextafterf16b(__x, __y);
+#ifdef __INT16_TYPE__
+    using __bfloat16_int_type = __INT16_TYPE__;
+#else
+    using __bfloat16_int_type = short int;
+#endif
+    __bfloat16_int_type __hx, __hy, __ix, __iy;
+    __builtin_memcpy(&__hx, &__x, sizeof(__x));
+    __builtin_memcpy(&__hy, &__y, sizeof(__x));
+    __ix = __hx & 0x7fff;	// |x|
+    __iy = __hy & 0x7fff;	// |y|
+    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
+      return __x + __y;
+    if (__x == __y)
+      return __y;		// x == y, return y
+    if (__ix == 0)		// x == 0
+      {
+	__hy = (__hy & 0x8000) | 1;	// return +-__BFLT16_DENORM_MIN__
+	__builtin_memcpy(&__x, &__hy, sizeof(__x));
+	__builtin_nextafterf(0.0f, 1.0f);	// raise underflow
+	return __x;
+      }
+    if (__hx >= 0)		// x > 0
+      {
+	if (__hx > __hy)	// x > y, x -= ulp
+	  --__hx;
+	else			// x < y, x += ulp
+	  ++__hx;
+      }
+    else			// x < 0
+      {
+	if (__hy >= 0 || __hx > __hy)	// x < y, x -= ulp
+	  --__hx;
+	else			// x > y, x += ulp
+	  ++__hx;
+      }
+    __hy = __hx & 0x7f80;
+    if (__hy >= 0x7f80)
+      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());	// overflow
+    else if (__hy < 0x0080)
+      __builtin_nextafterf(__FLT_MIN__, 0.0f);	// underflow
+    __builtin_memcpy(&__x, &__hx, sizeof(__x));
+    return __x;
+  }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
+  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  rint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  round(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
+  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
+  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tgamma(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  trunc(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   acosh(_Float32 __x)
@@ -3375,186 +3588,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_truncf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  acosh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  asinh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atanh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cbrt(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  erf(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  erfc(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  exp2(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  expm1(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
-
-  constexpr int
-  ilogb(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  lgamma(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
-
-  constexpr long long
-  llrint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
-
-  constexpr long long
-  llround(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log1p(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
-
-  // DR 568.
-  constexpr __gnu_cxx::__bfloat16_t
-  log2(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  logb(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
-
-  constexpr long
-  lrint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
-
-  constexpr long
-  lround(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  nearbyint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  {
-    if (std::__is_constant_evaluated())
-      return __builtin_nextafterf16b(__x, __y);
-#ifdef __INT16_TYPE__
-    using __bfloat16_int_type = __INT16_TYPE__;
-#else
-    using __bfloat16_int_type = short int;
-#endif
-    __bfloat16_int_type __hx, __hy, __ix, __iy;
-    __builtin_memcpy(&__hx, &__x, sizeof(__x));
-    __builtin_memcpy(&__hy, &__y, sizeof(__x));
-    __ix = __hx & 0x7fff;	// |x|
-    __iy = __hy & 0x7fff;	// |y|
-    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
-      return __x + __y;
-    if (__x == __y)
-      return __y;		// x == y, return y
-    if (__ix == 0)		// x == 0
-      {
-	__hy = (__hy & 0x8000) | 1;	// return +-__BFLT16_DENORM_MIN__
-	__builtin_memcpy(&__x, &__hy, sizeof(__x));
-	__builtin_nextafterf(0.0f, 1.0f);	// raise underflow
-	return __x;
-      }
-    if (__hx >= 0)		// x > 0
-      {
-	if (__hx > __hy)	// x > y, x -= ulp
-	  --__hx;
-	else			// x < y, x += ulp
-	  ++__hx;
-      }
-    else			// x < 0
-      {
-	if (__hy >= 0 || __hx > __hy)	// x < y, x -= ulp
-	  --__hx;
-	else			// x > y, x += ulp
-	  ++__hx;
-      }
-    __hy = __hx & 0x7f80;
-    if (__hy >= 0x7f80)
-      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());	// overflow
-    else if (__hy < 0x0080)
-      __builtin_nextafterf(__FLT_MIN__, 0.0f);	// underflow
-    __builtin_memcpy(&__x, &__hx, sizeof(__x));
-    return __x;
-  }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
-  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  rint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  round(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
-  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
-  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tgamma(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  trunc(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
-#endif
-
 
 #endif // _GLIBCXX_USE_C99_MATH_TR1
 #endif // C++11
@@ -3599,12 +3632,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return std::__hypot3<__type>(__x, __y, __z);
     }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   hypot(_Float16 __x, _Float16 __y, _Float16 __z)
   { return std::__hypot3<_Float16>(__x, __y, __z); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
+  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   hypot(_Float32 __x, _Float32 __y, _Float32 __z)
@@ -3625,12 +3675,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return std::__hypot3<_Float128>(__x, __y, __z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
-  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
-#endif
-
 #endif // C++17
 
 #if __cplusplus >= 202002L
@@ -3675,12 +3719,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return std::__lerp<__type>(__x, __y, __z);
     }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   lerp(_Float16 __x, _Float16 __y, _Float16 __z) noexcept
   { return std::__lerp<_Float16>(__x, __y, __z); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
+  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   lerp(_Float32 __x, _Float32 __y, _Float32 __z) noexcept
@@ -3701,12 +3762,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return std::__lerp<_Float128>(__x, __y, __z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
-  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
-#endif
-
 #endif // C++20
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/ext/type_traits.h b/libstdc++-v3/include/ext/type_traits.h
index 4466c6712c3..823b9710e0c 100644
--- a/libstdc++-v3/include/ext/type_traits.h
+++ b/libstdc++-v3/include/ext/type_traits.h
@@ -190,12 +190,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __promote<float>
     { typedef float __type; };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __promote<_Float16>
     { typedef _Float16 __type; };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __promote<__gnu_cxx::__bfloat16_t>
+  { typedef __gnu_cxx::__bfloat16_t __type; };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __promote<_Float32>
@@ -214,12 +231,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { typedef _Float128 __type; };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __promote<__gnu_cxx::__bfloat16_t>
-    { typedef __gnu_cxx::__bfloat16_t __type; };
-#endif
-
 #if __cpp_fold_expressions
 
   template<typename... _Tp>
diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
index 96e87ded864..5e9e9959270 100644
--- a/libstdc++-v3/include/std/atomic
+++ b/libstdc++-v3/include/std/atomic
@@ -1664,6 +1664,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       using __atomic_float<long double>::operator=;
     };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct atomic<_Float16> : __atomic_float<_Float16>
@@ -1681,71 +1687,76 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     };
 #endif
 
-#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_BFLOAT16_T__
   template<>
-    struct atomic<_Float32> : __atomic_float<_Float32>
+    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
+      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float32>::operator=;
+      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT64_T__
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
   template<>
-    struct atomic<_Float64> : __atomic_float<_Float64>
+    struct atomic<_Float32> : __atomic_float<_Float32>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
+      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float64>::operator=;
+      using __atomic_float<_Float32>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT128_T__
+#ifdef __STDCPP_FLOAT64_T__
   template<>
-    struct atomic<_Float128> : __atomic_float<_Float128>
+    struct atomic<_Float64> : __atomic_float<_Float64>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
+      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float128>::operator=;
+      using __atomic_float<_Float64>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT128_T__
   template<>
-    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
+    struct atomic<_Float128> : __atomic_float<_Float128>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
+      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
+      using __atomic_float<_Float128>::operator=;
     };
 #endif
 
diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv
index b34d672f5bd..451fb4cba47 100644
--- a/libstdc++-v3/include/std/charconv
+++ b/libstdc++-v3/include/std/charconv
@@ -689,6 +689,12 @@ namespace __detail
 			  float& __value,
 			  chars_format __fmt = chars_format::general) noexcept;
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
     && defined(__cpp_lib_to_chars)
   inline from_chars_result
@@ -704,6 +710,27 @@ namespace __detail
   }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
+    && defined(__cpp_lib_to_chars)
+  inline from_chars_result
+  from_chars(const char* __first, const char* __last,
+	     __gnu_cxx::__bfloat16_t & __value,
+	     chars_format __fmt = chars_format::general) noexcept
+  {
+    float __val;
+    from_chars_result __res
+      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
+    if (__res.ec == errc{})
+      __value = __val;
+    return __res;
+  }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline from_chars_result
   from_chars(const char* __first, const char* __last, _Float32& __value,
@@ -763,22 +790,6 @@ namespace __detail
 	     chars_format __fmt = chars_format::general) noexcept;
 #endif
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
-    && defined(__cpp_lib_to_chars)
-  inline from_chars_result
-  from_chars(const char* __first, const char* __last,
-	     __gnu_cxx::__bfloat16_t & __value,
-	     chars_format __fmt = chars_format::general) noexcept
-  {
-    float __val;
-    from_chars_result __res
-      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
-    if (__res.ec == errc{})
-      __value = __val;
-    return __res;
-  }
-#endif
 #endif
 
 #if defined __cpp_lib_to_chars
@@ -815,6 +826,12 @@ namespace __detail
 					float __value,
 					chars_format __fmt) noexcept;
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline to_chars_result
   to_chars(char* __first, char* __last, _Float16 __value) noexcept
@@ -832,6 +849,29 @@ namespace __detail
   { return to_chars(__first, __last, float(__value), __fmt, __precision); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline to_chars_result
+  to_chars(char* __first, char* __last,
+	   __gnu_cxx::__bfloat16_t __value) noexcept
+  {
+    return __to_chars_bfloat16_t(__first, __last, float(__value),
+				 chars_format{});
+  }
+  inline to_chars_result
+  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
+	   chars_format __fmt) noexcept
+  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
+  inline to_chars_result
+  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
+	   chars_format __fmt, int __precision) noexcept
+  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline to_chars_result
   to_chars(char* __first, char* __last, _Float32 __value) noexcept
@@ -920,24 +960,6 @@ namespace __detail
 			   chars_format __fmt, int __precision) noexcept;
 #endif
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline to_chars_result
-  to_chars(char* __first, char* __last,
-	   __gnu_cxx::__bfloat16_t __value) noexcept
-  {
-    return __to_chars_bfloat16_t(__first, __last, float(__value),
-				 chars_format{});
-  }
-  inline to_chars_result
-  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
-	   chars_format __fmt) noexcept
-  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
-  inline to_chars_result
-  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
-	   chars_format __fmt, int __precision) noexcept
-  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
-#endif
 #endif
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex
index 0f5f14c3ddb..2f47036e472 100644
--- a/libstdc++-v3/include/std/complex
+++ b/libstdc++-v3/include/std/complex
@@ -599,6 +599,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
 #if _GLIBCXX_USE_C99_COMPLEX
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   __complex_abs(__complex__ _Float16 __z)
@@ -649,6 +656,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return static_cast<__complex__ _Float16>(__builtin_cpowf(__x, __y)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  __complex_abs(__complex__ decltype(0.0bf16) __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  __complex_arg(__complex__ decltype(0.0bf16) __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_cos(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_cosh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_exp(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_log(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sin(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sinh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_tan(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_tanh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_pow(__complex__ decltype(0.0bf16) __x,
+		__complex__ decltype(0.0bf16) __y)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
+								      __y)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   __complex_abs(__complex__ _Float32 __z) { return __builtin_cabsf(__z); }
@@ -802,58 +866,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __complex_pow(__complex__ _Float128 __x, __complex__ _Float128 __y)
   { return __builtin_cpowf128(__x, __y); }
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  __complex_abs(__complex__ decltype(0.0bf16) __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  __complex_arg(__complex__ decltype(0.0bf16) __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_cos(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_cosh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_exp(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_log(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sin(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sinh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_tan(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_tanh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_pow(__complex__ decltype(0.0bf16) __x,
-		__complex__ decltype(0.0bf16) __y)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
-								      __y)); }
-#endif
 #endif
 
   // 26.2.7/3 abs(__z):  Returns the magnitude of __z.
@@ -1804,12 +1816,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __complex_type
     { };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __complex_type<_Float16>
     { typedef __complex__ _Float16 type; };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __complex_type<__gnu_cxx::__bfloat16_t>
+    { typedef __complex__ decltype(0.0bf16) type; };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __complex_type<_Float32>
@@ -1828,12 +1857,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { typedef __complex__ _Float128 type; };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __complex_type<__gnu_cxx::__bfloat16_t>
-    { typedef __complex__ decltype(0.0bf16) type; };
-#endif
-
   template<typename _Tp>
     requires requires { typename __complex_type<_Tp>::type; }
     class complex<_Tp>
@@ -2022,6 +2045,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
 #if _GLIBCXX_USE_C99_COMPLEX_TR1
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float16
   __complex_acos(__complex__ _Float16 __z)
@@ -2048,6 +2078,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return static_cast<__complex__ _Float16>(__builtin_catanhf(__z)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __complex__ decltype(0.0bf16)
+  __complex_acos(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_asin(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_atan(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_acosh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_asinh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_atanh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float32
   __complex_acos(__complex__ _Float32 __z)
@@ -2149,32 +2210,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __complex_atanh(__complex__ _Float128 __z)
   { return __builtin_catanhf128(__z); }
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __complex__ decltype(0.0bf16)
-  __complex_acos(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_asin(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_atan(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_acosh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_asinh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_atanh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
-#endif
 #endif
 
 #if _GLIBCXX_USE_C99_COMPLEX_TR1
@@ -2493,12 +2528,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_cprojl(__z.__rep()); }
 
 #if __cplusplus > 202002L
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float16
   __complex_proj(__complex__ _Float16 __z)
   { return static_cast<__complex__ _Float16>(__builtin_cprojf(__z)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __complex__ decltype(0.0bf16)
+  __complex_proj(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float32
   __complex_proj(__complex__ _Float32 __z)
@@ -2521,12 +2574,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_cprojf128(__z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __complex__ decltype(0.0bf16)
-  __complex_proj(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
-#endif
-
   template<typename _Tp>
     requires requires { typename __complex_type<_Tp>::type; }
     inline complex<_Tp>
diff --git a/libstdc++-v3/include/std/istream b/libstdc++-v3/include/std/istream
index 25d36973f4b..27893a505dd 100644
--- a/libstdc++-v3/include/std/istream
+++ b/libstdc++-v3/include/std/istream
@@ -225,6 +225,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return _M_extract(__f); }
       ///@}
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
       __attribute__((__always_inline__))
       __istream_type&
@@ -251,6 +256,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+      __attribute__((__always_inline__))
+      __istream_type&
+      operator>>(__gnu_cxx::__bfloat16_t & __f)
+      {
+	float __flt;
+	__istream_type& __ret = _M_extract(__flt);
+	ios_base::iostate __err = ios_base::goodbit;
+	if (__flt < -__BFLT16_MAX__)
+	  {
+	    __f = -__BFLT16_MAX__;
+	    __err = ios_base::failbit;
+	  }
+	else if (__flt > __BFLT16_MAX__)
+	  {
+	    __f = __BFLT16_MAX__;
+	    __err = ios_base::failbit;
+	  }
+	else
+	  __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
+	if (__err)
+	  this->setstate(__err);
+	return __ret;
+      }
+#endif
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
       __attribute__((__always_inline__))
       __istream_type&
@@ -287,32 +322,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-      __attribute__((__always_inline__))
-      __istream_type&
-      operator>>(__gnu_cxx::__bfloat16_t & __f)
-      {
-	float __flt;
-	__istream_type& __ret = _M_extract(__flt);
-	ios_base::iostate __err = ios_base::goodbit;
-	if (__flt < -__BFLT16_MAX__)
-	  {
-	    __f = -__BFLT16_MAX__;
-	    __err = ios_base::failbit;
-	  }
-	else if (__flt > __BFLT16_MAX__)
-	  {
-	    __f = __BFLT16_MAX__;
-	    __err = ios_base::failbit;
-	  }
-	else
-	  __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
-	if (__err)
-	  this->setstate(__err);
-	return __ret;
-      }
-#endif
-
       /**
        *  @brief  Basic arithmetic extractors
        *  @param  __p A variable of pointer type.
diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/limits
index 8bafd6fb972..e715cec7dd9 100644
--- a/libstdc++-v3/include/std/limits
+++ b/libstdc++-v3/include/std/limits
@@ -1980,21 +1980,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	= round_to_nearest; 						\
     }; 									\
 
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
 __glibcxx_float_n(16)
 #endif
-#ifdef __STDCPP_FLOAT32_T__
-__glibcxx_float_n(32)
-#endif
-#ifdef __STDCPP_FLOAT64_T__
-__glibcxx_float_n(64)
-#endif
-#ifdef __STDCPP_FLOAT128_T__
-__glibcxx_float_n(128)
-#endif
-#undef __glibcxx_float_n
-#undef __glibcxx_concat3
-#undef __glibcxx_concat3_
 
 #ifdef __STDCPP_BFLOAT16_T__
   __extension__
@@ -2071,6 +2066,24 @@ __glibcxx_float_n(128)
     };
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
+__glibcxx_float_n(32)
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+__glibcxx_float_n(64)
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+__glibcxx_float_n(128)
+#endif
+#undef __glibcxx_float_n
+#undef __glibcxx_concat3
+#undef __glibcxx_concat3_
+
 #endif
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/std/numbers b/libstdc++-v3/include/std/numbers
index d9d202f5392..39de869dd0e 100644
--- a/libstdc++-v3/include/std/numbers
+++ b/libstdc++-v3/include/std/numbers
@@ -199,10 +199,21 @@ namespace numbers
     inline constexpr TYPE phi_v<TYPE>			\
       = 1.618033988749894848204586834365638118##SUFFIX
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
 __glibcxx_numbers (_Float16, F16);
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
 __glibcxx_numbers (_Float32, F32);
 #endif
diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream
index 4711b8a3d96..6365fe7649b 100644
--- a/libstdc++-v3/include/std/ostream
+++ b/libstdc++-v3/include/std/ostream
@@ -235,6 +235,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return _M_insert(__f); }
       ///@}
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
       __attribute__((__always_inline__))
       __ostream_type&
@@ -244,6 +250,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
+      __attribute__((__always_inline__))
+      __ostream_type&
+      operator<<(__gnu_cxx::__bfloat16_t __f)
+      {
+	return _M_insert(static_cast<double>(__f));
+      }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
       __attribute__((__always_inline__))
       __ostream_type&
@@ -271,15 +291,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
-      __attribute__((__always_inline__))
-      __ostream_type&
-      operator<<(__gnu_cxx::__bfloat16_t __f)
-      {
-	return _M_insert(static_cast<double>(__f));
-      }
-#endif
-
       /**
        *  @brief  Pointer arithmetic inserters
        *  @param  __p A variable of pointer type.
diff --git a/libstdc++-v3/include/std/stdfloat b/libstdc++-v3/include/std/stdfloat
index c39dbb64904..3ea582e1f5d 100644
--- a/libstdc++-v3/include/std/stdfloat
+++ b/libstdc++-v3/include/std/stdfloat
@@ -36,10 +36,25 @@ namespace std
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
   #ifdef __STDCPP_FLOAT16_T__
   using float16_t = _Float16;
   #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  using bfloat16_t = __gnu_cxx::__bfloat16_t;
+  #endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
   #ifdef __STDCPP_FLOAT32_T__
   using float32_t = _Float32;
   #endif
@@ -52,10 +67,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using float128_t = _Float128;
   #endif
 
-  #ifdef __STDCPP_BFLOAT16_T__
-  using bfloat16_t = __gnu_cxx::__bfloat16_t;
-  #endif
-
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 #endif // C++23
diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..549d6485708 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -459,12 +459,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __is_floating_point_helper<long double>
     : public true_type { };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __is_floating_point_helper<_Float16>
     : public true_type { };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
+    : public true_type { };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __is_floating_point_helper<_Float32>
@@ -483,12 +500,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     : public true_type { };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
-    : public true_type { };
-#endif
-
 #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
   template<>
     struct __is_floating_point_helper<__float128>
-- 
2.39.1.388.g2fc9e9ca3c