[PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.
@ 2023-04-19  7:15 liuhongt
  2023-04-19 16:43 ` Mike Stump
  2023-04-20 12:18 ` Jakub Jelinek
  0 siblings, 2 replies; 12+ messages in thread
From: liuhongt @ 2023-04-19  7:15 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, ubizjak, jakub

-----Jakub's comments----------
That said, these fundamental types whose presence/absence depends on ISA flags
are quite problematic IMHO, as they are incompatible with the target
attribute/pragmas. Whether they are available or not available depends on
whether in this case SSE2 is enabled during compiler initialization (aka after
parsing command line options) and then they are available or unavailable to
everything else based on that.
-----comments end----------

Enable _Float16 and __bf16 all the time but issue errors when the
types are used in conversion, unary operation, binary operation,
parameter passing or value return when TARGET_SSE2 is not available.

Also undef macros which are used by libgcc/libstdc++ to check the
backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
available.

Bootstrapped and regtested  on x86_64-pc-linux-gnu{-m32,}
Also successfully cross-build targte i686-linux-gnu.
Ok for trunk?

gcc/ChangeLog:

	PR target/109054
	* config/i386/i386-builtins.cc
	(ix86_register_float16_builtin_type): Remove TARGET_SSE2.
	(ix86_register_bf16_builtin_type): Ditto.
	* config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
	isn't available, undef the macros which are used to check the
	backend support of the _Float16/__bf16 types when building
	libstdc++ and libgcc.
	* config/i386/i386.cc (construct_container): Issue errors for
	HFmode/BFmode when TARGET_SSE2 is not available.
	(function_value_32): Ditto.
	(ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
	(ix86_libgcc_floating_mode_supported_p): Ditto.
	(ix86_emit_support_tinfos): Adjust codes.
	(ix86_invalid_conversion): New function.
	(ix86_invalid_unary_op): Ditto.
	(ix86_invalid_binary_op): Ditto.
	(TARGET_INVALID_CONVERSION): Defined.
	(TARGET_INVALID_UNARY_OP): Defined.
	(TARGET_INVALID_BINARY_OP): Defined.
	* config/i386/immintrin.h: Remove #ifdef __SSE2__ for fp16/bf16
	related instrinsics header filers.
	* config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr109054.c: New test.
	* gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
	* gcc.target/i386/sse2-float16-1.c: Ditto.
	* gcc.target/i386/sse2-float16-4.c: New test.
	* gcc.target/i386/sse2-float16-5.c: New test.
	* g++.target/i386/float16-1.C: Adjust error info.
---
 gcc/config/i386/i386-builtins.cc              |   4 +-
 gcc/config/i386/i386-c.cc                     |  37 ++++++
 gcc/config/i386/i386.cc                       | 117 ++++++++++++++++--
 gcc/config/i386/i386.h                        |   4 +
 gcc/config/i386/immintrin.h                   |   4 -
 gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
 gcc/testsuite/gcc.target/i386/pr109054.c      |   6 +
 .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
 .../gcc.target/i386/sse2-float16-1.c          |   8 +-
 .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
 .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
 11 files changed, 217 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109054.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..1cdabfd3a0a 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
   else
     ix86_float16_type_node = float16_type_node;
 
-  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("_Float16"))
     lang_hooks.types.register_builtin_type (ix86_float16_type_node,
 					    "_Float16");
 }
@@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
   else
     ix86_bf16_type_node = bfloat16_type_node;
 
-  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("__bf16"))
     lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
 }
 
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index e7bd7cc706c..eb77d0af226 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -817,6 +817,43 @@ ix86_target_macros (void)
   if (!TARGET_80387)
     cpp_define (parse_in, "_SOFT_FLOAT");
 
+  /* HFmode/BFmode is supported without depending any isa
+     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
+     but according to psABI, they're really supported w/ SSE2 and above.
+     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
+     for backend support of the types, undef the macros to avoid
+     build failure, see PR109504.  */
+  if (!TARGET_SSE2)
+    {
+      if (c_dialect_cxx ()
+	  && cxx_dialect > cxx20)
+	{
+	  cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
+	  cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
+	}
+
+      if (flag_building_libgcc)
+	{
+	  /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
+	     to check backend support of _Float16 and __bf16 type.  */
+	  cpp_undef (parse_in, "__LIBGCC_HAS_HF_MODE__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_FUNC_EXT__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_MANT_DIG__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_EXCESS_PRECISION__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_EPSILON__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_MAX__");
+	  cpp_undef (parse_in, "__LIBGCC_HF_MIN__");
+
+	  cpp_undef (parse_in, "__LIBGCC_HAS_BF_MODE__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_FUNC_EXT__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_MANT_DIG__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_EXCESS_PRECISION__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_EPSILON__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_MAX__");
+	  cpp_undef (parse_in, "__LIBGCC_BF_MIN__");
+	}
+    }
+
   if (TARGET_LONG_DOUBLE_64)
     cpp_define (parse_in, "__LONG_DOUBLE_64__");
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fbd33a6bfd1..f31929b8752 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2651,7 +2651,10 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 
   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
      some less clueful developer tries to use floating-point anyway.  */
-  if (needed_sseregs && !TARGET_SSE)
+  if (needed_sseregs
+      && (!TARGET_SSE
+	  || (VALID_SSE2_TYPE_MODE (mode)
+	      && !TARGET_SSE2)))
     {
       /* Return early if we shouldn't raise an error for invalid
 	 calls.  */
@@ -2661,13 +2664,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 	{
 	  if (!issued_sse_ret_error)
 	    {
-	      error ("SSE register return with SSE disabled");
+	      if (VALID_SSE2_TYPE_MODE (mode))
+		error ("SSE register return with SSE2 disabled");
+	      else
+		error ("SSE register return with SSE disabled");
 	      issued_sse_ret_error = true;
 	    }
 	}
       else if (!issued_sse_arg_error)
 	{
-	  error ("SSE register argument with SSE disabled");
+	  if (VALID_SSE2_TYPE_MODE (mode))
+	    error ("SSE register argument with SSE2 disabled");
+	  else
+	    error ("SSE register argument with SSE disabled");
 	  issued_sse_arg_error = true;
 	}
       return NULL;
@@ -4022,13 +4031,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
 
   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
   if (mode == HFmode || mode == BFmode)
-    regno = FIRST_SSE_REG;
+    {
+      if (!TARGET_SSE2)
+	{
+	  error ("SSE register return with SSE2 disabled");
+	  regno = AX_REG;
+	}
+      else
+	regno = FIRST_SSE_REG;
+    }
+
   if (mode == HCmode)
     {
+      if (!TARGET_SSE2)
+	error ("SSE register return with SSE2 disabled");
+
       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
       XVECEXP (ret, 0, 0)
 	= gen_rtx_EXPR_LIST (VOIDmode,
-			     gen_rtx_REG (SImode, FIRST_SSE_REG),
+			     gen_rtx_REG (SImode,
+					  TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
 			     GEN_INT (0));
       return ret;
     }
@@ -22459,7 +22481,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (mode == TFmode)
     return true;
-  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
+  else if (mode == HFmode || mode == BFmode)
     return true;
   else
     return default_scalar_mode_supported_p (mode);
@@ -22475,7 +22497,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
      be defined by the C front-end for AVX512FP16 intrinsics.  We will
      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
      enabled.  */
-  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
+  return ((mode == HFmode || mode == BFmode)
 	  ? true
 	  : default_libgcc_floating_mode_supported_p (mode));
 }
@@ -22805,9 +22827,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
 
   if (!TARGET_SSE2)
     {
-      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
-      float16_type_node = ix86_float16_type_node;
-      bfloat16_type_node = ix86_bf16_type_node;
+      float16_type_node
+	= float16_type_node ? float16_type_node : ix86_float16_type_node;
+      bfloat16_type_node
+	= bfloat16_type_node ? bfloat16_type_node : ix86_bf16_type_node;
       callback (float16_type_node);
       callback (bfloat16_type_node);
       float16_type_node = NULL_TREE;
@@ -24259,6 +24282,71 @@ ix86_init_libfuncs (void)
 #endif
 }
 
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+
+static const char *
+ix86_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  if (element_mode (fromtype) != element_mode (totype))
+    {
+      /* Do no allow conversions to/from BFmode/HFmode scalar types
+	 when TARGET_SSE2 is not available.  */
+      if ((TYPE_MODE (fromtype) == BFmode
+	   || TYPE_MODE (fromtype) == HFmode)
+	  && !TARGET_SSE2)
+	return N_("invalid conversion from type %<__bf16%> "
+		  "or %<_Float16%> without option %<-msse2%>");
+
+      if ((TYPE_MODE (totype) == BFmode
+	   || TYPE_MODE (totype) == HFmode)
+	  && !TARGET_SSE2)
+	return N_("invalid conversion to type %<__bf16%> "
+		  "or %<_Float16%> without option %<-msse2%>");
+    }
+
+  /* Conversion allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+
+static const char *
+ix86_invalid_unary_op (int op, const_tree type)
+{
+  /* Reject all single-operand operations on BFmode/HFmode except for &
+     when TARGET_SSE2 is not available.  */
+  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
+      && !TARGET_SSE2 && op != ADDR_EXPR)
+    return N_("operation not permitted on type %<__bf16%> "
+	      "or %<_Float16%> without option %<-msse2%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+
+static const char *
+ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+			   const_tree type2)
+{
+  /* Reject all 2-operand operations on BFmode or HFmode
+     when TARGET_SSE2 is not available.  */
+  if ((element_mode (type1) == BFmode
+       || element_mode (type2) == BFmode
+       || element_mode (type1) == HFmode
+       || element_mode (type2) == HFmode)
+      && !TARGET_SSE2)
+    return N_("operation not permitted on type %<__bf16%> "
+	      "or %<_Float16%> without option %<-msse2%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
 /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
    FPU, assume that the fpcw is set to extended precision; when using
    only SSE, rounding is correct; when using both SSE and the FPU,
@@ -25248,6 +25336,15 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_TAG_SIZE
 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
+
 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
 #ifdef OPTION_GLIBC
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 1da6dce8e0b..7e839bc5c7e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define VALID_AVX512FP16_REG_MODE(MODE)					\
   ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
 
+#define VALID_SSE2_TYPE_MODE(MODE)		\
+  ((MODE) == HFmode || (MODE) == BFmode		\
+   || (MODE) == HCmode || (MODE) == BCmode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode	\
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b220d871942..cc78df56940 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -98,11 +98,9 @@
 
 #include <avx512vp2intersectvlintrin.h>
 
-#ifdef __SSE2__
 #include <avx512fp16intrin.h>
 
 #include <avx512fp16vlintrin.h>
-#endif
 
 #include <shaintrin.h>
 
@@ -118,13 +116,11 @@
 
 #include <vpclmulqdqintrin.h>
 
-#ifdef __SSE2__
 #include <avx512bf16vlintrin.h>
 
 #include <avx512bf16intrin.h>
 
 #include <avxneconvertintrin.h>
-#endif
 
 #include <amxtileintrin.h>
 
diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
index f96b932b698..938852ee9ad 100644
--- a/gcc/testsuite/g++.target/i386/float16-1.C
+++ b/gcc/testsuite/g++.target/i386/float16-1.C
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-_Float16	/* { dg-error "expected unqualified-id before '_Float16'" } */
-foo (_Float16 x) 
+_Float16
+foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */ 
 {
-  return x;
-}		/* { dg-error "'_Float16' is not supported on this target" } */
+  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109054.c b/gcc/testsuite/gcc.target/i386/pr109054.c
new file mode 100644
index 00000000000..fe5bcda10ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109054.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse" } */
+
+#pragma GCC target("sse4.1")
+#include <immintrin.h>
+int main(){return 0;}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
index 612d55be826..717055bc9ad 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-__bf16/* { dg-error "unknown type name '__bf16'" } */
-foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
-{
-  return x;
+__bf16
+foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
+{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
index 1b645eb499d..faf818df75f 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-_Float16/* { dg-error "is not supported on this target" } */
-foo (_Float16 x) /* { dg-error "is not supported on this target" } */
-{
-  return x;
+_Float16
+foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
+{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
new file mode 100644
index 00000000000..64baf92ff56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+_Float16 a;
+__bf16 c;
+_Complex _Float16 ac;
+
+void
+foo (_Float16* p)
+{
+  a = *p;
+}
+
+void
+foo1 (__bf16 *p)
+{
+  c = *p;
+}
+
+
+void
+foo2 (_Complex _Float16* p)
+{
+  ac = *p;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
new file mode 100644
index 00000000000..c3ed23b8ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target ia32} } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+_Float16 a;
+__bf16 c;
+_Complex ac;
+void
+foo (_Float16 p)
+{
+  a = p;
+}
+
+void
+foo1 (__bf16 p)
+{
+  c = p;
+}
+
+
+void
+foo2 (_Complex p)
+{
+  ac = p;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-04-19  7:15 [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2 liuhongt
@ 2023-04-19 16:43 ` Mike Stump
  2023-04-20 12:18 ` Jakub Jelinek
  1 sibling, 0 replies; 12+ messages in thread
From: Mike Stump @ 2023-04-19 16:43 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools, ubizjak, jakub

LLM, machine learning and AI likes coding with data types that are weird, float16, bf16, 8 bit float and 4 bit floats. Longer term, would be nice to natively support these everywhere. Would be nice to trial run them in the compiler, sort it all out, so that the implementation experience can drive language adoption. A little speculative and a little narrow focus for the field, but, AI isn't going away in the next 20 years I don't think. Anyway, I like the direction.

On Apr 19, 2023, at 12:15 AM, liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> That said, these fundamental types whose presence/absence depends on ISA flags
> are quite problematic IMHO, as they are incompatible with the target
> attribute/pragmas. Whether they are available or not available depends on
> whether in this case SSE2 is enabled during compiler initialization (aka after
> parsing command line options) and then they are available or unavailable to
> everything else based on that.
> -----comments end----------
> 
> Enable _Float16 and __bf16 all the time but issue errors when the

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-04-19  7:15 [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2 liuhongt
  2023-04-19 16:43 ` Mike Stump
@ 2023-04-20 12:18 ` Jakub Jelinek
  2023-04-21 13:53   ` [PATCH 1/2] " liuhongt
  1 sibling, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2023-04-20 12:18 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools, ubizjak

On Wed, Apr 19, 2023 at 03:15:51PM +0800, liuhongt wrote:
ChangeLog nits have been already reported earlier.

> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -817,6 +817,43 @@ ix86_target_macros (void)
>    if (!TARGET_80387)
>      cpp_define (parse_in, "_SOFT_FLOAT");
>  
> +  /* HFmode/BFmode is supported without depending any isa
> +     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
> +     but according to psABI, they're really supported w/ SSE2 and above.
> +     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
> +     for backend support of the types, undef the macros to avoid
> +     build failure, see PR109504.  */
> +  if (!TARGET_SSE2)
> +    {
> +      if (c_dialect_cxx ()
> +	  && cxx_dialect > cxx20)

Formatting, both conditions are short, so just put them on one line.

> +	{
> +	  cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
> +	  cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> +	}

But for the C++23 macros, more importantly I think we really should
also in ix86_target_macros_internal add
  if (c_dialect_cxx ()
      && cxx_dialect > cxx20
      && (isa_flag & OPTION_MASK_ISA_SSE2))
    {
      def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
      def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
    }
plus associated libstdc++ changes.  It can be done incrementally though.

> +
> +      if (flag_building_libgcc)
> +	{
> +	  /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> +	     to check backend support of _Float16 and __bf16 type.  */

That is actually the case only for HFmode, but not for BFmode right now.
So, we need further work.  One is to add the BFmode support in there,
and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
_Decimal* conversions are compiled in also if not -msse2 by default.
One way to do that is wrap the HF and BF mode related functions on x86
#ifndef __SSE2__ into the pragmas like intrin headers use (but then
perhaps we don't need to undef this stuff here), another is not provide
the hf/bf support in that case from the TUs where they are provided now,
but from a different one which would be compiled with -msse2.

> +	  cpp_undef (parse_in, "__LIBGCC_HAS_HF_MODE__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_FUNC_EXT__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_MANT_DIG__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_EXCESS_PRECISION__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_EPSILON__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_MAX__");
> +	  cpp_undef (parse_in, "__LIBGCC_HF_MIN__");
> +
> +	  cpp_undef (parse_in, "__LIBGCC_HAS_BF_MODE__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_FUNC_EXT__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_MANT_DIG__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_EXCESS_PRECISION__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_EPSILON__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_MAX__");
> +	  cpp_undef (parse_in, "__LIBGCC_BF_MIN__");
> +	}
> +    }
> +

> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -2651,7 +2651,10 @@ construct_container (machine_mode mode, machine_mode orig_mode,
>  
>    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
>       some less clueful developer tries to use floating-point anyway.  */
> -  if (needed_sseregs && !TARGET_SSE)
> +  if (needed_sseregs
> +      && (!TARGET_SSE
> +	  || (VALID_SSE2_TYPE_MODE (mode)
> +	      && !TARGET_SSE2)))

Formatting, no need to split this up that much.
  if (needed_sseregs
      && (!TARGET_SSE
	  || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
or even better
  if (needed_sseregs
      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
will do it.

> @@ -22805,9 +22827,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
>  
>    if (!TARGET_SSE2)
>      {
> -      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
> -      float16_type_node = ix86_float16_type_node;
> -      bfloat16_type_node = ix86_bf16_type_node;
> +      float16_type_node
> +	= float16_type_node ? float16_type_node : ix86_float16_type_node;
> +      bfloat16_type_node
> +	= bfloat16_type_node ? bfloat16_type_node : ix86_bf16_type_node;
>        callback (float16_type_node);
>        callback (bfloat16_type_node);

Instead of this, just use
      if (!float16_type_node)
	{
	  float16_type_node = ix86_float16_type_node;
	  callback (float16_type_node);
	  float16_type_node = NULL_TREE;
	}
      if (!bfloat16_type_node)
	{
	  bfloat16_type_node = ix86_bf16_type_node;
	  callback (bfloat16_type_node);
	  bfloat16_type_node = NULL_TREE;
	}
?
> +/* Return the diagnostic message string if conversion from FROMTYPE to
> +   TOTYPE is not allowed, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> +{
> +  if (element_mode (fromtype) != element_mode (totype))
> +    {
> +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> +	 when TARGET_SSE2 is not available.  */
> +      if ((TYPE_MODE (fromtype) == BFmode
> +	   || TYPE_MODE (fromtype) == HFmode)
> +	  && !TARGET_SSE2)

First of all, not really sure if this should be purely about scalar
modes, not also complex and vector modes involving those inner modes.
Because complex or vector modes with BF/HF elements will be without
TARGET_SSE2 for sure lowered into scalar code and that can't be handled
either.
So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
or even better
if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
?
Or even better remember the 2 modes above into machine_mode temporaries
and just use those in the != comparison and for the checks?

Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
we know which one it is.  Just return separate messages?


> +	return N_("invalid conversion from type %<__bf16%> "
> +		  "or %<_Float16%> without option %<-msse2%>");
> +
> +      if ((TYPE_MODE (totype) == BFmode
> +	   || TYPE_MODE (totype) == HFmode)
> +	  && !TARGET_SSE2)
> +	return N_("invalid conversion to type %<__bf16%> "
> +		  "or %<_Float16%> without option %<-msse2%>");

Ditto.
> +    }
> +
> +  /* Conversion allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the unary operation OP is
> +   not permitted on TYPE, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_unary_op (int op, const_tree type)
> +{
> +  /* Reject all single-operand operations on BFmode/HFmode except for &
> +     when TARGET_SSE2 is not available.  */
> +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> +      && !TARGET_SSE2 && op != ADDR_EXPR)
> +    return N_("operation not permitted on type %<__bf16%> "
> +	      "or %<_Float16%> without option %<-msse2%>");

Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
> +
> +  /* Operation allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the binary operation OP is
> +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> +			   const_tree type2)
> +{
> +  /* Reject all 2-operand operations on BFmode or HFmode
> +     when TARGET_SSE2 is not available.  */
> +  if ((element_mode (type1) == BFmode
> +       || element_mode (type2) == BFmode
> +       || element_mode (type1) == HFmode
> +       || element_mode (type2) == HFmode)
> +      && !TARGET_SSE2)
> +    return N_("operation not permitted on type %<__bf16%> "
> +	      "or %<_Float16%> without option %<-msse2%>");

Similarly.

	Jakub


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-04-20 12:18 ` Jakub Jelinek
@ 2023-04-21 13:53   ` liuhongt
  2023-04-21 13:53     ` [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas liuhongt
  2023-05-15  1:20     ` [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2 Hongtao Liu
  0 siblings, 2 replies; 12+ messages in thread
From: liuhongt @ 2023-04-21 13:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, jakub

> > +  if (!TARGET_SSE2)
> > +    {
> > +      if (c_dialect_cxx ()
> > +       && cxx_dialect > cxx20)
>
> Formatting, both conditions are short, so just put them on one line.
Changed.

> But for the C++23 macros, more importantly I think we really should
> also in ix86_target_macros_internal add
>   if (c_dialect_cxx ()
>       && cxx_dialect > cxx20
>       && (isa_flag & OPTION_MASK_ISA_SSE2))
>     {
>       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
>       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
>     }
> plus associated libstdc++ changes.  It can be done incrementally though.
Added in PATCH 2/2

> > +      if (flag_building_libgcc)
> > +     {
> > +       /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> > +          to check backend support of _Float16 and __bf16 type.  */
>
> That is actually the case only for HFmode, but not for BFmode right now.
> So, we need further work.  One is to add the BFmode support in there,
> and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
> _Decimal* conversions are compiled in also if not -msse2 by default.
> One way to do that is wrap the HF and BF mode related functions on x86
> #ifndef __SSE2__ into the pragmas like intrin headers use (but then
> perhaps we don't need to undef this stuff here), another is not provide
> the hf/bf support in that case from the TUs where they are provided now,
> but from a different one which would be compiled with -msse2.
Add CFLAGS-_hf_to_sd.c += -msse2, similar for other files in libbid, just like
we did before for HFtype softfp. Then no need to undef libgcc macros.

> >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> >       some less clueful developer tries to use floating-point anyway.  */
> > -  if (needed_sseregs && !TARGET_SSE)
> > +  if (needed_sseregs
> > +      && (!TARGET_SSE
> > +       || (VALID_SSE2_TYPE_MODE (mode)
> > +           && !TARGET_SSE2)))
>
> Formatting, no need to split this up that much.
>   if (needed_sseregs
>       && (!TARGET_SSE
>           || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> or even better
>   if (needed_sseregs
>       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> will do it.
Changed.

> Instead of this, just use
>       if (!float16_type_node)
>         {
>           float16_type_node = ix86_float16_type_node;
>           callback (float16_type_node);
>           float16_type_node = NULL_TREE;
>         }
>       if (!bfloat16_type_node)
>         {
>           bfloat16_type_node = ix86_bf16_type_node;
>           callback (bfloat16_type_node);
>           bfloat16_type_node = NULL_TREE;
>         }
Changed.


> > +static const char *
> > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > +{
> > +  if (element_mode (fromtype) != element_mode (totype))
> > +    {
> > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > +      when TARGET_SSE2 is not available.  */
> > +      if ((TYPE_MODE (fromtype) == BFmode
> > +        || TYPE_MODE (fromtype) == HFmode)
> > +       && !TARGET_SSE2)
>
> First of all, not really sure if this should be purely about scalar
> modes, not also complex and vector modes involving those inner modes.
> Because complex or vector modes with BF/HF elements will be without
> TARGET_SSE2 for sure lowered into scalar code and that can't be handled
> either.
> So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
> or even better
> if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
> ?
> Or even better remember the 2 modes above into machine_mode temporaries
> and just use those in the != comparison and for the checks?
>
> Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
> we know which one it is.  Just return separate messages?
Changed.

> > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > +     when TARGET_SSE2 is not available.  */
> > +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> > +      && !TARGET_SSE2 && op != ADDR_EXPR)
> > +    return N_("operation not permitted on type %<__bf16%> "
> > +           "or %<_Float16%> without option %<-msse2%>");
>
> Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
Changed.


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Successfully cross-build i686-linux-gnu.
Ok for trunk?

Enable _Float16 and __bf16 all the time but issue errors when the
types are used in conversion, unary operation, binary operation,
parameter passing or value return when TARGET_SSE2 is not available.

Also undef macros which are used by libgcc/libstdc++ to check the
backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
available.

gcc/ChangeLog:

	PR target/109504
	* config/i386/i386-builtins.cc
	(ix86_register_float16_builtin_type): Remove TARGET_SSE2.
	(ix86_register_bf16_builtin_type): Ditto.
	* config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
	isn't available, undef the macros which are used to check the
	backend support of the _Float16/__bf16 types when building
	libstdc++ and libgcc.
	* config/i386/i386.cc (construct_container): Issue errors for
	HFmode/BFmode when TARGET_SSE2 is not available.
	(function_value_32): Ditto.
	(ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
	(ix86_libgcc_floating_mode_supported_p): Ditto.
	(ix86_emit_support_tinfos): Adjust codes.
	(ix86_invalid_conversion): New function.
	(ix86_invalid_unary_op): Ditto.
	(ix86_invalid_binary_op): Ditto.
	(TARGET_INVALID_CONVERSION): Define.
	(TARGET_INVALID_UNARY_OP): Define.
	(TARGET_INVALID_BINARY_OP): Define.
	* config/i386/immintrin.h [__SSE2__]: Remove for fp16/bf16
	related instrinsics header files.
	* config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr109504.c: New test.
	* gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
	* gcc.target/i386/sse2-float16-1.c: Ditto.
	* gcc.target/i386/sse2-float16-4.c: New test.
	* gcc.target/i386/sse2-float16-5.c: New test.
	* g++.target/i386/float16-1.C: Adjust error info.

libgcc/ChangeLog:

	* config/i386/t-softfp: Add -msse2 to libbid HFtype related
	files.
---
 gcc/config/i386/i386-builtins.cc              |   4 +-
 gcc/config/i386/i386-c.cc                     |  15 ++
 gcc/config/i386/i386.cc                       | 130 ++++++++++++++++--
 gcc/config/i386/i386.h                        |   4 +
 gcc/config/i386/immintrin.h                   |   4 -
 gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
 gcc/testsuite/gcc.target/i386/pr109504.c      |   6 +
 .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
 .../gcc.target/i386/sse2-float16-1.c          |   8 +-
 .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
 .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
 libgcc/config/i386/t-softfp                   |   7 +
 12 files changed, 215 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109504.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..1cdabfd3a0a 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
   else
     ix86_float16_type_node = float16_type_node;
 
-  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("_Float16"))
     lang_hooks.types.register_builtin_type (ix86_float16_type_node,
 					    "_Float16");
 }
@@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
   else
     ix86_bf16_type_node = bfloat16_type_node;
 
-  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
+  if (!maybe_get_identifier ("__bf16"))
     lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
 }
 
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index e7bd7cc706c..2f83c9981e1 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -817,6 +817,21 @@ ix86_target_macros (void)
   if (!TARGET_80387)
     cpp_define (parse_in, "_SOFT_FLOAT");
 
+  /* HFmode/BFmode is supported without depending any isa
+     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
+     but according to psABI, they're really supported w/ SSE2 and above.
+     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
+     for backend support of the types, undef the macros to avoid
+     build failure, see PR109504.  */
+  if (!TARGET_SSE2)
+    {
+      if (c_dialect_cxx () && cxx_dialect > cxx20)
+	{
+	  cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
+	  cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
+	}
+    }
+
   if (TARGET_LONG_DOUBLE_64)
     cpp_define (parse_in, "__LONG_DOUBLE_64__");
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fbd33a6bfd1..633a0f41e60 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2651,7 +2651,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 
   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
      some less clueful developer tries to use floating-point anyway.  */
-  if (needed_sseregs && !TARGET_SSE)
+  if (needed_sseregs
+      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
     {
       /* Return early if we shouldn't raise an error for invalid
 	 calls.  */
@@ -2661,13 +2662,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
 	{
 	  if (!issued_sse_ret_error)
 	    {
-	      error ("SSE register return with SSE disabled");
+	      if (VALID_SSE2_TYPE_MODE (mode))
+		error ("SSE register return with SSE2 disabled");
+	      else
+		error ("SSE register return with SSE disabled");
 	      issued_sse_ret_error = true;
 	    }
 	}
       else if (!issued_sse_arg_error)
 	{
-	  error ("SSE register argument with SSE disabled");
+	  if (VALID_SSE2_TYPE_MODE (mode))
+	    error ("SSE register argument with SSE2 disabled");
+	  else
+	    error ("SSE register argument with SSE disabled");
 	  issued_sse_arg_error = true;
 	}
       return NULL;
@@ -4022,13 +4029,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
 
   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
   if (mode == HFmode || mode == BFmode)
-    regno = FIRST_SSE_REG;
+    {
+      if (!TARGET_SSE2)
+	{
+	  error ("SSE register return with SSE2 disabled");
+	  regno = AX_REG;
+	}
+      else
+	regno = FIRST_SSE_REG;
+    }
+
   if (mode == HCmode)
     {
+      if (!TARGET_SSE2)
+	error ("SSE register return with SSE2 disabled");
+
       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
       XVECEXP (ret, 0, 0)
 	= gen_rtx_EXPR_LIST (VOIDmode,
-			     gen_rtx_REG (SImode, FIRST_SSE_REG),
+			     gen_rtx_REG (SImode,
+					  TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
 			     GEN_INT (0));
       return ret;
     }
@@ -22459,7 +22479,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (mode == TFmode)
     return true;
-  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
+  else if (mode == HFmode || mode == BFmode)
     return true;
   else
     return default_scalar_mode_supported_p (mode);
@@ -22475,7 +22495,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
      be defined by the C front-end for AVX512FP16 intrinsics.  We will
      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
      enabled.  */
-  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
+  return ((mode == HFmode || mode == BFmode)
 	  ? true
 	  : default_libgcc_floating_mode_supported_p (mode));
 }
@@ -22805,9 +22825,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
 
   if (!TARGET_SSE2)
     {
-      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
-      float16_type_node = ix86_float16_type_node;
-      bfloat16_type_node = ix86_bf16_type_node;
+      if (!float16_type_node)
+	float16_type_node = ix86_float16_type_node;
+      if (!bfloat16_type_node)
+	bfloat16_type_node = ix86_bf16_type_node;
       callback (float16_type_node);
       callback (bfloat16_type_node);
       float16_type_node = NULL_TREE;
@@ -24259,6 +24280,86 @@ ix86_init_libfuncs (void)
 #endif
 }
 
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+
+static const char *
+ix86_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  machine_mode from_mode = element_mode (fromtype);
+  machine_mode to_mode = element_mode (totype);
+
+  if (!TARGET_SSE2 && from_mode != to_mode)
+    {
+      /* Do no allow conversions to/from BFmode/HFmode scalar types
+	 when TARGET_SSE2 is not available.  */
+      if (from_mode == BFmode)
+	return N_("invalid conversion from type %<__bf16%> "
+		  "without option %<-msse2%>");
+      if (from_mode == HFmode)
+	return N_("invalid conversion from type %<_Float16%> "
+		  "without option %<-msse2%>");
+      if (to_mode == BFmode)
+	return N_("invalid conversion to type %<__bf16%> "
+		  "without option %<-msse2%>");
+      if (to_mode == HFmode)
+	return N_("invalid conversion to type %<_Float16%> "
+		  "without option %<-msse2%>");
+    }
+
+  /* Conversion allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+
+static const char *
+ix86_invalid_unary_op (int op, const_tree type)
+{
+  machine_mode mmode = element_mode (type);
+  /* Reject all single-operand operations on BFmode/HFmode except for &
+     when TARGET_SSE2 is not available.  */
+  if (!TARGET_SSE2 && op != ADDR_EXPR)
+    {
+      if (mmode == BFmode)
+	return N_("operation not permitted on type %<__bf16%> "
+		  "without option %<-msse2%>");
+      if (mmode == HFmode)
+	return N_("operation not permitted on type %<_Float16%> "
+		  "without option %<-msse2%>");
+    }
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+
+static const char *
+ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+			const_tree type2)
+{
+  machine_mode type1_mode = element_mode (type1);
+  machine_mode type2_mode = element_mode (type2);
+  /* Reject all 2-operand operations on BFmode or HFmode
+     when TARGET_SSE2 is not available.  */
+  if (!TARGET_SSE2)
+    {
+      if (type1_mode == BFmode || type2_mode == BFmode)
+	return N_("operation not permitted on type %<__bf16%> "
+		  "without option %<-msse2%>");
+
+      if (type1_mode == HFmode || type2_mode == HFmode)
+	return N_("operation not permitted on type %<_Float16%> "
+		  "without option %<-msse2%>");
+    }
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
 /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
    FPU, assume that the fpcw is set to extended precision; when using
    only SSE, rounding is correct; when using both SSE and the FPU,
@@ -25248,6 +25349,15 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_TAG_SIZE
 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
+
 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
 #ifdef OPTION_GLIBC
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 1da6dce8e0b..7e839bc5c7e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define VALID_AVX512FP16_REG_MODE(MODE)					\
   ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
 
+#define VALID_SSE2_TYPE_MODE(MODE)		\
+  ((MODE) == HFmode || (MODE) == BFmode		\
+   || (MODE) == HCmode || (MODE) == BCmode)
+
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
    || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode	\
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b220d871942..cc78df56940 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -98,11 +98,9 @@
 
 #include <avx512vp2intersectvlintrin.h>
 
-#ifdef __SSE2__
 #include <avx512fp16intrin.h>
 
 #include <avx512fp16vlintrin.h>
-#endif
 
 #include <shaintrin.h>
 
@@ -118,13 +116,11 @@
 
 #include <vpclmulqdqintrin.h>
 
-#ifdef __SSE2__
 #include <avx512bf16vlintrin.h>
 
 #include <avx512bf16intrin.h>
 
 #include <avxneconvertintrin.h>
-#endif
 
 #include <amxtileintrin.h>
 
diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
index f96b932b698..938852ee9ad 100644
--- a/gcc/testsuite/g++.target/i386/float16-1.C
+++ b/gcc/testsuite/g++.target/i386/float16-1.C
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-_Float16	/* { dg-error "expected unqualified-id before '_Float16'" } */
-foo (_Float16 x) 
+_Float16
+foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */ 
 {
-  return x;
-}		/* { dg-error "'_Float16' is not supported on this target" } */
+  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr109504.c b/gcc/testsuite/gcc.target/i386/pr109504.c
new file mode 100644
index 00000000000..fe5bcda10ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109504.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse" } */
+
+#pragma GCC target("sse4.1")
+#include <immintrin.h>
+int main(){return 0;}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
index 612d55be826..717055bc9ad 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-__bf16/* { dg-error "unknown type name '__bf16'" } */
-foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
-{
-  return x;
+__bf16
+foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
+{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
index 1b645eb499d..faf818df75f 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mno-sse2" } */
 
-_Float16/* { dg-error "is not supported on this target" } */
-foo (_Float16 x) /* { dg-error "is not supported on this target" } */
-{
-  return x;
+_Float16
+foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
+{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
+  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
new file mode 100644
index 00000000000..64baf92ff56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+_Float16 a;
+__bf16 c;
+_Complex _Float16 ac;
+
+void
+foo (_Float16* p)
+{
+  a = *p;
+}
+
+void
+foo1 (__bf16 *p)
+{
+  c = *p;
+}
+
+
+void
+foo2 (_Complex _Float16* p)
+{
+  ac = *p;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
new file mode 100644
index 00000000000..c3ed23b8ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target ia32} } */
+/* { dg-options "-O2 -mno-sse2" } */
+
+_Float16 a;
+__bf16 c;
+_Complex ac;
+void
+foo (_Float16 p)
+{
+  a = p;
+}
+
+void
+foo1 (__bf16 p)
+{
+  c = p;
+}
+
+
+void
+foo2 (_Complex p)
+{
+  ac = p;
+}
diff --git a/libgcc/config/i386/t-softfp b/libgcc/config/i386/t-softfp
index 69d0f819822..80d1fac121b 100644
--- a/libgcc/config/i386/t-softfp
+++ b/libgcc/config/i386/t-softfp
@@ -31,3 +31,10 @@ CFLAGS-trunchfbf2.c += -msse2
 CFLAGS-eqhf2.c += -msse2
 CFLAGS-_divhc3.c += -msse2
 CFLAGS-_mulhc3.c += -msse2
+
+CFLAGS-_hf_to_sd.c += -msse2
+CFLAGS-_hf_to_dd.c += -msse2
+CFLAGS-_hf_to_td.c += -msse2
+CFLAGS-_sd_to_hf.c += -msse2
+CFLAGS-_dd_to_hf.c += -msse2
+CFLAGS-_td_to_hf.c += -msse2
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas.
  2023-04-21 13:53   ` [PATCH 1/2] " liuhongt
@ 2023-04-21 13:53     ` liuhongt
  2023-05-15  1:21       ` Hongtao Liu
  2023-05-15  1:20     ` [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2 Hongtao Liu
  1 sibling, 1 reply; 12+ messages in thread
From: liuhongt @ 2023-04-21 13:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools, jakub

> But for the C++23 macros, more importantly I think we really should
> also in ix86_target_macros_internal add
>   if (c_dialect_cxx ()
>       && cxx_dialect > cxx20
>       && (isa_flag & OPTION_MASK_ISA_SSE2))
>     {
>       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
>       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
>     }
> plus associated libstdc++ changes.  It can be done incrementally though.
Changed except for one place in libsupc++/compare, it's inside a function
where pragma can be added. Not sure if this inconsistency will cause any
issue.

#ifdef __STDCPP_BFLOAT16_T__
          if constexpr (__is_same(_Tp, decltype(0.0bf16)))
            return _Bfloat16;
#endif

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Successfully cross-build i686-linux-gnu.
Ok for trunk?

def_or_undef  target macros based on currently active ISA in pragmas
to also do that for __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for
C++, and change libstdc++ such that for x86 it adds similarly to x86
intrin headers something like around std::float16_t/std::bfloat16_t stuff.

gcc/ChangeLog:

	PR target/109504
	* config/i386/i386-c.cc (ix86_target_macros_internal):
	def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__.

libstdc++-v3/ChangeLog:

	* include/bits/c++config: Add #pragma GCC target("sse2") for
	_Float16 and bfloat16_t when __SSE2__ is not available.
	* include/bits/cpp_type_traits.h: Ditto.
	* include/bits/std_abs.h: Ditto.
	* include/c_global/cmath: Ditto.
	* include/ext/type_traits.h: Ditto.
	* include/std/atomic: Ditto.
	* include/std/charconv: Ditto.
	* include/std/complex: Ditto.
	* include/std/istream: Ditto.
	* include/std/limits: Ditto.
	* include/std/numbers: Ditto.
	* include/std/ostream: Ditto.
	* include/std/stdfloat: Ditto.
	* include/std/type_traits: Ditto.
---
 gcc/config/i386/i386-c.cc                   |   9 +-
 libstdc++-v3/include/bits/c++config         |  11 +
 libstdc++-v3/include/bits/cpp_type_traits.h |  27 +-
 libstdc++-v3/include/bits/std_abs.h         |  23 +-
 libstdc++-v3/include/c_global/cmath         | 733 +++++++++++---------
 libstdc++-v3/include/ext/type_traits.h      |  23 +-
 libstdc++-v3/include/std/atomic             |  43 +-
 libstdc++-v3/include/std/charconv           |  90 ++-
 libstdc++-v3/include/std/complex            | 227 +++---
 libstdc++-v3/include/std/istream            |  61 +-
 libstdc++-v3/include/std/limits             |  37 +-
 libstdc++-v3/include/std/numbers            |  11 +
 libstdc++-v3/include/std/ostream            |  29 +-
 libstdc++-v3/include/std/stdfloat           |  19 +-
 libstdc++-v3/include/std/type_traits        |  23 +-
 15 files changed, 809 insertions(+), 557 deletions(-)

diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 2f83c9981e1..bcc17263e28 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -492,7 +492,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
   if (isa_flag & OPTION_MASK_ISA_SSE)
     def_or_undef (parse_in, "__SSE__");
   if (isa_flag & OPTION_MASK_ISA_SSE2)
-    def_or_undef (parse_in, "__SSE2__");
+    {
+      def_or_undef (parse_in, "__SSE2__");
+      if (c_dialect_cxx () && cxx_dialect > cxx20)
+	{
+	  def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
+	  def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
+	}
+    }
   if (isa_flag & OPTION_MASK_ISA_SSE3)
     def_or_undef (parse_in, "__SSE3__");
   if (isa_flag & OPTION_MASK_ISA_SSSE3)
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index 13892787e09..c858497fc6e 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -820,6 +820,12 @@ namespace std
 # define _GLIBCXX_LDOUBLE_IS_IEEE_BINARY128 1
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_BFLOAT16_T__
 namespace __gnu_cxx
 {
@@ -827,6 +833,11 @@ namespace __gnu_cxx
 }
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __has_builtin
 # ifdef __is_identifier
 // Intel and older Clang require !__is_identifier for some built-ins:
diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h b/libstdc++-v3/include/bits/cpp_type_traits.h
index 4312f32a4e0..cadd5ca4fde 100644
--- a/libstdc++-v3/include/bits/cpp_type_traits.h
+++ b/libstdc++-v3/include/bits/cpp_type_traits.h
@@ -315,6 +315,12 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
       typedef __true_type __type;
     };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __is_floating<_Float16>
@@ -324,36 +330,41 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
     };
 #endif
 
-#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_BFLOAT16_T__
   template<>
-    struct __is_floating<_Float32>
+    struct __is_floating<__gnu_cxx::__bfloat16_t>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT64_T__
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
   template<>
-    struct __is_floating<_Float64>
+    struct __is_floating<_Float32>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT128_T__
+#ifdef __STDCPP_FLOAT64_T__
   template<>
-    struct __is_floating<_Float128>
+    struct __is_floating<_Float64>
     {
       enum { __value = 1 };
       typedef __true_type __type;
     };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT128_T__
   template<>
-    struct __is_floating<__gnu_cxx::__bfloat16_t>
+    struct __is_floating<_Float128>
     {
       enum { __value = 1 };
       typedef __true_type __type;
diff --git a/libstdc++-v3/include/bits/std_abs.h b/libstdc++-v3/include/bits/std_abs.h
index 1bb7ffbc2da..0423909e8c8 100644
--- a/libstdc++-v3/include/bits/std_abs.h
+++ b/libstdc++-v3/include/bits/std_abs.h
@@ -97,12 +97,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   abs(__GLIBCXX_TYPE_INT_N_3 __x) { return __x >= 0 ? __x : -__x; }
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   abs(_Float16 __x)
   { return _Float16(__builtin_fabsf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  abs(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   abs(_Float32 __x)
@@ -125,12 +142,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_fabsf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  abs(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
-#endif
-
 #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
   __extension__ inline _GLIBCXX_CONSTEXPR
   __float128
diff --git a/libstdc++-v3/include/c_global/cmath b/libstdc++-v3/include/c_global/cmath
index 568eb354c2d..6bf3a5eade2 100644
--- a/libstdc++-v3/include/c_global/cmath
+++ b/libstdc++-v3/include/c_global/cmath
@@ -515,6 +515,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     tanh(_Tp __x)
     { return __builtin_tanh(__x); }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   acos(_Float16 __x)
@@ -609,6 +615,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return _Float16(__builtin_tanhf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  acos(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  asin(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atan(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  ceil(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cos(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cosh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  exp(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fabs(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  floor(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
+  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log10(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
+  {
+    float __i, __ret = __builtin_modff(__x, &__i);
+    *__iptr = __gnu_cxx::__bfloat16_t(__i);
+    return __gnu_cxx::__bfloat16_t(__ret);
+  }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sin(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sinh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  sqrt(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tan(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tanh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   acos(_Float32 __x)
@@ -979,100 +1084,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_tanhf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  acos(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  asin(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atan(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  ceil(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cos(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cosh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  exp(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fabs(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  floor(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
-  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log10(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
-  {
-    float __i, __ret = __builtin_modff(__x, &__i);
-    *__iptr = __gnu_cxx::__bfloat16_t(__i);
-    return __gnu_cxx::__bfloat16_t(__ret);
-  }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sin(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sinh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  sqrt(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tan(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tanh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
-#endif
-
 #if _GLIBCXX_USE_C99_MATH
 #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
 
@@ -1507,6 +1518,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #endif // C++11
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   constexpr int
   fpclassify(_Float16 __x)
@@ -1558,6 +1575,62 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_isunordered(__x, __y); }
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  constexpr int
+  fpclassify(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
+				FP_SUBNORMAL, FP_ZERO, __x); }
+
+  constexpr bool
+  isfinite(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isfinite(__x); }
+
+  constexpr bool
+  isinf(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isinf(__x); }
+
+  constexpr bool
+  isnan(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isnan(__x); }
+
+  constexpr bool
+  isnormal(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_isnormal(__x); }
+
+  constexpr bool
+  signbit(__gnu_cxx::__bfloat16_t __x)
+  { return __builtin_signbit(__x); }
+
+  constexpr bool
+  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isgreater(__x, __y); }
+
+  constexpr bool
+  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isgreaterequal(__x, __y); }
+
+  constexpr bool
+  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isless(__x, __y); }
+
+  constexpr bool
+  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_islessequal(__x, __y); }
+
+  constexpr bool
+  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_islessgreater(__x, __y); }
+
+  constexpr bool
+  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __builtin_isunordered(__x, __y); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   constexpr int
   fpclassify(_Float32 __x)
@@ -1711,59 +1784,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_isunordered(__x, __y); }
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  constexpr int
-  fpclassify(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
-				FP_SUBNORMAL, FP_ZERO, __x); }
-
-  constexpr bool
-  isfinite(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isfinite(__x); }
-
-  constexpr bool
-  isinf(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isinf(__x); }
-
-  constexpr bool
-  isnan(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isnan(__x); }
-
-  constexpr bool
-  isnormal(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_isnormal(__x); }
-
-  constexpr bool
-  signbit(__gnu_cxx::__bfloat16_t __x)
-  { return __builtin_signbit(__x); }
-
-  constexpr bool
-  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isgreater(__x, __y); }
-
-  constexpr bool
-  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isgreaterequal(__x, __y); }
-
-  constexpr bool
-  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isless(__x, __y); }
-
-  constexpr bool
-  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_islessequal(__x, __y); }
-
-  constexpr bool
-  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_islessgreater(__x, __y); }
-
-  constexpr bool
-  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __builtin_isunordered(__x, __y); }
-#endif
-
-#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
-#endif /* _GLIBCXX_USE_C99_MATH */
+#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
+#endif /* _GLIBCXX_USE_C99_MATH */
 
 #if __cplusplus >= 201103L
 
@@ -2657,6 +2679,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { return __builtin_trunc(__x); }
 #endif
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float16
   acosh(_Float16 __x)
@@ -2837,6 +2865,191 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return _Float16(__builtin_truncf(__x)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  constexpr __gnu_cxx::__bfloat16_t
+  acosh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  asinh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  atanh(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  cbrt(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  erf(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  erfc(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  exp2(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  expm1(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
+
+  constexpr int
+  ilogb(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  lgamma(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
+
+  constexpr long long
+  llrint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
+
+  constexpr long long
+  llround(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  log1p(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
+
+  // DR 568.
+  constexpr __gnu_cxx::__bfloat16_t
+  log2(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  logb(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
+
+  constexpr long
+  lrint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
+
+  constexpr long
+  lround(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  nearbyint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  {
+    if (std::__is_constant_evaluated())
+      return __builtin_nextafterf16b(__x, __y);
+#ifdef __INT16_TYPE__
+    using __bfloat16_int_type = __INT16_TYPE__;
+#else
+    using __bfloat16_int_type = short int;
+#endif
+    __bfloat16_int_type __hx, __hy, __ix, __iy;
+    __builtin_memcpy(&__hx, &__x, sizeof(__x));
+    __builtin_memcpy(&__hy, &__y, sizeof(__x));
+    __ix = __hx & 0x7fff;	// |x|
+    __iy = __hy & 0x7fff;	// |y|
+    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
+      return __x + __y;
+    if (__x == __y)
+      return __y;		// x == y, return y
+    if (__ix == 0)		// x == 0
+      {
+	__hy = (__hy & 0x8000) | 1;	// return +-__BFLT16_DENORM_MIN__
+	__builtin_memcpy(&__x, &__hy, sizeof(__x));
+	__builtin_nextafterf(0.0f, 1.0f);	// raise underflow
+	return __x;
+      }
+    if (__hx >= 0)		// x > 0
+      {
+	if (__hx > __hy)	// x > y, x -= ulp
+	  --__hx;
+	else			// x < y, x += ulp
+	  ++__hx;
+      }
+    else			// x < 0
+      {
+	if (__hy >= 0 || __hx > __hy)	// x < y, x -= ulp
+	  --__hx;
+	else			// x > y, x += ulp
+	  ++__hx;
+      }
+    __hy = __hx & 0x7f80;
+    if (__hy >= 0x7f80)
+      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());	// overflow
+    else if (__hy < 0x0080)
+      __builtin_nextafterf(__FLT_MIN__, 0.0f);	// underflow
+    __builtin_memcpy(&__x, &__hx, sizeof(__x));
+    return __x;
+  }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
+  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
+  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  rint(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  round(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
+  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
+  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  tgamma(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
+
+  constexpr __gnu_cxx::__bfloat16_t
+  trunc(__gnu_cxx::__bfloat16_t __x)
+  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   constexpr _Float32
   acosh(_Float32 __x)
@@ -3375,186 +3588,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_truncf128(__x); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  constexpr __gnu_cxx::__bfloat16_t
-  acosh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  asinh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  atanh(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  cbrt(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  erf(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  erfc(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  exp2(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  expm1(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
-
-  constexpr int
-  ilogb(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  lgamma(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
-
-  constexpr long long
-  llrint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
-
-  constexpr long long
-  llround(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  log1p(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
-
-  // DR 568.
-  constexpr __gnu_cxx::__bfloat16_t
-  log2(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  logb(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
-
-  constexpr long
-  lrint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
-
-  constexpr long
-  lround(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  nearbyint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  {
-    if (std::__is_constant_evaluated())
-      return __builtin_nextafterf16b(__x, __y);
-#ifdef __INT16_TYPE__
-    using __bfloat16_int_type = __INT16_TYPE__;
-#else
-    using __bfloat16_int_type = short int;
-#endif
-    __bfloat16_int_type __hx, __hy, __ix, __iy;
-    __builtin_memcpy(&__hx, &__x, sizeof(__x));
-    __builtin_memcpy(&__hy, &__y, sizeof(__x));
-    __ix = __hx & 0x7fff;	// |x|
-    __iy = __hy & 0x7fff;	// |y|
-    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
-      return __x + __y;
-    if (__x == __y)
-      return __y;		// x == y, return y
-    if (__ix == 0)		// x == 0
-      {
-	__hy = (__hy & 0x8000) | 1;	// return +-__BFLT16_DENORM_MIN__
-	__builtin_memcpy(&__x, &__hy, sizeof(__x));
-	__builtin_nextafterf(0.0f, 1.0f);	// raise underflow
-	return __x;
-      }
-    if (__hx >= 0)		// x > 0
-      {
-	if (__hx > __hy)	// x > y, x -= ulp
-	  --__hx;
-	else			// x < y, x += ulp
-	  ++__hx;
-      }
-    else			// x < 0
-      {
-	if (__hy >= 0 || __hx > __hy)	// x < y, x -= ulp
-	  --__hx;
-	else			// x > y, x += ulp
-	  ++__hx;
-      }
-    __hy = __hx & 0x7f80;
-    if (__hy >= 0x7f80)
-      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());	// overflow
-    else if (__hy < 0x0080)
-      __builtin_nextafterf(__FLT_MIN__, 0.0f);	// underflow
-    __builtin_memcpy(&__x, &__hx, sizeof(__x));
-    return __x;
-  }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
-  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
-  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  rint(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  round(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
-  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
-  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  tgamma(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
-
-  constexpr __gnu_cxx::__bfloat16_t
-  trunc(__gnu_cxx::__bfloat16_t __x)
-  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
-#endif
-
 
 #endif // _GLIBCXX_USE_C99_MATH_TR1
 #endif // C++11
@@ -3599,12 +3632,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return std::__hypot3<__type>(__x, __y, __z);
     }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   hypot(_Float16 __x, _Float16 __y, _Float16 __z)
   { return std::__hypot3<_Float16>(__x, __y, __z); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
+  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   hypot(_Float32 __x, _Float32 __y, _Float32 __z)
@@ -3625,12 +3675,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return std::__hypot3<_Float128>(__x, __y, __z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
-  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
-#endif
-
 #endif // C++17
 
 #if __cplusplus >= 202002L
@@ -3675,12 +3719,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return std::__lerp<__type>(__x, __y, __z);
     }
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   lerp(_Float16 __x, _Float16 __y, _Float16 __z) noexcept
   { return std::__lerp<_Float16>(__x, __y, __z); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
+  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   lerp(_Float32 __x, _Float32 __y, _Float32 __z) noexcept
@@ -3701,12 +3762,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return std::__lerp<_Float128>(__x, __y, __z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
-  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
-#endif
-
 #endif // C++20
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/ext/type_traits.h b/libstdc++-v3/include/ext/type_traits.h
index 4466c6712c3..823b9710e0c 100644
--- a/libstdc++-v3/include/ext/type_traits.h
+++ b/libstdc++-v3/include/ext/type_traits.h
@@ -190,12 +190,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __promote<float>
     { typedef float __type; };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __promote<_Float16>
     { typedef _Float16 __type; };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __promote<__gnu_cxx::__bfloat16_t>
+  { typedef __gnu_cxx::__bfloat16_t __type; };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __promote<_Float32>
@@ -214,12 +231,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { typedef _Float128 __type; };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __promote<__gnu_cxx::__bfloat16_t>
-    { typedef __gnu_cxx::__bfloat16_t __type; };
-#endif
-
 #if __cpp_fold_expressions
 
   template<typename... _Tp>
diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
index 96e87ded864..5e9e9959270 100644
--- a/libstdc++-v3/include/std/atomic
+++ b/libstdc++-v3/include/std/atomic
@@ -1664,6 +1664,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       using __atomic_float<long double>::operator=;
     };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct atomic<_Float16> : __atomic_float<_Float16>
@@ -1681,71 +1687,76 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     };
 #endif
 
-#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_BFLOAT16_T__
   template<>
-    struct atomic<_Float32> : __atomic_float<_Float32>
+    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
+      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float32>::operator=;
+      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT64_T__
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
   template<>
-    struct atomic<_Float64> : __atomic_float<_Float64>
+    struct atomic<_Float32> : __atomic_float<_Float32>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
+      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float64>::operator=;
+      using __atomic_float<_Float32>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_FLOAT128_T__
+#ifdef __STDCPP_FLOAT64_T__
   template<>
-    struct atomic<_Float128> : __atomic_float<_Float128>
+    struct atomic<_Float64> : __atomic_float<_Float64>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
+      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<_Float128>::operator=;
+      using __atomic_float<_Float64>::operator=;
     };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT128_T__
   template<>
-    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
+    struct atomic<_Float128> : __atomic_float<_Float128>
     {
       atomic() noexcept = default;
 
       constexpr
-      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
+      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
       { }
 
       atomic& operator=(const atomic&) volatile = delete;
       atomic& operator=(const atomic&) = delete;
 
-      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
+      using __atomic_float<_Float128>::operator=;
     };
 #endif
 
diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv
index b34d672f5bd..451fb4cba47 100644
--- a/libstdc++-v3/include/std/charconv
+++ b/libstdc++-v3/include/std/charconv
@@ -689,6 +689,12 @@ namespace __detail
 			  float& __value,
 			  chars_format __fmt = chars_format::general) noexcept;
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
     && defined(__cpp_lib_to_chars)
   inline from_chars_result
@@ -704,6 +710,27 @@ namespace __detail
   }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
+    && defined(__cpp_lib_to_chars)
+  inline from_chars_result
+  from_chars(const char* __first, const char* __last,
+	     __gnu_cxx::__bfloat16_t & __value,
+	     chars_format __fmt = chars_format::general) noexcept
+  {
+    float __val;
+    from_chars_result __res
+      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
+    if (__res.ec == errc{})
+      __value = __val;
+    return __res;
+  }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline from_chars_result
   from_chars(const char* __first, const char* __last, _Float32& __value,
@@ -763,22 +790,6 @@ namespace __detail
 	     chars_format __fmt = chars_format::general) noexcept;
 #endif
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
-    && defined(__cpp_lib_to_chars)
-  inline from_chars_result
-  from_chars(const char* __first, const char* __last,
-	     __gnu_cxx::__bfloat16_t & __value,
-	     chars_format __fmt = chars_format::general) noexcept
-  {
-    float __val;
-    from_chars_result __res
-      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
-    if (__res.ec == errc{})
-      __value = __val;
-    return __res;
-  }
-#endif
 #endif
 
 #if defined __cpp_lib_to_chars
@@ -815,6 +826,12 @@ namespace __detail
 					float __value,
 					chars_format __fmt) noexcept;
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline to_chars_result
   to_chars(char* __first, char* __last, _Float16 __value) noexcept
@@ -832,6 +849,29 @@ namespace __detail
   { return to_chars(__first, __last, float(__value), __fmt, __precision); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline to_chars_result
+  to_chars(char* __first, char* __last,
+	   __gnu_cxx::__bfloat16_t __value) noexcept
+  {
+    return __to_chars_bfloat16_t(__first, __last, float(__value),
+				 chars_format{});
+  }
+  inline to_chars_result
+  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
+	   chars_format __fmt) noexcept
+  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
+  inline to_chars_result
+  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
+	   chars_format __fmt, int __precision) noexcept
+  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline to_chars_result
   to_chars(char* __first, char* __last, _Float32 __value) noexcept
@@ -920,24 +960,6 @@ namespace __detail
 			   chars_format __fmt, int __precision) noexcept;
 #endif
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline to_chars_result
-  to_chars(char* __first, char* __last,
-	   __gnu_cxx::__bfloat16_t __value) noexcept
-  {
-    return __to_chars_bfloat16_t(__first, __last, float(__value),
-				 chars_format{});
-  }
-  inline to_chars_result
-  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
-	   chars_format __fmt) noexcept
-  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
-  inline to_chars_result
-  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
-	   chars_format __fmt, int __precision) noexcept
-  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
-#endif
 #endif
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex
index 0f5f14c3ddb..2f47036e472 100644
--- a/libstdc++-v3/include/std/complex
+++ b/libstdc++-v3/include/std/complex
@@ -599,6 +599,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 
 #if _GLIBCXX_USE_C99_COMPLEX
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float16
   __complex_abs(__complex__ _Float16 __z)
@@ -649,6 +656,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return static_cast<__complex__ _Float16>(__builtin_cpowf(__x, __y)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __gnu_cxx::__bfloat16_t
+  __complex_abs(__complex__ decltype(0.0bf16) __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
+
+  inline __gnu_cxx::__bfloat16_t
+  __complex_arg(__complex__ decltype(0.0bf16) __z)
+  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_cos(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_cosh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_exp(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_log(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sin(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sinh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_tan(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_tanh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_pow(__complex__ decltype(0.0bf16) __x,
+		__complex__ decltype(0.0bf16) __y)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
+								      __y)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline _Float32
   __complex_abs(__complex__ _Float32 __z) { return __builtin_cabsf(__z); }
@@ -802,58 +866,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __complex_pow(__complex__ _Float128 __x, __complex__ _Float128 __y)
   { return __builtin_cpowf128(__x, __y); }
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __gnu_cxx::__bfloat16_t
-  __complex_abs(__complex__ decltype(0.0bf16) __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
-
-  inline __gnu_cxx::__bfloat16_t
-  __complex_arg(__complex__ decltype(0.0bf16) __z)
-  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_cos(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_cosh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_exp(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_log(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sin(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sinh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_tan(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_tanh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_pow(__complex__ decltype(0.0bf16) __x,
-		__complex__ decltype(0.0bf16) __y)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
-								      __y)); }
-#endif
 #endif
 
   // 26.2.7/3 abs(__z):  Returns the magnitude of __z.
@@ -1804,12 +1816,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __complex_type
     { };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __complex_type<_Float16>
     { typedef __complex__ _Float16 type; };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __complex_type<__gnu_cxx::__bfloat16_t>
+    { typedef __complex__ decltype(0.0bf16) type; };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __complex_type<_Float32>
@@ -1828,12 +1857,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { typedef __complex__ _Float128 type; };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __complex_type<__gnu_cxx::__bfloat16_t>
-    { typedef __complex__ decltype(0.0bf16) type; };
-#endif
-
   template<typename _Tp>
     requires requires { typename __complex_type<_Tp>::type; }
     class complex<_Tp>
@@ -2022,6 +2045,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
 #if _GLIBCXX_USE_C99_COMPLEX_TR1
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float16
   __complex_acos(__complex__ _Float16 __z)
@@ -2048,6 +2078,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return static_cast<__complex__ _Float16>(__builtin_catanhf(__z)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __complex__ decltype(0.0bf16)
+  __complex_acos(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_asin(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_atan(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_acosh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_asinh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
+
+  inline __complex__ decltype(0.0bf16)
+  __complex_atanh(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float32
   __complex_acos(__complex__ _Float32 __z)
@@ -2149,32 +2210,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __complex_atanh(__complex__ _Float128 __z)
   { return __builtin_catanhf128(__z); }
 #endif
-
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __complex__ decltype(0.0bf16)
-  __complex_acos(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_asin(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_atan(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_acosh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_asinh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
-
-  inline __complex__ decltype(0.0bf16)
-  __complex_atanh(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
-#endif
 #endif
 
 #if _GLIBCXX_USE_C99_COMPLEX_TR1
@@ -2493,12 +2528,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_cprojl(__z.__rep()); }
 
 #if __cplusplus > 202002L
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float16
   __complex_proj(__complex__ _Float16 __z)
   { return static_cast<__complex__ _Float16>(__builtin_cprojf(__z)); }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+  inline __complex__ decltype(0.0bf16)
+  __complex_proj(__complex__ decltype(0.0bf16) __z)
+  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
   inline __complex__ _Float32
   __complex_proj(__complex__ _Float32 __z)
@@ -2521,12 +2574,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return __builtin_cprojf128(__z); }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-  inline __complex__ decltype(0.0bf16)
-  __complex_proj(__complex__ decltype(0.0bf16) __z)
-  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
-#endif
-
   template<typename _Tp>
     requires requires { typename __complex_type<_Tp>::type; }
     inline complex<_Tp>
diff --git a/libstdc++-v3/include/std/istream b/libstdc++-v3/include/std/istream
index 25d36973f4b..27893a505dd 100644
--- a/libstdc++-v3/include/std/istream
+++ b/libstdc++-v3/include/std/istream
@@ -225,6 +225,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return _M_extract(__f); }
       ///@}
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
       __attribute__((__always_inline__))
       __istream_type&
@@ -251,6 +256,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
+      __attribute__((__always_inline__))
+      __istream_type&
+      operator>>(__gnu_cxx::__bfloat16_t & __f)
+      {
+	float __flt;
+	__istream_type& __ret = _M_extract(__flt);
+	ios_base::iostate __err = ios_base::goodbit;
+	if (__flt < -__BFLT16_MAX__)
+	  {
+	    __f = -__BFLT16_MAX__;
+	    __err = ios_base::failbit;
+	  }
+	else if (__flt > __BFLT16_MAX__)
+	  {
+	    __f = __BFLT16_MAX__;
+	    __err = ios_base::failbit;
+	  }
+	else
+	  __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
+	if (__err)
+	  this->setstate(__err);
+	return __ret;
+      }
+#endif
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
       __attribute__((__always_inline__))
       __istream_type&
@@ -287,32 +322,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
-      __attribute__((__always_inline__))
-      __istream_type&
-      operator>>(__gnu_cxx::__bfloat16_t & __f)
-      {
-	float __flt;
-	__istream_type& __ret = _M_extract(__flt);
-	ios_base::iostate __err = ios_base::goodbit;
-	if (__flt < -__BFLT16_MAX__)
-	  {
-	    __f = -__BFLT16_MAX__;
-	    __err = ios_base::failbit;
-	  }
-	else if (__flt > __BFLT16_MAX__)
-	  {
-	    __f = __BFLT16_MAX__;
-	    __err = ios_base::failbit;
-	  }
-	else
-	  __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
-	if (__err)
-	  this->setstate(__err);
-	return __ret;
-      }
-#endif
-
       /**
        *  @brief  Basic arithmetic extractors
        *  @param  __p A variable of pointer type.
diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/limits
index 8bafd6fb972..e715cec7dd9 100644
--- a/libstdc++-v3/include/std/limits
+++ b/libstdc++-v3/include/std/limits
@@ -1980,21 +1980,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	= round_to_nearest; 						\
     }; 									\
 
+
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
 __glibcxx_float_n(16)
 #endif
-#ifdef __STDCPP_FLOAT32_T__
-__glibcxx_float_n(32)
-#endif
-#ifdef __STDCPP_FLOAT64_T__
-__glibcxx_float_n(64)
-#endif
-#ifdef __STDCPP_FLOAT128_T__
-__glibcxx_float_n(128)
-#endif
-#undef __glibcxx_float_n
-#undef __glibcxx_concat3
-#undef __glibcxx_concat3_
 
 #ifdef __STDCPP_BFLOAT16_T__
   __extension__
@@ -2071,6 +2066,24 @@ __glibcxx_float_n(128)
     };
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
+#ifdef __STDCPP_FLOAT32_T__
+__glibcxx_float_n(32)
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+__glibcxx_float_n(64)
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+__glibcxx_float_n(128)
+#endif
+#undef __glibcxx_float_n
+#undef __glibcxx_concat3
+#undef __glibcxx_concat3_
+
 #endif
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/std/numbers b/libstdc++-v3/include/std/numbers
index d9d202f5392..39de869dd0e 100644
--- a/libstdc++-v3/include/std/numbers
+++ b/libstdc++-v3/include/std/numbers
@@ -199,10 +199,21 @@ namespace numbers
     inline constexpr TYPE phi_v<TYPE>			\
       = 1.618033988749894848204586834365638118##SUFFIX
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
 __glibcxx_numbers (_Float16, F16);
 #endif
 
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
 __glibcxx_numbers (_Float32, F32);
 #endif
diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream
index 4711b8a3d96..6365fe7649b 100644
--- a/libstdc++-v3/include/std/ostream
+++ b/libstdc++-v3/include/std/ostream
@@ -235,6 +235,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { return _M_insert(__f); }
       ///@}
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
       __attribute__((__always_inline__))
       __ostream_type&
@@ -244,6 +250,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
+#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
+      __attribute__((__always_inline__))
+      __ostream_type&
+      operator<<(__gnu_cxx::__bfloat16_t __f)
+      {
+	return _M_insert(static_cast<double>(__f));
+      }
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
       __attribute__((__always_inline__))
       __ostream_type&
@@ -271,15 +291,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 #endif
 
-#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
-      __attribute__((__always_inline__))
-      __ostream_type&
-      operator<<(__gnu_cxx::__bfloat16_t __f)
-      {
-	return _M_insert(static_cast<double>(__f));
-      }
-#endif
-
       /**
        *  @brief  Pointer arithmetic inserters
        *  @param  __p A variable of pointer type.
diff --git a/libstdc++-v3/include/std/stdfloat b/libstdc++-v3/include/std/stdfloat
index c39dbb64904..3ea582e1f5d 100644
--- a/libstdc++-v3/include/std/stdfloat
+++ b/libstdc++-v3/include/std/stdfloat
@@ -36,10 +36,25 @@ namespace std
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
   #ifdef __STDCPP_FLOAT16_T__
   using float16_t = _Float16;
   #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  using bfloat16_t = __gnu_cxx::__bfloat16_t;
+  #endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
   #ifdef __STDCPP_FLOAT32_T__
   using float32_t = _Float32;
   #endif
@@ -52,10 +67,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   using float128_t = _Float128;
   #endif
 
-  #ifdef __STDCPP_BFLOAT16_T__
-  using bfloat16_t = __gnu_cxx::__bfloat16_t;
-  #endif
-
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 #endif // C++23
diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits
index 2bd607a8b8f..549d6485708 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -459,12 +459,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     struct __is_floating_point_helper<long double>
     : public true_type { };
 
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_STDCPP_SSE2__
+#endif
+
 #ifdef __STDCPP_FLOAT16_T__
   template<>
     struct __is_floating_point_helper<_Float16>
     : public true_type { };
 #endif
 
+#ifdef __STDCPP_BFLOAT16_T__
+  template<>
+    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
+    : public true_type { };
+#endif
+
+#ifdef __DISABLE_STDCPP_SSE2__
+#undef __DISABLE_STDCPP_SSE2__
+#pragma GCC pop_options
+#endif
+
 #ifdef __STDCPP_FLOAT32_T__
   template<>
     struct __is_floating_point_helper<_Float32>
@@ -483,12 +500,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     : public true_type { };
 #endif
 
-#ifdef __STDCPP_BFLOAT16_T__
-  template<>
-    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
-    : public true_type { };
-#endif
-
 #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
   template<>
     struct __is_floating_point_helper<__float128>
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-04-21 13:53   ` [PATCH 1/2] " liuhongt
  2023-04-21 13:53     ` [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas liuhongt
@ 2023-05-15  1:20     ` Hongtao Liu
  2023-07-17  8:35       ` Hongtao Liu
  1 sibling, 1 reply; 12+ messages in thread
From: Hongtao Liu @ 2023-05-15  1:20 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, hjl.tools, jakub

ping.

On Fri, Apr 21, 2023 at 9:55 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> > > +  if (!TARGET_SSE2)
> > > +    {
> > > +      if (c_dialect_cxx ()
> > > +       && cxx_dialect > cxx20)
> >
> > Formatting, both conditions are short, so just put them on one line.
> Changed.
>
> > But for the C++23 macros, more importantly I think we really should
> > also in ix86_target_macros_internal add
> >   if (c_dialect_cxx ()
> >       && cxx_dialect > cxx20
> >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> >     {
> >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> >     }
> > plus associated libstdc++ changes.  It can be done incrementally though.
> Added in PATCH 2/2
>
> > > +      if (flag_building_libgcc)
> > > +     {
> > > +       /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> > > +          to check backend support of _Float16 and __bf16 type.  */
> >
> > That is actually the case only for HFmode, but not for BFmode right now.
> > So, we need further work.  One is to add the BFmode support in there,
> > and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
> > _Decimal* conversions are compiled in also if not -msse2 by default.
> > One way to do that is wrap the HF and BF mode related functions on x86
> > #ifndef __SSE2__ into the pragmas like intrin headers use (but then
> > perhaps we don't need to undef this stuff here), another is not provide
> > the hf/bf support in that case from the TUs where they are provided now,
> > but from a different one which would be compiled with -msse2.
> Add CFLAGS-_hf_to_sd.c += -msse2, similar for other files in libbid, just like
> we did before for HFtype softfp. Then no need to undef libgcc macros.
>
> > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > >       some less clueful developer tries to use floating-point anyway.  */
> > > -  if (needed_sseregs && !TARGET_SSE)
> > > +  if (needed_sseregs
> > > +      && (!TARGET_SSE
> > > +       || (VALID_SSE2_TYPE_MODE (mode)
> > > +           && !TARGET_SSE2)))
> >
> > Formatting, no need to split this up that much.
> >   if (needed_sseregs
> >       && (!TARGET_SSE
> >           || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > or even better
> >   if (needed_sseregs
> >       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > will do it.
> Changed.
>
> > Instead of this, just use
> >       if (!float16_type_node)
> >         {
> >           float16_type_node = ix86_float16_type_node;
> >           callback (float16_type_node);
> >           float16_type_node = NULL_TREE;
> >         }
> >       if (!bfloat16_type_node)
> >         {
> >           bfloat16_type_node = ix86_bf16_type_node;
> >           callback (bfloat16_type_node);
> >           bfloat16_type_node = NULL_TREE;
> >         }
> Changed.
>
>
> > > +static const char *
> > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > +{
> > > +  if (element_mode (fromtype) != element_mode (totype))
> > > +    {
> > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > +      when TARGET_SSE2 is not available.  */
> > > +      if ((TYPE_MODE (fromtype) == BFmode
> > > +        || TYPE_MODE (fromtype) == HFmode)
> > > +       && !TARGET_SSE2)
> >
> > First of all, not really sure if this should be purely about scalar
> > modes, not also complex and vector modes involving those inner modes.
> > Because complex or vector modes with BF/HF elements will be without
> > TARGET_SSE2 for sure lowered into scalar code and that can't be handled
> > either.
> > So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
> > or even better
> > if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
> > ?
> > Or even better remember the 2 modes above into machine_mode temporaries
> > and just use those in the != comparison and for the checks?
> >
> > Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
> > we know which one it is.  Just return separate messages?
> Changed.
>
> > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > +     when TARGET_SSE2 is not available.  */
> > > +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> > > +      && !TARGET_SSE2 && op != ADDR_EXPR)
> > > +    return N_("operation not permitted on type %<__bf16%> "
> > > +           "or %<_Float16%> without option %<-msse2%>");
> >
> > Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
> Changed.
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Successfully cross-build i686-linux-gnu.
> Ok for trunk?
>
> Enable _Float16 and __bf16 all the time but issue errors when the
> types are used in conversion, unary operation, binary operation,
> parameter passing or value return when TARGET_SSE2 is not available.
>
> Also undef macros which are used by libgcc/libstdc++ to check the
> backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
> available.
>
> gcc/ChangeLog:
>
>         PR target/109504
>         * config/i386/i386-builtins.cc
>         (ix86_register_float16_builtin_type): Remove TARGET_SSE2.
>         (ix86_register_bf16_builtin_type): Ditto.
>         * config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
>         isn't available, undef the macros which are used to check the
>         backend support of the _Float16/__bf16 types when building
>         libstdc++ and libgcc.
>         * config/i386/i386.cc (construct_container): Issue errors for
>         HFmode/BFmode when TARGET_SSE2 is not available.
>         (function_value_32): Ditto.
>         (ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
>         (ix86_libgcc_floating_mode_supported_p): Ditto.
>         (ix86_emit_support_tinfos): Adjust codes.
>         (ix86_invalid_conversion): New function.
>         (ix86_invalid_unary_op): Ditto.
>         (ix86_invalid_binary_op): Ditto.
>         (TARGET_INVALID_CONVERSION): Define.
>         (TARGET_INVALID_UNARY_OP): Define.
>         (TARGET_INVALID_BINARY_OP): Define.
>         * config/i386/immintrin.h [__SSE2__]: Remove for fp16/bf16
>         related instrinsics header files.
>         * config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr109504.c: New test.
>         * gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
>         * gcc.target/i386/sse2-float16-1.c: Ditto.
>         * gcc.target/i386/sse2-float16-4.c: New test.
>         * gcc.target/i386/sse2-float16-5.c: New test.
>         * g++.target/i386/float16-1.C: Adjust error info.
>
> libgcc/ChangeLog:
>
>         * config/i386/t-softfp: Add -msse2 to libbid HFtype related
>         files.
> ---
>  gcc/config/i386/i386-builtins.cc              |   4 +-
>  gcc/config/i386/i386-c.cc                     |  15 ++
>  gcc/config/i386/i386.cc                       | 130 ++++++++++++++++--
>  gcc/config/i386/i386.h                        |   4 +
>  gcc/config/i386/immintrin.h                   |   4 -
>  gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
>  gcc/testsuite/gcc.target/i386/pr109504.c      |   6 +
>  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
>  .../gcc.target/i386/sse2-float16-1.c          |   8 +-
>  .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
>  .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
>  libgcc/config/i386/t-softfp                   |   7 +
>  12 files changed, 215 insertions(+), 28 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109504.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c
>
> diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> index fc0c82b156e..1cdabfd3a0a 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
>    else
>      ix86_float16_type_node = float16_type_node;
>
> -  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
> +  if (!maybe_get_identifier ("_Float16"))
>      lang_hooks.types.register_builtin_type (ix86_float16_type_node,
>                                             "_Float16");
>  }
> @@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
>    else
>      ix86_bf16_type_node = bfloat16_type_node;
>
> -  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> +  if (!maybe_get_identifier ("__bf16"))
>      lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
>  }
>
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index e7bd7cc706c..2f83c9981e1 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -817,6 +817,21 @@ ix86_target_macros (void)
>    if (!TARGET_80387)
>      cpp_define (parse_in, "_SOFT_FLOAT");
>
> +  /* HFmode/BFmode is supported without depending any isa
> +     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
> +     but according to psABI, they're really supported w/ SSE2 and above.
> +     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
> +     for backend support of the types, undef the macros to avoid
> +     build failure, see PR109504.  */
> +  if (!TARGET_SSE2)
> +    {
> +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> +       {
> +         cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
> +         cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> +       }
> +    }
> +
>    if (TARGET_LONG_DOUBLE_64)
>      cpp_define (parse_in, "__LONG_DOUBLE_64__");
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index fbd33a6bfd1..633a0f41e60 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -2651,7 +2651,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
>
>    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
>       some less clueful developer tries to use floating-point anyway.  */
> -  if (needed_sseregs && !TARGET_SSE)
> +  if (needed_sseregs
> +      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
>      {
>        /* Return early if we shouldn't raise an error for invalid
>          calls.  */
> @@ -2661,13 +2662,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
>         {
>           if (!issued_sse_ret_error)
>             {
> -             error ("SSE register return with SSE disabled");
> +             if (VALID_SSE2_TYPE_MODE (mode))
> +               error ("SSE register return with SSE2 disabled");
> +             else
> +               error ("SSE register return with SSE disabled");
>               issued_sse_ret_error = true;
>             }
>         }
>        else if (!issued_sse_arg_error)
>         {
> -         error ("SSE register argument with SSE disabled");
> +         if (VALID_SSE2_TYPE_MODE (mode))
> +           error ("SSE register argument with SSE2 disabled");
> +         else
> +           error ("SSE register argument with SSE disabled");
>           issued_sse_arg_error = true;
>         }
>        return NULL;
> @@ -4022,13 +4029,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
>
>    /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
>    if (mode == HFmode || mode == BFmode)
> -    regno = FIRST_SSE_REG;
> +    {
> +      if (!TARGET_SSE2)
> +       {
> +         error ("SSE register return with SSE2 disabled");
> +         regno = AX_REG;
> +       }
> +      else
> +       regno = FIRST_SSE_REG;
> +    }
> +
>    if (mode == HCmode)
>      {
> +      if (!TARGET_SSE2)
> +       error ("SSE register return with SSE2 disabled");
> +
>        rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
>        XVECEXP (ret, 0, 0)
>         = gen_rtx_EXPR_LIST (VOIDmode,
> -                            gen_rtx_REG (SImode, FIRST_SSE_REG),
> +                            gen_rtx_REG (SImode,
> +                                         TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
>                              GEN_INT (0));
>        return ret;
>      }
> @@ -22459,7 +22479,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
>      return default_decimal_float_supported_p ();
>    else if (mode == TFmode)
>      return true;
> -  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> +  else if (mode == HFmode || mode == BFmode)
>      return true;
>    else
>      return default_scalar_mode_supported_p (mode);
> @@ -22475,7 +22495,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
>       be defined by the C front-end for AVX512FP16 intrinsics.  We will
>       issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
>       enabled.  */
> -  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> +  return ((mode == HFmode || mode == BFmode)
>           ? true
>           : default_libgcc_floating_mode_supported_p (mode));
>  }
> @@ -22805,9 +22825,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
>
>    if (!TARGET_SSE2)
>      {
> -      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
> -      float16_type_node = ix86_float16_type_node;
> -      bfloat16_type_node = ix86_bf16_type_node;
> +      if (!float16_type_node)
> +       float16_type_node = ix86_float16_type_node;
> +      if (!bfloat16_type_node)
> +       bfloat16_type_node = ix86_bf16_type_node;
>        callback (float16_type_node);
>        callback (bfloat16_type_node);
>        float16_type_node = NULL_TREE;
> @@ -24259,6 +24280,86 @@ ix86_init_libfuncs (void)
>  #endif
>  }
>
> +/* Return the diagnostic message string if conversion from FROMTYPE to
> +   TOTYPE is not allowed, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> +{
> +  machine_mode from_mode = element_mode (fromtype);
> +  machine_mode to_mode = element_mode (totype);
> +
> +  if (!TARGET_SSE2 && from_mode != to_mode)
> +    {
> +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> +        when TARGET_SSE2 is not available.  */
> +      if (from_mode == BFmode)
> +       return N_("invalid conversion from type %<__bf16%> "
> +                 "without option %<-msse2%>");
> +      if (from_mode == HFmode)
> +       return N_("invalid conversion from type %<_Float16%> "
> +                 "without option %<-msse2%>");
> +      if (to_mode == BFmode)
> +       return N_("invalid conversion to type %<__bf16%> "
> +                 "without option %<-msse2%>");
> +      if (to_mode == HFmode)
> +       return N_("invalid conversion to type %<_Float16%> "
> +                 "without option %<-msse2%>");
> +    }
> +
> +  /* Conversion allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the unary operation OP is
> +   not permitted on TYPE, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_unary_op (int op, const_tree type)
> +{
> +  machine_mode mmode = element_mode (type);
> +  /* Reject all single-operand operations on BFmode/HFmode except for &
> +     when TARGET_SSE2 is not available.  */
> +  if (!TARGET_SSE2 && op != ADDR_EXPR)
> +    {
> +      if (mmode == BFmode)
> +       return N_("operation not permitted on type %<__bf16%> "
> +                 "without option %<-msse2%>");
> +      if (mmode == HFmode)
> +       return N_("operation not permitted on type %<_Float16%> "
> +                 "without option %<-msse2%>");
> +    }
> +
> +  /* Operation allowed.  */
> +  return NULL;
> +}
> +
> +/* Return the diagnostic message string if the binary operation OP is
> +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> +
> +static const char *
> +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> +                       const_tree type2)
> +{
> +  machine_mode type1_mode = element_mode (type1);
> +  machine_mode type2_mode = element_mode (type2);
> +  /* Reject all 2-operand operations on BFmode or HFmode
> +     when TARGET_SSE2 is not available.  */
> +  if (!TARGET_SSE2)
> +    {
> +      if (type1_mode == BFmode || type2_mode == BFmode)
> +       return N_("operation not permitted on type %<__bf16%> "
> +                 "without option %<-msse2%>");
> +
> +      if (type1_mode == HFmode || type2_mode == HFmode)
> +       return N_("operation not permitted on type %<_Float16%> "
> +                 "without option %<-msse2%>");
> +    }
> +
> +  /* Operation allowed.  */
> +  return NULL;
> +}
> +
>  /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
>     FPU, assume that the fpcw is set to extended precision; when using
>     only SSE, rounding is correct; when using both SSE and the FPU,
> @@ -25248,6 +25349,15 @@ ix86_libgcc_floating_mode_supported_p
>  #undef TARGET_MEMTAG_TAG_SIZE
>  #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
>
> +#undef TARGET_INVALID_CONVERSION
> +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> +
> +#undef TARGET_INVALID_UNARY_OP
> +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> +
> +#undef TARGET_INVALID_BINARY_OP
> +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> +
>  static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
>  {
>  #ifdef OPTION_GLIBC
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 1da6dce8e0b..7e839bc5c7e 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
>  #define VALID_AVX512FP16_REG_MODE(MODE)                                        \
>    ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
>
> +#define VALID_SSE2_TYPE_MODE(MODE)             \
> +  ((MODE) == HFmode || (MODE) == BFmode                \
> +   || (MODE) == HCmode || (MODE) == BCmode)
> +
>  #define VALID_SSE2_REG_MODE(MODE)                                      \
>    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
>     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
> diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> index b220d871942..cc78df56940 100644
> --- a/gcc/config/i386/immintrin.h
> +++ b/gcc/config/i386/immintrin.h
> @@ -98,11 +98,9 @@
>
>  #include <avx512vp2intersectvlintrin.h>
>
> -#ifdef __SSE2__
>  #include <avx512fp16intrin.h>
>
>  #include <avx512fp16vlintrin.h>
> -#endif
>
>  #include <shaintrin.h>
>
> @@ -118,13 +116,11 @@
>
>  #include <vpclmulqdqintrin.h>
>
> -#ifdef __SSE2__
>  #include <avx512bf16vlintrin.h>
>
>  #include <avx512bf16intrin.h>
>
>  #include <avxneconvertintrin.h>
> -#endif
>
>  #include <amxtileintrin.h>
>
> diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
> index f96b932b698..938852ee9ad 100644
> --- a/gcc/testsuite/g++.target/i386/float16-1.C
> +++ b/gcc/testsuite/g++.target/i386/float16-1.C
> @@ -1,8 +1,8 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mno-sse2" } */
>
> -_Float16       /* { dg-error "expected unqualified-id before '_Float16'" } */
> -foo (_Float16 x)
> +_Float16
> +foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */
>  {
> -  return x;
> -}              /* { dg-error "'_Float16' is not supported on this target" } */
> +  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr109504.c b/gcc/testsuite/gcc.target/i386/pr109504.c
> new file mode 100644
> index 00000000000..fe5bcda10ad
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109504.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-sse" } */
> +
> +#pragma GCC target("sse4.1")
> +#include <immintrin.h>
> +int main(){return 0;}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> index 612d55be826..717055bc9ad 100644
> --- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> @@ -1,8 +1,8 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mno-sse2" } */
>
> -__bf16/* { dg-error "unknown type name '__bf16'" } */
> -foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
> -{
> -  return x;
> +__bf16
> +foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
> +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
>  }
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> index 1b645eb499d..faf818df75f 100644
> --- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> @@ -1,8 +1,8 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mno-sse2" } */
>
> -_Float16/* { dg-error "is not supported on this target" } */
> -foo (_Float16 x) /* { dg-error "is not supported on this target" } */
> -{
> -  return x;
> +_Float16
> +foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
> +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
>  }
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> new file mode 100644
> index 00000000000..64baf92ff56
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-sse2" } */
> +
> +_Float16 a;
> +__bf16 c;
> +_Complex _Float16 ac;
> +
> +void
> +foo (_Float16* p)
> +{
> +  a = *p;
> +}
> +
> +void
> +foo1 (__bf16 *p)
> +{
> +  c = *p;
> +}
> +
> +
> +void
> +foo2 (_Complex _Float16* p)
> +{
> +  ac = *p;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> new file mode 100644
> index 00000000000..c3ed23b8ab3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile { target ia32} } */
> +/* { dg-options "-O2 -mno-sse2" } */
> +
> +_Float16 a;
> +__bf16 c;
> +_Complex ac;
> +void
> +foo (_Float16 p)
> +{
> +  a = p;
> +}
> +
> +void
> +foo1 (__bf16 p)
> +{
> +  c = p;
> +}
> +
> +
> +void
> +foo2 (_Complex p)
> +{
> +  ac = p;
> +}
> diff --git a/libgcc/config/i386/t-softfp b/libgcc/config/i386/t-softfp
> index 69d0f819822..80d1fac121b 100644
> --- a/libgcc/config/i386/t-softfp
> +++ b/libgcc/config/i386/t-softfp
> @@ -31,3 +31,10 @@ CFLAGS-trunchfbf2.c += -msse2
>  CFLAGS-eqhf2.c += -msse2
>  CFLAGS-_divhc3.c += -msse2
>  CFLAGS-_mulhc3.c += -msse2
> +
> +CFLAGS-_hf_to_sd.c += -msse2
> +CFLAGS-_hf_to_dd.c += -msse2
> +CFLAGS-_hf_to_td.c += -msse2
> +CFLAGS-_sd_to_hf.c += -msse2
> +CFLAGS-_dd_to_hf.c += -msse2
> +CFLAGS-_td_to_hf.c += -msse2
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas.
  2023-04-21 13:53     ` [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas liuhongt
@ 2023-05-15  1:21       ` Hongtao Liu
  0 siblings, 0 replies; 12+ messages in thread
From: Hongtao Liu @ 2023-05-15  1:21 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, hjl.tools

ping

On Fri, Apr 21, 2023 at 9:55 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> > But for the C++23 macros, more importantly I think we really should
> > also in ix86_target_macros_internal add
> >   if (c_dialect_cxx ()
> >       && cxx_dialect > cxx20
> >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> >     {
> >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> >     }
> > plus associated libstdc++ changes.  It can be done incrementally though.
> Changed except for one place in libsupc++/compare, it's inside a function
> where pragma can be added. Not sure if this inconsistency will cause any
> issue.
>
> #ifdef __STDCPP_BFLOAT16_T__
>           if constexpr (__is_same(_Tp, decltype(0.0bf16)))
>             return _Bfloat16;
> #endif
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Successfully cross-build i686-linux-gnu.
> Ok for trunk?
>
> def_or_undef  target macros based on currently active ISA in pragmas
> to also do that for __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for
> C++, and change libstdc++ such that for x86 it adds similarly to x86
> intrin headers something like around std::float16_t/std::bfloat16_t stuff.
>
> gcc/ChangeLog:
>
>         PR target/109504
>         * config/i386/i386-c.cc (ix86_target_macros_internal):
>         def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__.
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/c++config: Add #pragma GCC target("sse2") for
>         _Float16 and bfloat16_t when __SSE2__ is not available.
>         * include/bits/cpp_type_traits.h: Ditto.
>         * include/bits/std_abs.h: Ditto.
>         * include/c_global/cmath: Ditto.
>         * include/ext/type_traits.h: Ditto.
>         * include/std/atomic: Ditto.
>         * include/std/charconv: Ditto.
>         * include/std/complex: Ditto.
>         * include/std/istream: Ditto.
>         * include/std/limits: Ditto.
>         * include/std/numbers: Ditto.
>         * include/std/ostream: Ditto.
>         * include/std/stdfloat: Ditto.
>         * include/std/type_traits: Ditto.
> ---
>  gcc/config/i386/i386-c.cc                   |   9 +-
>  libstdc++-v3/include/bits/c++config         |  11 +
>  libstdc++-v3/include/bits/cpp_type_traits.h |  27 +-
>  libstdc++-v3/include/bits/std_abs.h         |  23 +-
>  libstdc++-v3/include/c_global/cmath         | 733 +++++++++++---------
>  libstdc++-v3/include/ext/type_traits.h      |  23 +-
>  libstdc++-v3/include/std/atomic             |  43 +-
>  libstdc++-v3/include/std/charconv           |  90 ++-
>  libstdc++-v3/include/std/complex            | 227 +++---
>  libstdc++-v3/include/std/istream            |  61 +-
>  libstdc++-v3/include/std/limits             |  37 +-
>  libstdc++-v3/include/std/numbers            |  11 +
>  libstdc++-v3/include/std/ostream            |  29 +-
>  libstdc++-v3/include/std/stdfloat           |  19 +-
>  libstdc++-v3/include/std/type_traits        |  23 +-
>  15 files changed, 809 insertions(+), 557 deletions(-)
>
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index 2f83c9981e1..bcc17263e28 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -492,7 +492,14 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>    if (isa_flag & OPTION_MASK_ISA_SSE)
>      def_or_undef (parse_in, "__SSE__");
>    if (isa_flag & OPTION_MASK_ISA_SSE2)
> -    def_or_undef (parse_in, "__SSE2__");
> +    {
> +      def_or_undef (parse_in, "__SSE2__");
> +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> +       {
> +         def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> +         def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> +       }
> +    }
>    if (isa_flag & OPTION_MASK_ISA_SSE3)
>      def_or_undef (parse_in, "__SSE3__");
>    if (isa_flag & OPTION_MASK_ISA_SSSE3)
> diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
> index 13892787e09..c858497fc6e 100644
> --- a/libstdc++-v3/include/bits/c++config
> +++ b/libstdc++-v3/include/bits/c++config
> @@ -820,6 +820,12 @@ namespace std
>  # define _GLIBCXX_LDOUBLE_IS_IEEE_BINARY128 1
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_BFLOAT16_T__
>  namespace __gnu_cxx
>  {
> @@ -827,6 +833,11 @@ namespace __gnu_cxx
>  }
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __has_builtin
>  # ifdef __is_identifier
>  // Intel and older Clang require !__is_identifier for some built-ins:
> diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h b/libstdc++-v3/include/bits/cpp_type_traits.h
> index 4312f32a4e0..cadd5ca4fde 100644
> --- a/libstdc++-v3/include/bits/cpp_type_traits.h
> +++ b/libstdc++-v3/include/bits/cpp_type_traits.h
> @@ -315,6 +315,12 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>        typedef __true_type __type;
>      };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __is_floating<_Float16>
> @@ -324,36 +330,41 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT32_T__
> +#ifdef __STDCPP_BFLOAT16_T__
>    template<>
> -    struct __is_floating<_Float32>
> +    struct __is_floating<__gnu_cxx::__bfloat16_t>
>      {
>        enum { __value = 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT64_T__
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
>    template<>
> -    struct __is_floating<_Float64>
> +    struct __is_floating<_Float32>
>      {
>        enum { __value = 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT128_T__
> +#ifdef __STDCPP_FLOAT64_T__
>    template<>
> -    struct __is_floating<_Float128>
> +    struct __is_floating<_Float64>
>      {
>        enum { __value = 1 };
>        typedef __true_type __type;
>      };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> +#ifdef __STDCPP_FLOAT128_T__
>    template<>
> -    struct __is_floating<__gnu_cxx::__bfloat16_t>
> +    struct __is_floating<_Float128>
>      {
>        enum { __value = 1 };
>        typedef __true_type __type;
> diff --git a/libstdc++-v3/include/bits/std_abs.h b/libstdc++-v3/include/bits/std_abs.h
> index 1bb7ffbc2da..0423909e8c8 100644
> --- a/libstdc++-v3/include/bits/std_abs.h
> +++ b/libstdc++-v3/include/bits/std_abs.h
> @@ -97,12 +97,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    abs(__GLIBCXX_TYPE_INT_N_3 __x) { return __x >= 0 ? __x : -__x; }
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float16
>    abs(_Float16 __x)
>    { return _Float16(__builtin_fabsf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  abs(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float32
>    abs(_Float32 __x)
> @@ -125,12 +142,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_fabsf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  abs(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> -#endif
> -
>  #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
>    __extension__ inline _GLIBCXX_CONSTEXPR
>    __float128
> diff --git a/libstdc++-v3/include/c_global/cmath b/libstdc++-v3/include/c_global/cmath
> index 568eb354c2d..6bf3a5eade2 100644
> --- a/libstdc++-v3/include/c_global/cmath
> +++ b/libstdc++-v3/include/c_global/cmath
> @@ -515,6 +515,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      tanh(_Tp __x)
>      { return __builtin_tanh(__x); }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float16
>    acos(_Float16 __x)
> @@ -609,6 +615,105 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return _Float16(__builtin_tanhf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  acos(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  asin(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atan(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  ceil(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cos(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cosh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  exp(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fabs(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  floor(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log10(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
> +  {
> +    float __i, __ret = __builtin_modff(__x, &__i);
> +    *__iptr = __gnu_cxx::__bfloat16_t(__i);
> +    return __gnu_cxx::__bfloat16_t(__ret);
> +  }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sin(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sinh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  sqrt(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tan(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tanh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float32
>    acos(_Float32 __x)
> @@ -979,100 +1084,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_tanhf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  acos(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_acosf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  asin(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_asinf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atan(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atanf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atan2(__gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atan2f(__y, __x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  ceil(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ceilf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cos(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cosf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cosh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_coshf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  exp(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_expf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fabs(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fabsf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  floor(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_floorf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmod(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmodf(__x, __y)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  frexp(__gnu_cxx::__bfloat16_t __x, int* __exp)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_frexpf(__x, __exp)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  ldexp(__gnu_cxx::__bfloat16_t __x, int __exp)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ldexpf(__x, __exp)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_logf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log10(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log10f(__x)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  modf(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t* __iptr)
> -  {
> -    float __i, __ret = __builtin_modff(__x, &__i);
> -    *__iptr = __gnu_cxx::__bfloat16_t(__i);
> -    return __gnu_cxx::__bfloat16_t(__ret);
> -  }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  pow(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_powf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sin(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sinf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sinh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sinhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  sqrt(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_sqrtf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tan(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tanf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tanh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tanhf(__x)); }
> -#endif
> -
>  #if _GLIBCXX_USE_C99_MATH
>  #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
>
> @@ -1507,6 +1518,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>  #endif // C++11
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    constexpr int
>    fpclassify(_Float16 __x)
> @@ -1558,6 +1575,62 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_isunordered(__x, __y); }
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  constexpr int
> +  fpclassify(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
> +                               FP_SUBNORMAL, FP_ZERO, __x); }
> +
> +  constexpr bool
> +  isfinite(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isfinite(__x); }
> +
> +  constexpr bool
> +  isinf(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isinf(__x); }
> +
> +  constexpr bool
> +  isnan(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isnan(__x); }
> +
> +  constexpr bool
> +  isnormal(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_isnormal(__x); }
> +
> +  constexpr bool
> +  signbit(__gnu_cxx::__bfloat16_t __x)
> +  { return __builtin_signbit(__x); }
> +
> +  constexpr bool
> +  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isgreater(__x, __y); }
> +
> +  constexpr bool
> +  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isgreaterequal(__x, __y); }
> +
> +  constexpr bool
> +  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isless(__x, __y); }
> +
> +  constexpr bool
> +  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_islessequal(__x, __y); }
> +
> +  constexpr bool
> +  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_islessgreater(__x, __y); }
> +
> +  constexpr bool
> +  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __builtin_isunordered(__x, __y); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    constexpr int
>    fpclassify(_Float32 __x)
> @@ -1711,59 +1784,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_isunordered(__x, __y); }
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  constexpr int
> -  fpclassify(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL,
> -                               FP_SUBNORMAL, FP_ZERO, __x); }
> -
> -  constexpr bool
> -  isfinite(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isfinite(__x); }
> -
> -  constexpr bool
> -  isinf(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isinf(__x); }
> -
> -  constexpr bool
> -  isnan(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isnan(__x); }
> -
> -  constexpr bool
> -  isnormal(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_isnormal(__x); }
> -
> -  constexpr bool
> -  signbit(__gnu_cxx::__bfloat16_t __x)
> -  { return __builtin_signbit(__x); }
> -
> -  constexpr bool
> -  isgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isgreater(__x, __y); }
> -
> -  constexpr bool
> -  isgreaterequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isgreaterequal(__x, __y); }
> -
> -  constexpr bool
> -  isless(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isless(__x, __y); }
> -
> -  constexpr bool
> -  islessequal(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_islessequal(__x, __y); }
> -
> -  constexpr bool
> -  islessgreater(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_islessgreater(__x, __y); }
> -
> -  constexpr bool
> -  isunordered(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __builtin_isunordered(__x, __y); }
> -#endif
> -
> -#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
> -#endif /* _GLIBCXX_USE_C99_MATH */
> +#endif /* _GLIBCXX_USE_C99_FP_MACROS_DYNAMIC */
> +#endif /* _GLIBCXX_USE_C99_MATH */
>
>  #if __cplusplus >= 201103L
>
> @@ -2657,6 +2679,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { return __builtin_trunc(__x); }
>  #endif
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float16
>    acosh(_Float16 __x)
> @@ -2837,6 +2865,191 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return _Float16(__builtin_truncf(__x)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  constexpr __gnu_cxx::__bfloat16_t
> +  acosh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  asinh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  atanh(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  cbrt(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  erf(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  erfc(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  exp2(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  expm1(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
> +
> +  constexpr int
> +  ilogb(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  lgamma(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
> +
> +  constexpr long long
> +  llrint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
> +
> +  constexpr long long
> +  llround(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log1p(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
> +
> +  // DR 568.
> +  constexpr __gnu_cxx::__bfloat16_t
> +  log2(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  logb(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
> +
> +  constexpr long
> +  lrint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
> +
> +  constexpr long
> +  lround(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  nearbyint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  {
> +    if (std::__is_constant_evaluated())
> +      return __builtin_nextafterf16b(__x, __y);
> +#ifdef __INT16_TYPE__
> +    using __bfloat16_int_type = __INT16_TYPE__;
> +#else
> +    using __bfloat16_int_type = short int;
> +#endif
> +    __bfloat16_int_type __hx, __hy, __ix, __iy;
> +    __builtin_memcpy(&__hx, &__x, sizeof(__x));
> +    __builtin_memcpy(&__hy, &__y, sizeof(__x));
> +    __ix = __hx & 0x7fff;      // |x|
> +    __iy = __hy & 0x7fff;      // |y|
> +    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
> +      return __x + __y;
> +    if (__x == __y)
> +      return __y;              // x == y, return y
> +    if (__ix == 0)             // x == 0
> +      {
> +       __hy = (__hy & 0x8000) | 1;     // return +-__BFLT16_DENORM_MIN__
> +       __builtin_memcpy(&__x, &__hy, sizeof(__x));
> +       __builtin_nextafterf(0.0f, 1.0f);       // raise underflow
> +       return __x;
> +      }
> +    if (__hx >= 0)             // x > 0
> +      {
> +       if (__hx > __hy)        // x > y, x -= ulp
> +         --__hx;
> +       else                    // x < y, x += ulp
> +         ++__hx;
> +      }
> +    else                       // x < 0
> +      {
> +       if (__hy >= 0 || __hx > __hy)   // x < y, x -= ulp
> +         --__hx;
> +       else                    // x > y, x += ulp
> +         ++__hx;
> +      }
> +    __hy = __hx & 0x7f80;
> +    if (__hy >= 0x7f80)
> +      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());     // overflow
> +    else if (__hy < 0x0080)
> +      __builtin_nextafterf(__FLT_MIN__, 0.0f); // underflow
> +    __builtin_memcpy(&__x, &__hx, sizeof(__x));
> +    return __x;
> +  }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  rint(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  round(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  tgamma(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
> +
> +  constexpr __gnu_cxx::__bfloat16_t
> +  trunc(__gnu_cxx::__bfloat16_t __x)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    constexpr _Float32
>    acosh(_Float32 __x)
> @@ -3375,186 +3588,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_truncf128(__x); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  constexpr __gnu_cxx::__bfloat16_t
> -  acosh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_acoshf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  asinh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_asinhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  atanh(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_atanhf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  cbrt(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cbrtf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  copysign(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_copysignf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  erf(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_erff(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  erfc(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_erfcf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  exp2(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_exp2f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  expm1(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_expm1f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fdim(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fdimf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fma(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmaf(__x, __y, __z)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmax(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fmaxf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  fmin(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_fminf(__x, __y)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_hypotf(__x, __y)); }
> -
> -  constexpr int
> -  ilogb(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_ilogbf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  lgamma(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lgammaf(__x)); }
> -
> -  constexpr long long
> -  llrint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_llrintf(__x)); }
> -
> -  constexpr long long
> -  llround(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_llroundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log1p(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log1pf(__x)); }
> -
> -  // DR 568.
> -  constexpr __gnu_cxx::__bfloat16_t
> -  log2(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_log2f(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  logb(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_logbf(__x)); }
> -
> -  constexpr long
> -  lrint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lrintf(__x)); }
> -
> -  constexpr long
> -  lround(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_lroundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  nearbyint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_nearbyintf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  nextafter(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  {
> -    if (std::__is_constant_evaluated())
> -      return __builtin_nextafterf16b(__x, __y);
> -#ifdef __INT16_TYPE__
> -    using __bfloat16_int_type = __INT16_TYPE__;
> -#else
> -    using __bfloat16_int_type = short int;
> -#endif
> -    __bfloat16_int_type __hx, __hy, __ix, __iy;
> -    __builtin_memcpy(&__hx, &__x, sizeof(__x));
> -    __builtin_memcpy(&__hy, &__y, sizeof(__x));
> -    __ix = __hx & 0x7fff;      // |x|
> -    __iy = __hy & 0x7fff;      // |y|
> -    if (__ix > 0x7f80 || __iy > 0x7f80) // x or y is NaN
> -      return __x + __y;
> -    if (__x == __y)
> -      return __y;              // x == y, return y
> -    if (__ix == 0)             // x == 0
> -      {
> -       __hy = (__hy & 0x8000) | 1;     // return +-__BFLT16_DENORM_MIN__
> -       __builtin_memcpy(&__x, &__hy, sizeof(__x));
> -       __builtin_nextafterf(0.0f, 1.0f);       // raise underflow
> -       return __x;
> -      }
> -    if (__hx >= 0)             // x > 0
> -      {
> -       if (__hx > __hy)        // x > y, x -= ulp
> -         --__hx;
> -       else                    // x < y, x += ulp
> -         ++__hx;
> -      }
> -    else                       // x < 0
> -      {
> -       if (__hy >= 0 || __hx > __hy)   // x < y, x -= ulp
> -         --__hx;
> -       else                    // x > y, x += ulp
> -         ++__hx;
> -      }
> -    __hy = __hx & 0x7f80;
> -    if (__hy >= 0x7f80)
> -      __builtin_nextafterf(__FLT_MAX__, __builtin_inff());     // overflow
> -    else if (__hy < 0x0080)
> -      __builtin_nextafterf(__FLT_MIN__, 0.0f); // underflow
> -    __builtin_memcpy(&__x, &__hx, sizeof(__x));
> -    return __x;
> -  }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  remainder(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_remainderf(__x, __y)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  remquo(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, int* __pquo)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_remquof(__x, __y, __pquo)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  rint(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_rintf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  round(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_roundf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  scalbln(__gnu_cxx::__bfloat16_t __x, long __ex)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_scalblnf(__x, __ex)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  scalbn(__gnu_cxx::__bfloat16_t __x, int __ex)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_scalbnf(__x, __ex)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  tgamma(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_tgammaf(__x)); }
> -
> -  constexpr __gnu_cxx::__bfloat16_t
> -  trunc(__gnu_cxx::__bfloat16_t __x)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_truncf(__x)); }
> -#endif
> -
>
>  #endif // _GLIBCXX_USE_C99_MATH_TR1
>  #endif // C++11
> @@ -3599,12 +3632,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        return std::__hypot3<__type>(__x, __y, __z);
>      }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float16
>    hypot(_Float16 __x, _Float16 __y, _Float16 __z)
>    { return std::__hypot3<_Float16>(__x, __y, __z); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
> +  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float32
>    hypot(_Float32 __x, _Float32 __y, _Float32 __z)
> @@ -3625,12 +3675,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return std::__hypot3<_Float128>(__x, __y, __z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  hypot(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z)
> -  { return std::__hypot3<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> -#endif
> -
>  #endif // C++17
>
>  #if __cplusplus >= 202002L
> @@ -3675,12 +3719,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        return std::__lerp<__type>(__x, __y, __z);
>      }
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float16
>    lerp(_Float16 __x, _Float16 __y, _Float16 __z) noexcept
>    { return std::__lerp<_Float16>(__x, __y, __z); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
> +  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float32
>    lerp(_Float32 __x, _Float32 __y, _Float32 __z) noexcept
> @@ -3701,12 +3762,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return std::__lerp<_Float128>(__x, __y, __z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  lerp(__gnu_cxx::__bfloat16_t __x, __gnu_cxx::__bfloat16_t __y, __gnu_cxx::__bfloat16_t __z) noexcept
> -  { return std::__lerp<__gnu_cxx::__bfloat16_t>(__x, __y, __z); }
> -#endif
> -
>  #endif // C++20
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/ext/type_traits.h b/libstdc++-v3/include/ext/type_traits.h
> index 4466c6712c3..823b9710e0c 100644
> --- a/libstdc++-v3/include/ext/type_traits.h
> +++ b/libstdc++-v3/include/ext/type_traits.h
> @@ -190,12 +190,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __promote<float>
>      { typedef float __type; };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __promote<_Float16>
>      { typedef _Float16 __type; };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __promote<__gnu_cxx::__bfloat16_t>
> +  { typedef __gnu_cxx::__bfloat16_t __type; };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __promote<_Float32>
> @@ -214,12 +231,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { typedef _Float128 __type; };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __promote<__gnu_cxx::__bfloat16_t>
> -    { typedef __gnu_cxx::__bfloat16_t __type; };
> -#endif
> -
>  #if __cpp_fold_expressions
>
>    template<typename... _Tp>
> diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
> index 96e87ded864..5e9e9959270 100644
> --- a/libstdc++-v3/include/std/atomic
> +++ b/libstdc++-v3/include/std/atomic
> @@ -1664,6 +1664,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        using __atomic_float<long double>::operator=;
>      };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct atomic<_Float16> : __atomic_float<_Float16>
> @@ -1681,71 +1687,76 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT32_T__
> +#ifdef __STDCPP_BFLOAT16_T__
>    template<>
> -    struct atomic<_Float32> : __atomic_float<_Float32>
> +    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
>      {
>        atomic() noexcept = default;
>
>        constexpr
> -      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
> +      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
>        { }
>
>        atomic& operator=(const atomic&) volatile = delete;
>        atomic& operator=(const atomic&) = delete;
>
> -      using __atomic_float<_Float32>::operator=;
> +      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT64_T__
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
>    template<>
> -    struct atomic<_Float64> : __atomic_float<_Float64>
> +    struct atomic<_Float32> : __atomic_float<_Float32>
>      {
>        atomic() noexcept = default;
>
>        constexpr
> -      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
> +      atomic(_Float32 __fp) noexcept : __atomic_float<_Float32>(__fp)
>        { }
>
>        atomic& operator=(const atomic&) volatile = delete;
>        atomic& operator=(const atomic&) = delete;
>
> -      using __atomic_float<_Float64>::operator=;
> +      using __atomic_float<_Float32>::operator=;
>      };
>  #endif
>
> -#ifdef __STDCPP_FLOAT128_T__
> +#ifdef __STDCPP_FLOAT64_T__
>    template<>
> -    struct atomic<_Float128> : __atomic_float<_Float128>
> +    struct atomic<_Float64> : __atomic_float<_Float64>
>      {
>        atomic() noexcept = default;
>
>        constexpr
> -      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
> +      atomic(_Float64 __fp) noexcept : __atomic_float<_Float64>(__fp)
>        { }
>
>        atomic& operator=(const atomic&) volatile = delete;
>        atomic& operator=(const atomic&) = delete;
>
> -      using __atomic_float<_Float128>::operator=;
> +      using __atomic_float<_Float64>::operator=;
>      };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> +#ifdef __STDCPP_FLOAT128_T__
>    template<>
> -    struct atomic<__gnu_cxx::__bfloat16_t> : __atomic_float<__gnu_cxx::__bfloat16_t>
> +    struct atomic<_Float128> : __atomic_float<_Float128>
>      {
>        atomic() noexcept = default;
>
>        constexpr
> -      atomic(__gnu_cxx::__bfloat16_t __fp) noexcept : __atomic_float<__gnu_cxx::__bfloat16_t>(__fp)
> +      atomic(_Float128 __fp) noexcept : __atomic_float<_Float128>(__fp)
>        { }
>
>        atomic& operator=(const atomic&) volatile = delete;
>        atomic& operator=(const atomic&) = delete;
>
> -      using __atomic_float<__gnu_cxx::__bfloat16_t>::operator=;
> +      using __atomic_float<_Float128>::operator=;
>      };
>  #endif
>
> diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv
> index b34d672f5bd..451fb4cba47 100644
> --- a/libstdc++-v3/include/std/charconv
> +++ b/libstdc++-v3/include/std/charconv
> @@ -689,6 +689,12 @@ namespace __detail
>                           float& __value,
>                           chars_format __fmt = chars_format::general) noexcept;
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
>      && defined(__cpp_lib_to_chars)
>    inline from_chars_result
> @@ -704,6 +710,27 @@ namespace __detail
>    }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
> +    && defined(__cpp_lib_to_chars)
> +  inline from_chars_result
> +  from_chars(const char* __first, const char* __last,
> +            __gnu_cxx::__bfloat16_t & __value,
> +            chars_format __fmt = chars_format::general) noexcept
> +  {
> +    float __val;
> +    from_chars_result __res
> +      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
> +    if (__res.ec == errc{})
> +      __value = __val;
> +    return __res;
> +  }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline from_chars_result
>    from_chars(const char* __first, const char* __last, _Float32& __value,
> @@ -763,22 +790,6 @@ namespace __detail
>              chars_format __fmt = chars_format::general) noexcept;
>  #endif
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32) \
> -    && defined(__cpp_lib_to_chars)
> -  inline from_chars_result
> -  from_chars(const char* __first, const char* __last,
> -            __gnu_cxx::__bfloat16_t & __value,
> -            chars_format __fmt = chars_format::general) noexcept
> -  {
> -    float __val;
> -    from_chars_result __res
> -      = __from_chars_bfloat16_t(__first, __last, __val, __fmt);
> -    if (__res.ec == errc{})
> -      __value = __val;
> -    return __res;
> -  }
> -#endif
>  #endif
>
>  #if defined __cpp_lib_to_chars
> @@ -815,6 +826,12 @@ namespace __detail
>                                         float __value,
>                                         chars_format __fmt) noexcept;
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline to_chars_result
>    to_chars(char* __first, char* __last, _Float16 __value) noexcept
> @@ -832,6 +849,29 @@ namespace __detail
>    { return to_chars(__first, __last, float(__value), __fmt, __precision); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last,
> +          __gnu_cxx::__bfloat16_t __value) noexcept
> +  {
> +    return __to_chars_bfloat16_t(__first, __last, float(__value),
> +                                chars_format{});
> +  }
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> +          chars_format __fmt) noexcept
> +  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
> +  inline to_chars_result
> +  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> +          chars_format __fmt, int __precision) noexcept
> +  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline to_chars_result
>    to_chars(char* __first, char* __last, _Float32 __value) noexcept
> @@ -920,24 +960,6 @@ namespace __detail
>                            chars_format __fmt, int __precision) noexcept;
>  #endif
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last,
> -          __gnu_cxx::__bfloat16_t __value) noexcept
> -  {
> -    return __to_chars_bfloat16_t(__first, __last, float(__value),
> -                                chars_format{});
> -  }
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> -          chars_format __fmt) noexcept
> -  { return __to_chars_bfloat16_t(__first, __last, float(__value), __fmt); }
> -  inline to_chars_result
> -  to_chars(char* __first, char* __last, __gnu_cxx::__bfloat16_t __value,
> -          chars_format __fmt, int __precision) noexcept
> -  { return to_chars(__first, __last, float(__value), __fmt, __precision); }
> -#endif
>  #endif
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/std/complex b/libstdc++-v3/include/std/complex
> index 0f5f14c3ddb..2f47036e472 100644
> --- a/libstdc++-v3/include/std/complex
> +++ b/libstdc++-v3/include/std/complex
> @@ -599,6 +599,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  #endif
>
>  #if _GLIBCXX_USE_C99_COMPLEX
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float16
>    __complex_abs(__complex__ _Float16 __z)
> @@ -649,6 +656,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return static_cast<__complex__ _Float16>(__builtin_cpowf(__x, __y)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline __gnu_cxx::__bfloat16_t
> +  __complex_abs(__complex__ decltype(0.0bf16) __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
> +
> +  inline __gnu_cxx::__bfloat16_t
> +  __complex_arg(__complex__ decltype(0.0bf16) __z)
> +  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_cos(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_cosh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_exp(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_log(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sin(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sinh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_tan(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_tanh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_pow(__complex__ decltype(0.0bf16) __x,
> +               __complex__ decltype(0.0bf16) __y)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
> +                                                                     __y)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline _Float32
>    __complex_abs(__complex__ _Float32 __z) { return __builtin_cabsf(__z); }
> @@ -802,58 +866,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    __complex_pow(__complex__ _Float128 __x, __complex__ _Float128 __y)
>    { return __builtin_cpowf128(__x, __y); }
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline __gnu_cxx::__bfloat16_t
> -  __complex_abs(__complex__ decltype(0.0bf16) __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cabsf(__z)); }
> -
> -  inline __gnu_cxx::__bfloat16_t
> -  __complex_arg(__complex__ decltype(0.0bf16) __z)
> -  { return __gnu_cxx::__bfloat16_t(__builtin_cargf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_cos(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccosf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_cosh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ccoshf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_exp(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cexpf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_log(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_clogf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sin(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sinh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csinhf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_sqrt(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_csqrtf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_tan(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_tanh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_ctanhf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_pow(__complex__ decltype(0.0bf16) __x,
> -               __complex__ decltype(0.0bf16) __y)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cpowf(__x,
> -                                                                     __y)); }
> -#endif
>  #endif
>
>    // 26.2.7/3 abs(__z):  Returns the magnitude of __z.
> @@ -1804,12 +1816,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __complex_type
>      { };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __complex_type<_Float16>
>      { typedef __complex__ _Float16 type; };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __complex_type<__gnu_cxx::__bfloat16_t>
> +    { typedef __complex__ decltype(0.0bf16) type; };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __complex_type<_Float32>
> @@ -1828,12 +1857,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      { typedef __complex__ _Float128 type; };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __complex_type<__gnu_cxx::__bfloat16_t>
> -    { typedef __complex__ decltype(0.0bf16) type; };
> -#endif
> -
>    template<typename _Tp>
>      requires requires { typename __complex_type<_Tp>::type; }
>      class complex<_Tp>
> @@ -2022,6 +2045,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      }
>
>  #if _GLIBCXX_USE_C99_COMPLEX_TR1
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline __complex__ _Float16
>    __complex_acos(__complex__ _Float16 __z)
> @@ -2048,6 +2078,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return static_cast<__complex__ _Float16>(__builtin_catanhf(__z)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_acos(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_asin(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_atan(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_acosh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_asinh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
> +
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_atanh(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline __complex__ _Float32
>    __complex_acos(__complex__ _Float32 __z)
> @@ -2149,32 +2210,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    __complex_atanh(__complex__ _Float128 __z)
>    { return __builtin_catanhf128(__z); }
>  #endif
> -
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_acos(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacosf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_asin(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_atan(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_acosh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cacoshf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_asinh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_casinhf(__z)); }
> -
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_atanh(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_catanhf(__z)); }
> -#endif
>  #endif
>
>  #if _GLIBCXX_USE_C99_COMPLEX_TR1
> @@ -2493,12 +2528,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_cprojl(__z.__rep()); }
>
>  #if __cplusplus > 202002L
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline __complex__ _Float16
>    __complex_proj(__complex__ _Float16 __z)
>    { return static_cast<__complex__ _Float16>(__builtin_cprojf(__z)); }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +  inline __complex__ decltype(0.0bf16)
> +  __complex_proj(__complex__ decltype(0.0bf16) __z)
> +  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>    inline __complex__ _Float32
>    __complex_proj(__complex__ _Float32 __z)
> @@ -2521,12 +2574,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    { return __builtin_cprojf128(__z); }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -  inline __complex__ decltype(0.0bf16)
> -  __complex_proj(__complex__ decltype(0.0bf16) __z)
> -  { return static_cast<__complex__ decltype(0.0bf16)>(__builtin_cprojf(__z)); }
> -#endif
> -
>    template<typename _Tp>
>      requires requires { typename __complex_type<_Tp>::type; }
>      inline complex<_Tp>
> diff --git a/libstdc++-v3/include/std/istream b/libstdc++-v3/include/std/istream
> index 25d36973f4b..27893a505dd 100644
> --- a/libstdc++-v3/include/std/istream
> +++ b/libstdc++-v3/include/std/istream
> @@ -225,6 +225,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        { return _M_extract(__f); }
>        ///@}
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>        __attribute__((__always_inline__))
>        __istream_type&
> @@ -251,6 +256,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> +      __attribute__((__always_inline__))
> +      __istream_type&
> +      operator>>(__gnu_cxx::__bfloat16_t & __f)
> +      {
> +       float __flt;
> +       __istream_type& __ret = _M_extract(__flt);
> +       ios_base::iostate __err = ios_base::goodbit;
> +       if (__flt < -__BFLT16_MAX__)
> +         {
> +           __f = -__BFLT16_MAX__;
> +           __err = ios_base::failbit;
> +         }
> +       else if (__flt > __BFLT16_MAX__)
> +         {
> +           __f = __BFLT16_MAX__;
> +           __err = ios_base::failbit;
> +         }
> +       else
> +         __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
> +       if (__err)
> +         this->setstate(__err);
> +       return __ret;
> +      }
> +#endif
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
>        __attribute__((__always_inline__))
>        __istream_type&
> @@ -287,32 +322,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_FLOAT_IS_IEEE_BINARY32)
> -      __attribute__((__always_inline__))
> -      __istream_type&
> -      operator>>(__gnu_cxx::__bfloat16_t & __f)
> -      {
> -       float __flt;
> -       __istream_type& __ret = _M_extract(__flt);
> -       ios_base::iostate __err = ios_base::goodbit;
> -       if (__flt < -__BFLT16_MAX__)
> -         {
> -           __f = -__BFLT16_MAX__;
> -           __err = ios_base::failbit;
> -         }
> -       else if (__flt > __BFLT16_MAX__)
> -         {
> -           __f = __BFLT16_MAX__;
> -           __err = ios_base::failbit;
> -         }
> -       else
> -         __f = static_cast<__gnu_cxx::__bfloat16_t>(__flt);
> -       if (__err)
> -         this->setstate(__err);
> -       return __ret;
> -      }
> -#endif
> -
>        /**
>         *  @brief  Basic arithmetic extractors
>         *  @param  __p A variable of pointer type.
> diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/limits
> index 8bafd6fb972..e715cec7dd9 100644
> --- a/libstdc++-v3/include/std/limits
> +++ b/libstdc++-v3/include/std/limits
> @@ -1980,21 +1980,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>         = round_to_nearest;                                             \
>      };                                                                         \
>
> +
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>  __glibcxx_float_n(16)
>  #endif
> -#ifdef __STDCPP_FLOAT32_T__
> -__glibcxx_float_n(32)
> -#endif
> -#ifdef __STDCPP_FLOAT64_T__
> -__glibcxx_float_n(64)
> -#endif
> -#ifdef __STDCPP_FLOAT128_T__
> -__glibcxx_float_n(128)
> -#endif
> -#undef __glibcxx_float_n
> -#undef __glibcxx_concat3
> -#undef __glibcxx_concat3_
>
>  #ifdef __STDCPP_BFLOAT16_T__
>    __extension__
> @@ -2071,6 +2066,24 @@ __glibcxx_float_n(128)
>      };
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
> +#ifdef __STDCPP_FLOAT32_T__
> +__glibcxx_float_n(32)
> +#endif
> +#ifdef __STDCPP_FLOAT64_T__
> +__glibcxx_float_n(64)
> +#endif
> +#ifdef __STDCPP_FLOAT128_T__
> +__glibcxx_float_n(128)
> +#endif
> +#undef __glibcxx_float_n
> +#undef __glibcxx_concat3
> +#undef __glibcxx_concat3_
> +
>  #endif
>
>  _GLIBCXX_END_NAMESPACE_VERSION
> diff --git a/libstdc++-v3/include/std/numbers b/libstdc++-v3/include/std/numbers
> index d9d202f5392..39de869dd0e 100644
> --- a/libstdc++-v3/include/std/numbers
> +++ b/libstdc++-v3/include/std/numbers
> @@ -199,10 +199,21 @@ namespace numbers
>      inline constexpr TYPE phi_v<TYPE>                  \
>        = 1.618033988749894848204586834365638118##SUFFIX
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>  __glibcxx_numbers (_Float16, F16);
>  #endif
>
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>  __glibcxx_numbers (_Float32, F32);
>  #endif
> diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream
> index 4711b8a3d96..6365fe7649b 100644
> --- a/libstdc++-v3/include/std/ostream
> +++ b/libstdc++-v3/include/std/ostream
> @@ -235,6 +235,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        { return _M_insert(__f); }
>        ///@}
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #if defined(__STDCPP_FLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
>        __attribute__((__always_inline__))
>        __ostream_type&
> @@ -244,6 +250,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> +#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
> +      __attribute__((__always_inline__))
> +      __ostream_type&
> +      operator<<(__gnu_cxx::__bfloat16_t __f)
> +      {
> +       return _M_insert(static_cast<double>(__f));
> +      }
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #if defined(__STDCPP_FLOAT32_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
>        __attribute__((__always_inline__))
>        __ostream_type&
> @@ -271,15 +291,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        }
>  #endif
>
> -#if defined(__STDCPP_BFLOAT16_T__) && defined(_GLIBCXX_DOUBLE_IS_IEEE_BINARY64)
> -      __attribute__((__always_inline__))
> -      __ostream_type&
> -      operator<<(__gnu_cxx::__bfloat16_t __f)
> -      {
> -       return _M_insert(static_cast<double>(__f));
> -      }
> -#endif
> -
>        /**
>         *  @brief  Pointer arithmetic inserters
>         *  @param  __p A variable of pointer type.
> diff --git a/libstdc++-v3/include/std/stdfloat b/libstdc++-v3/include/std/stdfloat
> index c39dbb64904..3ea582e1f5d 100644
> --- a/libstdc++-v3/include/std/stdfloat
> +++ b/libstdc++-v3/include/std/stdfloat
> @@ -36,10 +36,25 @@ namespace std
>  {
>  _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>    #ifdef __STDCPP_FLOAT16_T__
>    using float16_t = _Float16;
>    #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  using bfloat16_t = __gnu_cxx::__bfloat16_t;
> +  #endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>    #ifdef __STDCPP_FLOAT32_T__
>    using float32_t = _Float32;
>    #endif
> @@ -52,10 +67,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>    using float128_t = _Float128;
>    #endif
>
> -  #ifdef __STDCPP_BFLOAT16_T__
> -  using bfloat16_t = __gnu_cxx::__bfloat16_t;
> -  #endif
> -
>  _GLIBCXX_END_NAMESPACE_VERSION
>  } // namespace std
>  #endif // C++23
> diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits
> index 2bd607a8b8f..549d6485708 100644
> --- a/libstdc++-v3/include/std/type_traits
> +++ b/libstdc++-v3/include/std/type_traits
> @@ -459,12 +459,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      struct __is_floating_point_helper<long double>
>      : public true_type { };
>
> +#ifndef __SSE2__
> +#pragma GCC push_options
> +#pragma GCC target("sse2")
> +#define __DISABLE_STDCPP_SSE2__
> +#endif
> +
>  #ifdef __STDCPP_FLOAT16_T__
>    template<>
>      struct __is_floating_point_helper<_Float16>
>      : public true_type { };
>  #endif
>
> +#ifdef __STDCPP_BFLOAT16_T__
> +  template<>
> +    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
> +    : public true_type { };
> +#endif
> +
> +#ifdef __DISABLE_STDCPP_SSE2__
> +#undef __DISABLE_STDCPP_SSE2__
> +#pragma GCC pop_options
> +#endif
> +
>  #ifdef __STDCPP_FLOAT32_T__
>    template<>
>      struct __is_floating_point_helper<_Float32>
> @@ -483,12 +500,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      : public true_type { };
>  #endif
>
> -#ifdef __STDCPP_BFLOAT16_T__
> -  template<>
> -    struct __is_floating_point_helper<__gnu_cxx::__bfloat16_t>
> -    : public true_type { };
> -#endif
> -
>  #if !defined(__STRICT_ANSI__) && defined(_GLIBCXX_USE_FLOAT128)
>    template<>
>      struct __is_floating_point_helper<__float128>
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-05-15  1:20     ` [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2 Hongtao Liu
@ 2023-07-17  8:35       ` Hongtao Liu
  2023-07-17 11:38         ` Uros Bizjak
  0 siblings, 1 reply; 12+ messages in thread
From: Hongtao Liu @ 2023-07-17  8:35 UTC (permalink / raw)
  To: liuhongt, Uros Bizjak; +Cc: gcc-patches, hjl.tools, jakub

I'd like to ping for this patch (only patch 1/2, for patch 2/2, I
think that may not be necessary).

On Mon, May 15, 2023 at 9:20 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> ping.
>
> On Fri, Apr 21, 2023 at 9:55 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > > > +  if (!TARGET_SSE2)
> > > > +    {
> > > > +      if (c_dialect_cxx ()
> > > > +       && cxx_dialect > cxx20)
> > >
> > > Formatting, both conditions are short, so just put them on one line.
> > Changed.
> >
> > > But for the C++23 macros, more importantly I think we really should
> > > also in ix86_target_macros_internal add
> > >   if (c_dialect_cxx ()
> > >       && cxx_dialect > cxx20
> > >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> > >     {
> > >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > >     }
> > > plus associated libstdc++ changes.  It can be done incrementally though.
> > Added in PATCH 2/2
> >
> > > > +      if (flag_building_libgcc)
> > > > +     {
> > > > +       /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> > > > +          to check backend support of _Float16 and __bf16 type.  */
> > >
> > > That is actually the case only for HFmode, but not for BFmode right now.
> > > So, we need further work.  One is to add the BFmode support in there,
> > > and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
> > > _Decimal* conversions are compiled in also if not -msse2 by default.
> > > One way to do that is wrap the HF and BF mode related functions on x86
> > > #ifndef __SSE2__ into the pragmas like intrin headers use (but then
> > > perhaps we don't need to undef this stuff here), another is not provide
> > > the hf/bf support in that case from the TUs where they are provided now,
> > > but from a different one which would be compiled with -msse2.
> > Add CFLAGS-_hf_to_sd.c += -msse2, similar for other files in libbid, just like
> > we did before for HFtype softfp. Then no need to undef libgcc macros.
> >
> > > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > > >       some less clueful developer tries to use floating-point anyway.  */
> > > > -  if (needed_sseregs && !TARGET_SSE)
> > > > +  if (needed_sseregs
> > > > +      && (!TARGET_SSE
> > > > +       || (VALID_SSE2_TYPE_MODE (mode)
> > > > +           && !TARGET_SSE2)))
> > >
> > > Formatting, no need to split this up that much.
> > >   if (needed_sseregs
> > >       && (!TARGET_SSE
> > >           || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > or even better
> > >   if (needed_sseregs
> > >       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > will do it.
> > Changed.
> >
> > > Instead of this, just use
> > >       if (!float16_type_node)
> > >         {
> > >           float16_type_node = ix86_float16_type_node;
> > >           callback (float16_type_node);
> > >           float16_type_node = NULL_TREE;
> > >         }
> > >       if (!bfloat16_type_node)
> > >         {
> > >           bfloat16_type_node = ix86_bf16_type_node;
> > >           callback (bfloat16_type_node);
> > >           bfloat16_type_node = NULL_TREE;
> > >         }
> > Changed.
> >
> >
> > > > +static const char *
> > > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > > +{
> > > > +  if (element_mode (fromtype) != element_mode (totype))
> > > > +    {
> > > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > > +      when TARGET_SSE2 is not available.  */
> > > > +      if ((TYPE_MODE (fromtype) == BFmode
> > > > +        || TYPE_MODE (fromtype) == HFmode)
> > > > +       && !TARGET_SSE2)
> > >
> > > First of all, not really sure if this should be purely about scalar
> > > modes, not also complex and vector modes involving those inner modes.
> > > Because complex or vector modes with BF/HF elements will be without
> > > TARGET_SSE2 for sure lowered into scalar code and that can't be handled
> > > either.
> > > So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
> > > or even better
> > > if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
> > > ?
> > > Or even better remember the 2 modes above into machine_mode temporaries
> > > and just use those in the != comparison and for the checks?
> > >
> > > Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
> > > we know which one it is.  Just return separate messages?
> > Changed.
> >
> > > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > > +     when TARGET_SSE2 is not available.  */
> > > > +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> > > > +      && !TARGET_SSE2 && op != ADDR_EXPR)
> > > > +    return N_("operation not permitted on type %<__bf16%> "
> > > > +           "or %<_Float16%> without option %<-msse2%>");
> > >
> > > Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
> > Changed.
> >
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Successfully cross-build i686-linux-gnu.
> > Ok for trunk?
> >
> > Enable _Float16 and __bf16 all the time but issue errors when the
> > types are used in conversion, unary operation, binary operation,
> > parameter passing or value return when TARGET_SSE2 is not available.
> >
> > Also undef macros which are used by libgcc/libstdc++ to check the
> > backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
> > available.
> >
> > gcc/ChangeLog:
> >
> >         PR target/109504
> >         * config/i386/i386-builtins.cc
> >         (ix86_register_float16_builtin_type): Remove TARGET_SSE2.
> >         (ix86_register_bf16_builtin_type): Ditto.
> >         * config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
> >         isn't available, undef the macros which are used to check the
> >         backend support of the _Float16/__bf16 types when building
> >         libstdc++ and libgcc.
> >         * config/i386/i386.cc (construct_container): Issue errors for
> >         HFmode/BFmode when TARGET_SSE2 is not available.
> >         (function_value_32): Ditto.
> >         (ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
> >         (ix86_libgcc_floating_mode_supported_p): Ditto.
> >         (ix86_emit_support_tinfos): Adjust codes.
> >         (ix86_invalid_conversion): New function.
> >         (ix86_invalid_unary_op): Ditto.
> >         (ix86_invalid_binary_op): Ditto.
> >         (TARGET_INVALID_CONVERSION): Define.
> >         (TARGET_INVALID_UNARY_OP): Define.
> >         (TARGET_INVALID_BINARY_OP): Define.
> >         * config/i386/immintrin.h [__SSE2__]: Remove for fp16/bf16
> >         related instrinsics header files.
> >         * config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr109504.c: New test.
> >         * gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
> >         * gcc.target/i386/sse2-float16-1.c: Ditto.
> >         * gcc.target/i386/sse2-float16-4.c: New test.
> >         * gcc.target/i386/sse2-float16-5.c: New test.
> >         * g++.target/i386/float16-1.C: Adjust error info.
> >
> > libgcc/ChangeLog:
> >
> >         * config/i386/t-softfp: Add -msse2 to libbid HFtype related
> >         files.
> > ---
> >  gcc/config/i386/i386-builtins.cc              |   4 +-
> >  gcc/config/i386/i386-c.cc                     |  15 ++
> >  gcc/config/i386/i386.cc                       | 130 ++++++++++++++++--
> >  gcc/config/i386/i386.h                        |   4 +
> >  gcc/config/i386/immintrin.h                   |   4 -
> >  gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
> >  gcc/testsuite/gcc.target/i386/pr109504.c      |   6 +
> >  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
> >  .../gcc.target/i386/sse2-float16-1.c          |   8 +-
> >  .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
> >  .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
> >  libgcc/config/i386/t-softfp                   |   7 +
> >  12 files changed, 215 insertions(+), 28 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109504.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> >
> > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > index fc0c82b156e..1cdabfd3a0a 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
> >    else
> >      ix86_float16_type_node = float16_type_node;
> >
> > -  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
> > +  if (!maybe_get_identifier ("_Float16"))
> >      lang_hooks.types.register_builtin_type (ix86_float16_type_node,
> >                                             "_Float16");
> >  }
> > @@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
> >    else
> >      ix86_bf16_type_node = bfloat16_type_node;
> >
> > -  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> > +  if (!maybe_get_identifier ("__bf16"))
> >      lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
> >  }
> >
> > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > index e7bd7cc706c..2f83c9981e1 100644
> > --- a/gcc/config/i386/i386-c.cc
> > +++ b/gcc/config/i386/i386-c.cc
> > @@ -817,6 +817,21 @@ ix86_target_macros (void)
> >    if (!TARGET_80387)
> >      cpp_define (parse_in, "_SOFT_FLOAT");
> >
> > +  /* HFmode/BFmode is supported without depending any isa
> > +     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
> > +     but according to psABI, they're really supported w/ SSE2 and above.
> > +     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
> > +     for backend support of the types, undef the macros to avoid
> > +     build failure, see PR109504.  */
> > +  if (!TARGET_SSE2)
> > +    {
> > +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> > +       {
> > +         cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > +         cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > +       }
> > +    }
> > +
> >    if (TARGET_LONG_DOUBLE_64)
> >      cpp_define (parse_in, "__LONG_DOUBLE_64__");
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index fbd33a6bfd1..633a0f41e60 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -2651,7 +2651,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> >
> >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> >       some less clueful developer tries to use floating-point anyway.  */
> > -  if (needed_sseregs && !TARGET_SSE)
> > +  if (needed_sseregs
> > +      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> >      {
> >        /* Return early if we shouldn't raise an error for invalid
> >          calls.  */
> > @@ -2661,13 +2662,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> >         {
> >           if (!issued_sse_ret_error)
> >             {
> > -             error ("SSE register return with SSE disabled");
> > +             if (VALID_SSE2_TYPE_MODE (mode))
> > +               error ("SSE register return with SSE2 disabled");
> > +             else
> > +               error ("SSE register return with SSE disabled");
> >               issued_sse_ret_error = true;
> >             }
> >         }
> >        else if (!issued_sse_arg_error)
> >         {
> > -         error ("SSE register argument with SSE disabled");
> > +         if (VALID_SSE2_TYPE_MODE (mode))
> > +           error ("SSE register argument with SSE2 disabled");
> > +         else
> > +           error ("SSE register argument with SSE disabled");
> >           issued_sse_arg_error = true;
> >         }
> >        return NULL;
> > @@ -4022,13 +4029,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
> >
> >    /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
> >    if (mode == HFmode || mode == BFmode)
> > -    regno = FIRST_SSE_REG;
> > +    {
> > +      if (!TARGET_SSE2)
> > +       {
> > +         error ("SSE register return with SSE2 disabled");
> > +         regno = AX_REG;
> > +       }
> > +      else
> > +       regno = FIRST_SSE_REG;
> > +    }
> > +
> >    if (mode == HCmode)
> >      {
> > +      if (!TARGET_SSE2)
> > +       error ("SSE register return with SSE2 disabled");
> > +
> >        rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
> >        XVECEXP (ret, 0, 0)
> >         = gen_rtx_EXPR_LIST (VOIDmode,
> > -                            gen_rtx_REG (SImode, FIRST_SSE_REG),
> > +                            gen_rtx_REG (SImode,
> > +                                         TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
> >                              GEN_INT (0));
> >        return ret;
> >      }
> > @@ -22459,7 +22479,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
> >      return default_decimal_float_supported_p ();
> >    else if (mode == TFmode)
> >      return true;
> > -  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > +  else if (mode == HFmode || mode == BFmode)
> >      return true;
> >    else
> >      return default_scalar_mode_supported_p (mode);
> > @@ -22475,7 +22495,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
> >       be defined by the C front-end for AVX512FP16 intrinsics.  We will
> >       issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
> >       enabled.  */
> > -  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > +  return ((mode == HFmode || mode == BFmode)
> >           ? true
> >           : default_libgcc_floating_mode_supported_p (mode));
> >  }
> > @@ -22805,9 +22825,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
> >
> >    if (!TARGET_SSE2)
> >      {
> > -      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
> > -      float16_type_node = ix86_float16_type_node;
> > -      bfloat16_type_node = ix86_bf16_type_node;
> > +      if (!float16_type_node)
> > +       float16_type_node = ix86_float16_type_node;
> > +      if (!bfloat16_type_node)
> > +       bfloat16_type_node = ix86_bf16_type_node;
> >        callback (float16_type_node);
> >        callback (bfloat16_type_node);
> >        float16_type_node = NULL_TREE;
> > @@ -24259,6 +24280,86 @@ ix86_init_libfuncs (void)
> >  #endif
> >  }
> >
> > +/* Return the diagnostic message string if conversion from FROMTYPE to
> > +   TOTYPE is not allowed, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > +{
> > +  machine_mode from_mode = element_mode (fromtype);
> > +  machine_mode to_mode = element_mode (totype);
> > +
> > +  if (!TARGET_SSE2 && from_mode != to_mode)
> > +    {
> > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > +        when TARGET_SSE2 is not available.  */
> > +      if (from_mode == BFmode)
> > +       return N_("invalid conversion from type %<__bf16%> "
> > +                 "without option %<-msse2%>");
> > +      if (from_mode == HFmode)
> > +       return N_("invalid conversion from type %<_Float16%> "
> > +                 "without option %<-msse2%>");
> > +      if (to_mode == BFmode)
> > +       return N_("invalid conversion to type %<__bf16%> "
> > +                 "without option %<-msse2%>");
> > +      if (to_mode == HFmode)
> > +       return N_("invalid conversion to type %<_Float16%> "
> > +                 "without option %<-msse2%>");
> > +    }
> > +
> > +  /* Conversion allowed.  */
> > +  return NULL;
> > +}
> > +
> > +/* Return the diagnostic message string if the unary operation OP is
> > +   not permitted on TYPE, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_unary_op (int op, const_tree type)
> > +{
> > +  machine_mode mmode = element_mode (type);
> > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > +     when TARGET_SSE2 is not available.  */
> > +  if (!TARGET_SSE2 && op != ADDR_EXPR)
> > +    {
> > +      if (mmode == BFmode)
> > +       return N_("operation not permitted on type %<__bf16%> "
> > +                 "without option %<-msse2%>");
> > +      if (mmode == HFmode)
> > +       return N_("operation not permitted on type %<_Float16%> "
> > +                 "without option %<-msse2%>");
> > +    }
> > +
> > +  /* Operation allowed.  */
> > +  return NULL;
> > +}
> > +
> > +/* Return the diagnostic message string if the binary operation OP is
> > +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> > +
> > +static const char *
> > +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> > +                       const_tree type2)
> > +{
> > +  machine_mode type1_mode = element_mode (type1);
> > +  machine_mode type2_mode = element_mode (type2);
> > +  /* Reject all 2-operand operations on BFmode or HFmode
> > +     when TARGET_SSE2 is not available.  */
> > +  if (!TARGET_SSE2)
> > +    {
> > +      if (type1_mode == BFmode || type2_mode == BFmode)
> > +       return N_("operation not permitted on type %<__bf16%> "
> > +                 "without option %<-msse2%>");
> > +
> > +      if (type1_mode == HFmode || type2_mode == HFmode)
> > +       return N_("operation not permitted on type %<_Float16%> "
> > +                 "without option %<-msse2%>");
> > +    }
> > +
> > +  /* Operation allowed.  */
> > +  return NULL;
> > +}
> > +
> >  /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
> >     FPU, assume that the fpcw is set to extended precision; when using
> >     only SSE, rounding is correct; when using both SSE and the FPU,
> > @@ -25248,6 +25349,15 @@ ix86_libgcc_floating_mode_supported_p
> >  #undef TARGET_MEMTAG_TAG_SIZE
> >  #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
> >
> > +#undef TARGET_INVALID_CONVERSION
> > +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> > +
> > +#undef TARGET_INVALID_UNARY_OP
> > +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> > +
> > +#undef TARGET_INVALID_BINARY_OP
> > +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> > +
> >  static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
> >  {
> >  #ifdef OPTION_GLIBC
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 1da6dce8e0b..7e839bc5c7e 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> >  #define VALID_AVX512FP16_REG_MODE(MODE)                                        \
> >    ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
> >
> > +#define VALID_SSE2_TYPE_MODE(MODE)             \
> > +  ((MODE) == HFmode || (MODE) == BFmode                \
> > +   || (MODE) == HCmode || (MODE) == BCmode)
> > +
> >  #define VALID_SSE2_REG_MODE(MODE)                                      \
> >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
> >     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
> > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> > index b220d871942..cc78df56940 100644
> > --- a/gcc/config/i386/immintrin.h
> > +++ b/gcc/config/i386/immintrin.h
> > @@ -98,11 +98,9 @@
> >
> >  #include <avx512vp2intersectvlintrin.h>
> >
> > -#ifdef __SSE2__
> >  #include <avx512fp16intrin.h>
> >
> >  #include <avx512fp16vlintrin.h>
> > -#endif
> >
> >  #include <shaintrin.h>
> >
> > @@ -118,13 +116,11 @@
> >
> >  #include <vpclmulqdqintrin.h>
> >
> > -#ifdef __SSE2__
> >  #include <avx512bf16vlintrin.h>
> >
> >  #include <avx512bf16intrin.h>
> >
> >  #include <avxneconvertintrin.h>
> > -#endif
> >
> >  #include <amxtileintrin.h>
> >
> > diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
> > index f96b932b698..938852ee9ad 100644
> > --- a/gcc/testsuite/g++.target/i386/float16-1.C
> > +++ b/gcc/testsuite/g++.target/i386/float16-1.C
> > @@ -1,8 +1,8 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -mno-sse2" } */
> >
> > -_Float16       /* { dg-error "expected unqualified-id before '_Float16'" } */
> > -foo (_Float16 x)
> > +_Float16
> > +foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */
> >  {
> > -  return x;
> > -}              /* { dg-error "'_Float16' is not supported on this target" } */
> > +  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr109504.c b/gcc/testsuite/gcc.target/i386/pr109504.c
> > new file mode 100644
> > index 00000000000..fe5bcda10ad
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr109504.c
> > @@ -0,0 +1,6 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mno-sse" } */
> > +
> > +#pragma GCC target("sse4.1")
> > +#include <immintrin.h>
> > +int main(){return 0;}
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > index 612d55be826..717055bc9ad 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > @@ -1,8 +1,8 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -mno-sse2" } */
> >
> > -__bf16/* { dg-error "unknown type name '__bf16'" } */
> > -foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
> > -{
> > -  return x;
> > +__bf16
> > +foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
> > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> >  }
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > index 1b645eb499d..faf818df75f 100644
> > --- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > @@ -1,8 +1,8 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -mno-sse2" } */
> >
> > -_Float16/* { dg-error "is not supported on this target" } */
> > -foo (_Float16 x) /* { dg-error "is not supported on this target" } */
> > -{
> > -  return x;
> > +_Float16
> > +foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
> > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> >  }
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > new file mode 100644
> > index 00000000000..64baf92ff56
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mno-sse2" } */
> > +
> > +_Float16 a;
> > +__bf16 c;
> > +_Complex _Float16 ac;
> > +
> > +void
> > +foo (_Float16* p)
> > +{
> > +  a = *p;
> > +}
> > +
> > +void
> > +foo1 (__bf16 *p)
> > +{
> > +  c = *p;
> > +}
> > +
> > +
> > +void
> > +foo2 (_Complex _Float16* p)
> > +{
> > +  ac = *p;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > new file mode 100644
> > index 00000000000..c3ed23b8ab3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile { target ia32} } */
> > +/* { dg-options "-O2 -mno-sse2" } */
> > +
> > +_Float16 a;
> > +__bf16 c;
> > +_Complex ac;
> > +void
> > +foo (_Float16 p)
> > +{
> > +  a = p;
> > +}
> > +
> > +void
> > +foo1 (__bf16 p)
> > +{
> > +  c = p;
> > +}
> > +
> > +
> > +void
> > +foo2 (_Complex p)
> > +{
> > +  ac = p;
> > +}
> > diff --git a/libgcc/config/i386/t-softfp b/libgcc/config/i386/t-softfp
> > index 69d0f819822..80d1fac121b 100644
> > --- a/libgcc/config/i386/t-softfp
> > +++ b/libgcc/config/i386/t-softfp
> > @@ -31,3 +31,10 @@ CFLAGS-trunchfbf2.c += -msse2
> >  CFLAGS-eqhf2.c += -msse2
> >  CFLAGS-_divhc3.c += -msse2
> >  CFLAGS-_mulhc3.c += -msse2
> > +
> > +CFLAGS-_hf_to_sd.c += -msse2
> > +CFLAGS-_hf_to_dd.c += -msse2
> > +CFLAGS-_hf_to_td.c += -msse2
> > +CFLAGS-_sd_to_hf.c += -msse2
> > +CFLAGS-_dd_to_hf.c += -msse2
> > +CFLAGS-_td_to_hf.c += -msse2
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-07-17  8:35       ` Hongtao Liu
@ 2023-07-17 11:38         ` Uros Bizjak
  2023-07-19  5:58           ` Hongtao Liu
  0 siblings, 1 reply; 12+ messages in thread
From: Uros Bizjak @ 2023-07-17 11:38 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, gcc-patches, hjl.tools, jakub

On Mon, Jul 17, 2023 at 10:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> I'd like to ping for this patch (only patch 1/2, for patch 2/2, I
> think that may not be necessary).
>
> On Mon, May 15, 2023 at 9:20 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > ping.
> >
> > On Fri, Apr 21, 2023 at 9:55 PM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > > > +  if (!TARGET_SSE2)
> > > > > +    {
> > > > > +      if (c_dialect_cxx ()
> > > > > +       && cxx_dialect > cxx20)
> > > >
> > > > Formatting, both conditions are short, so just put them on one line.
> > > Changed.
> > >
> > > > But for the C++23 macros, more importantly I think we really should
> > > > also in ix86_target_macros_internal add
> > > >   if (c_dialect_cxx ()
> > > >       && cxx_dialect > cxx20
> > > >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> > > >     {
> > > >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > > >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > > >     }
> > > > plus associated libstdc++ changes.  It can be done incrementally though.
> > > Added in PATCH 2/2
> > >
> > > > > +      if (flag_building_libgcc)
> > > > > +     {
> > > > > +       /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> > > > > +          to check backend support of _Float16 and __bf16 type.  */
> > > >
> > > > That is actually the case only for HFmode, but not for BFmode right now.
> > > > So, we need further work.  One is to add the BFmode support in there,
> > > > and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
> > > > _Decimal* conversions are compiled in also if not -msse2 by default.
> > > > One way to do that is wrap the HF and BF mode related functions on x86
> > > > #ifndef __SSE2__ into the pragmas like intrin headers use (but then
> > > > perhaps we don't need to undef this stuff here), another is not provide
> > > > the hf/bf support in that case from the TUs where they are provided now,
> > > > but from a different one which would be compiled with -msse2.
> > > Add CFLAGS-_hf_to_sd.c += -msse2, similar for other files in libbid, just like
> > > we did before for HFtype softfp. Then no need to undef libgcc macros.
> > >
> > > > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > > > >       some less clueful developer tries to use floating-point anyway.  */
> > > > > -  if (needed_sseregs && !TARGET_SSE)
> > > > > +  if (needed_sseregs
> > > > > +      && (!TARGET_SSE
> > > > > +       || (VALID_SSE2_TYPE_MODE (mode)
> > > > > +           && !TARGET_SSE2)))
> > > >
> > > > Formatting, no need to split this up that much.
> > > >   if (needed_sseregs
> > > >       && (!TARGET_SSE
> > > >           || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > > or even better
> > > >   if (needed_sseregs
> > > >       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > > will do it.
> > > Changed.
> > >
> > > > Instead of this, just use
> > > >       if (!float16_type_node)
> > > >         {
> > > >           float16_type_node = ix86_float16_type_node;
> > > >           callback (float16_type_node);
> > > >           float16_type_node = NULL_TREE;
> > > >         }
> > > >       if (!bfloat16_type_node)
> > > >         {
> > > >           bfloat16_type_node = ix86_bf16_type_node;
> > > >           callback (bfloat16_type_node);
> > > >           bfloat16_type_node = NULL_TREE;
> > > >         }
> > > Changed.
> > >
> > >
> > > > > +static const char *
> > > > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > > > +{
> > > > > +  if (element_mode (fromtype) != element_mode (totype))
> > > > > +    {
> > > > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > > > +      when TARGET_SSE2 is not available.  */
> > > > > +      if ((TYPE_MODE (fromtype) == BFmode
> > > > > +        || TYPE_MODE (fromtype) == HFmode)
> > > > > +       && !TARGET_SSE2)
> > > >
> > > > First of all, not really sure if this should be purely about scalar
> > > > modes, not also complex and vector modes involving those inner modes.
> > > > Because complex or vector modes with BF/HF elements will be without
> > > > TARGET_SSE2 for sure lowered into scalar code and that can't be handled
> > > > either.
> > > > So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
> > > > or even better
> > > > if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
> > > > ?
> > > > Or even better remember the 2 modes above into machine_mode temporaries
> > > > and just use those in the != comparison and for the checks?
> > > >
> > > > Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
> > > > we know which one it is.  Just return separate messages?
> > > Changed.
> > >
> > > > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > > > +     when TARGET_SSE2 is not available.  */
> > > > > +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> > > > > +      && !TARGET_SSE2 && op != ADDR_EXPR)
> > > > > +    return N_("operation not permitted on type %<__bf16%> "
> > > > > +           "or %<_Float16%> without option %<-msse2%>");
> > > >
> > > > Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
> > > Changed.
> > >
> > >
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > Successfully cross-build i686-linux-gnu.
> > > Ok for trunk?
> > >
> > > Enable _Float16 and __bf16 all the time but issue errors when the
> > > types are used in conversion, unary operation, binary operation,
> > > parameter passing or value return when TARGET_SSE2 is not available.
> > >
> > > Also undef macros which are used by libgcc/libstdc++ to check the
> > > backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
> > > available.
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR target/109504
> > >         * config/i386/i386-builtins.cc
> > >         (ix86_register_float16_builtin_type): Remove TARGET_SSE2.
> > >         (ix86_register_bf16_builtin_type): Ditto.
> > >         * config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
> > >         isn't available, undef the macros which are used to check the
> > >         backend support of the _Float16/__bf16 types when building
> > >         libstdc++ and libgcc.
> > >         * config/i386/i386.cc (construct_container): Issue errors for
> > >         HFmode/BFmode when TARGET_SSE2 is not available.
> > >         (function_value_32): Ditto.
> > >         (ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
> > >         (ix86_libgcc_floating_mode_supported_p): Ditto.
> > >         (ix86_emit_support_tinfos): Adjust codes.
> > >         (ix86_invalid_conversion): New function.
> > >         (ix86_invalid_unary_op): Ditto.
> > >         (ix86_invalid_binary_op): Ditto.
> > >         (TARGET_INVALID_CONVERSION): Define.
> > >         (TARGET_INVALID_UNARY_OP): Define.
> > >         (TARGET_INVALID_BINARY_OP): Define.
> > >         * config/i386/immintrin.h [__SSE2__]: Remove for fp16/bf16
> > >         related instrinsics header files.
> > >         * config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.target/i386/pr109504.c: New test.
> > >         * gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
> > >         * gcc.target/i386/sse2-float16-1.c: Ditto.
> > >         * gcc.target/i386/sse2-float16-4.c: New test.
> > >         * gcc.target/i386/sse2-float16-5.c: New test.
> > >         * g++.target/i386/float16-1.C: Adjust error info.
> > >
> > > libgcc/ChangeLog:
> > >
> > >         * config/i386/t-softfp: Add -msse2 to libbid HFtype related
> > >         files.

LGTM, if you need someone to rubber-stamp the patch. I'm not really
versed in this part of the compiler, so please wait a day if someone
has anything to say about the patch.

Thanks,
Uros.

> > > ---
> > >  gcc/config/i386/i386-builtins.cc              |   4 +-
> > >  gcc/config/i386/i386-c.cc                     |  15 ++
> > >  gcc/config/i386/i386.cc                       | 130 ++++++++++++++++--
> > >  gcc/config/i386/i386.h                        |   4 +
> > >  gcc/config/i386/immintrin.h                   |   4 -
> > >  gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
> > >  gcc/testsuite/gcc.target/i386/pr109504.c      |   6 +
> > >  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
> > >  .../gcc.target/i386/sse2-float16-1.c          |   8 +-
> > >  .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
> > >  .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
> > >  libgcc/config/i386/t-softfp                   |   7 +
> > >  12 files changed, 215 insertions(+), 28 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109504.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > >
> > > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > > index fc0c82b156e..1cdabfd3a0a 100644
> > > --- a/gcc/config/i386/i386-builtins.cc
> > > +++ b/gcc/config/i386/i386-builtins.cc
> > > @@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
> > >    else
> > >      ix86_float16_type_node = float16_type_node;
> > >
> > > -  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
> > > +  if (!maybe_get_identifier ("_Float16"))
> > >      lang_hooks.types.register_builtin_type (ix86_float16_type_node,
> > >                                             "_Float16");
> > >  }
> > > @@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
> > >    else
> > >      ix86_bf16_type_node = bfloat16_type_node;
> > >
> > > -  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> > > +  if (!maybe_get_identifier ("__bf16"))
> > >      lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
> > >  }
> > >
> > > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > > index e7bd7cc706c..2f83c9981e1 100644
> > > --- a/gcc/config/i386/i386-c.cc
> > > +++ b/gcc/config/i386/i386-c.cc
> > > @@ -817,6 +817,21 @@ ix86_target_macros (void)
> > >    if (!TARGET_80387)
> > >      cpp_define (parse_in, "_SOFT_FLOAT");
> > >
> > > +  /* HFmode/BFmode is supported without depending any isa
> > > +     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
> > > +     but according to psABI, they're really supported w/ SSE2 and above.
> > > +     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
> > > +     for backend support of the types, undef the macros to avoid
> > > +     build failure, see PR109504.  */
> > > +  if (!TARGET_SSE2)
> > > +    {
> > > +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> > > +       {
> > > +         cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > > +         cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > > +       }
> > > +    }
> > > +
> > >    if (TARGET_LONG_DOUBLE_64)
> > >      cpp_define (parse_in, "__LONG_DOUBLE_64__");
> > >
> > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > index fbd33a6bfd1..633a0f41e60 100644
> > > --- a/gcc/config/i386/i386.cc
> > > +++ b/gcc/config/i386/i386.cc
> > > @@ -2651,7 +2651,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> > >
> > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > >       some less clueful developer tries to use floating-point anyway.  */
> > > -  if (needed_sseregs && !TARGET_SSE)
> > > +  if (needed_sseregs
> > > +      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > >      {
> > >        /* Return early if we shouldn't raise an error for invalid
> > >          calls.  */
> > > @@ -2661,13 +2662,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> > >         {
> > >           if (!issued_sse_ret_error)
> > >             {
> > > -             error ("SSE register return with SSE disabled");
> > > +             if (VALID_SSE2_TYPE_MODE (mode))
> > > +               error ("SSE register return with SSE2 disabled");
> > > +             else
> > > +               error ("SSE register return with SSE disabled");
> > >               issued_sse_ret_error = true;
> > >             }
> > >         }
> > >        else if (!issued_sse_arg_error)
> > >         {
> > > -         error ("SSE register argument with SSE disabled");
> > > +         if (VALID_SSE2_TYPE_MODE (mode))
> > > +           error ("SSE register argument with SSE2 disabled");
> > > +         else
> > > +           error ("SSE register argument with SSE disabled");
> > >           issued_sse_arg_error = true;
> > >         }
> > >        return NULL;
> > > @@ -4022,13 +4029,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
> > >
> > >    /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
> > >    if (mode == HFmode || mode == BFmode)
> > > -    regno = FIRST_SSE_REG;
> > > +    {
> > > +      if (!TARGET_SSE2)
> > > +       {
> > > +         error ("SSE register return with SSE2 disabled");
> > > +         regno = AX_REG;
> > > +       }
> > > +      else
> > > +       regno = FIRST_SSE_REG;
> > > +    }
> > > +
> > >    if (mode == HCmode)
> > >      {
> > > +      if (!TARGET_SSE2)
> > > +       error ("SSE register return with SSE2 disabled");
> > > +
> > >        rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
> > >        XVECEXP (ret, 0, 0)
> > >         = gen_rtx_EXPR_LIST (VOIDmode,
> > > -                            gen_rtx_REG (SImode, FIRST_SSE_REG),
> > > +                            gen_rtx_REG (SImode,
> > > +                                         TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
> > >                              GEN_INT (0));
> > >        return ret;
> > >      }
> > > @@ -22459,7 +22479,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
> > >      return default_decimal_float_supported_p ();
> > >    else if (mode == TFmode)
> > >      return true;
> > > -  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > > +  else if (mode == HFmode || mode == BFmode)
> > >      return true;
> > >    else
> > >      return default_scalar_mode_supported_p (mode);
> > > @@ -22475,7 +22495,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
> > >       be defined by the C front-end for AVX512FP16 intrinsics.  We will
> > >       issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
> > >       enabled.  */
> > > -  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > > +  return ((mode == HFmode || mode == BFmode)
> > >           ? true
> > >           : default_libgcc_floating_mode_supported_p (mode));
> > >  }
> > > @@ -22805,9 +22825,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
> > >
> > >    if (!TARGET_SSE2)
> > >      {
> > > -      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
> > > -      float16_type_node = ix86_float16_type_node;
> > > -      bfloat16_type_node = ix86_bf16_type_node;
> > > +      if (!float16_type_node)
> > > +       float16_type_node = ix86_float16_type_node;
> > > +      if (!bfloat16_type_node)
> > > +       bfloat16_type_node = ix86_bf16_type_node;
> > >        callback (float16_type_node);
> > >        callback (bfloat16_type_node);
> > >        float16_type_node = NULL_TREE;
> > > @@ -24259,6 +24280,86 @@ ix86_init_libfuncs (void)
> > >  #endif
> > >  }
> > >
> > > +/* Return the diagnostic message string if conversion from FROMTYPE to
> > > +   TOTYPE is not allowed, NULL otherwise.  */
> > > +
> > > +static const char *
> > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > +{
> > > +  machine_mode from_mode = element_mode (fromtype);
> > > +  machine_mode to_mode = element_mode (totype);
> > > +
> > > +  if (!TARGET_SSE2 && from_mode != to_mode)
> > > +    {
> > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > +        when TARGET_SSE2 is not available.  */
> > > +      if (from_mode == BFmode)
> > > +       return N_("invalid conversion from type %<__bf16%> "
> > > +                 "without option %<-msse2%>");
> > > +      if (from_mode == HFmode)
> > > +       return N_("invalid conversion from type %<_Float16%> "
> > > +                 "without option %<-msse2%>");
> > > +      if (to_mode == BFmode)
> > > +       return N_("invalid conversion to type %<__bf16%> "
> > > +                 "without option %<-msse2%>");
> > > +      if (to_mode == HFmode)
> > > +       return N_("invalid conversion to type %<_Float16%> "
> > > +                 "without option %<-msse2%>");
> > > +    }
> > > +
> > > +  /* Conversion allowed.  */
> > > +  return NULL;
> > > +}
> > > +
> > > +/* Return the diagnostic message string if the unary operation OP is
> > > +   not permitted on TYPE, NULL otherwise.  */
> > > +
> > > +static const char *
> > > +ix86_invalid_unary_op (int op, const_tree type)
> > > +{
> > > +  machine_mode mmode = element_mode (type);
> > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > +     when TARGET_SSE2 is not available.  */
> > > +  if (!TARGET_SSE2 && op != ADDR_EXPR)
> > > +    {
> > > +      if (mmode == BFmode)
> > > +       return N_("operation not permitted on type %<__bf16%> "
> > > +                 "without option %<-msse2%>");
> > > +      if (mmode == HFmode)
> > > +       return N_("operation not permitted on type %<_Float16%> "
> > > +                 "without option %<-msse2%>");
> > > +    }
> > > +
> > > +  /* Operation allowed.  */
> > > +  return NULL;
> > > +}
> > > +
> > > +/* Return the diagnostic message string if the binary operation OP is
> > > +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> > > +
> > > +static const char *
> > > +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> > > +                       const_tree type2)
> > > +{
> > > +  machine_mode type1_mode = element_mode (type1);
> > > +  machine_mode type2_mode = element_mode (type2);
> > > +  /* Reject all 2-operand operations on BFmode or HFmode
> > > +     when TARGET_SSE2 is not available.  */
> > > +  if (!TARGET_SSE2)
> > > +    {
> > > +      if (type1_mode == BFmode || type2_mode == BFmode)
> > > +       return N_("operation not permitted on type %<__bf16%> "
> > > +                 "without option %<-msse2%>");
> > > +
> > > +      if (type1_mode == HFmode || type2_mode == HFmode)
> > > +       return N_("operation not permitted on type %<_Float16%> "
> > > +                 "without option %<-msse2%>");
> > > +    }
> > > +
> > > +  /* Operation allowed.  */
> > > +  return NULL;
> > > +}
> > > +
> > >  /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
> > >     FPU, assume that the fpcw is set to extended precision; when using
> > >     only SSE, rounding is correct; when using both SSE and the FPU,
> > > @@ -25248,6 +25349,15 @@ ix86_libgcc_floating_mode_supported_p
> > >  #undef TARGET_MEMTAG_TAG_SIZE
> > >  #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
> > >
> > > +#undef TARGET_INVALID_CONVERSION
> > > +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> > > +
> > > +#undef TARGET_INVALID_UNARY_OP
> > > +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> > > +
> > > +#undef TARGET_INVALID_BINARY_OP
> > > +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> > > +
> > >  static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
> > >  {
> > >  #ifdef OPTION_GLIBC
> > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > > index 1da6dce8e0b..7e839bc5c7e 100644
> > > --- a/gcc/config/i386/i386.h
> > > +++ b/gcc/config/i386/i386.h
> > > @@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> > >  #define VALID_AVX512FP16_REG_MODE(MODE)                                        \
> > >    ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
> > >
> > > +#define VALID_SSE2_TYPE_MODE(MODE)             \
> > > +  ((MODE) == HFmode || (MODE) == BFmode                \
> > > +   || (MODE) == HCmode || (MODE) == BCmode)
> > > +
> > >  #define VALID_SSE2_REG_MODE(MODE)                                      \
> > >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
> > >     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
> > > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> > > index b220d871942..cc78df56940 100644
> > > --- a/gcc/config/i386/immintrin.h
> > > +++ b/gcc/config/i386/immintrin.h
> > > @@ -98,11 +98,9 @@
> > >
> > >  #include <avx512vp2intersectvlintrin.h>
> > >
> > > -#ifdef __SSE2__
> > >  #include <avx512fp16intrin.h>
> > >
> > >  #include <avx512fp16vlintrin.h>
> > > -#endif
> > >
> > >  #include <shaintrin.h>
> > >
> > > @@ -118,13 +116,11 @@
> > >
> > >  #include <vpclmulqdqintrin.h>
> > >
> > > -#ifdef __SSE2__
> > >  #include <avx512bf16vlintrin.h>
> > >
> > >  #include <avx512bf16intrin.h>
> > >
> > >  #include <avxneconvertintrin.h>
> > > -#endif
> > >
> > >  #include <amxtileintrin.h>
> > >
> > > diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
> > > index f96b932b698..938852ee9ad 100644
> > > --- a/gcc/testsuite/g++.target/i386/float16-1.C
> > > +++ b/gcc/testsuite/g++.target/i386/float16-1.C
> > > @@ -1,8 +1,8 @@
> > >  /* { dg-do compile } */
> > >  /* { dg-options "-O2 -mno-sse2" } */
> > >
> > > -_Float16       /* { dg-error "expected unqualified-id before '_Float16'" } */
> > > -foo (_Float16 x)
> > > +_Float16
> > > +foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */
> > >  {
> > > -  return x;
> > > -}              /* { dg-error "'_Float16' is not supported on this target" } */
> > > +  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr109504.c b/gcc/testsuite/gcc.target/i386/pr109504.c
> > > new file mode 100644
> > > index 00000000000..fe5bcda10ad
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr109504.c
> > > @@ -0,0 +1,6 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -mno-sse" } */
> > > +
> > > +#pragma GCC target("sse4.1")
> > > +#include <immintrin.h>
> > > +int main(){return 0;}
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > index 612d55be826..717055bc9ad 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > @@ -1,8 +1,8 @@
> > >  /* { dg-do compile } */
> > >  /* { dg-options "-O2 -mno-sse2" } */
> > >
> > > -__bf16/* { dg-error "unknown type name '__bf16'" } */
> > > -foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
> > > -{
> > > -  return x;
> > > +__bf16
> > > +foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
> > > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> > >  }
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > index 1b645eb499d..faf818df75f 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > @@ -1,8 +1,8 @@
> > >  /* { dg-do compile } */
> > >  /* { dg-options "-O2 -mno-sse2" } */
> > >
> > > -_Float16/* { dg-error "is not supported on this target" } */
> > > -foo (_Float16 x) /* { dg-error "is not supported on this target" } */
> > > -{
> > > -  return x;
> > > +_Float16
> > > +foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
> > > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> > >  }
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > > new file mode 100644
> > > index 00000000000..64baf92ff56
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > > @@ -0,0 +1,25 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -mno-sse2" } */
> > > +
> > > +_Float16 a;
> > > +__bf16 c;
> > > +_Complex _Float16 ac;
> > > +
> > > +void
> > > +foo (_Float16* p)
> > > +{
> > > +  a = *p;
> > > +}
> > > +
> > > +void
> > > +foo1 (__bf16 *p)
> > > +{
> > > +  c = *p;
> > > +}
> > > +
> > > +
> > > +void
> > > +foo2 (_Complex _Float16* p)
> > > +{
> > > +  ac = *p;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > > new file mode 100644
> > > index 00000000000..c3ed23b8ab3
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > > @@ -0,0 +1,24 @@
> > > +/* { dg-do compile { target ia32} } */
> > > +/* { dg-options "-O2 -mno-sse2" } */
> > > +
> > > +_Float16 a;
> > > +__bf16 c;
> > > +_Complex ac;
> > > +void
> > > +foo (_Float16 p)
> > > +{
> > > +  a = p;
> > > +}
> > > +
> > > +void
> > > +foo1 (__bf16 p)
> > > +{
> > > +  c = p;
> > > +}
> > > +
> > > +
> > > +void
> > > +foo2 (_Complex p)
> > > +{
> > > +  ac = p;
> > > +}
> > > diff --git a/libgcc/config/i386/t-softfp b/libgcc/config/i386/t-softfp
> > > index 69d0f819822..80d1fac121b 100644
> > > --- a/libgcc/config/i386/t-softfp
> > > +++ b/libgcc/config/i386/t-softfp
> > > @@ -31,3 +31,10 @@ CFLAGS-trunchfbf2.c += -msse2
> > >  CFLAGS-eqhf2.c += -msse2
> > >  CFLAGS-_divhc3.c += -msse2
> > >  CFLAGS-_mulhc3.c += -msse2
> > > +
> > > +CFLAGS-_hf_to_sd.c += -msse2
> > > +CFLAGS-_hf_to_dd.c += -msse2
> > > +CFLAGS-_hf_to_td.c += -msse2
> > > +CFLAGS-_sd_to_hf.c += -msse2
> > > +CFLAGS-_dd_to_hf.c += -msse2
> > > +CFLAGS-_td_to_hf.c += -msse2
> > > --
> > > 2.39.1.388.g2fc9e9ca3c
> > >
> >
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-07-17 11:38         ` Uros Bizjak
@ 2023-07-19  5:58           ` Hongtao Liu
  2023-07-19 10:51             ` Jakub Jelinek
  0 siblings, 1 reply; 12+ messages in thread
From: Hongtao Liu @ 2023-07-19  5:58 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: liuhongt, gcc-patches, hjl.tools, jakub

On Mon, Jul 17, 2023 at 7:38 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Jul 17, 2023 at 10:28 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > I'd like to ping for this patch (only patch 1/2, for patch 2/2, I
> > think that may not be necessary).
> >
> > On Mon, May 15, 2023 at 9:20 AM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > ping.
> > >
> > > On Fri, Apr 21, 2023 at 9:55 PM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > > > +  if (!TARGET_SSE2)
> > > > > > +    {
> > > > > > +      if (c_dialect_cxx ()
> > > > > > +       && cxx_dialect > cxx20)
> > > > >
> > > > > Formatting, both conditions are short, so just put them on one line.
> > > > Changed.
> > > >
> > > > > But for the C++23 macros, more importantly I think we really should
> > > > > also in ix86_target_macros_internal add
> > > > >   if (c_dialect_cxx ()
> > > > >       && cxx_dialect > cxx20
> > > > >       && (isa_flag & OPTION_MASK_ISA_SSE2))
> > > > >     {
> > > > >       def_or_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > > > >       def_or_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > > > >     }
> > > > > plus associated libstdc++ changes.  It can be done incrementally though.
> > > > Added in PATCH 2/2
> > > >
> > > > > > +      if (flag_building_libgcc)
> > > > > > +     {
> > > > > > +       /* libbid uses __LIBGCC_HAS_HF_MODE__ and __LIBGCC_HAS_BF_MODE__
> > > > > > +          to check backend support of _Float16 and __bf16 type.  */
> > > > >
> > > > > That is actually the case only for HFmode, but not for BFmode right now.
> > > > > So, we need further work.  One is to add the BFmode support in there,
> > > > > and another one is make sure the _Float16 <-> _Decimal* and __bf16 <->
> > > > > _Decimal* conversions are compiled in also if not -msse2 by default.
> > > > > One way to do that is wrap the HF and BF mode related functions on x86
> > > > > #ifndef __SSE2__ into the pragmas like intrin headers use (but then
> > > > > perhaps we don't need to undef this stuff here), another is not provide
> > > > > the hf/bf support in that case from the TUs where they are provided now,
> > > > > but from a different one which would be compiled with -msse2.
> > > > Add CFLAGS-_hf_to_sd.c += -msse2, similar for other files in libbid, just like
> > > > we did before for HFtype softfp. Then no need to undef libgcc macros.
> > > >
> > > > > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > > > > >       some less clueful developer tries to use floating-point anyway.  */
> > > > > > -  if (needed_sseregs && !TARGET_SSE)
> > > > > > +  if (needed_sseregs
> > > > > > +      && (!TARGET_SSE
> > > > > > +       || (VALID_SSE2_TYPE_MODE (mode)
> > > > > > +           && !TARGET_SSE2)))
> > > > >
> > > > > Formatting, no need to split this up that much.
> > > > >   if (needed_sseregs
> > > > >       && (!TARGET_SSE
> > > > >           || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > > > or even better
> > > > >   if (needed_sseregs
> > > > >       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > > > will do it.
> > > > Changed.
> > > >
> > > > > Instead of this, just use
> > > > >       if (!float16_type_node)
> > > > >         {
> > > > >           float16_type_node = ix86_float16_type_node;
> > > > >           callback (float16_type_node);
> > > > >           float16_type_node = NULL_TREE;
> > > > >         }
> > > > >       if (!bfloat16_type_node)
> > > > >         {
> > > > >           bfloat16_type_node = ix86_bf16_type_node;
> > > > >           callback (bfloat16_type_node);
> > > > >           bfloat16_type_node = NULL_TREE;
> > > > >         }
> > > > Changed.
> > > >
> > > >
> > > > > > +static const char *
> > > > > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > > > > +{
> > > > > > +  if (element_mode (fromtype) != element_mode (totype))
> > > > > > +    {
> > > > > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > > > > +      when TARGET_SSE2 is not available.  */
> > > > > > +      if ((TYPE_MODE (fromtype) == BFmode
> > > > > > +        || TYPE_MODE (fromtype) == HFmode)
> > > > > > +       && !TARGET_SSE2)
> > > > >
> > > > > First of all, not really sure if this should be purely about scalar
> > > > > modes, not also complex and vector modes involving those inner modes.
> > > > > Because complex or vector modes with BF/HF elements will be without
> > > > > TARGET_SSE2 for sure lowered into scalar code and that can't be handled
> > > > > either.
> > > > > So if (!TARGET_SSE2 && GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode)
> > > > > or even better
> > > > > if (!TARGET_SSE2 && element_mode (fromtype) == BFmode)
> > > > > ?
> > > > > Or even better remember the 2 modes above into machine_mode temporaries
> > > > > and just use those in the != comparison and for the checks?
> > > > >
> > > > > Also, I think it is weird to tell user %<__bf16%> or %<_Float16%> when
> > > > > we know which one it is.  Just return separate messages?
> > > > Changed.
> > > >
> > > > > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > > > > +     when TARGET_SSE2 is not available.  */
> > > > > > +  if ((element_mode (type) == BFmode || element_mode (type) == HFmode)
> > > > > > +      && !TARGET_SSE2 && op != ADDR_EXPR)
> > > > > > +    return N_("operation not permitted on type %<__bf16%> "
> > > > > > +           "or %<_Float16%> without option %<-msse2%>");
> > > > >
> > > > > Similarly.  Also, check !TARGET_SSE2 first as inexpensive one.
> > > > Changed.
> > > >
> > > >
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > > Successfully cross-build i686-linux-gnu.
> > > > Ok for trunk?
> > > >
> > > > Enable _Float16 and __bf16 all the time but issue errors when the
> > > > types are used in conversion, unary operation, binary operation,
> > > > parameter passing or value return when TARGET_SSE2 is not available.
> > > >
> > > > Also undef macros which are used by libgcc/libstdc++ to check the
> > > > backend support of the _Float16/__bf16 types when TARGET_SSE2 is not
> > > > available.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         PR target/109504
> > > >         * config/i386/i386-builtins.cc
> > > >         (ix86_register_float16_builtin_type): Remove TARGET_SSE2.
> > > >         (ix86_register_bf16_builtin_type): Ditto.
> > > >         * config/i386/i386-c.cc (ix86_target_macros): When TARGET_SSE2
> > > >         isn't available, undef the macros which are used to check the
> > > >         backend support of the _Float16/__bf16 types when building
> > > >         libstdc++ and libgcc.
> > > >         * config/i386/i386.cc (construct_container): Issue errors for
> > > >         HFmode/BFmode when TARGET_SSE2 is not available.
> > > >         (function_value_32): Ditto.
> > > >         (ix86_scalar_mode_supported_p): Remove TARGET_SSE2 for HFmode/BFmode.
> > > >         (ix86_libgcc_floating_mode_supported_p): Ditto.
> > > >         (ix86_emit_support_tinfos): Adjust codes.
> > > >         (ix86_invalid_conversion): New function.
> > > >         (ix86_invalid_unary_op): Ditto.
> > > >         (ix86_invalid_binary_op): Ditto.
> > > >         (TARGET_INVALID_CONVERSION): Define.
> > > >         (TARGET_INVALID_UNARY_OP): Define.
> > > >         (TARGET_INVALID_BINARY_OP): Define.
> > > >         * config/i386/immintrin.h [__SSE2__]: Remove for fp16/bf16
> > > >         related instrinsics header files.
> > > >         * config/i386/i386.h (VALID_SSE2_TYPE_MODE): New macro.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         * gcc.target/i386/pr109504.c: New test.
> > > >         * gcc.target/i386/sse2-bfloat16-1.c: Adjust error info.
> > > >         * gcc.target/i386/sse2-float16-1.c: Ditto.
> > > >         * gcc.target/i386/sse2-float16-4.c: New test.
> > > >         * gcc.target/i386/sse2-float16-5.c: New test.
> > > >         * g++.target/i386/float16-1.C: Adjust error info.
> > > >
> > > > libgcc/ChangeLog:
> > > >
> > > >         * config/i386/t-softfp: Add -msse2 to libbid HFtype related
> > > >         files.
>
> LGTM, if you need someone to rubber-stamp the patch. I'm not really
> versed in this part of the compiler, so please wait a day if someone
> has anything to say about the patch.
Thanks, pushed to trunk.
>
> Thanks,
> Uros.
>
> > > > ---
> > > >  gcc/config/i386/i386-builtins.cc              |   4 +-
> > > >  gcc/config/i386/i386-c.cc                     |  15 ++
> > > >  gcc/config/i386/i386.cc                       | 130 ++++++++++++++++--
> > > >  gcc/config/i386/i386.h                        |   4 +
> > > >  gcc/config/i386/immintrin.h                   |   4 -
> > > >  gcc/testsuite/g++.target/i386/float16-1.C     |   8 +-
> > > >  gcc/testsuite/gcc.target/i386/pr109504.c      |   6 +
> > > >  .../gcc.target/i386/sse2-bfloat16-1.c         |   8 +-
> > > >  .../gcc.target/i386/sse2-float16-1.c          |   8 +-
> > > >  .../gcc.target/i386/sse2-float16-4.c          |  25 ++++
> > > >  .../gcc.target/i386/sse2-float16-5.c          |  24 ++++
> > > >  libgcc/config/i386/t-softfp                   |   7 +
> > > >  12 files changed, 215 insertions(+), 28 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr109504.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > > >
> > > > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > > > index fc0c82b156e..1cdabfd3a0a 100644
> > > > --- a/gcc/config/i386/i386-builtins.cc
> > > > +++ b/gcc/config/i386/i386-builtins.cc
> > > > @@ -1367,7 +1367,7 @@ ix86_register_float16_builtin_type (void)
> > > >    else
> > > >      ix86_float16_type_node = float16_type_node;
> > > >
> > > > -  if (!maybe_get_identifier ("_Float16") && TARGET_SSE2)
> > > > +  if (!maybe_get_identifier ("_Float16"))
> > > >      lang_hooks.types.register_builtin_type (ix86_float16_type_node,
> > > >                                             "_Float16");
> > > >  }
> > > > @@ -1385,7 +1385,7 @@ ix86_register_bf16_builtin_type (void)
> > > >    else
> > > >      ix86_bf16_type_node = bfloat16_type_node;
> > > >
> > > > -  if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
> > > > +  if (!maybe_get_identifier ("__bf16"))
> > > >      lang_hooks.types.register_builtin_type (ix86_bf16_type_node, "__bf16");
> > > >  }
> > > >
> > > > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> > > > index e7bd7cc706c..2f83c9981e1 100644
> > > > --- a/gcc/config/i386/i386-c.cc
> > > > +++ b/gcc/config/i386/i386-c.cc
> > > > @@ -817,6 +817,21 @@ ix86_target_macros (void)
> > > >    if (!TARGET_80387)
> > > >      cpp_define (parse_in, "_SOFT_FLOAT");
> > > >
> > > > +  /* HFmode/BFmode is supported without depending any isa
> > > > +     in scalar_mode_supported_p and libgcc_floating_mode_supported_p,
> > > > +     but according to psABI, they're really supported w/ SSE2 and above.
> > > > +     Since libstdc++ uses __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__
> > > > +     for backend support of the types, undef the macros to avoid
> > > > +     build failure, see PR109504.  */
> > > > +  if (!TARGET_SSE2)
> > > > +    {
> > > > +      if (c_dialect_cxx () && cxx_dialect > cxx20)
> > > > +       {
> > > > +         cpp_undef (parse_in, "__STDCPP_FLOAT16_T__");
> > > > +         cpp_undef (parse_in, "__STDCPP_BFLOAT16_T__");
> > > > +       }
> > > > +    }
> > > > +
> > > >    if (TARGET_LONG_DOUBLE_64)
> > > >      cpp_define (parse_in, "__LONG_DOUBLE_64__");
> > > >
> > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > > > index fbd33a6bfd1..633a0f41e60 100644
> > > > --- a/gcc/config/i386/i386.cc
> > > > +++ b/gcc/config/i386/i386.cc
> > > > @@ -2651,7 +2651,8 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> > > >
> > > >    /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
> > > >       some less clueful developer tries to use floating-point anyway.  */
> > > > -  if (needed_sseregs && !TARGET_SSE)
> > > > +  if (needed_sseregs
> > > > +      && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
> > > >      {
> > > >        /* Return early if we shouldn't raise an error for invalid
> > > >          calls.  */
> > > > @@ -2661,13 +2662,19 @@ construct_container (machine_mode mode, machine_mode orig_mode,
> > > >         {
> > > >           if (!issued_sse_ret_error)
> > > >             {
> > > > -             error ("SSE register return with SSE disabled");
> > > > +             if (VALID_SSE2_TYPE_MODE (mode))
> > > > +               error ("SSE register return with SSE2 disabled");
> > > > +             else
> > > > +               error ("SSE register return with SSE disabled");
> > > >               issued_sse_ret_error = true;
> > > >             }
> > > >         }
> > > >        else if (!issued_sse_arg_error)
> > > >         {
> > > > -         error ("SSE register argument with SSE disabled");
> > > > +         if (VALID_SSE2_TYPE_MODE (mode))
> > > > +           error ("SSE register argument with SSE2 disabled");
> > > > +         else
> > > > +           error ("SSE register argument with SSE disabled");
> > > >           issued_sse_arg_error = true;
> > > >         }
> > > >        return NULL;
> > > > @@ -4022,13 +4029,26 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
> > > >
> > > >    /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
> > > >    if (mode == HFmode || mode == BFmode)
> > > > -    regno = FIRST_SSE_REG;
> > > > +    {
> > > > +      if (!TARGET_SSE2)
> > > > +       {
> > > > +         error ("SSE register return with SSE2 disabled");
> > > > +         regno = AX_REG;
> > > > +       }
> > > > +      else
> > > > +       regno = FIRST_SSE_REG;
> > > > +    }
> > > > +
> > > >    if (mode == HCmode)
> > > >      {
> > > > +      if (!TARGET_SSE2)
> > > > +       error ("SSE register return with SSE2 disabled");
> > > > +
> > > >        rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
> > > >        XVECEXP (ret, 0, 0)
> > > >         = gen_rtx_EXPR_LIST (VOIDmode,
> > > > -                            gen_rtx_REG (SImode, FIRST_SSE_REG),
> > > > +                            gen_rtx_REG (SImode,
> > > > +                                         TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
> > > >                              GEN_INT (0));
> > > >        return ret;
> > > >      }
> > > > @@ -22459,7 +22479,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
> > > >      return default_decimal_float_supported_p ();
> > > >    else if (mode == TFmode)
> > > >      return true;
> > > > -  else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > > > +  else if (mode == HFmode || mode == BFmode)
> > > >      return true;
> > > >    else
> > > >      return default_scalar_mode_supported_p (mode);
> > > > @@ -22475,7 +22495,7 @@ ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
> > > >       be defined by the C front-end for AVX512FP16 intrinsics.  We will
> > > >       issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
> > > >       enabled.  */
> > > > -  return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
> > > > +  return ((mode == HFmode || mode == BFmode)
> > > >           ? true
> > > >           : default_libgcc_floating_mode_supported_p (mode));
> > > >  }
> > > > @@ -22805,9 +22825,10 @@ ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
> > > >
> > > >    if (!TARGET_SSE2)
> > > >      {
> > > > -      gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
> > > > -      float16_type_node = ix86_float16_type_node;
> > > > -      bfloat16_type_node = ix86_bf16_type_node;
> > > > +      if (!float16_type_node)
> > > > +       float16_type_node = ix86_float16_type_node;
> > > > +      if (!bfloat16_type_node)
> > > > +       bfloat16_type_node = ix86_bf16_type_node;
> > > >        callback (float16_type_node);
> > > >        callback (bfloat16_type_node);
> > > >        float16_type_node = NULL_TREE;
> > > > @@ -24259,6 +24280,86 @@ ix86_init_libfuncs (void)
> > > >  #endif
> > > >  }
> > > >
> > > > +/* Return the diagnostic message string if conversion from FROMTYPE to
> > > > +   TOTYPE is not allowed, NULL otherwise.  */
> > > > +
> > > > +static const char *
> > > > +ix86_invalid_conversion (const_tree fromtype, const_tree totype)
> > > > +{
> > > > +  machine_mode from_mode = element_mode (fromtype);
> > > > +  machine_mode to_mode = element_mode (totype);
> > > > +
> > > > +  if (!TARGET_SSE2 && from_mode != to_mode)
> > > > +    {
> > > > +      /* Do no allow conversions to/from BFmode/HFmode scalar types
> > > > +        when TARGET_SSE2 is not available.  */
> > > > +      if (from_mode == BFmode)
> > > > +       return N_("invalid conversion from type %<__bf16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +      if (from_mode == HFmode)
> > > > +       return N_("invalid conversion from type %<_Float16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +      if (to_mode == BFmode)
> > > > +       return N_("invalid conversion to type %<__bf16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +      if (to_mode == HFmode)
> > > > +       return N_("invalid conversion to type %<_Float16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +    }
> > > > +
> > > > +  /* Conversion allowed.  */
> > > > +  return NULL;
> > > > +}
> > > > +
> > > > +/* Return the diagnostic message string if the unary operation OP is
> > > > +   not permitted on TYPE, NULL otherwise.  */
> > > > +
> > > > +static const char *
> > > > +ix86_invalid_unary_op (int op, const_tree type)
> > > > +{
> > > > +  machine_mode mmode = element_mode (type);
> > > > +  /* Reject all single-operand operations on BFmode/HFmode except for &
> > > > +     when TARGET_SSE2 is not available.  */
> > > > +  if (!TARGET_SSE2 && op != ADDR_EXPR)
> > > > +    {
> > > > +      if (mmode == BFmode)
> > > > +       return N_("operation not permitted on type %<__bf16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +      if (mmode == HFmode)
> > > > +       return N_("operation not permitted on type %<_Float16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +    }
> > > > +
> > > > +  /* Operation allowed.  */
> > > > +  return NULL;
> > > > +}
> > > > +
> > > > +/* Return the diagnostic message string if the binary operation OP is
> > > > +   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
> > > > +
> > > > +static const char *
> > > > +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
> > > > +                       const_tree type2)
> > > > +{
> > > > +  machine_mode type1_mode = element_mode (type1);
> > > > +  machine_mode type2_mode = element_mode (type2);
> > > > +  /* Reject all 2-operand operations on BFmode or HFmode
> > > > +     when TARGET_SSE2 is not available.  */
> > > > +  if (!TARGET_SSE2)
> > > > +    {
> > > > +      if (type1_mode == BFmode || type2_mode == BFmode)
> > > > +       return N_("operation not permitted on type %<__bf16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +
> > > > +      if (type1_mode == HFmode || type2_mode == HFmode)
> > > > +       return N_("operation not permitted on type %<_Float16%> "
> > > > +                 "without option %<-msse2%>");
> > > > +    }
> > > > +
> > > > +  /* Operation allowed.  */
> > > > +  return NULL;
> > > > +}
> > > > +
> > > >  /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
> > > >     FPU, assume that the fpcw is set to extended precision; when using
> > > >     only SSE, rounding is correct; when using both SSE and the FPU,
> > > > @@ -25248,6 +25349,15 @@ ix86_libgcc_floating_mode_supported_p
> > > >  #undef TARGET_MEMTAG_TAG_SIZE
> > > >  #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
> > > >
> > > > +#undef TARGET_INVALID_CONVERSION
> > > > +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
> > > > +
> > > > +#undef TARGET_INVALID_UNARY_OP
> > > > +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
> > > > +
> > > > +#undef TARGET_INVALID_BINARY_OP
> > > > +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
> > > > +
> > > >  static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
> > > >  {
> > > >  #ifdef OPTION_GLIBC
> > > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > > > index 1da6dce8e0b..7e839bc5c7e 100644
> > > > --- a/gcc/config/i386/i386.h
> > > > +++ b/gcc/config/i386/i386.h
> > > > @@ -1046,6 +1046,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
> > > >  #define VALID_AVX512FP16_REG_MODE(MODE)                                        \
> > > >    ((MODE) == V8HFmode || (MODE) == V16HFmode || (MODE) == V32HFmode)
> > > >
> > > > +#define VALID_SSE2_TYPE_MODE(MODE)             \
> > > > +  ((MODE) == HFmode || (MODE) == BFmode                \
> > > > +   || (MODE) == HCmode || (MODE) == BCmode)
> > > > +
> > > >  #define VALID_SSE2_REG_MODE(MODE)                                      \
> > > >    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
> > > >     || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode   \
> > > > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> > > > index b220d871942..cc78df56940 100644
> > > > --- a/gcc/config/i386/immintrin.h
> > > > +++ b/gcc/config/i386/immintrin.h
> > > > @@ -98,11 +98,9 @@
> > > >
> > > >  #include <avx512vp2intersectvlintrin.h>
> > > >
> > > > -#ifdef __SSE2__
> > > >  #include <avx512fp16intrin.h>
> > > >
> > > >  #include <avx512fp16vlintrin.h>
> > > > -#endif
> > > >
> > > >  #include <shaintrin.h>
> > > >
> > > > @@ -118,13 +116,11 @@
> > > >
> > > >  #include <vpclmulqdqintrin.h>
> > > >
> > > > -#ifdef __SSE2__
> > > >  #include <avx512bf16vlintrin.h>
> > > >
> > > >  #include <avx512bf16intrin.h>
> > > >
> > > >  #include <avxneconvertintrin.h>
> > > > -#endif
> > > >
> > > >  #include <amxtileintrin.h>
> > > >
> > > > diff --git a/gcc/testsuite/g++.target/i386/float16-1.C b/gcc/testsuite/g++.target/i386/float16-1.C
> > > > index f96b932b698..938852ee9ad 100644
> > > > --- a/gcc/testsuite/g++.target/i386/float16-1.C
> > > > +++ b/gcc/testsuite/g++.target/i386/float16-1.C
> > > > @@ -1,8 +1,8 @@
> > > >  /* { dg-do compile } */
> > > >  /* { dg-options "-O2 -mno-sse2" } */
> > > >
> > > > -_Float16       /* { dg-error "expected unqualified-id before '_Float16'" } */
> > > > -foo (_Float16 x)
> > > > +_Float16
> > > > +foo (_Float16 x)/* { dg-error "SSE register return with SSE2 disabled" } */
> > > >  {
> > > > -  return x;
> > > > -}              /* { dg-error "'_Float16' is not supported on this target" } */
> > > > +  return x;/* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/i386/pr109504.c b/gcc/testsuite/gcc.target/i386/pr109504.c
> > > > new file mode 100644
> > > > index 00000000000..fe5bcda10ad
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/pr109504.c
> > > > @@ -0,0 +1,6 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -mno-sse" } */
> > > > +
> > > > +#pragma GCC target("sse4.1")
> > > > +#include <immintrin.h>
> > > > +int main(){return 0;}
> > > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > > index 612d55be826..717055bc9ad 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/sse2-bfloat16-1.c
> > > > @@ -1,8 +1,8 @@
> > > >  /* { dg-do compile } */
> > > >  /* { dg-options "-O2 -mno-sse2" } */
> > > >
> > > > -__bf16/* { dg-error "unknown type name '__bf16'" } */
> > > > -foo (__bf16 x) /* { dg-error "unknown type name '__bf16'" } */
> > > > -{
> > > > -  return x;
> > > > +__bf16
> > > > +foo (__bf16 x)  /* { dg-error "SSE register return with SSE2 disabled" } */
> > > > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> > > >  }
> > > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > > index 1b645eb499d..faf818df75f 100644
> > > > --- a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c
> > > > @@ -1,8 +1,8 @@
> > > >  /* { dg-do compile } */
> > > >  /* { dg-options "-O2 -mno-sse2" } */
> > > >
> > > > -_Float16/* { dg-error "is not supported on this target" } */
> > > > -foo (_Float16 x) /* { dg-error "is not supported on this target" } */
> > > > -{
> > > > -  return x;
> > > > +_Float16
> > > > +foo (_Float16 x) /* { dg-error "SSE register return with SSE2 disabled" } */
> > > > +{  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32 } } */
> > > > +  return x;  /* { dg-error "SSE register return with SSE2 disabled" "" { target ia32} } */
> > > >  }
> > > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-4.c b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > > > new file mode 100644
> > > > index 00000000000..64baf92ff56
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-4.c
> > > > @@ -0,0 +1,25 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -mno-sse2" } */
> > > > +
> > > > +_Float16 a;
> > > > +__bf16 c;
> > > > +_Complex _Float16 ac;
> > > > +
> > > > +void
> > > > +foo (_Float16* p)
> > > > +{
> > > > +  a = *p;
> > > > +}
> > > > +
> > > > +void
> > > > +foo1 (__bf16 *p)
> > > > +{
> > > > +  c = *p;
> > > > +}
> > > > +
> > > > +
> > > > +void
> > > > +foo2 (_Complex _Float16* p)
> > > > +{
> > > > +  ac = *p;
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-5.c b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > > > new file mode 100644
> > > > index 00000000000..c3ed23b8ab3
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-5.c
> > > > @@ -0,0 +1,24 @@
> > > > +/* { dg-do compile { target ia32} } */
> > > > +/* { dg-options "-O2 -mno-sse2" } */
> > > > +
> > > > +_Float16 a;
> > > > +__bf16 c;
> > > > +_Complex ac;
> > > > +void
> > > > +foo (_Float16 p)
> > > > +{
> > > > +  a = p;
> > > > +}
> > > > +
> > > > +void
> > > > +foo1 (__bf16 p)
> > > > +{
> > > > +  c = p;
> > > > +}
> > > > +
> > > > +
> > > > +void
> > > > +foo2 (_Complex p)
> > > > +{
> > > > +  ac = p;
> > > > +}
> > > > diff --git a/libgcc/config/i386/t-softfp b/libgcc/config/i386/t-softfp
> > > > index 69d0f819822..80d1fac121b 100644
> > > > --- a/libgcc/config/i386/t-softfp
> > > > +++ b/libgcc/config/i386/t-softfp
> > > > @@ -31,3 +31,10 @@ CFLAGS-trunchfbf2.c += -msse2
> > > >  CFLAGS-eqhf2.c += -msse2
> > > >  CFLAGS-_divhc3.c += -msse2
> > > >  CFLAGS-_mulhc3.c += -msse2
> > > > +
> > > > +CFLAGS-_hf_to_sd.c += -msse2
> > > > +CFLAGS-_hf_to_dd.c += -msse2
> > > > +CFLAGS-_hf_to_td.c += -msse2
> > > > +CFLAGS-_sd_to_hf.c += -msse2
> > > > +CFLAGS-_dd_to_hf.c += -msse2
> > > > +CFLAGS-_td_to_hf.c += -msse2
> > > > --
> > > > 2.39.1.388.g2fc9e9ca3c
> > > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
> >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2.
  2023-07-19  5:58           ` Hongtao Liu
@ 2023-07-19 10:51             ` Jakub Jelinek
  2023-07-20  4:50               ` [PATCH] Fix fp16 related testcase failure for i686 liuhongt
  0 siblings, 1 reply; 12+ messages in thread
From: Jakub Jelinek @ 2023-07-19 10:51 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: Uros Bizjak, liuhongt, gcc-patches, hjl.tools

On Wed, Jul 19, 2023 at 01:58:21PM +0800, Hongtao Liu wrote:
> > LGTM, if you need someone to rubber-stamp the patch. I'm not really
> > versed in this part of the compiler, so please wait a day if someone
> > has anything to say about the patch.
> Thanks, pushed to trunk.

I see some regressions most likely with this change on i686-linux,
in particular:
+FAIL: gcc.dg/pr107547.c (test for excess errors)
+FAIL: gcc.dg/torture/floatn-convert.c   -O0  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O0  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O1  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O1  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O2  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O2  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O2 -flto  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O2 -flto  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O2 -flto -flto-partition=none  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O2 -flto -flto-partition=none  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -O3 -g  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -O3 -g  compilation failed to produce executable
+FAIL: gcc.dg/torture/floatn-convert.c   -Os  (test for excess errors)
+UNRESOLVED: gcc.dg/torture/floatn-convert.c   -Os  compilation failed to produce executable
+FAIL: gcc.target/i386/float16-7.c  (test for errors, line 7)

Perhaps we need to tweak
gcc/testsuite/lib/target-supports.exp (add_options_for_float16)
so that it adds -msse2 for i?86-*-* x86_64-*-* (that would likely
fix up floatn-convert) and for the others perhaps
/* { dg-add-options float16 } */
?

	Jakub


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] Fix fp16 related testcase failure for i686.
  2023-07-19 10:51             ` Jakub Jelinek
@ 2023-07-20  4:50               ` liuhongt
  0 siblings, 0 replies; 12+ messages in thread
From: liuhongt @ 2023-07-20  4:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: jakub

> I see some regressions most likely with this change on i686-linux,
> in particular:
> +FAIL: gcc.dg/pr107547.c (test for excess errors)
> +FAIL: gcc.dg/torture/floatn-convert.c  -O0 (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O0 compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O1 (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O1 compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O2 (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O2 compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O2 -flto (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O2 -flto compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O2 -flto -flto-partition=none (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O2 -flto -flto-partition=none compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -O3 -g (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -O3 -g compilation failed to produce executable
> +FAIL: gcc.dg/torture/floatn-convert.c  -Os (test for excess errors)
> +UNRESOLVED: gcc.dg/torture/floatn-convert.c  -Os compilation failed to produce executable
> +FAIL: gcc.target/i386/float16-7.c (test for errors, line 7)
>

> Perhaps we need to tweak
> gcc/testsuite/lib/target-supports.exp (add_options_for_float16)
> so that it adds -msse2 for i?86-*-* x86_64-*-* (that would likely
> fix up floatn-convert) and for the others perhaps
> /* { dg-add-options float16 } */
> ?

I've verified the change fixed those failures.
Ready to push to trunk if there's no objections.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr107547.c: Add { dg-add-options float16 }.
	* gcc.target/i386/float16-7.c: Add -msse2 to dg-options.
	* lib/target-supports.exp (add_options_for_float16): Add
	-msse2 for i?86-*-* || x86_64-*-*.
---
 gcc/testsuite/gcc.dg/pr107547.c           | 1 +
 gcc/testsuite/gcc.target/i386/float16-7.c | 2 +-
 gcc/testsuite/lib/target-supports.exp     | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr107547.c b/gcc/testsuite/gcc.dg/pr107547.c
index c6992c8674b..7cd68afc0af 100644
--- a/gcc/testsuite/gcc.dg/pr107547.c
+++ b/gcc/testsuite/gcc.dg/pr107547.c
@@ -1,6 +1,7 @@
 /* PR tree-optimization/107547 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
+/* { dg-add-options float16 } */
 
 int x;
 
diff --git a/gcc/testsuite/gcc.target/i386/float16-7.c b/gcc/testsuite/gcc.target/i386/float16-7.c
index 86641afeba9..660021b6ccc 100644
--- a/gcc/testsuite/gcc.target/i386/float16-7.c
+++ b/gcc/testsuite/gcc.target/i386/float16-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mfpmath=387 -fexcess-precision=16" } */
+/* { dg-options "-O2 -msse2 -mfpmath=387 -fexcess-precision=16" } */
 /* { dg-excess-errors "'-fexcess-precision=16' is not compatible with '-mfpmath=387'" } */
 _Float16
 foo (_Float16 a, _Float16 b)
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8ea0d9feb1c..42024474091 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3487,6 +3487,9 @@ proc add_options_for_float16 { flags } {
     if { [istarget arm*-*-*] } {
 	return "$flags -mfp16-format=ieee"
     }
+    if { [istarget i?86-*-*] || [istarget x86_64-*-*] } {
+	return "$flags -msse2"
+    }
     return "$flags"
 }
 
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-07-20  4:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-19  7:15 [PATCH] [i386] Support type _Float16/__bf16 independent of SSE2 liuhongt
2023-04-19 16:43 ` Mike Stump
2023-04-20 12:18 ` Jakub Jelinek
2023-04-21 13:53   ` [PATCH 1/2] " liuhongt
2023-04-21 13:53     ` [PATCH 2/2] [i386] def_or_undef __STDCPP_FLOAT16_T__ and __STDCPP_BFLOAT16_T__ for target attribute/pragmas liuhongt
2023-05-15  1:21       ` Hongtao Liu
2023-05-15  1:20     ` [PATCH 1/2] [i386] Support type _Float16/__bf16 independent of SSE2 Hongtao Liu
2023-07-17  8:35       ` Hongtao Liu
2023-07-17 11:38         ` Uros Bizjak
2023-07-19  5:58           ` Hongtao Liu
2023-07-19 10:51             ` Jakub Jelinek
2023-07-20  4:50               ` [PATCH] Fix fp16 related testcase failure for i686 liuhongt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).