diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 21100c1087d..43331134301 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -35,6 +35,7 @@
 #include <cstdio> // for stderr
 #endif
 #include <cstring>
+#include <cmath>
 #include <functional>
 #include <iosfwd>
 #include <utility>
@@ -203,9 +204,170 @@ template <size_t _Np>
 // }}}
 template <size_t _Xp>
   using _SizeConstant = integral_constant<size_t, _Xp>;
+// constexpr feature detection{{{
+constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
+constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
+constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
+constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
+constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
+constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
+constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
+constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
+constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
+constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
+constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
+constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
+constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
+constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
+constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
+constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
+constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
+constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
+constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
+constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
+constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
+constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
+constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
+constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
+constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
+constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
+constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
+constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
+constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
+constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
+constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
+constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
+
+constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
+constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
+constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
+constexpr inline bool __support_neon_float =
+#if defined __GCC_IEC_559
+  __GCC_IEC_559 == 0;
+#elif defined __FAST_MATH__
+  true;
+#else
+  false;
+#endif
+
+#ifdef _ARCH_PWR10
+constexpr inline bool __have_power10vec = true;
+#else
+constexpr inline bool __have_power10vec = false;
+#endif
+#ifdef __POWER9_VECTOR__
+constexpr inline bool __have_power9vec = true;
+#else
+constexpr inline bool __have_power9vec = false;
+#endif
+#if defined __POWER8_VECTOR__
+constexpr inline bool __have_power8vec = true;
+#else
+constexpr inline bool __have_power8vec = __have_power9vec;
+#endif
+#if defined __VSX__
+constexpr inline bool __have_power_vsx = true;
+#else
+constexpr inline bool __have_power_vsx = __have_power8vec;
+#endif
+#if defined __ALTIVEC__
+constexpr inline bool __have_power_vmx = true;
+#else
+constexpr inline bool __have_power_vmx = __have_power_vsx;
+#endif
+
+// }}}
 
 namespace __detail
 {
+  constexpr std::uint_least64_t
+  __floating_point_flags()
+  {
+    std::uint_least64_t __flags = 0;
+    if constexpr (math_errhandling & MATH_ERREXCEPT)
+      __flags |= 1;
+#ifdef __FAST_MATH__
+    __flags |= 1 << 1;
+#elif __FINITE_MATH_ONLY__
+    __flags |= 2 << 1;
+#elif __GCC_IEC_559 < 2
+    __flags |= 3 << 1;
+#endif
+    __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
+    return __flags;
+  }
+
+  constexpr std::uint_least64_t
+  __machine_flags()
+  {
+    if constexpr (__have_mmx || __have_sse)
+      return __have_mmx
+		 | (__have_sse                << 1)
+		 | (__have_sse2               << 2)
+		 | (__have_sse3               << 3)
+		 | (__have_ssse3              << 4)
+		 | (__have_sse4_1             << 5)
+		 | (__have_sse4_2             << 6)
+		 | (__have_xop                << 7)
+		 | (__have_avx                << 8)
+		 | (__have_avx2               << 9)
+		 | (__have_bmi                << 10)
+		 | (__have_bmi2               << 11)
+		 | (__have_lzcnt              << 12)
+		 | (__have_sse4a              << 13)
+		 | (__have_fma                << 14)
+		 | (__have_fma4               << 15)
+		 | (__have_f16c               << 16)
+		 | (__have_popcnt             << 17)
+		 | (__have_avx512f            << 18)
+		 | (__have_avx512dq           << 19)
+		 | (__have_avx512vl           << 20)
+		 | (__have_avx512bw           << 21)
+		 | (__have_avx512bitalg       << 22)
+		 | (__have_avx512vbmi2        << 23)
+		 | (__have_avx512vbmi         << 24)
+		 | (__have_avx512ifma         << 25)
+		 | (__have_avx512cd           << 26)
+		 | (__have_avx512vnni         << 27)
+		 | (__have_avx512vpopcntdq    << 28)
+		 | (__have_avx512vp2intersect << 29);
+    else if constexpr (__have_neon)
+      return __have_neon
+	       | (__have_neon_a32 << 1)
+	       | (__have_neon_a64 << 2)
+	       | (__have_neon_a64 << 2)
+	       | (__support_neon_float << 3);
+    else if constexpr (__have_power_vmx)
+      return __have_power_vmx
+	       | (__have_power_vsx  << 1)
+	       | (__have_power8vec  << 2)
+	       | (__have_power9vec  << 3)
+	       | (__have_power10vec << 4);
+    else
+      return 0;
+  }
+
+  namespace
+  {
+    struct _OdrEnforcer {};
+  }
+
+  template <std::uint_least64_t...>
+    struct _MachineFlagsTemplate {};
+
+  /**@internal
+   * Use this type as default template argument to all function templates that
+   * are not declared always_inline. It ensures, that a function
+   * specialization, which the compiler decides not to inline, has a unique symbol
+   * (_OdrEnforcer) or a symbol matching the machine/architecture flags
+   * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
+   * users link TUs compiled with different flags. This is especially important
+   * for using simd in libraries.
+   */
+  using __odr_helper
+    = conditional_t<__machine_flags() == 0, _OdrEnforcer,
+		    _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
+
   struct _Minimum
   {
     template <typename _Tp>
@@ -469,71 +631,6 @@ template <int _Np>
 template <typename _Tp>
   inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
 
-// }}}
-// constexpr feature detection{{{
-constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
-constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
-constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
-constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
-constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
-constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
-constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
-constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
-constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
-constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
-constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
-constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
-constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
-constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
-constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
-constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
-constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
-constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
-constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
-constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
-constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
-constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
-constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
-constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
-
-constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
-constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
-constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
-constexpr inline bool __support_neon_float =
-#if defined __GCC_IEC_559
-  __GCC_IEC_559 == 0;
-#elif defined __FAST_MATH__
-  true;
-#else
-  false;
-#endif
-
-#ifdef _ARCH_PWR10
-constexpr inline bool __have_power10vec = true;
-#else
-constexpr inline bool __have_power10vec = false;
-#endif
-#ifdef __POWER9_VECTOR__
-constexpr inline bool __have_power9vec = true;
-#else
-constexpr inline bool __have_power9vec = false;
-#endif
-#if defined __POWER8_VECTOR__
-constexpr inline bool __have_power8vec = true;
-#else
-constexpr inline bool __have_power8vec = __have_power9vec;
-#endif
-#if defined __VSX__
-constexpr inline bool __have_power_vsx = true;
-#else
-constexpr inline bool __have_power_vsx = __have_power8vec;
-#endif
-#if defined __ALTIVEC__
-constexpr inline bool __have_power_vmx = true;
-#else
-constexpr inline bool __have_power_vmx = __have_power_vsx;
-#endif
-
 // }}}
 // __is_scalar_abi {{{
 template <typename _Abi>
@@ -3984,7 +4081,7 @@ template <typename _Tp, typename _A0, typename... _As>
 
 // }}}
 // concat(simd...) {{{
-template <typename _Tp, typename... _As>
+template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
   inline _GLIBCXX_SIMD_CONSTEXPR
   simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
   concat(const simd<_Tp, _As>&... __xs)
@@ -4567,6 +4664,7 @@ template <typename _Tp, typename _Abi>
       template <typename _Up, typename _A2,
 		typename
 		= enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
+	_GLIBCXX_SIMD_ALWAYS_INLINE
 	operator simd_mask<_Up, _A2>() &&
 	{
 	  using namespace std::experimental::__proposed;
@@ -4801,121 +4899,153 @@ find_last_set(_ExactBool)
 // }}}
 
 // _SimdIntOperators{{{1
-template <typename _V, typename _Impl, bool>
+template <typename _V, typename _Tp, typename _Abi, bool>
   class _SimdIntOperators {};
 
-template <typename _V, typename _Impl>
-  class _SimdIntOperators<_V, _Impl, true>
+template <typename _V, typename _Tp, typename _Abi>
+  class _SimdIntOperators<_V, _Tp, _Abi, true>
   {
+    using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
+
     _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
     { return *static_cast<const _V*>(this); }
 
-    template <typename _Tp>
+    template <typename _Up>
       _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
-      _S_make_derived(_Tp&& __d)
-      { return {__private_init, static_cast<_Tp&&>(__d)}; }
+      _S_make_derived(_Up&& __d)
+      { return {__private_init, static_cast<_Up&&>(__d)}; }
 
   public:
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator%=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs % __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator&=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs & __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator|=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs | __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator^=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs ^ __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator<<=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs << __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator>>=(_V& __lhs, const _V& __x)
     { return __lhs = __lhs >> __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator<<=(_V& __lhs, int __x)
     { return __lhs = __lhs << __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator>>=(_V& __lhs, int __x)
     { return __lhs = __lhs >> __x; }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator%(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_modulus(__data(__x), __data(__y)));
     }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator&(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_bit_and(__data(__x), __data(__y)));
     }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator|(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_bit_or(__data(__x), __data(__y)));
     }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator^(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_bit_xor(__data(__x), __data(__y)));
     }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator<<(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
     }
 
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator>>(const _V& __x, const _V& __y)
     {
       return _SimdIntOperators::_S_make_derived(
 	_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
     }
 
-    template <typename _VV = _V>
-      _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
-      {
-	using _Tp = typename _VV::value_type;
-	if (__y < 0)
-	  __invoke_ub("The behavior is undefined if the right operand of a "
-		      "shift operation is negative. [expr.shift]\nA shift by "
-		      "%d was requested",
-		      __y);
-	if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
-	  __invoke_ub(
-	    "The behavior is undefined if the right operand of a "
-	    "shift operation is greater than or equal to the width of the "
-	    "promoted left operand. [expr.shift]\nA shift by %d was requested",
-	    __y);
-	return _SimdIntOperators::_S_make_derived(
-	  _Impl::_S_bit_shift_left(__data(__x), __y));
-      }
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator<<(const _V& __x, int __y)
+    {
+      if (__y < 0)
+	__invoke_ub("The behavior is undefined if the right operand of a "
+		    "shift operation is negative. [expr.shift]\nA shift by "
+		    "%d was requested",
+		    __y);
+      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
+	__invoke_ub(
+	  "The behavior is undefined if the right operand of a "
+	  "shift operation is greater than or equal to the width of the "
+	  "promoted left operand. [expr.shift]\nA shift by %d was requested",
+	  __y);
+      return _SimdIntOperators::_S_make_derived(
+	_Impl::_S_bit_shift_left(__data(__x), __y));
+    }
 
-    template <typename _VV = _V>
-      _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
-      {
-	using _Tp = typename _VV::value_type;
-	if (__y < 0)
-	  __invoke_ub(
-	    "The behavior is undefined if the right operand of a shift "
-	    "operation is negative. [expr.shift]\nA shift by %d was requested",
-	    __y);
-	if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
-	  __invoke_ub(
-	    "The behavior is undefined if the right operand of a shift "
-	    "operation is greater than or equal to the width of the promoted "
-	    "left operand. [expr.shift]\nA shift by %d was requested",
-	    __y);
-	return _SimdIntOperators::_S_make_derived(
-	  _Impl::_S_bit_shift_right(__data(__x), __y));
-      }
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator>>(const _V& __x, int __y)
+    {
+      if (__y < 0)
+	__invoke_ub(
+	  "The behavior is undefined if the right operand of a shift "
+	  "operation is negative. [expr.shift]\nA shift by %d was requested",
+	  __y);
+      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
+	__invoke_ub(
+	  "The behavior is undefined if the right operand of a shift "
+	  "operation is greater than or equal to the width of the promoted "
+	  "left operand. [expr.shift]\nA shift by %d was requested",
+	  __y);
+      return _SimdIntOperators::_S_make_derived(
+	_Impl::_S_bit_shift_right(__data(__x), __y));
+    }
 
     // unary operators (for integral _Tp)
-    _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
+    _V
+    operator~() const
     { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
   };
 
@@ -4924,7 +5054,7 @@ template <typename _V, typename _Impl>
 // simd {{{
 template <typename _Tp, typename _Abi>
   class simd : public _SimdIntOperators<
-		 simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
+		 simd<_Tp, _Abi>, _Tp, _Abi,
 		 conjunction<is_integral<_Tp>,
 			     typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
 	       public _SimdTraits<_Tp, _Abi>::_SimdBase
@@ -4938,7 +5068,7 @@ template <typename _Tp, typename _Abi>
   public:
     using _Impl = typename _Traits::_SimdImpl;
     friend _Impl;
-    friend _SimdIntOperators<simd, _Impl, true>;
+    friend _SimdIntOperators<simd, _Tp, _Abi, true>;
 
     using value_type = _Tp;
     using reference = _SmartReference<_MemberType, _Impl, value_type>;
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 8cd338e313f..55fea77d4ab 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -50,7 +50,8 @@ template <typename _V, typename = _VectorTraits<_V>>
 //}}}
 // __vector_permute<Indices...>{{{
 // Index == -1 requests zeroing of the output element
-template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
+template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
+	  typename = __detail::__odr_helper>
   _Tp
   __vector_permute(_Tp __x)
   {
@@ -62,7 +63,8 @@ template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
 // }}}
 // __vector_shuffle<Indices...>{{{
 // Index == -1 requests zeroing of the output element
-template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
+template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
+	  typename = __detail::__odr_helper>
   _Tp
   __vector_shuffle(_Tp __x, _Tp __y)
   {
@@ -820,10 +822,12 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
     // _SimdBase / base class for simd, providing extra conversions {{{
     struct _SimdBase2
     {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __intrinsic_type_t<_Tp, _Np>() const
       {
 	return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
       }
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __vector_type_t<_Tp, _Np>() const
       {
 	return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
@@ -832,6 +836,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
 
     struct _SimdBase1
     {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __intrinsic_type_t<_Tp, _Np>() const
       { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
     };
@@ -844,11 +849,13 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
     // _MaskBase {{{
     struct _MaskBase2
     {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __intrinsic_type_t<_Tp, _Np>() const
       {
 	return static_cast<const simd_mask<_Tp, _Abi>*>(this)
 	  ->_M_data.__intrin();
       }
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __vector_type_t<_Tp, _Np>() const
       {
 	return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
@@ -857,6 +864,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
 
     struct _MaskBase1
     {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       explicit operator __intrinsic_type_t<_Tp, _Np>() const
       { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
     };
@@ -874,7 +882,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
       _Up _M_data;
 
     public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       _MaskCastType(_Up __x) : _M_data(__x) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       operator _MaskMember() const { return _M_data; }
     };
 
@@ -887,7 +897,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
       _SimdMember _M_data;
 
     public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       operator _SimdMember() const { return _M_data; }
     };
 
@@ -898,8 +910,11 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
       _SimdMember _M_data;
 
     public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       _SimdCastType2(_Bp __b) : _M_data(__b) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
       operator _SimdMember() const { return _M_data; }
     };
 
@@ -913,14 +928,14 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
 struct _CommonImplX86;
 struct _CommonImplNeon;
 struct _CommonImplBuiltin;
-template <typename _Abi> struct _SimdImplBuiltin;
-template <typename _Abi> struct _MaskImplBuiltin;
-template <typename _Abi> struct _SimdImplX86;
-template <typename _Abi> struct _MaskImplX86;
-template <typename _Abi> struct _SimdImplNeon;
-template <typename _Abi> struct _MaskImplNeon;
-template <typename _Abi> struct _SimdImplPpc;
-template <typename _Abi> struct _MaskImplPpc;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc;
 
 // simd_abi::_VecBuiltin {{{
 template <int _UsedBytes>
@@ -1369,7 +1384,7 @@ struct _CommonImplBuiltin
 
 // }}}
 // _SimdImplBuiltin {{{1
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _SimdImplBuiltin
   {
     // member types {{{2
@@ -2618,7 +2633,7 @@ struct _MaskImplBuiltinMixin
 };
 
 // _MaskImplBuiltin {{{1
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _MaskImplBuiltin : _MaskImplBuiltinMixin
   {
     using _MaskImplBuiltinMixin::_S_to_bits;
@@ -2953,4 +2968,4 @@ _GLIBCXX_SIMD_END_NAMESPACE
 #endif // __cplusplus >= 201703L
 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
 
-// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80
+// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 1e75812d098..78ad33f74e4 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -172,6 +172,46 @@
 #else
 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
 #endif
+#ifdef __AVX512BITALG__
+#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
+#endif
+#ifdef __AVX512VBMI2__
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
+#endif
+#ifdef __AVX512VBMI__
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
+#endif
+#ifdef __AVX512IFMA__
+#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
+#endif
+#ifdef __AVX512CD__
+#define _GLIBCXX_SIMD_HAVE_AVX512CD 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512CD 0
+#endif
+#ifdef __AVX512VNNI__
+#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
+#endif
+#ifdef __AVX512VP2INTERSECT__
+#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
+#endif
 
 #if _GLIBCXX_SIMD_HAVE_SSE
 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
index dc2fb90b9b2..5a742ed52e1 100644
--- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
+++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
@@ -201,6 +201,7 @@ template <typename _Tp, typename _Abi, size_t _Offset>
   };
 
 template <size_t _Offset, typename _Tp, typename _Abi, typename... _As>
+  _GLIBCXX_SIMD_INTRINSIC
   __tuple_element_meta<_Tp, _Abi, _Offset>
   __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&)
   { return {}; }
@@ -230,11 +231,13 @@ template <size_t _O0, size_t _O1, typename _Base>
   struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {};
 
 template <size_t _Offset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
   decltype(auto)
   __add_offset(_Tp& __base)
   { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
 
 template <size_t _Offset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
   decltype(auto)
   __add_offset(const _Tp& __base)
   {
@@ -243,6 +246,7 @@ template <size_t _Offset, typename _Tp>
   }
 
 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
   decltype(auto)
   __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base)
   {
@@ -251,6 +255,7 @@ template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
   }
 
 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
   decltype(auto)
   __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base)
   {
@@ -586,6 +591,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
 	  return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()];
       }
 
+    _GLIBCXX_SIMD_INTRINSIC
     _Tp operator[](size_t __i) const noexcept
     {
       if constexpr (_S_tuple_size == 1)
@@ -608,6 +614,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
 	}
     }
 
+    _GLIBCXX_SIMD_INTRINSIC
     void _M_set(size_t __i, _Tp __val) noexcept
     {
       if constexpr (_S_tuple_size == 1)
@@ -627,6 +634,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
 
   private:
     // _M_subscript_read/_write {{{
+    _GLIBCXX_SIMD_INTRINSIC
     _Tp _M_subscript_read([[maybe_unused]] size_t __i) const noexcept
     {
       if constexpr (__is_vectorizable_v<_FirstType>)
@@ -635,6 +643,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
 	return first[__i];
     }
 
+    _GLIBCXX_SIMD_INTRINSIC
     void _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept
     {
       if constexpr (__is_vectorizable_v<_FirstType>)
@@ -1033,9 +1042,11 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
     _Tp _M_data;
     using _TT = __remove_cvref_t<_Tp>;
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator _TT()
     { return _M_data; }
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator _TT&()
     {
       static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1043,6 +1054,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
       return _M_data;
     }
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator _TT*()
     {
       static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1050,13 +1062,16 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
       return &_M_data;
     }
 
-    constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
 
     template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
       operator simd<typename _TT::value_type, _Abi>()
       { return {__private_init, _M_data}; }
 
     template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
       operator simd<typename _TT::value_type, _Abi>&()
       {
 	return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(
@@ -1064,6 +1079,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
       }
 
     template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
       operator simd<typename _TT::value_type, _Abi>*()
       {
 	return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(
@@ -1081,14 +1097,18 @@ template <typename _Tp>
     _Tp _M_data;
     fixed_size_simd<_TT, 1> _M_fd;
 
-    constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
 
+    _GLIBCXX_SIMD_INTRINSIC
     ~__autocvt_to_simd()
     { _M_data = __data(_M_fd).first; }
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator fixed_size_simd<_TT, 1>()
     { return _M_fd; }
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator fixed_size_simd<_TT, 1> &()
     {
       static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1096,6 +1116,7 @@ template <typename _Tp>
       return _M_fd;
     }
 
+    _GLIBCXX_SIMD_INTRINSIC
     operator fixed_size_simd<_TT, 1> *()
     {
       static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1107,8 +1128,8 @@ template <typename _Tp>
 // }}}
 
 struct _CommonImplFixedSize;
-template <int _Np> struct _SimdImplFixedSize;
-template <int _Np> struct _MaskImplFixedSize;
+template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize;
+template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize;
 // simd_abi::_Fixed {{{
 template <int _Np>
   struct simd_abi::_Fixed
@@ -1172,12 +1193,15 @@ template <int _Np>
 	{
 	  // The following ensures, function arguments are passed via the stack.
 	  // This is important for ABI compatibility across TU boundaries
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  _SimdBase(const _SimdBase&) {}
 	  _SimdBase() = default;
 
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  explicit operator const _SimdMember &() const
 	  { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; }
 
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  explicit operator array<_Tp, _Np>() const
 	  {
 	    array<_Tp, _Np> __r;
@@ -1198,8 +1222,11 @@ template <int _Np>
 	// _SimdCastType {{{
 	struct _SimdCastType
 	{
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  _SimdCastType(const array<_Tp, _Np>&);
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  _SimdCastType(const _SimdMember& dd) : _M_data(dd) {}
+	  _GLIBCXX_SIMD_ALWAYS_INLINE
 	  explicit operator const _SimdMember &() const { return _M_data; }
 
 	private:
@@ -1237,7 +1264,7 @@ struct _CommonImplFixedSize
 // _SimdImplFixedSize {{{1
 // fixed_size should not inherit from _SimdMathFallback in order for
 // specializations in the used _SimdTuple Abis to get used
-template <int _Np>
+template <int _Np, typename>
   struct _SimdImplFixedSize
   {
     // member types {{{2
@@ -1794,7 +1821,7 @@ template <int _Np>
   };
 
 // _MaskImplFixedSize {{{1
-template <int _Np>
+template <int _Np, typename>
   struct _MaskImplFixedSize
   {
     static_assert(
diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h
index 61af9fc67af..01061a75a5e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_math.h
+++ b/libstdc++-v3/include/experimental/bits/simd_math.h
@@ -60,6 +60,7 @@ template <typename _DoubleR, typename _Tp, typename _Abi>
 template <typename _Tp, typename _Abi, typename...,                            \
 	  typename _R = _Math_return_type_t<                                   \
 	    decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
   __name(simd<_Tp, _Abi> __x)                                                  \
   { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
@@ -125,6 +126,7 @@ template <                                                                     \
   typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,                    \
   typename _R = _Math_return_type_t<                                           \
     decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
   {                                                                            \
@@ -155,6 +157,7 @@ template <typename _Tp, typename _Abi, typename...,                            \
 	    decltype(std::__name(declval<double>(), _Arg2::declval(),          \
 				 _Arg3::declval())),                           \
 	    _Tp, _Abi>>                                                        \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
 	 const typename _Arg3::type& __z)                                      \
@@ -399,6 +402,7 @@ template <typename _Abi>
 // }}}
 // __extract_exponent_as_int {{{
 template <typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_INTRINSIC
   rebind_simd_t<int, simd<_Tp, _Abi>>
   __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
   {
@@ -421,7 +425,8 @@ template <typename ImplFun, typename FallbackFun, typename... _Args>
     -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
   { return __impl_fun(static_cast<_Args&&>(__args)...); }
 
-template <typename ImplFun, typename FallbackFun, typename... _Args>
+template <typename ImplFun, typename FallbackFun, typename... _Args,
+	  typename = __detail::__odr_helper>
   inline auto
   __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
 			      _Args&&... __args)
@@ -457,7 +462,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
  * Fix sign.
  */
 // cos{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   cos(const simd<_Tp, _Abi>& __x)
   {
@@ -503,7 +508,7 @@ template <typename _Tp>
 
 //}}}
 // sin{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   sin(const simd<_Tp, _Abi>& __x)
   {
@@ -565,6 +570,7 @@ _GLIBCXX_SIMD_MATH_CALL_(expm1)
 // frexp {{{
 #if _GLIBCXX_SIMD_X86INTRIN
 template <typename _Tp, size_t _Np>
+  _GLIBCXX_SIMD_INTRINSIC
   _SimdWrapper<_Tp, _Np>
   __getexp(_SimdWrapper<_Tp, _Np> __x)
   {
@@ -593,6 +599,7 @@ template <typename _Tp, size_t _Np>
   }
 
 template <typename _Tp, size_t _Np>
+  _GLIBCXX_SIMD_INTRINSIC
   _SimdWrapper<_Tp, _Np>
   __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
   {
@@ -633,7 +640,7 @@ template <typename _Tp, size_t _Np>
  * The return value will be in the range [0.5, 1.0[
  * The @p __e value will be an integer defining the power-of-two exponent
  */
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
   {
@@ -738,7 +745,7 @@ _GLIBCXX_SIMD_MATH_CALL_(log2)
 
 //}}}
 // logb{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
   logb(const simd<_Tp, _Abi>& __x)
   {
@@ -813,7 +820,7 @@ template <typename _Tp, typename _Abi>
   }
 
 //}}}
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
   {
@@ -847,6 +854,7 @@ _GLIBCXX_SIMD_MATH_CALL_(fabs)
 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
 // allow signed integral _Tp
 template <typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
   enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
   abs(const simd<_Tp, _Abi>& __x)
   { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
@@ -929,7 +937,7 @@ template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
 	      __data(__args)...)};
   }
 
-template <typename _VV>
+template <typename _VV, typename = __detail::__odr_helper>
   __remove_cvref_t<_VV>
   __hypot(_VV __x, _VV __y)
   {
@@ -1067,7 +1075,7 @@ template <typename _Tp, typename _Abi>
 
 _GLIBCXX_SIMD_CVTING2(hypot)
 
-  template <typename _VV>
+  template <typename _VV, typename = __detail::__odr_helper>
   __remove_cvref_t<_VV>
   __hypot(_VV __x, _VV __y, _VV __z)
   {
@@ -1268,7 +1276,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
   {
@@ -1306,12 +1314,14 @@ _GLIBCXX_SIMD_MATH_CALL_(isfinite)
 // `int isinf(double)`.
 template <typename _Tp, typename _Abi, typename...,
 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
   enable_if_t<is_floating_point_v<_Tp>, _R>
   isinf(simd<_Tp, _Abi> __x)
   { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
 
 template <typename _Tp, typename _Abi, typename...,
 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
   enable_if_t<is_floating_point_v<_Tp>, _R>
   isnan(simd<_Tp, _Abi> __x)
   { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
@@ -1319,6 +1329,7 @@ template <typename _Tp, typename _Abi, typename...,
 _GLIBCXX_SIMD_MATH_CALL_(isnormal)
 
 template <typename..., typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
   simd_mask<_Tp, _Abi>
   signbit(simd<_Tp, _Abi> __x)
   {
@@ -1366,7 +1377,7 @@ simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
 */
 
 // special math {{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1377,7 +1388,7 @@ template <typename _Tp, typename _Abi>
     });
   }
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1401,7 +1412,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
 _GLIBCXX_SIMD_MATH_CALL_(expint)
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 	  const simd<_Tp, _Abi>& __x)
@@ -1410,7 +1421,7 @@ template <typename _Tp, typename _Abi>
       [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
   }
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 	   const simd<_Tp, _Abi>& __x)
@@ -1419,7 +1430,7 @@ template <typename _Tp, typename _Abi>
       [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
   }
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 	   const simd<_Tp, _Abi>& __x)
@@ -1430,7 +1441,7 @@ template <typename _Tp, typename _Abi>
 
 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 	     const simd<_Tp, _Abi>& __x)
@@ -1439,7 +1450,7 @@ template <typename _Tp, typename _Abi>
       [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
   }
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
 	       const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1450,7 +1461,7 @@ template <typename _Tp, typename _Abi>
     });
   }
 
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
   sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
 	      const simd<_Tp, _Abi>& __x)
diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h
index 7f472e88649..bbd26835d9c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_neon.h
+++ b/libstdc++-v3/include/experimental/bits/simd_neon.h
@@ -44,7 +44,7 @@ struct _CommonImplNeon : _CommonImplBuiltin
 
 // }}}
 // _SimdImplNeon {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _SimdImplNeon : _SimdImplBuiltin<_Abi>
   {
     using _Base = _SimdImplBuiltin<_Abi>;
@@ -390,7 +390,7 @@ struct _MaskImplNeonMixin
 
 // }}}
 // _MaskImplNeon {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi>
   {
     using _MaskImplBuiltinMixin::_S_to_maskvector;
diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h
index ef52d129a85..4143bafa80e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -35,7 +35,7 @@
 _GLIBCXX_SIMD_BEGIN_NAMESPACE
 
 // _SimdImplPpc {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
   {
     using _Base = _SimdImplBuiltin<_Abi>;
@@ -117,7 +117,7 @@ template <typename _Abi>
 
 // }}}
 // _MaskImplPpc {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
   {
     using _Base = _MaskImplBuiltin<_Abi>;
diff --git a/libstdc++-v3/include/experimental/bits/simd_scalar.h b/libstdc++-v3/include/experimental/bits/simd_scalar.h
index 48e13f6c719..b23011ca6c9 100644
--- a/libstdc++-v3/include/experimental/bits/simd_scalar.h
+++ b/libstdc++-v3/include/experimental/bits/simd_scalar.h
@@ -155,7 +155,8 @@ struct _SimdImplScalar
 
   // _S_masked_load {{{2
   template <typename _Tp, typename _Up>
-    static inline _Tp _S_masked_load(_Tp __merge, bool __k,
+    _GLIBCXX_SIMD_INTRINSIC
+    static _Tp _S_masked_load(_Tp __merge, bool __k,
 				     const _Up* __mem) noexcept
     {
       if (__k)
@@ -165,83 +166,97 @@ struct _SimdImplScalar
 
   // _S_store {{{2
   template <typename _Tp, typename _Up>
-    static inline void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept
     { __mem[0] = static_cast<_Up>(__v); }
 
   // _S_masked_store {{{2
   template <typename _Tp, typename _Up>
-    static inline void _S_masked_store(const _Tp __v, _Up* __mem,
+    _GLIBCXX_SIMD_INTRINSIC
+    static void _S_masked_store(const _Tp __v, _Up* __mem,
 				       const bool __k) noexcept
     { if (__k) __mem[0] = __v; }
 
   // _S_negate {{{2
   template <typename _Tp>
-    static constexpr inline bool _S_negate(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr bool _S_negate(_Tp __x) noexcept
     { return !__x; }
 
   // _S_reduce {{{2
   template <typename _Tp, typename _BinaryOperation>
-    static constexpr inline _Tp
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp
     _S_reduce(const simd<_Tp, simd_abi::scalar>& __x, const _BinaryOperation&)
     { return __x._M_data; }
 
   // _S_min, _S_max {{{2
   template <typename _Tp>
-    static constexpr inline _Tp _S_min(const _Tp __a, const _Tp __b)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_min(const _Tp __a, const _Tp __b)
     { return std::min(__a, __b); }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_max(const _Tp __a, const _Tp __b)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_max(const _Tp __a, const _Tp __b)
     { return std::max(__a, __b); }
 
   // _S_complement {{{2
   template <typename _Tp>
-    static constexpr inline _Tp _S_complement(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_complement(_Tp __x) noexcept
     { return static_cast<_Tp>(~__x); }
 
   // _S_unary_minus {{{2
   template <typename _Tp>
-    static constexpr inline _Tp _S_unary_minus(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_unary_minus(_Tp __x) noexcept
     { return static_cast<_Tp>(-__x); }
 
   // arithmetic operators {{{2
   template <typename _Tp>
-    static constexpr inline _Tp _S_plus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_plus(_Tp __x, _Tp __y)
     {
       return static_cast<_Tp>(__promote_preserving_unsigned(__x)
 			      + __promote_preserving_unsigned(__y));
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_minus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_minus(_Tp __x, _Tp __y)
     {
       return static_cast<_Tp>(__promote_preserving_unsigned(__x)
 			      - __promote_preserving_unsigned(__y));
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_multiplies(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_multiplies(_Tp __x, _Tp __y)
     {
       return static_cast<_Tp>(__promote_preserving_unsigned(__x)
 			      * __promote_preserving_unsigned(__y));
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_divides(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_divides(_Tp __x, _Tp __y)
     {
       return static_cast<_Tp>(__promote_preserving_unsigned(__x)
 			      / __promote_preserving_unsigned(__y));
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_modulus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_modulus(_Tp __x, _Tp __y)
     {
       return static_cast<_Tp>(__promote_preserving_unsigned(__x)
 			      % __promote_preserving_unsigned(__y));
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_bit_and(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_and(_Tp __x, _Tp __y)
     {
       if constexpr (is_floating_point_v<_Tp>)
 	{
@@ -254,7 +269,8 @@ struct _SimdImplScalar
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_bit_or(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_or(_Tp __x, _Tp __y)
     {
       if constexpr (is_floating_point_v<_Tp>)
 	{
@@ -267,7 +283,8 @@ struct _SimdImplScalar
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_bit_xor(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_xor(_Tp __x, _Tp __y)
     {
       if constexpr (is_floating_point_v<_Tp>)
 	{
@@ -280,11 +297,13 @@ struct _SimdImplScalar
     }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_bit_shift_left(_Tp __x, int __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_shift_left(_Tp __x, int __y)
     { return static_cast<_Tp>(__promote_preserving_unsigned(__x) << __y); }
 
   template <typename _Tp>
-    static constexpr inline _Tp _S_bit_shift_right(_Tp __x, int __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_shift_right(_Tp __x, int __y)
     { return static_cast<_Tp>(__promote_preserving_unsigned(__x) >> __y); }
 
   // math {{{2
@@ -553,11 +572,13 @@ struct _SimdImplScalar
 
   // _S_increment & _S_decrement{{{2
   template <typename _Tp>
-    constexpr static inline void _S_increment(_Tp& __x)
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr static void _S_increment(_Tp& __x)
     { ++__x; }
 
   template <typename _Tp>
-    constexpr static inline void _S_decrement(_Tp& __x)
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr static void _S_decrement(_Tp& __x)
     { --__x; }
 
 
@@ -582,6 +603,7 @@ struct _SimdImplScalar
 
   // smart_reference access {{{2
   template <typename _Tp, typename _Up>
+    _GLIBCXX_SIMD_INTRINSIC
     constexpr static void _S_set(_Tp& __v, [[maybe_unused]] int __i,
 				 _Up&& __x) noexcept
     {
@@ -677,25 +699,32 @@ struct _MaskImplScalar
   }
 
   // logical and bitwise operators {{{2
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_logical_and(bool __x, bool __y)
   { return __x && __y; }
 
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_logical_or(bool __x, bool __y)
   { return __x || __y; }
 
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_bit_not(bool __x)
   { return !__x; }
 
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_bit_and(bool __x, bool __y)
   { return __x && __y; }
 
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_bit_or(bool __x, bool __y)
   { return __x || __y; }
 
+  _GLIBCXX_SIMD_INTRINSIC
   static constexpr bool _S_bit_xor(bool __x, bool __y)
   { return __x != __y; }
 
   // smart_reference access {{{2
+  _GLIBCXX_SIMD_INTRINSIC
   constexpr static void _S_set(bool& __k, [[maybe_unused]] int __i,
 			       bool __x) noexcept
   {
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 34633c096b1..e010740b44c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -822,7 +822,7 @@ struct _CommonImplX86 : _CommonImplBuiltin
 
 // }}}
 // _SimdImplX86 {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _SimdImplX86 : _SimdImplBuiltin<_Abi>
   {
     using _Base = _SimdImplBuiltin<_Abi>;
@@ -4241,7 +4241,7 @@ struct _MaskImplX86Mixin
 
 // }}}
 // _MaskImplX86 {{{
-template <typename _Abi>
+template <typename _Abi, typename>
   struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi>
   {
     using _MaskImplX86Mixin::_S_to_bits;