diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 21100c1087d..43331134301 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -35,6 +35,7 @@ #include // for stderr #endif #include +#include #include #include #include @@ -203,9 +204,170 @@ template // }}} template using _SizeConstant = integral_constant; +// constexpr feature detection{{{ +constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; +constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; +constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; +constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; +constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; +constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; +constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; +constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; +constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; +constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; +constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; +constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; +constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; +constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; +constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; +constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; +constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; +constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; +constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; +constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; +constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; +constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; +constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; +constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; +constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG; +constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2; +constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI; +constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA; +constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD; +constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI; +constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ; +constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT; + +constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; +constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; +constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; +constexpr inline bool __support_neon_float = +#if defined __GCC_IEC_559 + __GCC_IEC_559 == 0; +#elif defined __FAST_MATH__ + true; +#else + false; +#endif + +#ifdef _ARCH_PWR10 +constexpr inline bool __have_power10vec = true; +#else +constexpr inline bool __have_power10vec = false; +#endif +#ifdef __POWER9_VECTOR__ +constexpr inline bool __have_power9vec = true; +#else +constexpr inline bool __have_power9vec = false; +#endif +#if defined __POWER8_VECTOR__ +constexpr inline bool __have_power8vec = true; +#else +constexpr inline bool __have_power8vec = __have_power9vec; +#endif +#if defined __VSX__ +constexpr inline bool __have_power_vsx = true; +#else +constexpr inline bool __have_power_vsx = __have_power8vec; +#endif +#if defined __ALTIVEC__ +constexpr inline bool __have_power_vmx = true; +#else +constexpr inline bool __have_power_vmx = __have_power_vsx; +#endif + +// }}} namespace __detail { + constexpr std::uint_least64_t + __floating_point_flags() + { + std::uint_least64_t __flags = 0; + if constexpr (math_errhandling & MATH_ERREXCEPT) + __flags |= 1; +#ifdef __FAST_MATH__ + __flags |= 1 << 1; +#elif __FINITE_MATH_ONLY__ + __flags |= 2 << 1; +#elif __GCC_IEC_559 < 2 + __flags |= 3 << 1; +#endif + __flags |= (__FLT_EVAL_METHOD__ + 1) << 3; + return __flags; + } + + constexpr std::uint_least64_t + __machine_flags() + { + if constexpr (__have_mmx || __have_sse) + return __have_mmx + | (__have_sse << 1) + | (__have_sse2 << 2) + | (__have_sse3 << 3) + | (__have_ssse3 << 4) + | (__have_sse4_1 << 5) + | (__have_sse4_2 << 6) + | (__have_xop << 7) + | (__have_avx << 8) + | (__have_avx2 << 9) + | (__have_bmi << 10) + | (__have_bmi2 << 11) + | (__have_lzcnt << 12) + | (__have_sse4a << 13) + | (__have_fma << 14) + | (__have_fma4 << 15) + | (__have_f16c << 16) + | (__have_popcnt << 17) + | (__have_avx512f << 18) + | (__have_avx512dq << 19) + | (__have_avx512vl << 20) + | (__have_avx512bw << 21) + | (__have_avx512bitalg << 22) + | (__have_avx512vbmi2 << 23) + | (__have_avx512vbmi << 24) + | (__have_avx512ifma << 25) + | (__have_avx512cd << 26) + | (__have_avx512vnni << 27) + | (__have_avx512vpopcntdq << 28) + | (__have_avx512vp2intersect << 29); + else if constexpr (__have_neon) + return __have_neon + | (__have_neon_a32 << 1) + | (__have_neon_a64 << 2) + | (__have_neon_a64 << 2) + | (__support_neon_float << 3); + else if constexpr (__have_power_vmx) + return __have_power_vmx + | (__have_power_vsx << 1) + | (__have_power8vec << 2) + | (__have_power9vec << 3) + | (__have_power10vec << 4); + else + return 0; + } + + namespace + { + struct _OdrEnforcer {}; + } + + template + struct _MachineFlagsTemplate {}; + + /**@internal + * Use this type as default template argument to all function templates that + * are not declared always_inline. It ensures, that a function + * specialization, which the compiler decides not to inline, has a unique symbol + * (_OdrEnforcer) or a symbol matching the machine/architecture flags + * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where + * users link TUs compiled with different flags. This is especially important + * for using simd in libraries. + */ + using __odr_helper + = conditional_t<__machine_flags() == 0, _OdrEnforcer, + _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>; + struct _Minimum { template @@ -469,71 +631,6 @@ template template inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value; -// }}} -// constexpr feature detection{{{ -constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; -constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; -constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; -constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; -constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; -constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; -constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; -constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; -constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; -constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; -constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; -constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; -constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; -constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; -constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; -constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; -constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; -constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; -constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; -constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; -constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; -constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; -constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; -constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; - -constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; -constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; -constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; -constexpr inline bool __support_neon_float = -#if defined __GCC_IEC_559 - __GCC_IEC_559 == 0; -#elif defined __FAST_MATH__ - true; -#else - false; -#endif - -#ifdef _ARCH_PWR10 -constexpr inline bool __have_power10vec = true; -#else -constexpr inline bool __have_power10vec = false; -#endif -#ifdef __POWER9_VECTOR__ -constexpr inline bool __have_power9vec = true; -#else -constexpr inline bool __have_power9vec = false; -#endif -#if defined __POWER8_VECTOR__ -constexpr inline bool __have_power8vec = true; -#else -constexpr inline bool __have_power8vec = __have_power9vec; -#endif -#if defined __VSX__ -constexpr inline bool __have_power_vsx = true; -#else -constexpr inline bool __have_power_vsx = __have_power8vec; -#endif -#if defined __ALTIVEC__ -constexpr inline bool __have_power_vmx = true; -#else -constexpr inline bool __have_power_vmx = __have_power_vsx; -#endif - // }}} // __is_scalar_abi {{{ template @@ -3984,7 +4081,7 @@ template // }}} // concat(simd...) {{{ -template +template inline _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>> concat(const simd<_Tp, _As>&... __xs) @@ -4567,6 +4664,7 @@ template template == simd_size_v<_Tp, _Abi>>> + _GLIBCXX_SIMD_ALWAYS_INLINE operator simd_mask<_Up, _A2>() && { using namespace std::experimental::__proposed; @@ -4801,121 +4899,153 @@ find_last_set(_ExactBool) // }}} // _SimdIntOperators{{{1 -template +template class _SimdIntOperators {}; -template - class _SimdIntOperators<_V, _Impl, true> +template + class _SimdIntOperators<_V, _Tp, _Abi, true> { + using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl; + _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const { return *static_cast(this); } - template + template _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V - _S_make_derived(_Tp&& __d) - { return {__private_init, static_cast<_Tp&&>(__d)}; } + _S_make_derived(_Up&& __d) + { return {__private_init, static_cast<_Up&&>(__d)}; } public: - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator%=(_V& __lhs, const _V& __x) { return __lhs = __lhs % __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator&=(_V& __lhs, const _V& __x) { return __lhs = __lhs & __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator|=(_V& __lhs, const _V& __x) { return __lhs = __lhs | __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator^=(_V& __lhs, const _V& __x) { return __lhs = __lhs ^ __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator<<=(_V& __lhs, const _V& __x) { return __lhs = __lhs << __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator>>=(_V& __lhs, const _V& __x) { return __lhs = __lhs >> __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator<<=(_V& __lhs, int __x) { return __lhs = __lhs << __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V& + operator>>=(_V& __lhs, int __x) { return __lhs = __lhs >> __x; } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator%(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_modulus(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator&(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_and(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator|(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_or(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator^(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_xor(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator<<(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_shift_left(__data(__x), __data(__y))); } - _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y) + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator>>(const _V& __x, const _V& __y) { return _SimdIntOperators::_S_make_derived( _Impl::_S_bit_shift_right(__data(__x), __data(__y))); } - template - _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y) - { - using _Tp = typename _VV::value_type; - if (__y < 0) - __invoke_ub("The behavior is undefined if the right operand of a " - "shift operation is negative. [expr.shift]\nA shift by " - "%d was requested", - __y); - if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) - __invoke_ub( - "The behavior is undefined if the right operand of a " - "shift operation is greater than or equal to the width of the " - "promoted left operand. [expr.shift]\nA shift by %d was requested", - __y); - return _SimdIntOperators::_S_make_derived( - _Impl::_S_bit_shift_left(__data(__x), __y)); - } + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator<<(const _V& __x, int __y) + { + if (__y < 0) + __invoke_ub("The behavior is undefined if the right operand of a " + "shift operation is negative. [expr.shift]\nA shift by " + "%d was requested", + __y); + if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) + __invoke_ub( + "The behavior is undefined if the right operand of a " + "shift operation is greater than or equal to the width of the " + "promoted left operand. [expr.shift]\nA shift by %d was requested", + __y); + return _SimdIntOperators::_S_make_derived( + _Impl::_S_bit_shift_left(__data(__x), __y)); + } - template - _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y) - { - using _Tp = typename _VV::value_type; - if (__y < 0) - __invoke_ub( - "The behavior is undefined if the right operand of a shift " - "operation is negative. [expr.shift]\nA shift by %d was requested", - __y); - if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) - __invoke_ub( - "The behavior is undefined if the right operand of a shift " - "operation is greater than or equal to the width of the promoted " - "left operand. [expr.shift]\nA shift by %d was requested", - __y); - return _SimdIntOperators::_S_make_derived( - _Impl::_S_bit_shift_right(__data(__x), __y)); - } + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend + _V + operator>>(const _V& __x, int __y) + { + if (__y < 0) + __invoke_ub( + "The behavior is undefined if the right operand of a shift " + "operation is negative. [expr.shift]\nA shift by %d was requested", + __y); + if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) + __invoke_ub( + "The behavior is undefined if the right operand of a shift " + "operation is greater than or equal to the width of the promoted " + "left operand. [expr.shift]\nA shift by %d was requested", + __y); + return _SimdIntOperators::_S_make_derived( + _Impl::_S_bit_shift_right(__data(__x), __y)); + } // unary operators (for integral _Tp) - _GLIBCXX_SIMD_CONSTEXPR _V operator~() const + _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR + _V + operator~() const { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; } }; @@ -4924,7 +5054,7 @@ template // simd {{{ template class simd : public _SimdIntOperators< - simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl, + simd<_Tp, _Abi>, _Tp, _Abi, conjunction, typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>, public _SimdTraits<_Tp, _Abi>::_SimdBase @@ -4938,7 +5068,7 @@ template public: using _Impl = typename _Traits::_SimdImpl; friend _Impl; - friend _SimdIntOperators; + friend _SimdIntOperators; using value_type = _Tp; using reference = _SmartReference<_MemberType, _Impl, value_type>; diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index 8cd338e313f..55fea77d4ab 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -50,7 +50,8 @@ template > //}}} // __vector_permute{{{ // Index == -1 requests zeroing of the output element -template > +template , + typename = __detail::__odr_helper> _Tp __vector_permute(_Tp __x) { @@ -62,7 +63,8 @@ template > // }}} // __vector_shuffle{{{ // Index == -1 requests zeroing of the output element -template > +template , + typename = __detail::__odr_helper> _Tp __vector_shuffle(_Tp __x, _Tp __y) { @@ -820,10 +822,12 @@ template // _SimdBase / base class for simd, providing extra conversions {{{ struct _SimdBase2 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __to_intrin(static_cast*>(this)->_M_data); } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __vector_type_t<_Tp, _Np>() const { return static_cast*>(this)->_M_data.__builtin(); @@ -832,6 +836,7 @@ template struct _SimdBase1 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __data(*static_cast*>(this)); } }; @@ -844,11 +849,13 @@ template // _MaskBase {{{ struct _MaskBase2 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return static_cast*>(this) ->_M_data.__intrin(); } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __vector_type_t<_Tp, _Np>() const { return static_cast*>(this)->_M_data._M_data; @@ -857,6 +864,7 @@ template struct _MaskBase1 { + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator __intrinsic_type_t<_Tp, _Np>() const { return __data(*static_cast*>(this)); } }; @@ -874,7 +882,9 @@ template _Up _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _MaskCastType(_Up __x) : _M_data(__x) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _MaskMember() const { return _M_data; } }; @@ -887,7 +897,9 @@ template _SimdMember _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _SimdMember() const { return _M_data; } }; @@ -898,8 +910,11 @@ template _SimdMember _M_data; public: + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType2(_Bp __b) : _M_data(__b) {} + _GLIBCXX_SIMD_ALWAYS_INLINE operator _SimdMember() const { return _M_data; } }; @@ -913,14 +928,14 @@ template struct _CommonImplX86; struct _CommonImplNeon; struct _CommonImplBuiltin; -template struct _SimdImplBuiltin; -template struct _MaskImplBuiltin; -template struct _SimdImplX86; -template struct _MaskImplX86; -template struct _SimdImplNeon; -template struct _MaskImplNeon; -template struct _SimdImplPpc; -template struct _MaskImplPpc; +template struct _SimdImplBuiltin; +template struct _MaskImplBuiltin; +template struct _SimdImplX86; +template struct _MaskImplX86; +template struct _SimdImplNeon; +template struct _MaskImplNeon; +template struct _SimdImplPpc; +template struct _MaskImplPpc; // simd_abi::_VecBuiltin {{{ template @@ -1369,7 +1384,7 @@ struct _CommonImplBuiltin // }}} // _SimdImplBuiltin {{{1 -template +template struct _SimdImplBuiltin { // member types {{{2 @@ -2618,7 +2633,7 @@ struct _MaskImplBuiltinMixin }; // _MaskImplBuiltin {{{1 -template +template struct _MaskImplBuiltin : _MaskImplBuiltinMixin { using _MaskImplBuiltinMixin::_S_to_bits; @@ -2953,4 +2968,4 @@ _GLIBCXX_SIMD_END_NAMESPACE #endif // __cplusplus >= 201703L #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_ -// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80 +// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100 diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h index 1e75812d098..78ad33f74e4 100644 --- a/libstdc++-v3/include/experimental/bits/simd_detail.h +++ b/libstdc++-v3/include/experimental/bits/simd_detail.h @@ -172,6 +172,46 @@ #else #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 #endif +#ifdef __AVX512BITALG__ +#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0 +#endif +#ifdef __AVX512VBMI2__ +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0 +#endif +#ifdef __AVX512VBMI__ +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0 +#endif +#ifdef __AVX512IFMA__ +#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0 +#endif +#ifdef __AVX512CD__ +#define _GLIBCXX_SIMD_HAVE_AVX512CD 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512CD 0 +#endif +#ifdef __AVX512VNNI__ +#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0 +#endif +#ifdef __AVX512VPOPCNTDQ__ +#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0 +#endif +#ifdef __AVX512VP2INTERSECT__ +#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1 +#else +#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0 +#endif #if _GLIBCXX_SIMD_HAVE_SSE #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h index dc2fb90b9b2..5a742ed52e1 100644 --- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h +++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h @@ -201,6 +201,7 @@ template }; template + _GLIBCXX_SIMD_INTRINSIC __tuple_element_meta<_Tp, _Abi, _Offset> __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) { return {}; } @@ -230,11 +231,13 @@ template struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; template + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(_Tp& __base) { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } template + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(const _Tp& __base) { @@ -243,6 +246,7 @@ template } template + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) { @@ -251,6 +255,7 @@ template } template + _GLIBCXX_SIMD_INTRINSIC decltype(auto) __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) { @@ -586,6 +591,7 @@ template return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; } + _GLIBCXX_SIMD_INTRINSIC _Tp operator[](size_t __i) const noexcept { if constexpr (_S_tuple_size == 1) @@ -608,6 +614,7 @@ template } } + _GLIBCXX_SIMD_INTRINSIC void _M_set(size_t __i, _Tp __val) noexcept { if constexpr (_S_tuple_size == 1) @@ -627,6 +634,7 @@ template private: // _M_subscript_read/_write {{{ + _GLIBCXX_SIMD_INTRINSIC _Tp _M_subscript_read([[maybe_unused]] size_t __i) const noexcept { if constexpr (__is_vectorizable_v<_FirstType>) @@ -635,6 +643,7 @@ template return first[__i]; } + _GLIBCXX_SIMD_INTRINSIC void _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept { if constexpr (__is_vectorizable_v<_FirstType>) @@ -1033,9 +1042,11 @@ template >> _Tp _M_data; using _TT = __remove_cvref_t<_Tp>; + _GLIBCXX_SIMD_INTRINSIC operator _TT() { return _M_data; } + _GLIBCXX_SIMD_INTRINSIC operator _TT&() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1043,6 +1054,7 @@ template >> return _M_data; } + _GLIBCXX_SIMD_INTRINSIC operator _TT*() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1050,13 +1062,16 @@ template >> return &_M_data; } - constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd) {} + _GLIBCXX_SIMD_INTRINSIC + constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd) {} template + _GLIBCXX_SIMD_INTRINSIC operator simd() { return {__private_init, _M_data}; } template + _GLIBCXX_SIMD_INTRINSIC operator simd&() { return *reinterpret_cast*>( @@ -1064,6 +1079,7 @@ template >> } template + _GLIBCXX_SIMD_INTRINSIC operator simd*() { return reinterpret_cast*>( @@ -1081,14 +1097,18 @@ template _Tp _M_data; fixed_size_simd<_TT, 1> _M_fd; - constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} + _GLIBCXX_SIMD_INTRINSIC + constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} + _GLIBCXX_SIMD_INTRINSIC ~__autocvt_to_simd() { _M_data = __data(_M_fd).first; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1>() { return _M_fd; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1> &() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1096,6 +1116,7 @@ template return _M_fd; } + _GLIBCXX_SIMD_INTRINSIC operator fixed_size_simd<_TT, 1> *() { static_assert(is_lvalue_reference<_Tp>::value, ""); @@ -1107,8 +1128,8 @@ template // }}} struct _CommonImplFixedSize; -template struct _SimdImplFixedSize; -template struct _MaskImplFixedSize; +template struct _SimdImplFixedSize; +template struct _MaskImplFixedSize; // simd_abi::_Fixed {{{ template struct simd_abi::_Fixed @@ -1172,12 +1193,15 @@ template { // The following ensures, function arguments are passed via the stack. // This is important for ABI compatibility across TU boundaries + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdBase(const _SimdBase&) {} _SimdBase() = default; + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator const _SimdMember &() const { return static_cast*>(this)->_M_data; } + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator array<_Tp, _Np>() const { array<_Tp, _Np> __r; @@ -1198,8 +1222,11 @@ template // _SimdCastType {{{ struct _SimdCastType { + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType(const array<_Tp, _Np>&); + _GLIBCXX_SIMD_ALWAYS_INLINE _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} + _GLIBCXX_SIMD_ALWAYS_INLINE explicit operator const _SimdMember &() const { return _M_data; } private: @@ -1237,7 +1264,7 @@ struct _CommonImplFixedSize // _SimdImplFixedSize {{{1 // fixed_size should not inherit from _SimdMathFallback in order for // specializations in the used _SimdTuple Abis to get used -template +template struct _SimdImplFixedSize { // member types {{{2 @@ -1794,7 +1821,7 @@ template }; // _MaskImplFixedSize {{{1 -template +template struct _MaskImplFixedSize { static_assert( diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h index 61af9fc67af..01061a75a5e 100644 --- a/libstdc++-v3/include/experimental/bits/simd_math.h +++ b/libstdc++-v3/include/experimental/bits/simd_math.h @@ -60,6 +60,7 @@ template template ())), _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t, _R> \ __name(simd<_Tp, _Abi> __x) \ { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; } @@ -125,6 +126,7 @@ template < \ typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ typename _R = _Math_return_type_t< \ decltype(std::__name(declval(), _Arg2::declval())), _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t, _R> \ __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \ { \ @@ -155,6 +157,7 @@ template (), _Arg2::declval(), \ _Arg3::declval())), \ _Tp, _Abi>> \ + _GLIBCXX_SIMD_ALWAYS_INLINE \ enable_if_t, _R> \ __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \ const typename _Arg3::type& __z) \ @@ -399,6 +402,7 @@ template // }}} // __extract_exponent_as_int {{{ template + _GLIBCXX_SIMD_INTRINSIC rebind_simd_t> __extract_exponent_as_int(const simd<_Tp, _Abi>& __v) { @@ -421,7 +425,8 @@ template -> decltype(__impl_fun(static_cast<_Args&&>(__args)...)) { return __impl_fun(static_cast<_Args&&>(__args)...); } -template +template inline auto __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun, _Args&&... __args) @@ -457,7 +462,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp) * Fix sign. */ // cos{{{ -template +template enable_if_t, simd<_Tp, _Abi>> cos(const simd<_Tp, _Abi>& __x) { @@ -503,7 +508,7 @@ template //}}} // sin{{{ -template +template enable_if_t, simd<_Tp, _Abi>> sin(const simd<_Tp, _Abi>& __x) { @@ -565,6 +570,7 @@ _GLIBCXX_SIMD_MATH_CALL_(expm1) // frexp {{{ #if _GLIBCXX_SIMD_X86INTRIN template + _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np> __getexp(_SimdWrapper<_Tp, _Np> __x) { @@ -593,6 +599,7 @@ template } template + _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np> __getmant_avx512(_SimdWrapper<_Tp, _Np> __x) { @@ -633,7 +640,7 @@ template * The return value will be in the range [0.5, 1.0[ * The @p __e value will be an integer defining the power-of-two exponent */ -template +template enable_if_t, simd<_Tp, _Abi>> frexp(const simd<_Tp, _Abi>& __x, _Samesize>* __exp) { @@ -738,7 +745,7 @@ _GLIBCXX_SIMD_MATH_CALL_(log2) //}}} // logb{{{ -template +template enable_if_t::value, simd<_Tp, _Abi>> logb(const simd<_Tp, _Abi>& __x) { @@ -813,7 +820,7 @@ template } //}}} -template +template enable_if_t, simd<_Tp, _Abi>> modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr) { @@ -847,6 +854,7 @@ _GLIBCXX_SIMD_MATH_CALL_(fabs) // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to // allow signed integral _Tp template + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t && is_signed_v<_Tp>, simd<_Tp, _Abi>> abs(const simd<_Tp, _Abi>& __x) { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } @@ -929,7 +937,7 @@ template __data(__args)...)}; } -template +template __remove_cvref_t<_VV> __hypot(_VV __x, _VV __y) { @@ -1067,7 +1075,7 @@ template _GLIBCXX_SIMD_CVTING2(hypot) - template + template __remove_cvref_t<_VV> __hypot(_VV __x, _VV __y, _VV __z) { @@ -1268,7 +1276,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp) _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp) _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*) -template +template enable_if_t, simd<_Tp, _Abi>> copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) { @@ -1306,12 +1314,14 @@ _GLIBCXX_SIMD_MATH_CALL_(isfinite) // `int isinf(double)`. template > + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t, _R> isinf(simd<_Tp, _Abi> __x) { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; } template > + _GLIBCXX_SIMD_ALWAYS_INLINE enable_if_t, _R> isnan(simd<_Tp, _Abi> __x) { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; } @@ -1319,6 +1329,7 @@ template + _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask<_Tp, _Abi> signbit(simd<_Tp, _Abi> __x) { @@ -1366,7 +1377,7 @@ simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer, */ // special math {{{ -template +template enable_if_t, simd<_Tp, _Abi>> assoc_laguerre(const fixed_size_simd>& __n, const fixed_size_simd>& __m, @@ -1377,7 +1388,7 @@ template }); } -template +template enable_if_t, simd<_Tp, _Abi>> assoc_legendre(const fixed_size_simd>& __n, const fixed_size_simd>& __m, @@ -1401,7 +1412,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp) _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp) _GLIBCXX_SIMD_MATH_CALL_(expint) -template +template enable_if_t, simd<_Tp, _Abi>> hermite(const fixed_size_simd>& __n, const simd<_Tp, _Abi>& __x) @@ -1410,7 +1421,7 @@ template [&](auto __i) { return std::hermite(__n[__i], __x[__i]); }); } -template +template enable_if_t, simd<_Tp, _Abi>> laguerre(const fixed_size_simd>& __n, const simd<_Tp, _Abi>& __x) @@ -1419,7 +1430,7 @@ template [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); }); } -template +template enable_if_t, simd<_Tp, _Abi>> legendre(const fixed_size_simd>& __n, const simd<_Tp, _Abi>& __x) @@ -1430,7 +1441,7 @@ template _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta) -template +template enable_if_t, simd<_Tp, _Abi>> sph_bessel(const fixed_size_simd>& __n, const simd<_Tp, _Abi>& __x) @@ -1439,7 +1450,7 @@ template [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); }); } -template +template enable_if_t, simd<_Tp, _Abi>> sph_legendre(const fixed_size_simd>& __l, const fixed_size_simd>& __m, @@ -1450,7 +1461,7 @@ template }); } -template +template enable_if_t, simd<_Tp, _Abi>> sph_neumann(const fixed_size_simd>& __n, const simd<_Tp, _Abi>& __x) diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h index 7f472e88649..bbd26835d9c 100644 --- a/libstdc++-v3/include/experimental/bits/simd_neon.h +++ b/libstdc++-v3/include/experimental/bits/simd_neon.h @@ -44,7 +44,7 @@ struct _CommonImplNeon : _CommonImplBuiltin // }}} // _SimdImplNeon {{{ -template +template struct _SimdImplNeon : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -390,7 +390,7 @@ struct _MaskImplNeonMixin // }}} // _MaskImplNeon {{{ -template +template struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi> { using _MaskImplBuiltinMixin::_S_to_maskvector; diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h index ef52d129a85..4143bafa80e 100644 --- a/libstdc++-v3/include/experimental/bits/simd_ppc.h +++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h @@ -35,7 +35,7 @@ _GLIBCXX_SIMD_BEGIN_NAMESPACE // _SimdImplPpc {{{ -template +template struct _SimdImplPpc : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -117,7 +117,7 @@ template // }}} // _MaskImplPpc {{{ -template +template struct _MaskImplPpc : _MaskImplBuiltin<_Abi> { using _Base = _MaskImplBuiltin<_Abi>; diff --git a/libstdc++-v3/include/experimental/bits/simd_scalar.h b/libstdc++-v3/include/experimental/bits/simd_scalar.h index 48e13f6c719..b23011ca6c9 100644 --- a/libstdc++-v3/include/experimental/bits/simd_scalar.h +++ b/libstdc++-v3/include/experimental/bits/simd_scalar.h @@ -155,7 +155,8 @@ struct _SimdImplScalar // _S_masked_load {{{2 template - static inline _Tp _S_masked_load(_Tp __merge, bool __k, + _GLIBCXX_SIMD_INTRINSIC + static _Tp _S_masked_load(_Tp __merge, bool __k, const _Up* __mem) noexcept { if (__k) @@ -165,83 +166,97 @@ struct _SimdImplScalar // _S_store {{{2 template - static inline void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept + _GLIBCXX_SIMD_INTRINSIC + static void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept { __mem[0] = static_cast<_Up>(__v); } // _S_masked_store {{{2 template - static inline void _S_masked_store(const _Tp __v, _Up* __mem, + _GLIBCXX_SIMD_INTRINSIC + static void _S_masked_store(const _Tp __v, _Up* __mem, const bool __k) noexcept { if (__k) __mem[0] = __v; } // _S_negate {{{2 template - static constexpr inline bool _S_negate(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr bool _S_negate(_Tp __x) noexcept { return !__x; } // _S_reduce {{{2 template - static constexpr inline _Tp + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_reduce(const simd<_Tp, simd_abi::scalar>& __x, const _BinaryOperation&) { return __x._M_data; } // _S_min, _S_max {{{2 template - static constexpr inline _Tp _S_min(const _Tp __a, const _Tp __b) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_min(const _Tp __a, const _Tp __b) { return std::min(__a, __b); } template - static constexpr inline _Tp _S_max(const _Tp __a, const _Tp __b) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_max(const _Tp __a, const _Tp __b) { return std::max(__a, __b); } // _S_complement {{{2 template - static constexpr inline _Tp _S_complement(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_complement(_Tp __x) noexcept { return static_cast<_Tp>(~__x); } // _S_unary_minus {{{2 template - static constexpr inline _Tp _S_unary_minus(_Tp __x) noexcept + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_unary_minus(_Tp __x) noexcept { return static_cast<_Tp>(-__x); } // arithmetic operators {{{2 template - static constexpr inline _Tp _S_plus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_plus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) + __promote_preserving_unsigned(__y)); } template - static constexpr inline _Tp _S_minus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_minus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) - __promote_preserving_unsigned(__y)); } template - static constexpr inline _Tp _S_multiplies(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_multiplies(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) * __promote_preserving_unsigned(__y)); } template - static constexpr inline _Tp _S_divides(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_divides(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) / __promote_preserving_unsigned(__y)); } template - static constexpr inline _Tp _S_modulus(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_modulus(_Tp __x, _Tp __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) % __promote_preserving_unsigned(__y)); } template - static constexpr inline _Tp _S_bit_and(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_and(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -254,7 +269,8 @@ struct _SimdImplScalar } template - static constexpr inline _Tp _S_bit_or(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_or(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -267,7 +283,8 @@ struct _SimdImplScalar } template - static constexpr inline _Tp _S_bit_xor(_Tp __x, _Tp __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_xor(_Tp __x, _Tp __y) { if constexpr (is_floating_point_v<_Tp>) { @@ -280,11 +297,13 @@ struct _SimdImplScalar } template - static constexpr inline _Tp _S_bit_shift_left(_Tp __x, int __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_shift_left(_Tp __x, int __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) << __y); } template - static constexpr inline _Tp _S_bit_shift_right(_Tp __x, int __y) + _GLIBCXX_SIMD_INTRINSIC + static constexpr _Tp _S_bit_shift_right(_Tp __x, int __y) { return static_cast<_Tp>(__promote_preserving_unsigned(__x) >> __y); } // math {{{2 @@ -553,11 +572,13 @@ struct _SimdImplScalar // _S_increment & _S_decrement{{{2 template - constexpr static inline void _S_increment(_Tp& __x) + _GLIBCXX_SIMD_INTRINSIC + constexpr static void _S_increment(_Tp& __x) { ++__x; } template - constexpr static inline void _S_decrement(_Tp& __x) + _GLIBCXX_SIMD_INTRINSIC + constexpr static void _S_decrement(_Tp& __x) { --__x; } @@ -582,6 +603,7 @@ struct _SimdImplScalar // smart_reference access {{{2 template + _GLIBCXX_SIMD_INTRINSIC constexpr static void _S_set(_Tp& __v, [[maybe_unused]] int __i, _Up&& __x) noexcept { @@ -677,25 +699,32 @@ struct _MaskImplScalar } // logical and bitwise operators {{{2 + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_logical_and(bool __x, bool __y) { return __x && __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_logical_or(bool __x, bool __y) { return __x || __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_not(bool __x) { return !__x; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_and(bool __x, bool __y) { return __x && __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_or(bool __x, bool __y) { return __x || __y; } + _GLIBCXX_SIMD_INTRINSIC static constexpr bool _S_bit_xor(bool __x, bool __y) { return __x != __y; } // smart_reference access {{{2 + _GLIBCXX_SIMD_INTRINSIC constexpr static void _S_set(bool& __k, [[maybe_unused]] int __i, bool __x) noexcept { diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index 34633c096b1..e010740b44c 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -822,7 +822,7 @@ struct _CommonImplX86 : _CommonImplBuiltin // }}} // _SimdImplX86 {{{ -template +template struct _SimdImplX86 : _SimdImplBuiltin<_Abi> { using _Base = _SimdImplBuiltin<_Abi>; @@ -4241,7 +4241,7 @@ struct _MaskImplX86Mixin // }}} // _MaskImplX86 {{{ -template +template struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi> { using _MaskImplX86Mixin::_S_to_bits;