diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 99c07313f59a..e1b17c621b34 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -91,7 +91,7 @@ constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8; #else constexpr inline int __sve_vectorized_size_bytes = 0; -#endif +#endif namespace simd_abi { // simd_abi forward declarations {{{ @@ -381,7 +381,7 @@ __machine_flags() | (__have_neon_a64 << 2) | (__have_neon_a64 << 2) | (__support_neon_float << 3) - | (__have_sve << 4); + | (__have_sve << 4); else if constexpr (__have_power_vmx) return __have_power_vmx | (__have_power_vsx << 1) @@ -4180,7 +4180,7 @@ split(const simd<_Tp, _Ap>& __x) constexpr size_t _N0 = _SL::template _S_at<0>(); using _V = __deduced_simd<_Tp, _N0>; - auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __generate_from_n_evaluations( [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { @@ -4584,7 +4584,7 @@ _S_determine_best_abi() // 1. The ABI tag is valid for _Tp // 2. The storage overhead is no more than padding to fill the next // power-of-2 number of bytes - if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp> + if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp> && ((__is_sve_abi<_A0<_Bytes>>() && __have_sve && (_Np <= __sve_vectorized_size_bytes/sizeof(_Tp))) || (__fullsize / 2 < _Np)) ) diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index b0ffe339569d..d65e833f7851 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -2376,94 +2376,95 @@ _S_isnormal(_SimdWrapper<_Tp, _Np> __x) _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t _S_fpclassify(_SimdWrapper<_Tp, _Np> __x) { - if constexpr(__have_sve) - { - __fixed_size_storage_t __r{}; - __execute_n_times<_Np>( - [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - __r._M_set(__i, std::fpclassify(__x[__i])); - }); - return __r; - } - else { - using _I = __int_for_sizeof_t<_Tp>; - const auto __xn - = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x))); - constexpr size_t _NI = sizeof(__xn) / sizeof(_I); - _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn - = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>)); - - _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal - = __vector_broadcast<_NI, _I>(FP_NORMAL); + if constexpr(__have_sve) + { + __fixed_size_storage_t __r{}; + __execute_n_times<_Np>( + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __r._M_set(__i, std::fpclassify(__x[__i])); + }); + return __r; + } + else + { + using _I = __int_for_sizeof_t<_Tp>; + const auto __xn + = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x))); + constexpr size_t _NI = sizeof(__xn) / sizeof(_I); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn + = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>)); + + _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal + = __vector_broadcast<_NI, _I>(FP_NORMAL); #if !__FINITE_MATH_ONLY__ - _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn - = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>)); - _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan - = __vector_broadcast<_NI, _I>(FP_NAN); - _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite - = __vector_broadcast<_NI, _I>(FP_INFINITE); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn + = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>)); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan + = __vector_broadcast<_NI, _I>(FP_NAN); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite + = __vector_broadcast<_NI, _I>(FP_INFINITE); #endif #ifndef __FAST_MATH__ - _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal - = __vector_broadcast<_NI, _I>(FP_SUBNORMAL); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal + = __vector_broadcast<_NI, _I>(FP_SUBNORMAL); #endif - _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero - = __vector_broadcast<_NI, _I>(FP_ZERO); + _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero + = __vector_broadcast<_NI, _I>(FP_ZERO); - __vector_type_t<_I, _NI> - __tmp = __xn < __minn - #ifdef __FAST_MATH__ - ? __fp_zero - #else - ? (__xn == 0 ? __fp_zero : __fp_subnormal) - #endif - #if __FINITE_MATH_ONLY__ - : __fp_normal; - #else - : (__xn < __infn ? __fp_normal - : (__xn == __infn ? __fp_infinite : __fp_nan)); - #endif + __vector_type_t<_I, _NI> + __tmp = __xn < __minn +#ifdef __FAST_MATH__ + ? __fp_zero +#else + ? (__xn == 0 ? __fp_zero : __fp_subnormal) +#endif +#if __FINITE_MATH_ONLY__ + : __fp_normal; +#else + : (__xn < __infn ? __fp_normal + : (__xn == __infn ? __fp_infinite : __fp_nan)); +#endif - if constexpr (sizeof(_I) == sizeof(int)) - { - using _FixedInt = __fixed_size_storage_t; - const auto __as_int = __vector_bitcast(__tmp); - if constexpr (_FixedInt::_S_tuple_size == 1) - return {__as_int}; - else if constexpr (_FixedInt::_S_tuple_size == 2 - && is_same_v< - typename _FixedInt::_SecondType::_FirstAbi, - simd_abi::scalar>) - return {__extract<0, 2>(__as_int), __as_int[_Np - 1]}; - else if constexpr (_FixedInt::_S_tuple_size == 2) - return {__extract<0, 2>(__as_int), - __auto_bitcast(__extract<1, 2>(__as_int))}; + if constexpr (sizeof(_I) == sizeof(int)) + { + using _FixedInt = __fixed_size_storage_t; + const auto __as_int = __vector_bitcast(__tmp); + if constexpr (_FixedInt::_S_tuple_size == 1) + return {__as_int}; + else if constexpr (_FixedInt::_S_tuple_size == 2 + && is_same_v< + typename _FixedInt::_SecondType::_FirstAbi, + simd_abi::scalar>) + return {__extract<0, 2>(__as_int), __as_int[_Np - 1]}; + else if constexpr (_FixedInt::_S_tuple_size == 2) + return {__extract<0, 2>(__as_int), + __auto_bitcast(__extract<1, 2>(__as_int))}; + else + __assert_unreachable<_Tp>(); + } + else if constexpr (_Np == 2 && sizeof(_I) == 8 + && __fixed_size_storage_t::_S_tuple_size == 2) + { + const auto __aslong = __vector_bitcast<_LLong>(__tmp); + return {int(__aslong[0]), {int(__aslong[1])}}; + } +#if _GLIBCXX_SIMD_X86INTRIN + else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32 + && __fixed_size_storage_t::_S_tuple_size == 1) + return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)), + __to_intrin(__hi128(__tmp)))}; + else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64 + && __fixed_size_storage_t::_S_tuple_size == 1) + return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))}; +#endif // _GLIBCXX_SIMD_X86INTRIN + else if constexpr (__fixed_size_storage_t::_S_tuple_size == 1) + return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp), + [](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return __make_wrapper(__l...); + })}; else __assert_unreachable<_Tp>(); } - else if constexpr (_Np == 2 && sizeof(_I) == 8 - && __fixed_size_storage_t::_S_tuple_size == 2) - { - const auto __aslong = __vector_bitcast<_LLong>(__tmp); - return {int(__aslong[0]), {int(__aslong[1])}}; - } -#if _GLIBCXX_SIMD_X86INTRIN - else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32 - && __fixed_size_storage_t::_S_tuple_size == 1) - return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)), - __to_intrin(__hi128(__tmp)))}; - else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64 - && __fixed_size_storage_t::_S_tuple_size == 1) - return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))}; -#endif // _GLIBCXX_SIMD_X86INTRIN - else if constexpr (__fixed_size_storage_t::_S_tuple_size == 1) - return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp), - [](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - return __make_wrapper(__l...); - })}; - else - __assert_unreachable<_Tp>(); - } } // _S_increment & _S_decrement{{{2 @@ -2796,22 +2797,22 @@ _S_convert(simd_mask<_Up, _UAbi> __x) return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits()); } else - { - if constexpr(__is_sve_abi<_UAbi>()) - { - simd_mask<_Tp> __r(false); - constexpr size_t __min_size = std::min(__r.size(), __x.size()); - __execute_n_times<__min_size>( - [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - __r[__i] = __x[__i]; - }); - return __data(__r); - } - else - return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, - _S_size<_Tp>>( - __data(__x)); - } + { + if constexpr(__is_sve_abi<_UAbi>()) + { + simd_mask<_Tp> __r(false); + constexpr size_t __min_size = std::min(__r.size(), __x.size()); + __execute_n_times<__min_size>( + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __r[__i] = __x[__i]; + }); + return __data(__r); + } + else + return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, + _S_size<_Tp>>( + __data(__x)); + } } // }}} // _S_masked_load {{{2 diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h b/libstdc++-v3/include/experimental/bits/simd_converter.h index 78ccc027fbb2..03fb3d28ab12 100644 --- a/libstdc++-v3/include/experimental/bits/simd_converter.h +++ b/libstdc++-v3/include/experimental/bits/simd_converter.h @@ -30,14 +30,14 @@ _GLIBCXX_SIMD_BEGIN_NAMESPACE template -_Ret __converter_fallback(_Arg __a) + _Ret __converter_fallback(_Arg __a) { - _Ret __ret{}; - __execute_n_times<_Np>( + _Ret __ret{}; + __execute_n_times<_Np>( [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - __ret._M_set(__i, static_cast<_To>(__a[__i])); - }); - return __ret; + __ret._M_set(__i, static_cast<_To>(__a[__i])); + }); + return __ret; } // _SimdConverter scalar -> scalar {{{ @@ -76,8 +76,7 @@ struct _SimdConverter __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>, is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, conjunction, is_same<_AFrom, _ATo>>> - && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>()) - >> + && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())>> { using _Arg = typename _AFrom::template __traits<_From>::_SimdMember; using _Ret = typename _ATo::template __traits<_To>::_SimdMember; @@ -93,13 +92,13 @@ struct _SimdConverter // _SimdConverter "native 1" -> "native 2" {{{ template struct _SimdConverter< - _From, _AFrom, _To, _ATo, - enable_if_t, __is_fixed_size_abi<_ATo>, - is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, - conjunction, is_same<_AFrom, _ATo>>> - && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>()) - >> + _From, _AFrom, _To, _ATo, + enable_if_t, __is_fixed_size_abi<_ATo>, + is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, + conjunction, is_same<_AFrom, _ATo>>> + && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>()) + >> { using _Arg = typename _AFrom::template __traits<_From>::_SimdMember; using _Ret = typename _ATo::template __traits<_To>::_SimdMember; @@ -145,8 +144,8 @@ struct _SimdConverter<_From, simd_abi::fixed_size<_Np> if constexpr (is_same_v<_From, _To>) return __x; - // fallback to sequential when sve is available - else if constexpr (__have_sve) + // fallback to sequential when sve is available + else if constexpr (__have_sve) return __converter_fallback<_Arg, _Ret, _To, _Np>(__x); // special case (optimize) int signedness casts @@ -313,12 +312,12 @@ static_assert( "_SimdConverter to fixed_size only works for equal element counts"); using _Ret = __fixed_size_storage_t<_To, _Np>; - using _Arg = typename _SimdTraits<_From, _Ap>::_SimdMember; + using _Arg = typename _SimdTraits<_From, _Ap>::_SimdMember; _GLIBCXX_SIMD_INTRINSIC constexpr _Ret operator()(_Arg __x) const noexcept { - if constexpr (__have_sve) + if constexpr (__have_sve) return __converter_fallback<_Arg, _Ret, _To, _Np>(__x); else if constexpr (_Ret::_S_tuple_size == 1) return {__vector_convert(__x)}; @@ -357,12 +356,13 @@ static_assert( "_SimdConverter to fixed_size only works for equal element counts"); using _Arg = __fixed_size_storage_t<_From, _Np>; - using _Ret = typename _SimdTraits<_To, _Ap>::_SimdMember; + using _Ret = typename _SimdTraits<_To, _Ap>::_SimdMember; _GLIBCXX_SIMD_INTRINSIC constexpr - _Ret operator()(const _Arg& __x) const noexcept + _Ret + operator()(const _Arg& __x) const noexcept { - if constexpr(__have_sve) + if constexpr(__have_sve) return __converter_fallback<_Arg, _Ret, _To, _Np>(__x); else if constexpr (_Arg::_S_tuple_size == 1) return __vector_convert<__vector_type_t<_To, _Np>>(__x.first); diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h index 769256d6992b..bf515e5145de 100644 --- a/libstdc++-v3/include/experimental/bits/simd_math.h +++ b/libstdc++-v3/include/experimental/bits/simd_math.h @@ -652,17 +652,17 @@ frexp(const simd<_Tp, _Abi>& __x, _Samesize>* __exp) (*__exp)[0] = __tmp; return __r; } - else if constexpr (__is_sve_abi<_Abi>()) + else if constexpr (__is_sve_abi<_Abi>()) { - simd<_Tp, _Abi> __r; - __execute_n_times>( - [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - int __tmp; - const auto __ri = std::frexp(__x[__i], &__tmp); - (*__exp)[__i] = __tmp; - __r[__i] = __ri; - }); - return __r; + simd<_Tp, _Abi> __r; + __execute_n_times>( + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + int __tmp; + const auto __ri = std::frexp(__x[__i], &__tmp); + (*__exp)[__i] = __tmp; + __r[__i] = __ri; + }); + return __r; } else if constexpr (__is_fixed_size_abi_v<_Abi>) return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))}; @@ -1147,7 +1147,8 @@ hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); #ifndef __FAST_MATH__ - if constexpr (_V::size() > 1 && (__is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32)) + if constexpr (_V::size() > 1 + && __is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32) { // With ARMv7 NEON, we have no subnormals and must use slightly // different strategy const _V __hi_exp = __hi & __inf; diff --git a/libstdc++-v3/include/experimental/bits/simd_sve.h b/libstdc++-v3/include/experimental/bits/simd_sve.h index 123242a3a625..511da08aafc5 100644 --- a/libstdc++-v3/include/experimental/bits/simd_sve.h +++ b/libstdc++-v3/include/experimental/bits/simd_sve.h @@ -573,7 +573,7 @@ __intrin() const _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](_SizeConstant<__i>) const { - return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i); + return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i); } _GLIBCXX_SIMD_INTRINSIC constexpr void @@ -831,7 +831,7 @@ _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper; using _STp = __get_sve_value_type_t<_Tp>; - + _SUp* __up = reinterpret_cast<_SUp*>(__p); if constexpr (std::is_same_v<_Tp, _Up>) @@ -839,19 +839,19 @@ _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper && std::is_integral_v<_Up> && (sizeof(_Tp) > sizeof(_Up))) { - if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>) - return svst1b(__k._M_data, __up, __x); - if constexpr (std::is_same_v<_SUp, uint8_t> && std::is_unsigned_v<_STp>) - return svst1b(__k._M_data, __up, __x); - if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>) - return svst1h(__k._M_data, __up, __x); - if constexpr (std::is_same_v<_SUp, uint16_t> && std::is_unsigned_v<_STp>) - return svst1h(__k._M_data, __up, __x); - if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>) - return svst1w(__k._M_data, __up, __x); - if constexpr (std::is_same_v<_SUp, uint32_t> && std::is_unsigned_v<_STp>) - return svst1w(__k._M_data, __up, __x); - } + if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>) + return svst1b(__k._M_data, __up, __x); + if constexpr (std::is_same_v<_SUp, uint8_t> && std::is_unsigned_v<_STp>) + return svst1b(__k._M_data, __up, __x); + if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>) + return svst1h(__k._M_data, __up, __x); + if constexpr (std::is_same_v<_SUp, uint16_t> && std::is_unsigned_v<_STp>) + return svst1h(__k._M_data, __up, __x); + if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>) + return svst1w(__k._M_data, __up, __x); + if constexpr (std::is_same_v<_SUp, uint32_t> && std::is_unsigned_v<_STp>) + return svst1w(__k._M_data, __up, __x); + } __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { if (__k[__i]) @@ -991,42 +991,42 @@ for (size_t __ri = 1; __ri != __i; __ri++) _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, plus<>) { - return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, bit_and<>) { - return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, bit_or<>) { - return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>) { - return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum()) { - return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum()) { - return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data); } template @@ -1082,7 +1082,7 @@ _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept template _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) - { return __x._M_data + __y._M_data; } + { return __x._M_data + __y._M_data; } template _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> @@ -1097,19 +1097,21 @@ _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) template _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) - { - __sve_vector_type_t<_Tp, _Np> __y_padded = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), - __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); - return __x._M_data / __y_padded; + { + __sve_vector_type_t<_Tp, _Np> __y_padded + = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), + __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); + return __x._M_data / __y_padded; } template _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) - { - __sve_vector_type_t<_Tp, _Np> __y_padded = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), - __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); - return __x._M_data % __y_padded; + { + __sve_vector_type_t<_Tp, _Np> __y_padded + = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), + __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); + return __x._M_data % __y_padded; } template @@ -1412,14 +1414,14 @@ _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y, _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) { - return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); + return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); } template _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) { - return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); + return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data); } template @@ -1594,28 +1596,28 @@ _S_broadcast(bool __x) { constexpr size_t _Np = simd_size_v<_Tp, _Abi>; __sve_bool_type __tr = __sve_vector_type<_Tp, _Np>::__sve_active_mask(); - __sve_bool_type __fl = svpfalse_b();; + __sve_bool_type __fl = svpfalse_b(); return __x ? __tr : __fl; } template _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> _S_load(const bool* __mem) - { + { constexpr size_t _Np = simd_size_v<_Tp, _Abi>; - const uint8_t* __p = reinterpret_cast(__mem); - __sve_bool_type __u8_active_mask = __sve_vector_type::__sve_active_mask(); - __sve_vector_type_t __u8_vec_mask_load = svld1(__u8_active_mask, __p); - __sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0); + const uint8_t* __p = reinterpret_cast(__mem); + __sve_bool_type __u8_active_mask = __sve_vector_type::__sve_active_mask(); + __sve_vector_type_t __u8_vec_mask_load = svld1(__u8_active_mask, __p); + __sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0); - __sve_bool_type __tp_mask = __u8_mask; - for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2) - { - __tp_mask = svunpklo(__tp_mask); - } + __sve_bool_type __tp_mask = __u8_mask; + for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2) + { + __tp_mask = svunpklo(__tp_mask); + } _SveMaskWrapper> __r{__tp_mask}; - return __r; + return __r; } template @@ -1639,8 +1641,9 @@ _S_masked_load(_SveMaskWrapper<_Bits, _Np> __merge, _SveMaskWrapper<_Bits, _Np> _GLIBCXX_SIMD_INTRINSIC static constexpr void _S_store(_SveMaskWrapper<_Bits, _Np> __v, bool* __mem) noexcept { - __execute_n_times<_Np>([&](auto __i) - _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __mem[__i] = __v[__i]; + }); } template @@ -1659,8 +1662,9 @@ _S_masked_store(const _SveMaskWrapper<_Bits, _Np> __v, bool* __mem, _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x) { _ULLong __r = 0; - __execute_n_times<_Np>( - [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r |= _ULLong(__x[__i]) << __i; }); + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __r |= _ULLong(__x[__i]) << __i; + }); return __r; } @@ -1669,8 +1673,9 @@ _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x) _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>) { _SveMaskWrapper __r; - __execute_n_times<_Np>([&](auto __i) - _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __bits[__i]); }); + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __r._M_set(__i, __bits[__i]); + }); return __r; } @@ -1730,8 +1735,9 @@ for (size_t __up_size = sizeof(_Up); __up_size != sizeof(_Tp); __up_size *= 2) _S_convert(_BitMask<_Np, _Sanitized> __x) { _MaskMember<_Tp> __r{}; - __execute_n_times<_Np>([&](auto __i) - _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); }); + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __r._M_set(__i, __x[__i]); + }); return __r; } @@ -1831,12 +1837,18 @@ _S_all_of(simd_mask<_Tp, _Abi> __k) template _GLIBCXX_SIMD_INTRINSIC static bool _S_any_of(simd_mask<_Tp, _Abi> __k) - { return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), __k._M_data); } + { + return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), + __k._M_data); + } template _GLIBCXX_SIMD_INTRINSIC static bool _S_none_of(simd_mask<_Tp, _Abi> __k) - { return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), __k._M_data); } + { + return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), + __k._M_data); + } template _GLIBCXX_SIMD_INTRINSIC static bool @@ -1849,7 +1861,10 @@ _S_some_of(simd_mask<_Tp, _Abi> __k) template _GLIBCXX_SIMD_INTRINSIC static int _S_find_first_set(simd_mask<_Tp, _Abi> __k) - { return svclastb(svpfirst(__k._M_data, svpfalse()), -1, __sve_mask_type::__index0123); } + { + return svclastb(svpfirst(__k._M_data, svpfalse()), + -1, __sve_mask_type::__index0123); + } template _GLIBCXX_SIMD_INTRINSIC static int