Hi, I applied and did extended testing on x86_64 (no regressions) and aarch64 using qemu testing SVE 256, 512, and 1024. Looks good! While going through the applied patch I noticed a few style issues that I simply turned into a patch (attached). A few comments inline. Sorry for not seeing these before. On Friday, 9 February 2024 15:28:10 CET Srinivas Yadav Singanaboina wrote: > diff --git a/libstdc++-v3/include/experimental/bits/simd.h > b/libstdc++-v3/include/experimental/bits/simd.h index > 90523ea57dc..d274cd740fe 100644 > --- a/libstdc++-v3/include/experimental/bits/simd.h > +++ b/libstdc++-v3/include/experimental/bits/simd.h > @@ -39,12 +39,16 @@ > #include > #include > #include > +#include > > #if _GLIBCXX_SIMD_X86INTRIN > #include > #elif _GLIBCXX_SIMD_HAVE_NEON > #include > #endif > +#if _GLIBCXX_SIMD_HAVE_SVE > +#include > +#endif > > /** @ingroup ts_simd > * @{ > @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double; > using __m512i [[__gnu__::__vector_size__(64)]] = long long; > #endif > > +#if _GLIBCXX_SIMD_HAVE_SVE > +constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / > 8; +#else > +constexpr inline int __sve_vectorized_size_bytes = 0; > +#endif > + > namespace simd_abi { > // simd_abi forward declarations {{{ > // implementation details: > @@ -108,6 +118,9 @@ template > template > struct _VecBltnBtmsk; > > +template > + struct _SveAbi; > + > template > using _VecN = _VecBuiltin; > > @@ -123,6 +136,9 @@ template > template > using _Neon = _VecBuiltin<_UsedBytes>; > > +template > + using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>; > + > // implementation-defined: > using __sse = _Sse<>; > using __avx = _Avx<>; > @@ -130,6 +146,7 @@ using __avx512 = _Avx512<>; > using __neon = _Neon<>; > using __neon128 = _Neon<16>; > using __neon64 = _Neon<8>; > +using __sve = _Sve<>; > > // standard: > template > @@ -250,6 +267,8 @@ constexpr inline bool __support_neon_float = > false; > #endif > > +constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE; > + > #ifdef _ARCH_PWR10 > constexpr inline bool __have_power10vec = true; > #else > @@ -356,12 +375,13 @@ namespace __detail > > | (__have_avx512vnni << 27) > | (__have_avx512vpopcntdq << 28) > | (__have_avx512vp2intersect << 29); > > - else if constexpr (__have_neon) > + else if constexpr (__have_neon || __have_sve) > return __have_neon > > | (__have_neon_a32 << 1) > | (__have_neon_a64 << 2) > | (__have_neon_a64 << 2) > > - | (__support_neon_float << 3); > + | (__support_neon_float << 3) > + | (__have_sve << 4); This is not enough. This should list all feature flags that might have a (significant enough) influence on code-gen in inline functions (that are not always_inline). AFAIU at least __ARM_FEATURE_SVE2 is necessary. But I assume __ARM_FEATURE_SVE2_BITPERM, __ARM_FEATURE_SVE_BITS, __ARM_FEATURE_SVE_MATMUL_INT8, and __ARM_FEATURE_SVE_VECTOR_OPERATORS are also relevant. Maybe more? > [...] bits/simd.h: > // fall back to fixed_size only if scalar and native ABIs don't match > template > struct __deduce_fixed_size_fallback {}; > > +template > + struct __no_sve_deduce_fixed_size_fallback {}; > + > template > struct __deduce_fixed_size_fallback<_Tp, _Np, > enable_if_t::template _S_is_valid_v<_Tp>>> > { using type = simd_abi::fixed_size<_Np>; }; > > +template > + struct __no_sve_deduce_fixed_size_fallback<_Tp, _Np, > + enable_if_t::template _S_is_valid_v<_Tp>>> > + { using type = simd_abi::fixed_size<_Np>; }; > + > template > struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {}; > > +template > + struct __no_sve_deduce_impl : public > __no_sve_deduce_fixed_size_fallback<_Tp, _Np> {}; I believe you don't need __no_sve_deduce_fixed_size_fallback. Simply derive __no_sve_deduce_impl from __deduce_fixed_size_fallback. No? > diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h > b/libstdc++-v3/include/experimental/bits/simd_converter.h index > 3160e251632..b233d2c70a5 100644 > --- a/libstdc++-v3/include/experimental/bits/simd_converter.h > +++ b/libstdc++-v3/include/experimental/bits/simd_converter.h > @@ -28,6 +28,18 @@ > #if __cplusplus >= 201703L > > _GLIBCXX_SIMD_BEGIN_NAMESPACE > + > +template > +_Ret __converter_fallback(_Arg __a) > + { > + _Ret __ret{}; > + __execute_n_times<_Np>( > + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > + __ret._M_set(__i, static_cast<_To>(__a[__i])); > + }); > + return __ret; > + } > + > // _SimdConverter scalar -> scalar {{{ > template > struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar, > @@ -56,14 +68,16 @@ template > }; > > // }}} > -// _SimdConverter "native 1" -> "native 2" {{{ > +// _SimdConverter "native non-sve 1" -> "native non-sve 2" {{{ > template > struct _SimdConverter< > _From, _AFrom, _To, _ATo, > enable_if_t __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>, > is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, > - conjunction, is_same<_AFrom, _ATo>>>>> > + conjunction, is_same<_AFrom, _ATo>>> > + && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>()) > + >> > { > using _Arg = typename _AFrom::template __traits<_From>::_SimdMember; > using _Ret = typename _ATo::template __traits<_To>::_SimdMember; > @@ -75,6 +89,26 @@ template typename _ATo> { return __vector_convert<_V>(__a, __more...); } > }; > > +// }}} > +// _SimdConverter "native 1" -> "native 2" {{{ > +template > + struct _SimdConverter< > + _From, _AFrom, _To, _ATo, > + enable_if_t + __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>, > + is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, > + conjunction, is_same<_AFrom, _ATo>>> > + && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>()) > + >> > + { > + using _Arg = typename _AFrom::template __traits<_From>::_SimdMember; > + using _Ret = typename _ATo::template __traits<_To>::_SimdMember; > + > + _GLIBCXX_SIMD_INTRINSIC constexpr _Ret > + operator()(_Arg __x) const noexcept > + { return __converter_fallback<_Arg, _Ret, _To, simd_size_v<_To, > _ATo>>(__x); } + }; > + I'd prefer if you could solve this with a constexpr-if in operator() instead of making the enable_if condition even longer. Feel free to static_assert(sizeof...(_More) == 0) in the SVE branch. (Why is it unnecessary, though?) > // }}} > // _SimdConverter scalar -> fixed_size<1> {{{1 > template > @@ -111,6 +145,10 @@ template > if constexpr (is_same_v<_From, _To>) > return __x; > > + // fallback to sequential when sve is available > + else if constexpr (__have_sve) > + return __converter_fallback<_Arg, _Ret, _To, _Np>(__x); > + At least the next three cases should all work, no? Or is the point that this fallback leads to better code-gen with SVE? > diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h > b/libstdc++-v3/include/experimental/bits/simd_detail.h index > 1fb77866bb2..52fdf7149bf 100644 > --- a/libstdc++-v3/include/experimental/bits/simd_detail.h > +++ b/libstdc++-v3/include/experimental/bits/simd_detail.h > @@ -61,6 +61,11 @@ > #else > #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 > #endif > +#if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS==1) > +#define _GLIBCXX_SIMD_HAVE_SVE 1 > +#else > +#define _GLIBCXX_SIMD_HAVE_SVE 0 > +#endif > //}}} > // x86{{{ > #ifdef __MMX__ > @@ -267,7 +272,7 @@ > #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) > #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) > > -#if __STRICT_ANSI__ || defined __clang__ > +#if _GLIBCXX_SIMD_HAVE_SVE || __STRICT_ANSI__ || defined __clang__ > #define _GLIBCXX_SIMD_CONSTEXPR > #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const This is something I'd like to see resolved. (But not necessary for this patch, IMHO.) Even if some parts of the SVE interface can't be used in constant expressions, it must be possible to work around those with `if (__builtin_is_constant_evaluated())` branches. For C++26 we will have to do this, because the std::simd interface is fully constexpr. > diff --git a/libstdc++-v3/include/experimental/bits/simd_sve.h > b/libstdc++-v3/include/experimental/bits/simd_sve.h new file mode 100644 > index 00000000000..123242a3a62 > --- /dev/null > +++ b/libstdc++-v3/include/experimental/bits/simd_sve.h [...] > +template > + struct __sve_vector_type > + {}; > + > +template > + using __sve_vector_type_t = typename __sve_vector_type<_Tp, _Np>::type; > + > +template > + struct __sve_vector_type > + { > + typedef svint8_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(int8_t __dup) > + { return svdup_s8(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b8(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svuint8_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(uint8_t __dup) > + { return svdup_u8(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b8(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svint16_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(int16_t __dup) > + { return svdup_s16(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b16(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svuint16_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(uint16_t __dup) > + { return svdup_u16(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b16(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svint32_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(int32_t __dup) > + { return svdup_s32(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b32(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svuint32_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(uint32_t __dup) > + { return svdup_u32(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b32(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svint64_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(int64_t __dup) > + { return svdup_s64(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b64(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svuint64_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(uint64_t __dup) > + { return svdup_u64(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b64(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svfloat32_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(float __dup) > + { return svdup_f32(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b32(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + { > + typedef svfloat64_t __sve_vlst_type > __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); + > + inline static __sve_vlst_type > + __sve_broadcast(double __dup) > + { return svdup_f64(__dup); } > + > + inline static __sve_bool_type > + __sve_active_mask() > + { return svwhilelt_b64(size_t(0), _Np); }; > + > + using type = __sve_vlst_type; > + }; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; > + > +template > + struct __sve_vector_type > + : __sve_vector_type<__get_sve_value_type_t, _Np> > + {}; Please replace the last 6 partial specializations with a generic implementation of the primary template: template struct __sve_vector_type : __sve_vector_type<__get_sve_value_type_t, _Np> {}; This avoids issues on platforms that define (u)int64_t as (unsigned) long long and is simpler in any case. [...] > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> > + _S_load(const _Up* __p, _SveMaskWrapper __k) > + { > + using _STp = __get_sve_value_type_t<_Tp>; > + using _SUp = __get_sve_value_type_t<_Up>; > + using _V = __sve_vector_type_t<_Tp, _Np>; > + const _SUp* __up = reinterpret_cast(__p); > + > + if constexpr (std::is_same_v<_Tp, _Up>) > + return _V(svld1(__k._M_data, __up)); > + if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up> > + && (sizeof(_Tp) > sizeof(_Up))) > + { > + if constexpr (std::is_same_v<_SUp, int8_t>) > + { > + if constexpr (std::is_same_v<_STp, int16_t>) > + return _V(svld1sb_s16(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint16_t>) > + return _V(svld1sb_u16(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int32_t>) > + return _V(svld1sb_s32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint32_t>) > + return _V(svld1sb_u32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1sb_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1sb_u64(__k._M_data, __up)); > + } > + if constexpr (std::is_same_v<_SUp, uint8_t>) > + { > + if constexpr (std::is_same_v<_STp, int16_t>) > + return _V(svld1ub_s16(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint16_t>) > + return _V(svld1ub_u16(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int32_t>) > + return _V(svld1ub_s32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint32_t>) > + return _V(svld1ub_u32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1ub_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1ub_u64(__k._M_data, __up)); > + } > + if constexpr (std::is_same_v<_SUp, int16_t>) > + { > + if constexpr (std::is_same_v<_STp, int32_t>) > + return _V(svld1sh_s32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint32_t>) > + return _V(svld1sh_u32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1sh_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1sh_u64(__k._M_data, __up)); > + } > + if constexpr (std::is_same_v<_SUp, uint16_t>) > + { > + if constexpr (std::is_same_v<_STp, int32_t>) > + return _V(svld1uh_s32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint32_t>) > + return _V(svld1uh_u32(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1uh_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1uh_u64(__k._M_data, __up)); > + } > + if constexpr (std::is_same_v<_SUp, int32_t>) > + { > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1sw_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1sw_u64(__k._M_data, __up)); > + } > + if constexpr (std::is_same_v<_SUp, uint32_t>) > + { > + if constexpr (std::is_same_v<_STp, int64_t>) > + return _V(svld1uw_s64(__k._M_data, __up)); > + if constexpr (std::is_same_v<_STp, uint64_t>) > + return _V(svld1uw_u64(__k._M_data, __up)); > + } > + } > + return __generate_from_n_evaluations<_Np, __sve_vector_type_t<_Tp, > _Np>>( > + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > + return __k[__i] ? static_cast<_Tp>(__p[__i]) : _Tp{}; > + }); Fine for now, because this unlikely to be used much anyway. But I'd prefer to see masked vector load(s) + vector conversion(s) at some point. > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, > _SveMaskWrapper __k) + { > + using _SUp = __get_sve_value_type_t<_Up>; > + using _STp = __get_sve_value_type_t<_Tp>; > + > + _SUp* __up = reinterpret_cast<_SUp*>(__p); > + > + if constexpr (std::is_same_v<_Tp, _Up>) > + return svst1(__k._M_data, __up, __x); > + if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up> > + && (sizeof(_Tp) > sizeof(_Up))) > + { > + if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>) > + return svst1b(__k._M_data, __up, __x); > + if constexpr (std::is_same_v<_SUp, uint8_t> && > std::is_unsigned_v<_STp>) + return svst1b(__k._M_data, __up, __x); > + if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>) > + return svst1h(__k._M_data, __up, __x); > + if constexpr (std::is_same_v<_SUp, uint16_t> && > std::is_unsigned_v<_STp>) + return svst1h(__k._M_data, __up, __x); > + if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>) > + return svst1w(__k._M_data, __up, __x); > + if constexpr (std::is_same_v<_SUp, uint32_t> && > std::is_unsigned_v<_STp>) + return svst1w(__k._M_data, __up, __x); > + } > + > + __execute_n_times<_Np>([&](auto __i) > _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + if (__k[__i]) > + __p[__i] = static_cast<_Up>(__x[__i]); > + }); Same as for converting masked loads... > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np> > + _S_blend(_SveMaskWrapper __k, _SveSimdWrapper<_Tp, > _Np> __at0, + _SveSimdWrapper<_Tp, _Np> __at1) > + { return svsel(__k._M_data, __at1._M_data, __at0._M_data); } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem) > + { > + __execute_n_times<_Np>([&](auto __i) > _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + __mem[__i] = __x[__i]; > + }); > + } > +}; > + > +template > + struct _SimdImplSve > + { > + template > + using _MaskMember = typename _Abi::template _MaskMember<_Tp>; > + > + template > + using _SimdMember = typename _Abi::template > __traits<_Tp>::_SimdMember; + > + using _CommonImpl = typename _Abi::_CommonImpl; > + using _SuperImpl = typename _Abi::_SimdImpl; > + using _MaskImpl = typename _Abi::_MaskImpl; > + > + template > + static constexpr size_t _S_full_size = _Abi::template > _S_full_size<_Tp>; + > + template > + static constexpr size_t _S_size = _Abi::template _S_size<_Tp>; > + > + template > + using _TypeTag = _Tp*; > + > + using abi_type = _Abi; > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr auto > + _S_broadcast(_Tp __x) noexcept > + { > + return __sve_vector_type<_Tp, __sve_vectorized_size_bytes / sizeof(_Tp)> > + ::__sve_broadcast(__x); > + } > + > + template > + inline static constexpr _SimdMember<_Tp> > + _S_generator(_Fp&& __gen, _TypeTag<_Tp>) > + { > + constexpr size_t _Np = _S_size<_Tp>; > + _SveSimdWrapper<_Tp, _Np> __ret; > + __execute_n_times<_S_size<_Tp>>( > + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __ret._M_set(__i, > __gen(__i)); }); + return __ret; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> > + _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept > + { > + constexpr size_t _Np = _S_size<_Tp>; > + _SimdMember<_Tp> __ret = _CommonImpl::template _S_load<_Tp, _Up, _Np>( > + __mem, _SveMaskWrapper{ > + __sve_vector_type<_Tp, _Np>::__sve_active_mask()}); > + return __ret; > + } > + > + template > + static constexpr inline _SveSimdWrapper<_Tp, _Np> > + _S_masked_load(_SveSimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> > __k, const _Up* __mem) + noexcept > + { > + __sve_vector_type_t<_Tp, _Np> __v > + = _CommonImpl::template _S_load<_Tp, _Up, _Np>(__mem, __k); > + __sve_vector_type_t<_Tp, _Np> __ret = svsel(__k._M_data, __v, > __merge._M_data); + return __ret; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept > + { > + constexpr size_t _Np = _S_size<_Tp>; > + _CommonImpl::template _S_store<_Tp, _Up, _Np>( > + __mem, __v, __sve_vector_type<_Tp, _Np>::__sve_active_mask()); > + } > + > + template > + static constexpr inline void > + _S_masked_store(const _SveSimdWrapper<_Tp, _Np> __v, _Up* __mem, > + const _SveMaskWrapper __k) noexcept > + { _CommonImpl::template _S_store<_Tp, _Up, _Np>(__mem, __v, __k); } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> > + _S_negate(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { > + return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, + __sve_vector_type<_Tp, > _Np>::__sve_broadcast(_Tp{})); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op) > + { > + auto __x_data = __x._M_data; > + constexpr size_t _Np = simd_size_v<_Tp, _Abi>; > + using __sve_vec_t = __sve_vector_type_t<_Tp, _Np>; > + std::size_t __i = __x.size(); > + for (; (__i % 2) != 1; __i /= 2) > + { > + __x_data = __binary_op(simd<_Tp, _Abi>( > + __private_init, _SveSimdWrapper<_Tp, _Np>( > + __sve_vec_t(svuzp1(__x_data, __x_data)))), > + simd<_Tp, _Abi>( > + __private_init, _SveSimdWrapper<_Tp, _Np>( > + __sve_vec_t(svuzp2(__x_data, __x_data)))) > + )._M_data; > + } > + _Tp __res = __x_data[0]; > + for (size_t __ri = 1; __ri != __i; __ri++) > + __res = __binary_op(__x_data[__ri], __res); > + return __res; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, plus<>) > + { > + return svaddv(__sve_vector_type<_Tp, > _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, bit_and<>) > + { > + return svandv(__sve_vector_type<_Tp, > _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, bit_or<>) > + { > + return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), > __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>) > + { > + return sveorv(__sve_vector_type<_Tp, > _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum()) > + { > + return svmaxv(__sve_vector_type<_Tp, > _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp > + _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum()) > + { > + return svminv(__sve_vector_type<_Tp, > _S_size<_Tp>>::__sve_active_mask(), __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr > + __sve_vector_type_t<_Tp, _Np> > + _S_min(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b) > + { > + return svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __a._M_data, __b._M_data); + } > + > + template > + _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr > + __sve_vector_type_t<_Tp, _Np> > + _S_max(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __b) > + { > + return svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __a._M_data, __b._M_data); + } > + > + template > + _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr > + pair<_SveSimdWrapper<_Tp, _Np>, _SveSimdWrapper<_Tp, _Np>> > + _S_minmax(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> > __b) + { > + return { > + svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __a._M_data, > __b._M_data), + svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __a._M_data, __b._M_data) + }; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_complement(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { > + if constexpr (is_floating_point_v<_Tp>) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + return __sve_reinterpret_cast<_Tp>( > + svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __sve_reinterpret_cast<_Ip>(__x))); > + } > + else > + return svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np> > + _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { > + return svmul_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, + static_cast<_Tp>(-1)); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { return __x._M_data + __y._M_data; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_minus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { return __x._M_data - __y._M_data; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x, > _SveSimdWrapper<_Tp, _Np> __y) + { return __x._M_data * __y._M_data; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + __sve_vector_type_t<_Tp, _Np> __y_padded = > svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), + > __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); + > return __x._M_data / __y_padded; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + __sve_vector_type_t<_Tp, _Np> __y_padded = > svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), + > __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); + > return __x._M_data % __y_padded; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_bit_and(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + if constexpr (is_floating_point_v<_Tp>) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + return __sve_reinterpret_cast<_Tp>( > + svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __sve_reinterpret_cast<_Ip>(__x), > __sve_reinterpret_cast<_Ip>(__y))); + } > + else > + return svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __x._M_data, __y._M_data); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_bit_or(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + if constexpr (is_floating_point_v<_Tp>) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + return __sve_reinterpret_cast<_Tp>( > + svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __sve_reinterpret_cast<_Ip>(__x), > __sve_reinterpret_cast<_Ip>(__y))); + } > + else > + return svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __x._M_data, __y._M_data); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_bit_xor(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + if constexpr (is_floating_point_v<_Tp>) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + return __sve_reinterpret_cast<_Tp>( > + sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __sve_reinterpret_cast<_Ip>(__x), > __sve_reinterpret_cast<_Ip>(__y))); + } > + else > + return sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + __x._M_data, __y._M_data); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np> > + _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { return __x._M_data << __y._M_data; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np> > + _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, > _SveSimdWrapper<_Tp, _Np> __y) + { return __x._M_data >> __y._M_data; > } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, int __y) > + { return __x._M_data << __y; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, > _Np> + _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, int __y) > + { return __x._M_data >> __y; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_increment(_SveSimdWrapper<_Tp, _Np>& __x) > + { __x = __x._M_data + 1; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_decrement(_SveSimdWrapper<_Tp, _Np>& __x) > + { __x = __x._M_data - 1; } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> > + _S_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> > __y) + { > + return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> > + _S_not_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + return svcmpne(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> > + _S_less(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) > + { > + return svcmplt(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> > + _S_less_equal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + return svcmple(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + // simd.math > +#define _GLIBCXX_SIMD_MATH_FALLBACK(__name) > \ + template typename... _More> \ + static > _SveSimdWrapper<_Tp, _Np> _S_##__name(const _SveSimdWrapper<_Tp, _Np>& __x, > \ + const _More&... __more) \ > + { > \ + _SveSimdWrapper<_Tp, _Np> __r; > \ > + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > \ + __r._M_set(__i, __name(__x[__i], __more[__i]...)); > \ + }); > \ + return __r; > \ > + } > + > +#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) > \ + template > \ + static auto > _S_##__name(const _Tp& __x, const _More&... __more) > \ + { > \ + return __fixed_size_storage_t<_RetTp, > _Tp::_S_size>::_S_generate( \ + [&] (auto __meta) > _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ + > return __meta._S_generator( > \ + [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > \ + return __name(__x[__meta._S_offset + __i], > \ + __more[__meta._S_offset + __i]...); > \ + }, static_cast<_RetTp*>(nullptr)); > \ + }); > \ + } > + > + _GLIBCXX_SIMD_MATH_FALLBACK(acos) > + _GLIBCXX_SIMD_MATH_FALLBACK(asin) > + _GLIBCXX_SIMD_MATH_FALLBACK(atan) > + _GLIBCXX_SIMD_MATH_FALLBACK(atan2) > + _GLIBCXX_SIMD_MATH_FALLBACK(cos) > + _GLIBCXX_SIMD_MATH_FALLBACK(sin) > + _GLIBCXX_SIMD_MATH_FALLBACK(tan) > + _GLIBCXX_SIMD_MATH_FALLBACK(acosh) > + _GLIBCXX_SIMD_MATH_FALLBACK(asinh) > + _GLIBCXX_SIMD_MATH_FALLBACK(atanh) > + _GLIBCXX_SIMD_MATH_FALLBACK(cosh) > + _GLIBCXX_SIMD_MATH_FALLBACK(sinh) > + _GLIBCXX_SIMD_MATH_FALLBACK(tanh) > + _GLIBCXX_SIMD_MATH_FALLBACK(exp) > + _GLIBCXX_SIMD_MATH_FALLBACK(exp2) > + _GLIBCXX_SIMD_MATH_FALLBACK(expm1) > + _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb) > + _GLIBCXX_SIMD_MATH_FALLBACK(log) > + _GLIBCXX_SIMD_MATH_FALLBACK(log10) > + _GLIBCXX_SIMD_MATH_FALLBACK(log1p) > + _GLIBCXX_SIMD_MATH_FALLBACK(log2) > + _GLIBCXX_SIMD_MATH_FALLBACK(logb) > + > + // modf implemented in simd_math.h > + _GLIBCXX_SIMD_MATH_FALLBACK(scalbn) > + _GLIBCXX_SIMD_MATH_FALLBACK(scalbln) > + _GLIBCXX_SIMD_MATH_FALLBACK(cbrt) > + _GLIBCXX_SIMD_MATH_FALLBACK(pow) > + _GLIBCXX_SIMD_MATH_FALLBACK(erf) > + _GLIBCXX_SIMD_MATH_FALLBACK(erfc) > + _GLIBCXX_SIMD_MATH_FALLBACK(lgamma) > + _GLIBCXX_SIMD_MATH_FALLBACK(tgamma) > + > + _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint) > + _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint) > + > + _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround) > + _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround) > + > + _GLIBCXX_SIMD_MATH_FALLBACK(fmod) > + _GLIBCXX_SIMD_MATH_FALLBACK(remainder) > + > + template > + static _SveSimdWrapper<_Tp, _Np> > + _S_remquo(const _SveSimdWrapper<_Tp, _Np> __x, const > _SveSimdWrapper<_Tp, _Np> __y, + __fixed_size_storage_t* __z) > + { > + _SveSimdWrapper<_Tp, _Np> __r{}; > + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > + int __tmp; > + __r._M_set(__i, remquo(__x[__i], __y[__i], &__tmp)); > + __z->_M_set(__i, __tmp); > + }); > + return __r; > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t > + _S_fpclassify(_SveSimdWrapper<_Tp, _Np> __x) > + { > + __fixed_size_storage_t __r{}; > + __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { > + __r._M_set(__i, std::fpclassify(__x[__i])); > + }); > + return __r; > + } > + > + // copysign in simd_math.h > + _GLIBCXX_SIMD_MATH_FALLBACK(nextafter) > + _GLIBCXX_SIMD_MATH_FALLBACK(fdim) > + > +#undef _GLIBCXX_SIMD_MATH_FALLBACK > +#undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET > + > + template > + static constexpr _MaskMember<_Tp> > + __fp_cmp(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> > __y, _Op __op) + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + using _VI = __sve_vector_type_t<_Ip, _Np>; > + using _WI = _SveSimdWrapper<_Ip, _Np>; > + const _WI __fmv = __sve_vector_type<_Ip, > _Np>::__sve_broadcast(__finite_max_v<_Ip>); + const _WI __zerov = > __sve_vector_type<_Ip, _Np>::__sve_broadcast(0); + const _WI __xn = > _VI(__sve_reinterpret_cast<_Ip>(__x)); > + const _WI __yn = _VI(__sve_reinterpret_cast<_Ip>(__y)); > + > + const _WI __xp > + = svsel(_S_less(__xn, __zerov), _S_unary_minus(_WI(_S_bit_and(__xn, > __fmv))), __xn); + const _WI __yp > + = svsel(_S_less(__yn, __zerov), _S_unary_minus(_WI(_S_bit_and(__yn, > __fmv))), __yn); + return svbic_z(__sve_vector_type<_Ip, > _Np>::__sve_active_mask(), __op(__xp, __yp)._M_data, + > _SuperImpl::_S_isunordered(__x, __y)._M_data); > + } > + > + template > + static constexpr _MaskMember<_Tp> > + _S_isgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> > __y) noexcept + { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { > return _S_less(__yp, __xp); }); } + > + template > + static constexpr _MaskMember<_Tp> > + _S_isgreaterequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) noexcept + { return __fp_cmp(__x, __y, [](auto __xp, auto > __yp) { return _S_less_equal(__yp, __xp); }); } + > + template > + static constexpr _MaskMember<_Tp> > + _S_isless(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> > __y) noexcept + { return __fp_cmp(__x, __y, [](auto __xp, auto __yp) { > return _S_less(__xp, __yp); }); } + > + template > + static constexpr _MaskMember<_Tp> > + _S_islessequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) noexcept + { return __fp_cmp(__x, __y, [](auto __xp, auto > __yp) { return _S_less_equal(__xp, __yp); }); } + > + template > + static constexpr _MaskMember<_Tp> > + _S_islessgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) noexcept + { > + return svbic_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > + _SuperImpl::_S_not_equal_to(__x, __y)._M_data, > + _SuperImpl::_S_isunordered(__x, __y)._M_data); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_abs(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_fabs(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_sqrt(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svsqrt_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_ldexp(_SveSimdWrapper<_Tp, _Np> __x, __fixed_size_storage_t _Np> __y) noexcept + { > + auto __sve_register = __y.first; > + if constexpr (std::is_same_v<_Tp, float>) > + return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, + __sve_register._M_data); > + else > + { > + __sve_vector_type_t __sve_d_register = > svunpklo(__sve_register); + return svscale_z(__sve_vector_type<_Tp, > _Np>::__sve_active_mask(), __x._M_data, + __sve_d_register); > + } > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y, > + _SveSimdWrapper<_Tp, _Np> __z) > + { > + return svmad_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data, + __z._M_data); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) > + { > + return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y) > + { > + return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_isfinite([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) > + { > +#if __FINITE_MATH_ONLY__ > + return __sve_vector_type_t<_Tp, _Np>::__sve_all_true_mask(); > +#else > + // if all exponent bits are set, __x is either inf or NaN > + > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + const __sve_vector_type_t<_Ip, _Np> __absn = > __sve_reinterpret_cast<_Ip>(_S_abs(__x)); + const __sve_vector_type_t<_Ip, > _Np> __maxn > + = __sve_reinterpret_cast<_Ip>( > + __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>)); > + > + return _S_less_equal(_SveSimdWrapper<_Ip, _Np>{__absn}, > _SveSimdWrapper<_Ip, _Np>{__maxn}); +#endif > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_isinf([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) > + { > +#if __FINITE_MATH_ONLY__ > + return {}; // false > +#else > + return _S_equal_to<_Tp, _Np>(_S_abs(__x), > _S_broadcast(__infinity_v<_Tp>)); +#endif > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_isnan([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x) > + { > +#if __FINITE_MATH_ONLY__ > + return {}; // false > +#else > + return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __x._M_data); +#endif > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_isnormal(_SveSimdWrapper<_Tp, _Np> __x) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + using _V = __sve_vector_type_t<_Ip, _Np>; > + using _VW = _SveSimdWrapper<_Ip, _Np>; > + > + const _V __absn = __sve_reinterpret_cast<_Ip>(_S_abs(__x)); > + const _V __minn = __sve_reinterpret_cast<_Ip>( > + __sve_vector_type<_Tp, _Np>::__sve_broadcast(__norm_min_v<_Tp>)); > +#if __FINITE_MATH_ONLY__ > + return _S_greater_equal(_VW{__absn}, _VW{__minn}); > +#else > + const _V __maxn = __sve_reinterpret_cast<_Ip>( > + __sve_vector_type<_Tp, _Np>::__sve_broadcast(__finite_max_v<_Tp>)); > + return _MaskImpl::_S_bit_and(_S_less_equal(_VW{__minn}, _VW{__absn}), > + _S_less_equal(_VW{__absn}, _VW{__maxn})); > +#endif > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_signbit(_SveSimdWrapper<_Tp, _Np> __x) > + { > + using _Ip = __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>; > + using _V = __sve_vector_type_t<_Ip, _Np>; > + using _VW = _SveSimdWrapper<_Ip, _Np>; > + > + const _V __xn = __sve_reinterpret_cast<_Ip>(__x); > + const _V __zeron = __sve_vector_type<_Ip, _Np>::__sve_broadcast(0); > + return _S_less(_VW{__xn}, _VW{__zeron}); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> > + _S_isunordered(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, > _Np> __y) + { > + return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data, __y._M_data); + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_nearbyint(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svrinti_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_rint(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return _SuperImpl::_S_nearbyint(__x); } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_trunc(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svrintz_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_round(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svrinta_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_floor(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svrintm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np> > + _S_ceil(_SveSimdWrapper<_Tp, _Np> __x) noexcept > + { return svrintp_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), > __x._M_data); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, > _SveSimdWrapper<_Tp, _Np>& __lhs, + > __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs) > + { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k, > _SveSimdWrapper<_Tp, _Np>& __lhs, + __type_identity_t<_Tp> __rhs) > + { __lhs = _CommonImpl::_S_blend(__k, __lhs, __data(simd<_Tp, > _Abi>(__rhs))); } + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, > _SveSimdWrapper<_Tp, _Np>& __lhs, + const > __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs, _Op __op) + { > + __lhs = _CommonImpl::_S_blend(__k, __lhs, > + _SveSimdWrapper<_Tp, _Np>(__op(_SuperImpl{}, __lhs, __rhs))); > + } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k, > _SveSimdWrapper<_Tp, _Np>& __lhs, + const __type_identity_t<_Tp> __rhs, > _Op __op) > + { _S_masked_cassign(__k, __lhs, _S_broadcast(__rhs), __op); } > + > + template > + _GLIBCXX_SIMD_INTRINSIC static constexpr void > + _S_set(_SveSimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept > + { __v._M_set(__i, static_cast<_Up&&>(__x)); } > + > + template