Hi, In my previous email I accidentally forgot to add libstdc++ mailing list, hence I am sending this again. The following patch adds ARM SVE support to std::experimental::simd. libstdc++-v3/ChangeLog: * include/Makefile.am: Add simd_sve.h. * include/Makefile.in: Add simd_sve.h. * include/experimental/bits/simd.h: Add new SveAbi. * include/experimental/bits/simd_builtin.h: Use __no_sve_deduce_t to support existing Neon Abi. * include/experimental/bits/simd_converter.h: Convert sequentially when sve is available. * include/experimental/bits/simd_detail.h: Define sve specific macro. * include/experimental/bits/simd_math.h: Fallback frexp to execute sequentially when sve is available, to handle fixed_size_simd return type that always uses sve. * include/experimental/simd: Include bits/simd_sve.h. * testsuite/experimental/simd/tests/bits/main.h: Enable testing for sve128, sve256, sve512. * include/experimental/bits/simd_sve.h: New file. Signed-off-by: Srinivas Yadav Singanaboina vasu.srinivasvasu.14@gmail.com --- libstdc++-v3/include/Makefile.am | 1 + libstdc++-v3/include/Makefile.in | 1 + libstdc++-v3/include/experimental/bits/simd.h | 131 +- .../include/experimental/bits/simd_builtin.h | 35 +- .../experimental/bits/simd_converter.h | 57 +- .../include/experimental/bits/simd_detail.h | 7 +- .../include/experimental/bits/simd_math.h | 14 +- .../include/experimental/bits/simd_sve.h | 1818 +++++++++++++++++ libstdc++-v3/include/experimental/simd | 3 + .../experimental/simd/tests/bits/main.h | 3 + 10 files changed, 2039 insertions(+), 31 deletions(-) create mode 100644 libstdc++-v3/include/experimental/bits/simd_sve.h diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index 6209f390e08..1170cb047a6 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -826,6 +826,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 596fa0d2390..bc44582a2da 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -1172,6 +1172,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 90523ea57dc..95fd92784b2 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -39,12 +39,16 @@ #include #include #include +#include #if _GLIBCXX_SIMD_X86INTRIN #include #elif _GLIBCXX_SIMD_HAVE_NEON #include #endif +#if _GLIBCXX_SIMD_HAVE_SVE +#include +#endif /** @ingroup ts_simd * @{ @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double; using __m512i [[__gnu__::__vector_size__(64)]] = long long; #endif +#if _GLIBCXX_SIMD_HAVE_SVE +constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8; +#else +constexpr inline int __sve_vectorized_size_bytes = 0; +#endif + namespace simd_abi { // simd_abi forward declarations {{{ // implementation details: @@ -108,6 +118,9 @@ template template struct _VecBltnBtmsk; +template + struct _SveAbi; + template using _VecN = _VecBuiltin; @@ -123,6 +136,9 @@ template template using _Neon = _VecBuiltin<_UsedBytes>; +template + using _Sve = _SveAbi<_UsedBytes>; + // implementation-defined: using __sse = _Sse<>; using __avx = _Avx<>; @@ -130,6 +146,7 @@ using __avx512 = _Avx512<>; using __neon = _Neon<>; using __neon128 = _Neon<16>; using __neon64 = _Neon<8>; +using __sve = _Sve<>; // standard: template @@ -250,6 +267,8 @@ constexpr inline bool __support_neon_float = false; #endif +constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE; + #ifdef _ARCH_PWR10 constexpr inline bool __have_power10vec = true; #else @@ -356,12 +375,13 @@ namespace __detail | (__have_avx512vnni << 27) | (__have_avx512vpopcntdq << 28) | (__have_avx512vp2intersect << 29); - else if constexpr (__have_neon) + else if constexpr (__have_neon || __have_sve) return __have_neon | (__have_neon_a32 << 1) | (__have_neon_a64 << 2) | (__have_neon_a64 << 2) - | (__support_neon_float << 3); + | (__support_neon_float << 3) + | (__have_sve << 4); else if constexpr (__have_power_vmx) return __have_power_vmx | (__have_power_vsx << 1) @@ -733,6 +753,16 @@ template return _Bytes <= 16 && is_same_v, _Abi>; } +// }}} +// __is_sve_abi {{{ +template + constexpr bool + __is_sve_abi() + { + constexpr auto _Bytes = __abi_bytes_v<_Abi>; + return _Bytes <= __sve_vectorized_size_bytes && is_same_v, _Abi>; + } + // }}} // __make_dependent_t {{{ template @@ -998,6 +1028,9 @@ template template using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; +template + struct _SveSimdWrapper; + // }}} // __is_simd_wrapper {{{ template @@ -2858,6 +2891,8 @@ template constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); if constexpr (__bytes == sizeof(_Tp)) return static_cast(nullptr); + else if constexpr (__have_sve) + return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr); else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); else @@ -2951,6 +2986,9 @@ template > template struct __deduce_impl; +template + struct __no_sve_deduce_impl; + namespace simd_abi { /** * @tparam _Tp The requested `value_type` for the elements. @@ -2965,6 +3003,12 @@ template template using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; + +template + struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {}; + +template + using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type; } // namespace simd_abi // }}}2 @@ -2974,13 +3018,23 @@ template template struct rebind_simd<_Tp, simd<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { using type = simd<_Tp, std::conditional_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template struct rebind_simd<_Tp, simd_mask<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { using type = simd_mask<_Tp, std::conditional_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; @@ -3243,7 +3297,7 @@ template else if constexpr (_Tp::size() == 1) return __x[0]; else if constexpr (sizeof(_Tp) == sizeof(__x) - && !__is_fixed_size_abi_v<_Ap>) + && !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>()) return {__private_init, __vector_bitcast( _Ap::_S_masked(__data(__x))._M_data)}; @@ -4004,18 +4058,29 @@ template & __x) { using _Tp = typename _V::value_type; + + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( + [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __x[__i * _V::size() + __j]; }); + }); + }; + if constexpr (_Parts == 1) { return {simd_cast<_V>(__x)}; } else if (__x._M_is_constprop()) { - return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( - [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA - { return __x[__i * _V::size() + __j]; }); - }); + return __gen_fallback(); } +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr(__is_sve_abi<_Ap>) + { + return __gen_fallback(); + } +#endif else if constexpr ( __is_fixed_size_abi_v<_Ap> && (is_same_v @@ -4115,7 +4180,8 @@ template constexpr size_t _N0 = _SL::template _S_at<0>(); using _V = __deduced_simd<_Tp, _N0>; - if (__x._M_is_constprop()) + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __generate_from_n_evaluations( [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; @@ -4124,6 +4190,14 @@ template return __x[__offset + __j]; }); }); + }; + + if (__x._M_is_constprop()) + __gen_fallback(); +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr (__have_sve) + __gen_fallback(); +#endif else if constexpr (_Np == _N0) { static_assert(sizeof...(_Sizes) == 1); @@ -4510,8 +4584,10 @@ template