From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7857) id 024DB3858C55; Wed, 27 Mar 2024 14:15:10 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 024DB3858C55 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1711548911; bh=xK1vYhh0SB/7+WQpRgryB7wLSMhWxJl4Yc/iRSyA7fs=; h=From:To:Subject:Date:From; b=aqHe+UfGhWW3wo6ckX2/VZ5n8dV3QrkOiNAHaNM2obu5kbLiG6ZcAF2/oTleQnC/L yM9QvwceM5zZRD8EgNcbzBOv3Rrhsn0WKVDO83y3p3FW1p8tDrZi21ST8nSzov/fbs DAmz/dN44yRi2syZPdJPdXY+CFrw7RlgjRInj1J8= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Matthias Kretz To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r14-9688] libstdc++: add ARM SVE support to std::experimental::simd X-Act-Checkin: gcc X-Git-Author: Srinivas Yadav Singanaboina X-Git-Refname: refs/heads/master X-Git-Oldrev: 0b02da5b99e89347f5f8bf875ec8318f84adff18 X-Git-Newrev: 9ac3119fec81fb64d11dee8f853145f937389366 Message-Id: <20240327141511.024DB3858C55@sourceware.org> Date: Wed, 27 Mar 2024 14:15:10 +0000 (GMT) List-Id: https://gcc.gnu.org/g:9ac3119fec81fb64d11dee8f853145f937389366 commit r14-9688-g9ac3119fec81fb64d11dee8f853145f937389366 Author: Srinivas Yadav Singanaboina Date: Sat Mar 16 19:04:35 2024 +0000 libstdc++: add ARM SVE support to std::experimental::simd libstdc++-v3/ChangeLog: * include/Makefile.am: Add simd_sve.h. * include/Makefile.in: Add simd_sve.h. * include/experimental/bits/simd.h: Add new SveAbi. * include/experimental/bits/simd_builtin.h: Use __no_sve_deduce_t to support existing Neon Abi. * include/experimental/bits/simd_converter.h: Convert sequentially when sve is available. * include/experimental/bits/simd_detail.h: Define sve specific macro. * include/experimental/bits/simd_math.h: Fallback frexp to execute sequntially when sve is available, to handle fixed_size_simd return type that always uses sve. * include/experimental/simd: Include bits/simd_sve.h. * testsuite/experimental/simd/tests/bits/main.h: Enable testing for sve128, sve256, sve512. * include/experimental/bits/simd_sve.h: New file. Signed-off-by: Srinivas Yadav Singanaboina Diff: --- libstdc++-v3/include/Makefile.am | 1 + libstdc++-v3/include/Makefile.in | 1 + libstdc++-v3/include/experimental/bits/simd.h | 135 +- .../include/experimental/bits/simd_builtin.h | 180 +- .../include/experimental/bits/simd_converter.h | 57 +- .../include/experimental/bits/simd_detail.h | 12 +- libstdc++-v3/include/experimental/bits/simd_math.h | 15 +- libstdc++-v3/include/experimental/bits/simd_sve.h | 1852 ++++++++++++++++++++ libstdc++-v3/include/experimental/simd | 3 + .../testsuite/experimental/simd/tests/bits/main.h | 3 + 10 files changed, 2155 insertions(+), 104 deletions(-) diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index cb902de36ae..422a0f4bd0a 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -835,6 +835,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 9357087acb4..9fd4ab4848c 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -1181,6 +1181,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 974377c6798..03c2e17a326 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -39,12 +39,16 @@ #include #include #include +#include #if _GLIBCXX_SIMD_X86INTRIN #include #elif _GLIBCXX_SIMD_HAVE_NEON #include #endif +#if _GLIBCXX_SIMD_HAVE_SVE +#include +#endif /** @ingroup ts_simd * @{ @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double; using __m512i [[__gnu__::__vector_size__(64)]] = long long; #endif +#if _GLIBCXX_SIMD_HAVE_SVE +constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8; +#else +constexpr inline int __sve_vectorized_size_bytes = 0; +#endif + namespace simd_abi { // simd_abi forward declarations {{{ // implementation details: @@ -108,6 +118,9 @@ template template struct _VecBltnBtmsk; +template + struct _SveAbi; + template using _VecN = _VecBuiltin; @@ -123,6 +136,9 @@ template template using _Neon = _VecBuiltin<_UsedBytes>; +template + using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>; + // implementation-defined: using __sse = _Sse<>; using __avx = _Avx<>; @@ -130,6 +146,7 @@ using __avx512 = _Avx512<>; using __neon = _Neon<>; using __neon128 = _Neon<16>; using __neon64 = _Neon<8>; +using __sve = _Sve<>; // standard: template @@ -250,6 +267,9 @@ constexpr inline bool __support_neon_float = false; #endif +constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE; +constexpr inline bool __have_sve2 = _GLIBCXX_SIMD_HAVE_SVE2; + #ifdef _ARCH_PWR10 constexpr inline bool __have_power10vec = true; #else @@ -356,12 +376,14 @@ namespace __detail | (__have_avx512vnni << 27) | (__have_avx512vpopcntdq << 28) | (__have_avx512vp2intersect << 29); - else if constexpr (__have_neon) + else if constexpr (__have_neon || __have_sve) return __have_neon | (__have_neon_a32 << 1) | (__have_neon_a64 << 2) | (__have_neon_a64 << 2) - | (__support_neon_float << 3); + | (__support_neon_float << 3) + | (__have_sve << 4) + | (__have_sve2 << 5); else if constexpr (__have_power_vmx) return __have_power_vmx | (__have_power_vsx << 1) @@ -733,6 +755,16 @@ template return _Bytes <= 16 && is_same_v, _Abi>; } +// }}} +// __is_sve_abi {{{ +template + constexpr bool + __is_sve_abi() + { + constexpr auto _Bytes = __abi_bytes_v<_Abi>; + return _Bytes <= __sve_vectorized_size_bytes && is_same_v, _Abi>; + } + // }}} // __make_dependent_t {{{ template @@ -998,6 +1030,9 @@ template template using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; +template + struct _SveSimdWrapper; + // }}} // __is_simd_wrapper {{{ template @@ -2830,7 +2865,8 @@ namespace simd_abi { // most of simd_abi is defined in simd_detail.h template inline constexpr int max_fixed_size - = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32; + = ((__have_avx512bw && sizeof(_Tp) == 1) + || (__have_sve && __sve_vectorized_size_bytes/sizeof(_Tp) >= 64)) ? 64 : 32; // compatible {{{ #if defined __x86_64__ || defined __aarch64__ @@ -2858,6 +2894,8 @@ template constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); if constexpr (__bytes == sizeof(_Tp)) return static_cast(nullptr); + else if constexpr (__have_sve) + return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr); else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); else @@ -2951,6 +2989,9 @@ template > template struct __deduce_impl; +template + struct __no_sve_deduce_impl; + namespace simd_abi { /** * @tparam _Tp The requested `value_type` for the elements. @@ -2965,6 +3006,12 @@ template template using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; + +template + struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {}; + +template + using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type; } // namespace simd_abi // }}}2 @@ -2974,13 +3021,27 @@ template template struct rebind_simd<_Tp, simd<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { + using type = simd<_Tp, std::conditional_t< + !__is_sve_abi<_Abi>(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template struct rebind_simd<_Tp, simd_mask<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { + using type = simd_mask<_Tp, std::conditional_t< + !__is_sve_abi<_Abi>(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; @@ -3243,7 +3304,7 @@ template else if constexpr (_Tp::size() == 1) return __x[0]; else if constexpr (sizeof(_Tp) == sizeof(__x) - && !__is_fixed_size_abi_v<_Ap>) + && !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>()) return {__private_init, __vector_bitcast( _Ap::_S_masked(__data(__x))._M_data)}; @@ -4004,18 +4065,29 @@ template & __x) { using _Tp = typename _V::value_type; + + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( + [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __x[__i * _V::size() + __j]; }); + }); + }; + if constexpr (_Parts == 1) { return {simd_cast<_V>(__x)}; } else if (__x._M_is_constprop()) { - return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( - [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA - { return __x[__i * _V::size() + __j]; }); - }); + return __gen_fallback(); } +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr(__is_sve_abi<_Ap>) + { + return __gen_fallback(); + } +#endif else if constexpr ( __is_fixed_size_abi_v<_Ap> && (is_same_v @@ -4115,7 +4187,8 @@ template constexpr size_t _N0 = _SL::template _S_at<0>(); using _V = __deduced_simd<_Tp, _N0>; - if (__x._M_is_constprop()) + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __generate_from_n_evaluations( [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; @@ -4124,6 +4197,14 @@ template return __x[__offset + __j]; }); }); + }; + + if (__x._M_is_constprop()) + __gen_fallback(); +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr (__have_sve) + __gen_fallback(); +#endif else if constexpr (_Np == _N0) { static_assert(sizeof...(_Sizes) == 1); @@ -4510,8 +4591,11 @@ template