From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-pg1-x530.google.com (mail-pg1-x530.google.com [IPv6:2607:f8b0:4864:20::530]) by sourceware.org (Postfix) with ESMTPS id D64CE3858D32; Fri, 24 Nov 2023 15:59:31 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org D64CE3858D32 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org D64CE3858D32 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=2607:f8b0:4864:20::530 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1700841581; cv=none; b=a8aXRY5THy80zbM559hDZku7BmFRXSq8/fd7aIrTGVRMBuSt3fVzSmhH+SDDqpd/MLCLg+ksPaT7XnW3KAg7brnQHdEUrN5ES/M61drzKw6cHcXRDqa7QRCfXyuw3m8PI9Sx5/vvXw5ZveGmZTIbYI93XCKexOQX5P3gW3OEaQk= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1700841581; c=relaxed/simple; bh=0cun8FG0VUhYf0oNg0nmdu0h/WceVg5fM2cpmAXGKH0=; h=DKIM-Signature:MIME-Version:From:Date:Message-ID:Subject:To; b=BKML+Jp70+bGM37ksB/YT3jEZfrTZlE/0bSnIbeEQnvTJTPiW/tfq8eZ+6EPcxjmdeUqdOI7hRHB9NiIe+g4YPxKS42Eh8APO6YFQ4cOWZ1JzhmuXtW6E+So/3j1JjGnl6UIaFq/fy5VdrUeIxYz9dzWiU65VsWtUpl7zTn4E/A= ARC-Authentication-Results: i=1; server2.sourceware.org Received: by mail-pg1-x530.google.com with SMTP id 41be03b00d2f7-5be30d543c4so1471141a12.2; Fri, 24 Nov 2023 07:59:31 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1700841569; x=1701446369; darn=gcc.gnu.org; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=Vy18xuuoNgHCZW1udS4Tg4eYFfyAJtuBPts1DfhCgAY=; b=gmGJ+6LuD84+rBcALXuigDp+UNDX0zyrS4j4A0fF/uRV+i6iuytP9B/xjWj+Wqa5d6 BrsES+EktyOQy+AlcLXMAPYV/t7z96Z/0fhr8DhjYcFDwAdz8wvGnQULBFXnHeC2SS5q iTEgN0pIXggHXPsDemzh9LsqpT5ddOhy/+gFTLdNX1KbLL9ltHdtDzUfSizl2I+IKbMe 3nRNNBnVp6xzq9OR82qThdYFNQNu1Sr/n6US1Z/My5AEjTlnrSA0Tgk+Oc8cdkIANw7s y3SHaCZDp6UgKMW+yFiPCnVpI7OlgXvVTUqLIHBFMkGPetKVjMeczOPqbrl88XlPvdDW uPlg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1700841569; x=1701446369; h=to:subject:message-id:date:from:mime-version:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=Vy18xuuoNgHCZW1udS4Tg4eYFfyAJtuBPts1DfhCgAY=; b=F+3dn5okUsE2XkG07WncKc7Qtu/8Kq51MDjc7rQyIpQ6YU7W6hAI80TYnQjivqj/rZ eVfAwoAJZbniWqwxteNcih5exrL4ovFT7KsoQwcXj0n2K/kUfOixBaTvxGTBoVFC33X6 2wE7ZJ9846D91H1yor19wdph9OPSZ9FGXx+7KWXNCOMogdfj2a2l6zvJOx3ggLzp7I8o jYGQ9/tXR3snP0vo7gmpUod0WpK74ZDbUnT06Q+L6UkEq2UUs+fRGHx/NSSrlhT8DqcJ ZLczvGKEtxVgePic2WZoy68CufLDRJB8xSZrSq9uHYcf0Lo56Y3p22iS11b5H74wQGQ+ mlug== X-Gm-Message-State: AOJu0Yxbx1m8cAL+Mszi/GITvBzrLjC+58VchFzUgE7HWOi6fxpHVrAg sz3KVS5X6WqJhZF2+//1OkeTkbR9mndAwYnlAaJI7Vz8gFk= X-Google-Smtp-Source: AGHT+IGWhMJiQFHKWYFbj2PH3OTV+wjRdFrfh2tjzPVpudu9ZjTEKN9toKKJAMogpZ+BCabo5//dztSNxik/I7N9CSk= X-Received: by 2002:a17:90b:3a82:b0:280:aa7b:fbe8 with SMTP id om2-20020a17090b3a8200b00280aa7bfbe8mr3714492pjb.32.1700841568267; Fri, 24 Nov 2023 07:59:28 -0800 (PST) MIME-Version: 1.0 From: Srinivas Yadav Date: Fri, 24 Nov 2023 09:59:16 -0600 Message-ID: Subject: [PATCH] libstdc++: add ARM SVE support to std::experimental::simd To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Content-Type: multipart/alternative; boundary="0000000000007ef60b060ae80954" X-Spam-Status: No, score=-9.1 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,FREEMAIL_REPLY,GIT_PATCH_0,HTML_MESSAGE,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: --0000000000007ef60b060ae80954 Content-Type: text/plain; charset="UTF-8" Hi, In my previous email I accidentally forgot to add libstdc++ mailing list, hence I am sending this again. The following patch adds ARM SVE support to std::experimental::simd. libstdc++-v3/ChangeLog: * include/Makefile.am: Add simd_sve.h. * include/Makefile.in: Add simd_sve.h. * include/experimental/bits/simd.h: Add new SveAbi. * include/experimental/bits/simd_builtin.h: Use __no_sve_deduce_t to support existing Neon Abi. * include/experimental/bits/simd_converter.h: Convert sequentially when sve is available. * include/experimental/bits/simd_detail.h: Define sve specific macro. * include/experimental/bits/simd_math.h: Fallback frexp to execute sequentially when sve is available, to handle fixed_size_simd return type that always uses sve. * include/experimental/simd: Include bits/simd_sve.h. * testsuite/experimental/simd/tests/bits/main.h: Enable testing for sve128, sve256, sve512. * include/experimental/bits/simd_sve.h: New file. Signed-off-by: Srinivas Yadav Singanaboina vasu.srinivasvasu.14@gmail.com --- libstdc++-v3/include/Makefile.am | 1 + libstdc++-v3/include/Makefile.in | 1 + libstdc++-v3/include/experimental/bits/simd.h | 131 +- .../include/experimental/bits/simd_builtin.h | 35 +- .../experimental/bits/simd_converter.h | 57 +- .../include/experimental/bits/simd_detail.h | 7 +- .../include/experimental/bits/simd_math.h | 14 +- .../include/experimental/bits/simd_sve.h | 1818 +++++++++++++++++ libstdc++-v3/include/experimental/simd | 3 + .../experimental/simd/tests/bits/main.h | 3 + 10 files changed, 2039 insertions(+), 31 deletions(-) create mode 100644 libstdc++-v3/include/experimental/bits/simd_sve.h diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index 6209f390e08..1170cb047a6 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -826,6 +826,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 596fa0d2390..bc44582a2da 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -1172,6 +1172,7 @@ experimental_bits_headers = \ ${experimental_bits_srcdir}/simd_neon.h \ ${experimental_bits_srcdir}/simd_ppc.h \ ${experimental_bits_srcdir}/simd_scalar.h \ + ${experimental_bits_srcdir}/simd_sve.h \ ${experimental_bits_srcdir}/simd_x86.h \ ${experimental_bits_srcdir}/simd_x86_conversions.h \ ${experimental_bits_srcdir}/string_view.tcc \ diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 90523ea57dc..95fd92784b2 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -39,12 +39,16 @@ #include #include #include +#include #if _GLIBCXX_SIMD_X86INTRIN #include #elif _GLIBCXX_SIMD_HAVE_NEON #include #endif +#if _GLIBCXX_SIMD_HAVE_SVE +#include +#endif /** @ingroup ts_simd * @{ @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double; using __m512i [[__gnu__::__vector_size__(64)]] = long long; #endif +#if _GLIBCXX_SIMD_HAVE_SVE +constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8; +#else +constexpr inline int __sve_vectorized_size_bytes = 0; +#endif + namespace simd_abi { // simd_abi forward declarations {{{ // implementation details: @@ -108,6 +118,9 @@ template template struct _VecBltnBtmsk; +template + struct _SveAbi; + template using _VecN = _VecBuiltin; @@ -123,6 +136,9 @@ template template using _Neon = _VecBuiltin<_UsedBytes>; +template + using _Sve = _SveAbi<_UsedBytes>; + // implementation-defined: using __sse = _Sse<>; using __avx = _Avx<>; @@ -130,6 +146,7 @@ using __avx512 = _Avx512<>; using __neon = _Neon<>; using __neon128 = _Neon<16>; using __neon64 = _Neon<8>; +using __sve = _Sve<>; // standard: template @@ -250,6 +267,8 @@ constexpr inline bool __support_neon_float = false; #endif +constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE; + #ifdef _ARCH_PWR10 constexpr inline bool __have_power10vec = true; #else @@ -356,12 +375,13 @@ namespace __detail | (__have_avx512vnni << 27) | (__have_avx512vpopcntdq << 28) | (__have_avx512vp2intersect << 29); - else if constexpr (__have_neon) + else if constexpr (__have_neon || __have_sve) return __have_neon | (__have_neon_a32 << 1) | (__have_neon_a64 << 2) | (__have_neon_a64 << 2) - | (__support_neon_float << 3); + | (__support_neon_float << 3) + | (__have_sve << 4); else if constexpr (__have_power_vmx) return __have_power_vmx | (__have_power_vsx << 1) @@ -733,6 +753,16 @@ template return _Bytes <= 16 && is_same_v, _Abi>; } +// }}} +// __is_sve_abi {{{ +template + constexpr bool + __is_sve_abi() + { + constexpr auto _Bytes = __abi_bytes_v<_Abi>; + return _Bytes <= __sve_vectorized_size_bytes && is_same_v, _Abi>; + } + // }}} // __make_dependent_t {{{ template @@ -998,6 +1028,9 @@ template template using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; +template + struct _SveSimdWrapper; + // }}} // __is_simd_wrapper {{{ template @@ -2858,6 +2891,8 @@ template constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); if constexpr (__bytes == sizeof(_Tp)) return static_cast(nullptr); + else if constexpr (__have_sve) + return static_cast<_SveAbi<__sve_vectorized_size_bytes>*>(nullptr); else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); else @@ -2951,6 +2986,9 @@ template > template struct __deduce_impl; +template + struct __no_sve_deduce_impl; + namespace simd_abi { /** * @tparam _Tp The requested `value_type` for the elements. @@ -2965,6 +3003,12 @@ template template using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; + +template + struct __no_sve_deduce : __no_sve_deduce_impl<_Tp, _Np> {}; + +template + using __no_sve_deduce_t = typename __no_sve_deduce<_Tp, _Np, _Abis...>::type; } // namespace simd_abi // }}}2 @@ -2974,13 +3018,23 @@ template template struct rebind_simd<_Tp, simd<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { using type = simd<_Tp, std::conditional_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template struct rebind_simd<_Tp, simd_mask<_Up, _Abi>, - void_t, _Abi>>> - { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; + void_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>> + { using type = simd_mask<_Tp, std::conditional_t(), + simd_abi::__no_sve_deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>, + simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>; + }; template using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; @@ -3243,7 +3297,7 @@ template else if constexpr (_Tp::size() == 1) return __x[0]; else if constexpr (sizeof(_Tp) == sizeof(__x) - && !__is_fixed_size_abi_v<_Ap>) + && !__is_fixed_size_abi_v<_Ap> && !__is_sve_abi<_Ap>()) return {__private_init, __vector_bitcast( _Ap::_S_masked(__data(__x))._M_data)}; @@ -4004,18 +4058,29 @@ template & __x) { using _Tp = typename _V::value_type; + + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( + [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { + return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __x[__i * _V::size() + __j]; }); + }); + }; + if constexpr (_Parts == 1) { return {simd_cast<_V>(__x)}; } else if (__x._M_is_constprop()) { - return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( - [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { - return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA - { return __x[__i * _V::size() + __j]; }); - }); + return __gen_fallback(); } +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr(__is_sve_abi<_Ap>) + { + return __gen_fallback(); + } +#endif else if constexpr ( __is_fixed_size_abi_v<_Ap> && (is_same_v @@ -4115,7 +4180,8 @@ template constexpr size_t _N0 = _SL::template _S_at<0>(); using _V = __deduced_simd<_Tp, _N0>; - if (__x._M_is_constprop()) + auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA + { return __generate_from_n_evaluations( [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; @@ -4124,6 +4190,14 @@ template return __x[__offset + __j]; }); }); + }; + + if (__x._M_is_constprop()) + __gen_fallback(); +#if _GLIBCXX_SIMD_HAVE_SVE + else if constexpr (__have_sve) + __gen_fallback(); +#endif else if constexpr (_Np == _N0) { static_assert(sizeof...(_Sizes) == 1); @@ -4510,8 +4584,10 @@ template