public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Matthias Kretz <mkretz@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r13-6333] libstdc++: More efficient masked inc-/decrement implementation Date: Fri, 24 Feb 2023 18:40:45 +0000 (GMT) [thread overview] Message-ID: <20230224184045.D11923857C45@sourceware.org> (raw) https://gcc.gnu.org/g:6ce55180d494b616e2e3e68ffedfe9007e42ca06 commit r13-6333-g6ce55180d494b616e2e3e68ffedfe9007e42ca06 Author: Matthias Kretz <m.kretz@gsi.de> Date: Mon Feb 20 16:33:31 2023 +0100 libstdc++: More efficient masked inc-/decrement implementation Signed-off-by: Matthias Kretz <m.kretz@gsi.de> libstdc++-v3/ChangeLog: PR libstdc++/108856 * include/experimental/bits/simd_builtin.h (_SimdImplBuiltin::_S_masked_unary): More efficient implementation of masked inc-/decrement for integers and floats without AVX2. * include/experimental/bits/simd_x86.h (_SimdImplX86::_S_masked_unary): New. Use AVX512 masked subtract builtins for masked inc-/decrement. Diff: --- .../include/experimental/bits/simd_builtin.h | 27 ++++++++- libstdc++-v3/include/experimental/bits/simd_x86.h | 68 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index 9736db65e01..0cf5c9897cd 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -2546,8 +2546,31 @@ template <typename _Abi, typename> _Op<decltype(__vv)> __op; if (__k._M_is_constprop_all_of()) return __data(__op(__vv)); - else - return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); + else if constexpr (is_same_v<_Op<void>, __increment<void>>) + { + static_assert(not std::is_same_v<_K, bool>); + if constexpr (is_integral_v<_Tp>) + // Take a shortcut knowing that __k is an integer vector with values -1 or 0. + return __v._M_data - __vector_bitcast<_Tp>(__k._M_data); + else if constexpr (not __have_avx2) + return __v._M_data + + __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast( + _K, _Tp(1))); + // starting with AVX2 it is more efficient to blend after add + } + else if constexpr (is_same_v<_Op<void>, __decrement<void>>) + { + static_assert(not std::is_same_v<_K, bool>); + if constexpr (is_integral_v<_Tp>) + // Take a shortcut knowing that __k is an integer vector with values -1 or 0. + return __v._M_data + __vector_bitcast<_Tp>(__k._M_data); + else if constexpr (not __have_avx2) + return __v._M_data + - __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast( + _K, _Tp(1))); + // starting with AVX2 it is more efficient to blend after sub + } + return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); } //}}}2 diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index cd642cb3290..8872ca301b9 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -3462,6 +3462,74 @@ template <typename _Abi, typename> } //}}} }}} + template <template <typename> class _Op, typename _Tp, typename _K, + size_t _Np> + _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> + _S_masked_unary(const _SimdWrapper<_K, _Np> __k, + const _SimdWrapper<_Tp, _Np> __v) + { + if (__k._M_is_constprop_none_of()) + return __v; + else if (__k._M_is_constprop_all_of()) + { + auto __vv = _Base::_M_make_simd(__v); + _Op<decltype(__vv)> __op; + return __data(__op(__vv)); + } + else if constexpr (__is_bitmask_v<decltype(__k)> + && (is_same_v<_Op<void>, __increment<void>> + || is_same_v<_Op<void>, __decrement<void>>)) + { + // optimize masked unary increment and decrement as masked sub +/-1 + constexpr int __pm_one + = is_same_v<_Op<void>, __increment<void>> ? -1 : 1; + if constexpr (is_integral_v<_Tp>) + { + constexpr bool __lp64 = sizeof(long) == sizeof(long long); + using _Ip = std::make_signed_t<_Tp>; + using _Up = std::conditional_t< + std::is_same_v<_Ip, long>, + std::conditional_t<__lp64, long long, int>, + std::conditional_t< + std::is_same_v<_Ip, signed char>, char, _Ip>>; + const auto __value = __vector_bitcast<_Up>(__v._M_data); +#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr) \ + if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width) \ + return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value, \ + __vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data)) + _GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512); + _GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256); + _GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128); + _GLIBCXX_SIMD_MASK_SUB(2, 64, psubw512); + _GLIBCXX_SIMD_MASK_SUB(2, 32, psubw256); + _GLIBCXX_SIMD_MASK_SUB(2, 16, psubw128); + _GLIBCXX_SIMD_MASK_SUB(4, 64, psubd512); + _GLIBCXX_SIMD_MASK_SUB(4, 32, psubd256); + _GLIBCXX_SIMD_MASK_SUB(4, 16, psubd128); + _GLIBCXX_SIMD_MASK_SUB(8, 64, psubq512); + _GLIBCXX_SIMD_MASK_SUB(8, 32, psubq256); + _GLIBCXX_SIMD_MASK_SUB(8, 16, psubq128); +#undef _GLIBCXX_SIMD_MASK_SUB + } + else + { +#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr) \ + if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width) \ + return __builtin_ia32_##_Instr##_mask( \ + __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \ + __k._M_data, _MM_FROUND_CUR_DIRECTION) + _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512); + _GLIBCXX_SIMD_MASK_SUB(4, 32, subps256); + _GLIBCXX_SIMD_MASK_SUB(4, 16, subps128); + _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512); + _GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256); + _GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128); +#undef _GLIBCXX_SIMD_MASK_SUB + } + } + else + return _Base::template _S_masked_unary<_Op>(__k, __v); + } }; // }}}
reply other threads:[~2023-02-24 18:40 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230224184045.D11923857C45@sourceware.org \ --to=mkretz@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ --cc=libstdc++-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).