From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7857) id D11923857C45; Fri, 24 Feb 2023 18:40:45 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D11923857C45 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1677264045; bh=T5Gh06HNUA43DXZtNA1MwLWd07cEJKu2F4aEbpW9zH4=; h=From:To:Subject:Date:From; b=NaKa4MvdlonFBY/JobE55xBhV8MJRj5gkLLBf9QMQsycZ1Nk7KBR6LUi92zNyVFiU NOIJV3yUpFuUgEWXTEOYnrA8sUpc+cZR21G/XE0Kga5H8ol+nhDdOjQc9Iuo5g6Dcr cw/1jCcQm84/aUnjGmpC3c95CvX5A8dcO2zArbi8= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Matthias Kretz To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r13-6333] libstdc++: More efficient masked inc-/decrement implementation X-Act-Checkin: gcc X-Git-Author: Matthias Kretz X-Git-Refname: refs/heads/master X-Git-Oldrev: 2dd68cddbc745de2c4a4b4982673122716c8231c X-Git-Newrev: 6ce55180d494b616e2e3e68ffedfe9007e42ca06 Message-Id: <20230224184045.D11923857C45@sourceware.org> Date: Fri, 24 Feb 2023 18:40:45 +0000 (GMT) List-Id: https://gcc.gnu.org/g:6ce55180d494b616e2e3e68ffedfe9007e42ca06 commit r13-6333-g6ce55180d494b616e2e3e68ffedfe9007e42ca06 Author: Matthias Kretz Date: Mon Feb 20 16:33:31 2023 +0100 libstdc++: More efficient masked inc-/decrement implementation Signed-off-by: Matthias Kretz libstdc++-v3/ChangeLog: PR libstdc++/108856 * include/experimental/bits/simd_builtin.h (_SimdImplBuiltin::_S_masked_unary): More efficient implementation of masked inc-/decrement for integers and floats without AVX2. * include/experimental/bits/simd_x86.h (_SimdImplX86::_S_masked_unary): New. Use AVX512 masked subtract builtins for masked inc-/decrement. Diff: --- .../include/experimental/bits/simd_builtin.h | 27 ++++++++- libstdc++-v3/include/experimental/bits/simd_x86.h | 68 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index 9736db65e01..0cf5c9897cd 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -2546,8 +2546,31 @@ template _Op __op; if (__k._M_is_constprop_all_of()) return __data(__op(__vv)); - else - return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); + else if constexpr (is_same_v<_Op, __increment>) + { + static_assert(not std::is_same_v<_K, bool>); + if constexpr (is_integral_v<_Tp>) + // Take a shortcut knowing that __k is an integer vector with values -1 or 0. + return __v._M_data - __vector_bitcast<_Tp>(__k._M_data); + else if constexpr (not __have_avx2) + return __v._M_data + + __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast( + _K, _Tp(1))); + // starting with AVX2 it is more efficient to blend after add + } + else if constexpr (is_same_v<_Op, __decrement>) + { + static_assert(not std::is_same_v<_K, bool>); + if constexpr (is_integral_v<_Tp>) + // Take a shortcut knowing that __k is an integer vector with values -1 or 0. + return __v._M_data + __vector_bitcast<_Tp>(__k._M_data); + else if constexpr (not __have_avx2) + return __v._M_data + - __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast( + _K, _Tp(1))); + // starting with AVX2 it is more efficient to blend after sub + } + return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); } //}}}2 diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index cd642cb3290..8872ca301b9 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -3462,6 +3462,74 @@ template } //}}} }}} + template