From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7857) id 7869E3858C31; Wed, 27 Mar 2024 14:15:21 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 7869E3858C31 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1711548921; bh=WPkmPDfVmglDiPTUef0GHZO5Zwm3LkZ7tdXHE6TTGY8=; h=From:To:Subject:Date:From; b=dWOm4MYzoanlGtdFkWzfvOubOKdh/V2gcUXF/kgFOzDJcLXrIlwvX4gCTmlrO0OcD TljkTHoiio7N5sXJZW6dB6RWFbmi3Zqwo5b2DlFtpQRCflJI2UAx8ovyM69pyB7V3G xoV9pgcRM2rKYBKwOLInZjamIlJCfU0pNKA/eu+A= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Matthias Kretz To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r14-9690] libstdc++: Add masked ++/-- implementation for sizeof < 16 X-Act-Checkin: gcc X-Git-Author: Matthias Kretz X-Git-Refname: refs/heads/master X-Git-Oldrev: 0ac2c0f0687b321ab54de271d788b4e0a287b4e2 X-Git-Newrev: a6c630c314b099f64d79055964d88b257459cf13 Message-Id: <20240327141521.7869E3858C31@sourceware.org> Date: Wed, 27 Mar 2024 14:15:21 +0000 (GMT) List-Id: https://gcc.gnu.org/g:a6c630c314b099f64d79055964d88b257459cf13 commit r14-9690-ga6c630c314b099f64d79055964d88b257459cf13 Author: Matthias Kretz Date: Wed Mar 27 13:41:25 2024 +0100 libstdc++: Add masked ++/-- implementation for sizeof < 16 This resolves further failures (-Wreturn-type warnings) and test failures for where-* tests targeting AVX-512. Signed-off-by: Matthias Kretz libstdc++-v3/ChangeLog: * include/experimental/bits/simd_x86.h (_S_masked_unary): Cast inputs < 16 bytes to 16 byte vectors before calling the right subtraction builtin. Before returning, truncate to the return vector type. Diff: --- libstdc++-v3/include/experimental/bits/simd_x86.h | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index 6b414486fee..517c4b4a5be 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -3508,6 +3508,9 @@ template #ifdef __clang__ return __movm<_Np, _Tp>(__k._M_data) ? __v._M_data - __pm_one : __v._M_data; #else // __clang__ + using _TV = __vector_type_t<_Tp, _Np>; + constexpr size_t __bytes = sizeof(__v) < 16 ? 16 : sizeof(__v); + constexpr size_t __width = __bytes / sizeof(_Tp); if constexpr (is_integral_v<_Tp>) { constexpr bool __lp64 = sizeof(long) == sizeof(long long); @@ -3517,11 +3520,11 @@ template std::conditional_t<__lp64, long long, int>, std::conditional_t< std::is_same_v<_Ip, signed char>, char, _Ip>>; - const auto __value = __vector_bitcast<_Up>(__v._M_data); + const auto __value = __intrin_bitcast<__vector_type_t<_Up, __width>>(__v._M_data); #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr) \ - if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width) \ - return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value, \ - __vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data)) + if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width) \ + return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(__value, \ + __vector_broadcast<__width>(_Up(__pm_one)), __value, __k._M_data)) _GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512); _GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256); _GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128); @@ -3538,16 +3541,17 @@ template } else { + const auto __value = __intrin_bitcast<__vector_type_t<_Tp, __width>>(__v._M_data); #define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr) \ - if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width) \ + if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width) \ return __builtin_ia32_##_Instr##_mask( \ - __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \ + __value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \ __k._M_data, _MM_FROUND_CUR_DIRECTION) #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr) \ - if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width) \ - return __builtin_ia32_##_Instr##_mask( \ - __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \ - __k._M_data) + if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width) \ + return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask( \ + __value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \ + __k._M_data)) _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512); _GLIBCXX_SIMD_MASK_SUB(4, 32, subps256); _GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);