From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 65B2A3858430; Wed, 15 May 2024 10:04:51 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 65B2A3858430 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1715767491; bh=KDbZg7LZAnzIS7mp5OQirya9SSzXuxkIuAgxvv53dK8=; h=From:To:Subject:Date:In-Reply-To:References:From; b=OgeHBSBcqvHqf0c7mAHzaJbHkc2A5nSE7JVyXPNSKEya5bJ4+6HV3NZc3/LFvEioK CnLWkKRHyXqKFt0FSMuWqYFgsGewzrvjbHNRlLe/oaPwmnltaGdJcoKdWVI3G9M96b 6xQ2YSY6ePE4SieAhevCPCCEfprE1/6344yGf4Vs= From: "mkretz at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug libstdc++/114958] use __builtin_shufflevector for std::experimental::simd split and concat (at least the common cases) to enable better optimizations Date: Wed, 15 May 2024 10:04:50 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: libstdc++ X-Bugzilla-Version: 15.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: mkretz at gcc dot gnu.org X-Bugzilla-Status: ASSIGNED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: mkretz at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D114958 --- Comment #6 from Matthias Kretz (Vir) --- The last commit introduced a regression on i686 where __builtin_shufflevect= or was producing MMX vectors (which can mess up the FPU). Untested patch which resolves the issue: libstdc++-v3/ChangeLog: PR libstdc++/114958 * include/experimental/bits/simd.h (__as_vector): Don't use vector_size(8) on __i386__. (__vec_shuffle): Never return MMX vectors, widen to 16 bytes instead. (concat): Fix padding calculation to pick up widening logic from __as_vector. diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 6a6fd4f109d..63951df488c 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -1665,7 +1665,12 @@ __as_vector(_V __x) { static_assert(is_simd<_V>::value); using _Tp =3D typename _V::value_type; +#ifdef __i386__ + constexpr auto __bytes =3D sizeof(_Tp) =3D=3D 8 ? 16 : sizeof(_= Tp); + using _RV [[__gnu__::__vector_size__(__bytes)]] =3D _Tp; +#else using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] =3D _Tp; +#endif return _RV{__data(__x)}; } } @@ -2081,11 +2086,14 @@ __not(_Tp __a) noexcept // }}} // __vec_shuffle{{{ template - _GLIBCXX_SIMD_INTRINSIC constexpr auto + _GLIBCXX_SIMD_INTRINSIC constexpr + __vector_type_t::value_type, sizeof...(_Is)> __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm) { constexpr int _N0 =3D sizeof(__x) / sizeof(__x[0]); constexpr int _N1 =3D sizeof(__y) / sizeof(__y[0]); + using _Tp =3D typename _VectorTraits<_T0>::value_type; + using _RV [[maybe_unused]] =3D __vector_type_t<_Tp, sizeof...(_Is)>; #if __has_builtin(__builtin_shufflevector) #ifdef __clang__ // Clang requires _T0 =3D=3D _T1 @@ -2105,14 +2113,23 @@ __not(_Tp __a) noexcept }); else #endif - return __builtin_shufflevector(__x, __y, [=3D] { - constexpr int __j =3D __idx_perm(_Is); - static_assert(__j < _N0 + _N1); - return __j; - }()...); + { + const auto __r =3D __builtin_shufflevector(__x, __y, [=3D] { + constexpr int __j =3D __idx_perm(_Is); + static_assert(__j < _N0 + _N1); + return __j; + }()...); +#ifdef __i386__ + if constexpr (sizeof(__r) =3D=3D sizeof(_RV)) + return __r; + else + return _RV {__r[_Is]...}; +#else + return __r; +#endif + } #else - using _Tp =3D __remove_cvref_t; - return __vector_type_t<_Tp, sizeof...(_Is)> { + return _RV { [=3D]() -> _Tp { constexpr int __j =3D __idx_perm(_Is); static_assert(__j < _N0 + _N1); @@ -4393,9 +4410,9 @@ for (unsigned __j =3D 0; __j < __i; ++__j) __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(), [](int __i) { constexpr int __sizes[2] =3D {int(simd_size_v<_Tp, _As>)...}; - constexpr int __padding0 - =3D sizeof(__vector_type_t<_Tp, __sizes[0= ]>) / sizeof(_Tp) - - __sizes[0]; + constexpr int __vsizes[2] + =3D {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...}; + constexpr int __padding0 =3D __vsizes[0] - __sizes[0]; return __i >=3D _Np ? -1 : __i < __sizes[0]= ? __i : __i + __padding0; })}; }=