* [PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat
@ 2024-05-07 13:41 Matthias Kretz
2024-05-13 10:43 ` Jonathan Wakely
0 siblings, 1 reply; 2+ messages in thread
From: Matthias Kretz @ 2024-05-07 13:41 UTC (permalink / raw)
To: gcc-patches, libstdc++
[-- Attachment #1: Type: text/plain, Size: 2072 bytes --]
Tested on x86_64-linux-gnu and aarch64-linux-gnu and with Clang 18 on x86_64-
linux-gnu.
OK for trunk and backport(s)?
---------------------- 8< ----------------------------
Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
libstdc++-v3/ChangeLog:
PR libstdc++/114958
* include/experimental/bits/simd.h (__as_vector): Return scalar
simd as one-element vector. Return vector from single-vector
fixed_size simd.
(__vec_shuffle): New.
(__extract_part): Adjust return type signature.
(split): Use __extract_part for any split into non-fixed_size
simds.
(concat): If the return type stores a single vector, use
__vec_shuffle (which calls __builtin_shufflevector) to produce
the return value.
* include/experimental/bits/simd_builtin.h
(__shift_elements_right): Removed.
(__extract_part): Return single elements directly. Use
__vec_shuffle (which calls __builtin_shufflevector) to for all
non-trivial cases.
* include/experimental/bits/simd_fixed_size.h (__extract_part):
Return single elements directly.
* testsuite/experimental/simd/pr114958.cc: New test.
---
libstdc++-v3/include/experimental/bits/simd.h | 161 +++++++++++++-----
.../include/experimental/bits/simd_builtin.h | 152 +----------------
.../experimental/bits/simd_fixed_size.h | 4 +-
.../testsuite/experimental/simd/pr114958.cc | 20 +++
4 files changed, 145 insertions(+), 192 deletions(-)
create mode 100644 libstdc++-v3/testsuite/experimental/simd/pr114958.cc
--
──────────────────────────────────────────────────────────────────────────
Dr. Matthias Kretz https://mattkretz.github.io
GSI Helmholtz Centre for Heavy Ion Research https://gsi.de
stdₓ::simd
──────────────────────────────────────────────────────────────────────────
[-- Attachment #2: 0001-libstdc-Use-__builtin_shufflevector-for-simd-split-a.patch --]
[-- Type: text/x-patch, Size: 17846 bytes --]
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 6ef9c955cfa..6a6fd4f109d 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1651,7 +1651,24 @@ __as_vector(_V __x)
if constexpr (__is_vector_type_v<_V>)
return __x;
else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
- return __data(__x)._M_data;
+ {
+ if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
+ {
+ static_assert(is_simd<_V>::value);
+ static_assert(_V::abi_type::template __traits<
+ typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
+ return __as_vector(__data(__x).first);
+ }
+ else if constexpr (_V::size() > 1)
+ return __data(__x)._M_data;
+ else
+ {
+ static_assert(is_simd<_V>::value);
+ using _Tp = typename _V::value_type;
+ using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+ return _RV{__data(__x)};
+ }
+ }
else if constexpr (__is_vectorizable_v<_V>)
return __vector_type_t<_V, 2>{__x};
else
@@ -2061,6 +2078,60 @@ __not(_Tp __a) noexcept
return ~__a;
}
+// }}}
+// __vec_shuffle{{{
+template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
+ _GLIBCXX_SIMD_INTRINSIC constexpr auto
+ __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
+ {
+ constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
+ constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+#if __has_builtin(__builtin_shufflevector)
+#ifdef __clang__
+ // Clang requires _T0 == _T1
+ if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
+ return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
+ else if constexpr (sizeof(__x) > sizeof(__y))
+ return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
+ else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
+ return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
+ __i = __idx_perm(__i);
+ return __i < _N0 ? __i : __i - _N0 + _N1;
+ });
+ else if constexpr (sizeof(__x) < sizeof(__y))
+ return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
+ __i = __idx_perm(__i);
+ return __i < _N0 ? __i : __i - _N0 + _N1;
+ });
+ else
+#endif
+ return __builtin_shufflevector(__x, __y, [=] {
+ constexpr int __j = __idx_perm(_Is);
+ static_assert(__j < _N0 + _N1);
+ return __j;
+ }()...);
+#else
+ using _Tp = __remove_cvref_t<decltype(__x[0])>;
+ return __vector_type_t<_Tp, sizeof...(_Is)> {
+ [=]() -> _Tp {
+ constexpr int __j = __idx_perm(_Is);
+ static_assert(__j < _N0 + _N1);
+ if constexpr (__j < 0)
+ return 0;
+ else if constexpr (__j < _N0)
+ return __x[__j];
+ else
+ return __y[__j - _N0];
+ }()...
+ };
+#endif
+ }
+
+template <typename _T0, typename _Fun, typename _Seq>
+ _GLIBCXX_SIMD_INTRINSIC constexpr auto
+ __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
+ { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
+
// }}}
// __concat{{{
template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
@@ -3947,7 +4018,7 @@ clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap
// __extract_part {{{
template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
- _SimdWrapper<_Tp, _Np / _Total * _Combine>
+ conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
__extract_part(const _SimdWrapper<_Tp, _Np> __x);
template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
@@ -4231,48 +4302,21 @@ static_assert(
__split_wrapper(_SL::template _S_pop_front<1>(),
__data(__x).second));
}
- else if constexpr ((!is_same_v<simd_abi::scalar,
- simd_abi::deduce_t<_Tp, _Sizes>> && ...)
- && (!__is_fixed_size_abi_v<
- simd_abi::deduce_t<_Tp, _Sizes>> && ...))
+ else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
{
- if constexpr (((_Sizes * 2 == _Np) && ...))
- return {{__private_init, __extract_part<0, 2>(__data(__x))},
- {__private_init, __extract_part<1, 2>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
- return {{__private_init, __extract_part<0, 3>(__data(__x))},
- {__private_init, __extract_part<1, 3>(__data(__x))},
- {__private_init, __extract_part<2, 3>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<2 * _Np / 3, _Np / 3>>)
- return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
- {__private_init, __extract_part<2, 3>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<_Np / 3, 2 * _Np / 3>>)
- return {{__private_init, __extract_part<0, 3>(__data(__x))},
- {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
- return {{__private_init, __extract_part<0, 2>(__data(__x))},
- {__private_init, __extract_part<2, 4>(__data(__x))},
- {__private_init, __extract_part<3, 4>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
- return {{__private_init, __extract_part<0, 4>(__data(__x))},
- {__private_init, __extract_part<1, 4>(__data(__x))},
- {__private_init, __extract_part<1, 2>(__data(__x))}};
- else if constexpr (is_same_v<_SizeList<_Sizes...>,
- _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
- return {{__private_init, __extract_part<0, 4>(__data(__x))},
- {__private_init, __extract_center(__data(__x))},
- {__private_init, __extract_part<3, 4>(__data(__x))}};
- else if constexpr (((_Sizes * 4 == _Np) && ...))
- return {{__private_init, __extract_part<0, 4>(__data(__x))},
- {__private_init, __extract_part<1, 4>(__data(__x))},
- {__private_init, __extract_part<2, 4>(__data(__x))},
- {__private_init, __extract_part<3, 4>(__data(__x))}};
- // else fall through
+ constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
+ return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
+ [&](auto __i) constexpr {
+ constexpr size_t __offset = [&]() {
+ size_t __r = 0;
+ for (unsigned __j = 0; __j < __i; ++__j)
+ __r += __size[__j];
+ return __r;
+ }();
+ return __deduced_simd<_Tp, __size[__i]>(
+ __private_init,
+ __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
+ });
}
#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
const __may_alias<_Tp>* const __element_ptr
@@ -4334,14 +4378,37 @@ static_assert(
simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
concat(const simd<_Tp, _As>&... __xs)
{
- using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
+ constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
+ using _Abi = simd_abi::deduce_t<_Tp, _Np>;
+ using _Rp = simd<_Tp, _Abi>;
+ using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
if constexpr (sizeof...(__xs) == 1)
return simd_cast<_Rp>(__xs...);
else if ((... && __xs._M_is_constprop()))
- return simd<_Tp,
- simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
- [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
+ return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
{ return __subscript_in_pack<__i>(__xs...); });
+ else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
+ {
+ return {__private_init,
+ __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
+ [](int __i) {
+ constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
+ constexpr int __padding0
+ = sizeof(__vector_type_t<_Tp, __sizes[0]>) / sizeof(_Tp)
+ - __sizes[0];
+ return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
+ })};
+ }
+ else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
+ return [](const auto& __x0, const auto& __x1, const auto& __x2)
+ _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+ return concat(concat(__x0, __x1), __x2);
+ }(__xs...);
+ else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
+ return [](const auto& __x0, const auto& __x1, const auto&... __rest)
+ _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+ return concat(concat(__x0, __x1), concat(__rest...));
+ }(__xs...);
else
{
_Rp __r{};
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 4ceeb423894..505f8083794 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -92,124 +92,16 @@ __wrapper_bitcast(_SimdWrapper<_Up, _M> __x)
return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data);
}
-// }}}
-// __shift_elements_right{{{
-// if (__shift % 2ⁿ == 0) => the low n Bytes are correct
-template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
- _GLIBCXX_SIMD_INTRINSIC _Tp
- __shift_elements_right(_Tp __v)
- {
- [[maybe_unused]] const auto __iv = __to_intrin(__v);
- static_assert(__shift <= sizeof(_Tp));
- if constexpr (__shift == 0)
- return __v;
- else if constexpr (__shift == sizeof(_Tp))
- return _Tp();
-#if _GLIBCXX_SIMD_X86INTRIN // {{{
- else if constexpr (__have_sse && __shift == 8
- && _TVT::template _S_is<float, 4>)
- return _mm_movehl_ps(__iv, __iv);
- else if constexpr (__have_sse2 && __shift == 8
- && _TVT::template _S_is<double, 2>)
- return _mm_unpackhi_pd(__iv, __iv);
- else if constexpr (__have_sse2 && sizeof(_Tp) == 16)
- return reinterpret_cast<typename _TVT::type>(
- _mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift));
- else if constexpr (__shift == 16 && sizeof(_Tp) == 32)
- {
- /*if constexpr (__have_avx && _TVT::template _S_is<double, 4>)
- return _mm256_permute2f128_pd(__iv, __iv, 0x81);
- else if constexpr (__have_avx && _TVT::template _S_is<float, 8>)
- return _mm256_permute2f128_ps(__iv, __iv, 0x81);
- else if constexpr (__have_avx)
- return reinterpret_cast<typename _TVT::type>(
- _mm256_permute2f128_si256(__iv, __iv, 0x81));
- else*/
- return __zero_extend(__hi128(__v));
- }
- else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16)
- {
- const auto __vll = __vector_bitcast<_LLong>(__v);
- return reinterpret_cast<typename _TVT::type>(
- _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81),
- __vll, __shift));
- }
- else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16)
- {
- const auto __vll = __vector_bitcast<_LLong>(__v);
- return reinterpret_cast<typename _TVT::type>(
- __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift),
- _mm_srli_si128(__hi128(__vll), __shift)));
- }
- else if constexpr (sizeof(_Tp) == 32 && __shift > 16)
- return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v)));
- else if constexpr (sizeof(_Tp) == 64 && __shift == 32)
- return __zero_extend(__hi256(__v));
- else if constexpr (__have_avx512f && sizeof(_Tp) == 64)
- {
- if constexpr (__shift >= 48)
- return __zero_extend(
- __shift_elements_right<__shift - 48>(__extract<3, 4>(__v)));
- else if constexpr (__shift >= 32)
- return __zero_extend(
- __shift_elements_right<__shift - 32>(__hi256(__v)));
- else if constexpr (__shift % 8 == 0)
- return reinterpret_cast<typename _TVT::type>(
- _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v),
- __shift / 8));
- else if constexpr (__shift % 4 == 0)
- return reinterpret_cast<typename _TVT::type>(
- _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v),
- __shift / 4));
- else if constexpr (__have_avx512bw && __shift < 16)
- {
- const auto __vll = __vector_bitcast<_LLong>(__v);
- return reinterpret_cast<typename _TVT::type>(
- _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9),
- __vll, __shift));
- }
- else if constexpr (__have_avx512bw && __shift < 32)
- {
- const auto __vll = __vector_bitcast<_LLong>(__v);
- return reinterpret_cast<typename _TVT::type>(
- _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee),
- _mm512_shuffle_i32x4(__vll, __vll, 0xf9),
- __shift - 16));
- }
- else
- __assert_unreachable<_Tp>();
- }
- /*
- } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64)
- return __auto_bitcast(__extract<__shift / 16, 4>(__v));
- */
-#endif // _GLIBCXX_SIMD_X86INTRIN }}}
- else
- {
- constexpr int __chunksize = __shift % 8 == 0 ? 8
- : __shift % 4 == 0 ? 4
- : __shift % 2 == 0 ? 2
- : 1;
- auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v);
- using _Up = decltype(__w);
- return __intrin_bitcast<_Tp>(
- __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
- [](auto... __chunks) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
- return _Up{__chunks...};
- }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
- return __w[__shift / __chunksize + __i];
- }));
- }
- }
-
// }}}
// __extract_part(_SimdWrapper<_Tp, _Np>) {{{
template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
- _SimdWrapper<_Tp, _Np / _Total * _Combine>
+ conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
__extract_part(const _SimdWrapper<_Tp, _Np> __x)
{
- if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
+ if constexpr (_Np == _Total and _Combine == 1)
+ return __x[_Index];
+ else if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x);
else
{
@@ -235,39 +127,11 @@ __extract_part(const _SimdWrapper<_Tp, _Np> __x)
return __x;
else if constexpr (_Index == 0)
return __intrin_bitcast<_R>(__as_vector(__x));
-#if _GLIBCXX_SIMD_X86INTRIN // {{{
- else if constexpr (sizeof(__x) == 32
- && __return_size * sizeof(_Tp) <= 16)
- {
- constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp);
- if constexpr (__bytes_to_skip == 16)
- return __vector_bitcast<_Tp, __return_size>(
- __hi128(__as_vector(__x)));
- else
- return __vector_bitcast<_Tp, __return_size>(
- _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
- __lo128(__vector_bitcast<_LLong>(__x)),
- __bytes_to_skip));
- }
-#endif // _GLIBCXX_SIMD_X86INTRIN }}}
- else if constexpr (_Index > 0
- && (__values_to_skip % __return_size != 0
- || sizeof(_R) >= 8)
- && (__values_to_skip + __return_size) * sizeof(_Tp)
- <= 64
- && sizeof(__x) >= 16)
- return __intrin_bitcast<_R>(
- __shift_elements_right<__values_to_skip * sizeof(_Tp)>(
- __as_vector(__x)));
else
- {
- _R __r = {};
- __builtin_memcpy(&__r,
- reinterpret_cast<const char*>(&__x)
- + sizeof(_Tp) * __values_to_skip,
- __return_size * sizeof(_Tp));
- return __r;
- }
+ return __vec_shuffle(__as_vector(__x), make_index_sequence<__bit_ceil(__return_size)>(),
+ [](size_t __i) {
+ return __i + __values_to_skip;
+ });
}
}
diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
index 40885521297..bdfeefd0632 100644
--- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
+++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
@@ -927,7 +927,9 @@ __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x)
using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>;
// handle (optimize) the simple cases
- if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size)
+ if constexpr (__return_size == 1)
+ return __x[integral_constant<size_t, __values_to_skip>()];
+ else if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size)
return __x.first._M_data;
else if constexpr (_Index == 0 && _Total == _Combine)
return __x;
diff --git a/libstdc++-v3/testsuite/experimental/simd/pr114958.cc b/libstdc++-v3/testsuite/experimental/simd/pr114958.cc
new file mode 100644
index 00000000000..94c9e0a2d18
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr114958.cc
@@ -0,0 +1,20 @@
+// { dg-options "-std=c++17" }
+// { dg-do compile { target x86_64-*-* } }
+// { dg-require-effective-target c++17 }
+// { dg-additional-options "-march=x86-64-v3" { target x86_64-*-* } }
+// { dg-require-cmath "" }
+// { dg-final { scan-assembler-times "vperm(q|pd)\[\\t \]+\\\$144" 1 } }
+
+#include <experimental/simd>
+
+namespace stdx = std::experimental;
+
+using T = std::uint64_t;
+using V = stdx::simd<T, stdx::simd_abi::_VecBuiltin<32>>;
+using V1 = stdx::simd<T, stdx::simd_abi::scalar>;
+
+V perm(V data)
+{
+ auto [carry, _] = stdx::split<3, 1>(data);
+ return concat(V1(), carry);
+}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat
2024-05-07 13:41 [PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat Matthias Kretz
@ 2024-05-13 10:43 ` Jonathan Wakely
0 siblings, 0 replies; 2+ messages in thread
From: Jonathan Wakely @ 2024-05-13 10:43 UTC (permalink / raw)
To: Matthias Kretz; +Cc: gcc-patches, libstdc++
On Tue, 7 May 2024 at 14:42, Matthias Kretz <m.kretz@gsi.de> wrote:
>
> Tested on x86_64-linux-gnu and aarch64-linux-gnu and with Clang 18 on x86_64-
> linux-gnu.
>
> OK for trunk and backport(s)?
OK for all.
>
> ---------------------- 8< ----------------------------
>
> Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/114958
> * include/experimental/bits/simd.h (__as_vector): Return scalar
> simd as one-element vector. Return vector from single-vector
> fixed_size simd.
> (__vec_shuffle): New.
> (__extract_part): Adjust return type signature.
> (split): Use __extract_part for any split into non-fixed_size
> simds.
> (concat): If the return type stores a single vector, use
> __vec_shuffle (which calls __builtin_shufflevector) to produce
> the return value.
> * include/experimental/bits/simd_builtin.h
> (__shift_elements_right): Removed.
> (__extract_part): Return single elements directly. Use
> __vec_shuffle (which calls __builtin_shufflevector) to for all
> non-trivial cases.
> * include/experimental/bits/simd_fixed_size.h (__extract_part):
> Return single elements directly.
> * testsuite/experimental/simd/pr114958.cc: New test.
> ---
> libstdc++-v3/include/experimental/bits/simd.h | 161 +++++++++++++-----
> .../include/experimental/bits/simd_builtin.h | 152 +----------------
> .../experimental/bits/simd_fixed_size.h | 4 +-
> .../testsuite/experimental/simd/pr114958.cc | 20 +++
> 4 files changed, 145 insertions(+), 192 deletions(-)
> create mode 100644 libstdc++-v3/testsuite/experimental/simd/pr114958.cc
>
>
> --
> ──────────────────────────────────────────────────────────────────────────
> Dr. Matthias Kretz https://mattkretz.github.io
> GSI Helmholtz Centre for Heavy Ion Research https://gsi.de
> stdₓ::simd
> ──────────────────────────────────────────────────────────────────────────
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-05-13 10:43 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-07 13:41 [PATCH] libstdc++: Use __builtin_shufflevector for simd split and concat Matthias Kretz
2024-05-13 10:43 ` Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).