public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-7076] libstdc++: Fix mask reduction of simd_mask<double> on POWER7
@ 2021-02-03 15:50 Jonathan Wakely
0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2021-02-03 15:50 UTC (permalink / raw)
To: gcc-cvs, libstdc++-cvs
https://gcc.gnu.org/g:81c2c32de9c1058c33fcf77ada31186b4ae1f1fe
commit r11-7076-g81c2c32de9c1058c33fcf77ada31186b4ae1f1fe
Author: Matthias Kretz <kretz@kde.org>
Date: Wed Feb 3 15:49:30 2021 +0000
libstdc++: Fix mask reduction of simd_mask<double> on POWER7
POWER7 does not support __vector long long reductions, making the
generic _S_popcount implementation ill-formed. Specializing _S_popcount
for PPC allows optimization and avoids the issue.
libstdc++-v3/ChangeLog:
* include/experimental/bits/simd.h: Add __have_power10vec
conditional on _ARCH_PWR10.
* include/experimental/bits/simd_builtin.h: Forward declare
_MaskImplPpc and use it as _MaskImpl when __ALTIVEC__ is
defined.
(_MaskImplBuiltin::_S_some_of): Call _S_popcount from the
_SuperImpl for optimizations and correctness.
* include/experimental/bits/simd_ppc.h: Add _MaskImplPpc.
(_MaskImplPpc::_S_popcount): Implement via vec_cntm for POWER10.
Otherwise, for >=int use -vec_sums divided by a sizeof factor.
For <int use -vec_sums(vec_sum4s(...)) to sum all mask entries.
Diff:
---
libstdc++-v3/include/experimental/bits/simd.h | 5 ++++
.../include/experimental/bits/simd_builtin.h | 6 ++--
libstdc++-v3/include/experimental/bits/simd_ppc.h | 35 +++++++++++++++++++++-
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 149396d6f82..becd1d6a4bb 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -477,6 +477,11 @@ constexpr inline bool __support_neon_float =
false;
#endif
+#ifdef _ARCH_PWR10
+constexpr inline bool __have_power10vec = true;
+#else
+constexpr inline bool __have_power10vec = false;
+#endif
#ifdef __POWER9_VECTOR__
constexpr inline bool __have_power9vec = true;
#else
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index efca65fa6e3..7f728a10488 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -920,6 +920,7 @@ template <typename _Abi> struct _MaskImplX86;
template <typename _Abi> struct _SimdImplNeon;
template <typename _Abi> struct _MaskImplNeon;
template <typename _Abi> struct _SimdImplPpc;
+template <typename _Abi> struct _MaskImplPpc;
// simd_abi::_VecBuiltin {{{
template <int _UsedBytes>
@@ -959,10 +960,11 @@ template <int _UsedBytes>
using _CommonImpl = _CommonImplBuiltin;
#ifdef __ALTIVEC__
using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
+ using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
#else
using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
-#endif
using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
+#endif
#endif
// }}}
@@ -2899,7 +2901,7 @@ template <typename _Abi>
_GLIBCXX_SIMD_INTRINSIC static bool
_S_some_of(simd_mask<_Tp, _Abi> __k)
{
- const int __n_true = _S_popcount(__k);
+ const int __n_true = _SuperImpl::_S_popcount(__k);
return __n_true > 0 && __n_true < int(_S_size<_Tp>);
}
diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h
index b92fc19ccb8..ef52d129a85 100644
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -30,6 +30,7 @@
#ifndef __ALTIVEC__
#error "simd_ppc.h may only be included when AltiVec/VMX is available"
#endif
+#include <altivec.h>
_GLIBCXX_SIMD_BEGIN_NAMESPACE
@@ -114,10 +115,42 @@ template <typename _Abi>
// }}}
};
+// }}}
+// _MaskImplPpc {{{
+template <typename _Abi>
+ struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
+ {
+ using _Base = _MaskImplBuiltin<_Abi>;
+
+ // _S_popcount {{{
+ template <typename _Tp>
+ _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
+ {
+ const auto __kv = __as_vector(__k);
+ if constexpr (__have_power10vec)
+ {
+ return vec_cntm(__to_intrin(__kv), 1);
+ }
+ else if constexpr (sizeof(_Tp) >= sizeof(int))
+ {
+ using _Intrin = __intrinsic_type16_t<int>;
+ const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3];
+ return __sum / (sizeof(_Tp) / sizeof(int));
+ }
+ else
+ {
+ const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>());
+ return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3];
+ }
+ }
+
+ // }}}
+ };
+
// }}}
_GLIBCXX_SIMD_END_NAMESPACE
#endif // __cplusplus >= 201703L
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
-// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
+// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-02-03 15:50 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-03 15:50 [gcc r11-7076] libstdc++: Fix mask reduction of simd_mask<double> on POWER7 Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).