public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-7076] libstdc++: Fix mask reduction of simd_mask<double> on POWER7
@ 2021-02-03 15:50 Jonathan Wakely
  0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2021-02-03 15:50 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

https://gcc.gnu.org/g:81c2c32de9c1058c33fcf77ada31186b4ae1f1fe

commit r11-7076-g81c2c32de9c1058c33fcf77ada31186b4ae1f1fe
Author: Matthias Kretz <kretz@kde.org>
Date:   Wed Feb 3 15:49:30 2021 +0000

    libstdc++: Fix mask reduction of simd_mask<double> on POWER7
    
    POWER7 does not support __vector long long reductions, making the
    generic _S_popcount implementation ill-formed. Specializing _S_popcount
    for PPC allows optimization and avoids the issue.
    
    libstdc++-v3/ChangeLog:
    
            * include/experimental/bits/simd.h: Add __have_power10vec
            conditional on _ARCH_PWR10.
            * include/experimental/bits/simd_builtin.h: Forward declare
            _MaskImplPpc and use it as _MaskImpl when __ALTIVEC__ is
            defined.
            (_MaskImplBuiltin::_S_some_of): Call _S_popcount from the
            _SuperImpl for optimizations and correctness.
            * include/experimental/bits/simd_ppc.h: Add _MaskImplPpc.
            (_MaskImplPpc::_S_popcount): Implement via vec_cntm for POWER10.
            Otherwise, for >=int use -vec_sums divided by a sizeof factor.
            For <int use -vec_sums(vec_sum4s(...)) to sum all mask entries.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h      |  5 ++++
 .../include/experimental/bits/simd_builtin.h       |  6 ++--
 libstdc++-v3/include/experimental/bits/simd_ppc.h  | 35 +++++++++++++++++++++-
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 149396d6f82..becd1d6a4bb 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -477,6 +477,11 @@ constexpr inline bool __support_neon_float =
   false;
 #endif
 
+#ifdef _ARCH_PWR10
+constexpr inline bool __have_power10vec = true;
+#else
+constexpr inline bool __have_power10vec = false;
+#endif
 #ifdef __POWER9_VECTOR__
 constexpr inline bool __have_power9vec = true;
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index efca65fa6e3..7f728a10488 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -920,6 +920,7 @@ template <typename _Abi> struct _MaskImplX86;
 template <typename _Abi> struct _SimdImplNeon;
 template <typename _Abi> struct _MaskImplNeon;
 template <typename _Abi> struct _SimdImplPpc;
+template <typename _Abi> struct _MaskImplPpc;
 
 // simd_abi::_VecBuiltin {{{
 template <int _UsedBytes>
@@ -959,10 +960,11 @@ template <int _UsedBytes>
     using _CommonImpl = _CommonImplBuiltin;
 #ifdef __ALTIVEC__
     using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
+    using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
 #else
     using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
-#endif
     using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
+#endif
 #endif
 
     // }}}
@@ -2899,7 +2901,7 @@ template <typename _Abi>
       _GLIBCXX_SIMD_INTRINSIC static bool
       _S_some_of(simd_mask<_Tp, _Abi> __k)
       {
-	const int __n_true = _S_popcount(__k);
+	const int __n_true = _SuperImpl::_S_popcount(__k);
 	return __n_true > 0 && __n_true < int(_S_size<_Tp>);
       }
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h
index b92fc19ccb8..ef52d129a85 100644
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -30,6 +30,7 @@
 #ifndef __ALTIVEC__
 #error "simd_ppc.h may only be included when AltiVec/VMX is available"
 #endif
+#include <altivec.h>
 
 _GLIBCXX_SIMD_BEGIN_NAMESPACE
 
@@ -114,10 +115,42 @@ template <typename _Abi>
     // }}}
   };
 
+// }}}
+// _MaskImplPpc {{{
+template <typename _Abi>
+  struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
+  {
+    using _Base = _MaskImplBuiltin<_Abi>;
+
+    // _S_popcount {{{
+    template <typename _Tp>
+      _GLIBCXX_SIMD_INTRINSIC static int _S_popcount(simd_mask<_Tp, _Abi> __k)
+      {
+	const auto __kv = __as_vector(__k);
+	if constexpr (__have_power10vec)
+	  {
+	    return vec_cntm(__to_intrin(__kv), 1);
+	  }
+	else if constexpr (sizeof(_Tp) >= sizeof(int))
+	  {
+	    using _Intrin = __intrinsic_type16_t<int>;
+	    const int __sum = -vec_sums(__intrin_bitcast<_Intrin>(__kv), _Intrin())[3];
+	    return __sum / (sizeof(_Tp) / sizeof(int));
+	  }
+	else
+	  {
+	    const auto __summed_to_int = vec_sum4s(__to_intrin(__kv), __intrinsic_type16_t<int>());
+	    return -vec_sums(__summed_to_int, __intrinsic_type16_t<int>())[3];
+	  }
+      }
+
+    // }}}
+  };
+
 // }}}
 
 _GLIBCXX_SIMD_END_NAMESPACE
 #endif // __cplusplus >= 201703L
 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_PPC_H_
 
-// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
+// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-02-03 15:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-03 15:50 [gcc r11-7076] libstdc++: Fix mask reduction of simd_mask<double> on POWER7 Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).