public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "mkretz at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug libstdc++/114958] use __builtin_shufflevector for std::experimental::simd split and concat (at least the common cases) to enable better optimizations
Date: Wed, 15 May 2024 10:04:50 +0000	[thread overview]
Message-ID: <bug-114958-4-fOGDDxLSAu@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-114958-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114958

--- Comment #6 from Matthias Kretz (Vir) <mkretz at gcc dot gnu.org> ---
The last commit introduced a regression on i686 where __builtin_shufflevector
was producing MMX vectors (which can mess up the FPU). Untested patch which
resolves the issue:

libstdc++-v3/ChangeLog:

        PR libstdc++/114958
        * include/experimental/bits/simd.h (__as_vector): Don't use
        vector_size(8) on __i386__.
        (__vec_shuffle): Never return MMX vectors, widen to 16 bytes
        instead.
        (concat): Fix padding calculation to pick up widening logic from
        __as_vector.

diff --git a/libstdc++-v3/include/experimental/bits/simd.h
b/libstdc++-v3/include/experimental/bits/simd.h
index 6a6fd4f109d..63951df488c 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1665,7 +1665,12 @@ __as_vector(_V __x)
          {
            static_assert(is_simd<_V>::value);
            using _Tp = typename _V::value_type;
+#ifdef __i386__
+           constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
+           using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
+#else
            using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+#endif
            return _RV{__data(__x)};
          }
       }
@@ -2081,11 +2086,14 @@ __not(_Tp __a) noexcept
 // }}}
 // __vec_shuffle{{{
 template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
-  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  __vector_type_t<typename _VectorTraits<_T0>::value_type, sizeof...(_Is)>
   __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun
__idx_perm)
   {
     constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
     constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+    using _Tp = typename _VectorTraits<_T0>::value_type;
+    using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
 #if __has_builtin(__builtin_shufflevector)
 #ifdef __clang__
     // Clang requires _T0 == _T1
@@ -2105,14 +2113,23 @@ __not(_Tp __a) noexcept
             });
     else
 #endif
-      return __builtin_shufflevector(__x, __y, [=] {
-              constexpr int __j = __idx_perm(_Is);
-              static_assert(__j < _N0 + _N1);
-              return __j;
-            }()...);
+      {
+       const auto __r = __builtin_shufflevector(__x, __y, [=] {
+                          constexpr int __j = __idx_perm(_Is);
+                          static_assert(__j < _N0 + _N1);
+                          return __j;
+                        }()...);
+#ifdef __i386__
+       if constexpr (sizeof(__r) == sizeof(_RV))
+         return __r;
+       else
+         return _RV {__r[_Is]...};
+#else
+       return __r;
+#endif
+      }
 #else
-    using _Tp = __remove_cvref_t<decltype(__x[0])>;
-    return __vector_type_t<_Tp, sizeof...(_Is)> {
+    return _RV {
       [=]() -> _Tp {
        constexpr int __j = __idx_perm(_Is);
        static_assert(__j < _N0 + _N1);
@@ -4393,9 +4410,9 @@ for (unsigned __j = 0; __j < __i; ++__j)
                __vec_shuffle(__as_vector(__xs)...,
std::make_index_sequence<_RW::_S_full_size>(),
                              [](int __i) {
                                constexpr int __sizes[2] =
{int(simd_size_v<_Tp, _As>)...};
-                               constexpr int __padding0
-                                 = sizeof(__vector_type_t<_Tp, __sizes[0]>) /
sizeof(_Tp)
-                                     - __sizes[0];
+                               constexpr int __vsizes[2]
+                                 = {int(sizeof(__as_vector(__xs)) /
sizeof(_Tp))...};
+                               constexpr int __padding0 = __vsizes[0] -
__sizes[0];
                                return __i >= _Np ? -1 : __i < __sizes[0] ? __i
: __i + __padding0;
                              })};
       }

  parent reply	other threads:[~2024-05-15 10:04 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-06 10:29 [Bug libstdc++/114958] New: " mkretz at gcc dot gnu.org
2024-05-06 10:35 ` [Bug libstdc++/114958] " mkretz at gcc dot gnu.org
2024-05-06 10:45 ` jakub at gcc dot gnu.org
2024-05-06 11:08 ` mkretz at gcc dot gnu.org
2024-05-06 11:24 ` jakub at gcc dot gnu.org
2024-05-13 11:41 ` cvs-commit at gcc dot gnu.org
2024-05-15 10:04 ` mkretz at gcc dot gnu.org [this message]
2024-05-29  7:03 ` mkretz at gcc dot gnu.org
2024-06-10  5:58 ` cvs-commit at gcc dot gnu.org
2024-06-11  9:50 ` cvs-commit at gcc dot gnu.org
2024-06-20 12:44 ` cvs-commit at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-114958-4-fOGDDxLSAu@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).