From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <redi@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2181)
 id 30094385DC33; Thu, 24 Jun 2021 13:34:13 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 30094385DC33
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Jonathan Wakely <redi@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org
Subject: [gcc r12-1780] libstdc++: Improve copysign(simd) codegen
X-Act-Checkin: gcc
X-Git-Author: Matthias Kretz <m.kretz@gsi.de>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 07ba52849ffca26a3d461f94921b23a9cdbaea7f
X-Git-Newrev: 0237aa8c706f09bb2568224afcdac40b5e47c3e1
Message-Id: <20210624133413.30094385DC33@sourceware.org>
Date: Thu, 24 Jun 2021 13:34:13 +0000 (GMT)
X-BeenThere: libstdc++-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Libstdc++-cvs mailing list <libstdc++-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/libstdc++-cvs>,
 <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/libstdc++-cvs/>
List-Help: <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/libstdc++-cvs>,
 <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Thu, 24 Jun 2021 13:34:13 -0000

https://gcc.gnu.org/g:0237aa8c706f09bb2568224afcdac40b5e47c3e1

commit r12-1780-g0237aa8c706f09bb2568224afcdac40b5e47c3e1
Author: Matthias Kretz <m.kretz@gsi.de>
Date:   Thu Jun 24 14:20:13 2021 +0100

    libstdc++: Improve copysign(simd) codegen
    
    This also resolves a test failure on aarch64 with -ffast-math and
    fixed_size<N> with large N.
    
    Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
    
    libstdc++-v3/ChangeLog:
    
            * include/experimental/bits/simd.h: Add missing operator~
            overload for simd<floating-point> to __float_bitwise_operators.
            * include/experimental/bits/simd_builtin.h
            (_SimdImplBuiltin::_S_complement): Bitcast to int (and back) to
            implement complement for floating-point vectors.
            * include/experimental/bits/simd_fixed_size.h
            (_SimdImplFixedSize::_S_copysign): New function, forwarding to
            copysign implementation of _SimdTuple members.
            * include/experimental/bits/simd_math.h (copysign): Call
            _SimdImpl::_S_copysign for fixed_size arguments. Simplify
            generic copysign implementation using the new ~ operator.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h            | 6 ++++++
 libstdc++-v3/include/experimental/bits/simd_builtin.h    | 7 ++++++-
 libstdc++-v3/include/experimental/bits/simd_fixed_size.h | 2 +-
 libstdc++-v3/include/experimental/bits/simd_math.h       | 4 +++-
 4 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 59ddf3cc958..163f1b574e2 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -5189,6 +5189,12 @@ template <typename _Tp, typename _Ap>
     return {__private_init,
 	    _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
   }
+
+template <typename _Tp, typename _Ap>
+  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
+  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
+  operator~(const simd<_Tp, _Ap>& __a)
+  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
 } // namespace __float_bitwise_operators }}}
 
 _GLIBCXX_SIMD_END_NAMESPACE
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index e986ee91620..8cd338e313f 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1632,7 +1632,12 @@ template <typename _Abi>
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
       _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept
-      { return ~__x._M_data; }
+      {
+	if constexpr (is_floating_point_v<_Tp>)
+	  return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x));
+	else
+	  return ~__x._M_data;
+      }
 
     // _S_unary_minus {{{2
     template <typename _Tp, size_t _Np>
diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
index 2722055c899..7c2c1df77c8 100644
--- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
+++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
@@ -1663,7 +1663,7 @@ template <int _Np>
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp)
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod)
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder)
-    // copysign in simd_math.h
+    _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign)
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter)
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim)
     _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax)
diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h
index 4799803a200..d954e761eee 100644
--- a/libstdc++-v3/include/experimental/bits/simd_math.h
+++ b/libstdc++-v3/include/experimental/bits/simd_math.h
@@ -1304,6 +1304,8 @@ template <typename _Tp, typename _Abi>
   {
     if constexpr (simd_size_v<_Tp, _Abi> == 1)
       return std::copysign(__x[0], __y[0]);
+    else if constexpr (__is_fixed_size_abi_v<_Abi>)
+      return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
     else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
       // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
       // It is necessary, because __signmask below cannot be computed at compile
@@ -1315,7 +1317,7 @@ template <typename _Tp, typename _Abi>
 	using _V = simd<_Tp, _Abi>;
 	using namespace std::experimental::__float_bitwise_operators;
 	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
-	return (__x & (__x ^ __signmask)) | (__y & __signmask);
+	return (__x & ~__signmask) | (__y & __signmask);
       }
   }