From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lxmtout1.gsi.de (lxmtout1.gsi.de [140.181.3.111]) by sourceware.org (Postfix) with ESMTPS id 995BE3838011; Tue, 8 Jun 2021 12:11:27 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 995BE3838011 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=gsi.de Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gsi.de Received: from localhost (localhost [127.0.0.1]) by lxmtout1.gsi.de (Postfix) with ESMTP id A81262050D0A; Tue, 8 Jun 2021 14:11:26 +0200 (CEST) X-Virus-Scanned: Debian amavisd-new at lxmtout1.gsi.de Received: from lxmtout1.gsi.de ([127.0.0.1]) by localhost (lxmtout1.gsi.de [127.0.0.1]) (amavisd-new, port 10024) with LMTP id FPz3Eji6stxQ; Tue, 8 Jun 2021 14:11:26 +0200 (CEST) Received: from srvex3.campus.gsi.de (unknown [10.10.4.16]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-SHA256 (128/128 bits)) (No client certificate requested) by lxmtout1.gsi.de (Postfix) with ESMTPS id 8D3452050D00; Tue, 8 Jun 2021 14:11:26 +0200 (CEST) Received: from excalibur.localnet (140.181.3.12) by srvex3.campus.gsi.de (10.10.4.16) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2242.10; Tue, 8 Jun 2021 14:11:26 +0200 From: Matthias Kretz To: , Subject: [PATCH 01/11] libstdc++: Improve copysign codegen Date: Tue, 8 Jun 2021 14:11:25 +0200 Message-ID: <1909428.ZsUzbmURod@excalibur> Organization: GSI Helmholtzzentrum =?UTF-8?B?ZsO8cg==?= Schwerionenforschung In-Reply-To: <270527782.u9WJ3AIrlG@excalibur> References: <270527782.u9WJ3AIrlG@excalibur> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="nextPart22545888.vB34APBZVq" Content-Transfer-Encoding: 7Bit X-Originating-IP: [140.181.3.12] X-ClientProxiedBy: srvex4.Campus.gsi.de (10.10.4.36) To srvex3.campus.gsi.de (10.10.4.16) X-Spam-Status: No, score=-10.3 required=5.0 tests=BAYES_00, BODY_8BITS, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_PASS, SPF_PASS, TXREP, URIBL_SBL, URIBL_SBL_A autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 08 Jun 2021 12:11:29 -0000 --nextPart22545888.vB34APBZVq Content-Transfer-Encoding: base64 Content-Type: text/plain; charset="UTF-8" CgpGcm9tOiBNYXR0aGlhcyBLcmV0eiA8a3JldHpAa2RlLm9yZz4KClRoaXMgYWxzbyByZXNvbHZl cyBhIHRlc3QgZmFpbHVyZSBvbiBhYXJjaDY0IHdpdGggLWZmYXN0LW1hdGggYW5kCmZpeGVkX3Np emU8Tj4gd2l0aCBsYXJnZSBOLgoKU2lnbmVkLW9mZi1ieTogTWF0dGhpYXMgS3JldHogPG0ua3Jl dHpAZ3NpLmRlPgoKbGlic3RkYysrLXYzL0NoYW5nZUxvZzoKCgkqIGluY2x1ZGUvZXhwZXJpbWVu dGFsL2JpdHMvc2ltZC5oOiBBZGQgbWlzc2luZyBvcGVyYXRvcn4KCW92ZXJsb2FkIGZvciBzaW1k PGZsb2F0aW5nLXBvaW50PiB0byBfX2Zsb2F0X2JpdHdpc2Vfb3BlcmF0b3JzLgoJKiBpbmNsdWRl L2V4cGVyaW1lbnRhbC9iaXRzL3NpbWRfYnVpbHRpbi5oCgkoX1NpbWRJbXBsQnVpbHRpbjo6X1Nf Y29tcGxlbWVudCk6IEJpdGNhc3QgdG8gaW50IChhbmQgYmFjaykgdG8KCWltcGxlbWVudCBjb21w bGVtZW50IGZvciBmbG9hdGluZy1wb2ludCB2ZWN0b3JzLgoJKiBpbmNsdWRlL2V4cGVyaW1lbnRh bC9iaXRzL3NpbWRfZml4ZWRfc2l6ZS5oCgkoX1NpbWRJbXBsRml4ZWRTaXplOjpfU19jb3B5c2ln bik6IE5ldyBmdW5jdGlvbiwgZm9yd2FyZGluZyB0bwoJY29weXNpZ24gaW1wbGVtZW50YXRpb24g b2YgX1NpbWRUdXBsZSBtZW1iZXJzLgoJKiBpbmNsdWRlL2V4cGVyaW1lbnRhbC9iaXRzL3NpbWRf bWF0aC5oIChjb3B5c2lnbik6IENhbGwKCV9TaW1kSW1wbDo6X1NfY29weXNpZ24gZm9yIGZpeGVk X3NpemUgYXJndW1lbnRzLiBTaW1wbGlmeQoJZ2VuZXJpYyBjb3B5c2lnbiBpbXBsZW1lbnRhdGlv biB1c2luZyB0aGUgbmV3IH4gb3BlcmF0b3IuCi0tLQogbGlic3RkYysrLXYzL2luY2x1ZGUvZXhw ZXJpbWVudGFsL2JpdHMvc2ltZC5oICAgICAgICAgICAgfCA2ICsrKysrKwogbGlic3RkYysrLXYz L2luY2x1ZGUvZXhwZXJpbWVudGFsL2JpdHMvc2ltZF9idWlsdGluLmggICAgfCA3ICsrKysrKy0K IGxpYnN0ZGMrKy12My9pbmNsdWRlL2V4cGVyaW1lbnRhbC9iaXRzL3NpbWRfZml4ZWRfc2l6ZS5o IHwgMiArLQogbGlic3RkYysrLXYzL2luY2x1ZGUvZXhwZXJpbWVudGFsL2JpdHMvc2ltZF9tYXRo LmggICAgICAgfCA0ICsrKy0KIDQgZmlsZXMgY2hhbmdlZCwgMTYgaW5zZXJ0aW9ucygrKSwgMyBk ZWxldGlvbnMoLSkKCgotLQrilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDi lIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDi lIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDi lIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDi lIDilIDilIDilIAKIERyLiBNYXR0aGlhcyBLcmV0eiAgICAgICAgICAgICAgICAgICAgICAgICAg IGh0dHBzOi8vbWF0dGtyZXR6LmdpdGh1Yi5pbwogR1NJIEhlbG1ob2x0eiBDZW50cmUgZm9yIEhl YXZ5IElvbiBSZXNlYXJjaCAgICAgICAgICAgICAgIGh0dHBzOi8vZ3NpLmRlCiBzdGQ6OmV4cGVy aW1lbnRhbDo6c2ltZCAgICAgICAgICAgICAgaHR0cHM6Ly9naXRodWIuY29tL1ZjRGV2ZWwvc3Rk LXNpbWQK4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA 4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA 4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA 4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA --nextPart22545888.vB34APBZVq Content-Disposition: inline; filename="0001-libstdc-Improve-copysign-codegen.patch" Content-Transfer-Encoding: 7Bit Content-Type: text/x-patch; charset="utf-8"; name="0001-libstdc-Improve-copysign-codegen.patch" diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index 59ddf3cc958..163f1b574e2 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -5189,6 +5189,12 @@ template return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; } + +template + _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR + enable_if_t, simd<_Tp, _Ap>> + operator~(const simd<_Tp, _Ap>& __a) + { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; } } // namespace __float_bitwise_operators }}} _GLIBCXX_SIMD_END_NAMESPACE diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h index e986ee91620..8cd338e313f 100644 --- a/libstdc++-v3/include/experimental/bits/simd_builtin.h +++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h @@ -1632,7 +1632,12 @@ template template _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept - { return ~__x._M_data; } + { + if constexpr (is_floating_point_v<_Tp>) + return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x)); + else + return ~__x._M_data; + } // _S_unary_minus {{{2 template diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h index 2722055c899..7c2c1df77c8 100644 --- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h +++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h @@ -1663,7 +1663,7 @@ template _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) - // copysign in simd_math.h + _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h index 4799803a200..d954e761eee 100644 --- a/libstdc++-v3/include/experimental/bits/simd_math.h +++ b/libstdc++-v3/include/experimental/bits/simd_math.h @@ -1304,6 +1304,8 @@ template { if constexpr (simd_size_v<_Tp, _Abi> == 1) return std::copysign(__x[0], __y[0]); + else if constexpr (__is_fixed_size_abi_v<_Abi>) + return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))}; else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12) // Remove this case once __bit_cast is implemented via __builtin_bit_cast. // It is necessary, because __signmask below cannot be computed at compile @@ -1315,7 +1317,7 @@ template using _V = simd<_Tp, _Abi>; using namespace std::experimental::__float_bitwise_operators; _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1); - return (__x & (__x ^ __signmask)) | (__y & __signmask); + return (__x & ~__signmask) | (__y & __signmask); } } --nextPart22545888.vB34APBZVq--