From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lxmtout1.gsi.de (lxmtout1.gsi.de [140.181.3.111]) by sourceware.org (Postfix) with ESMTPS id D244F398B87F; Wed, 27 Jan 2021 20:41:45 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org D244F398B87F Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=gsi.de Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=M.Kretz@gsi.de Received: from localhost (localhost [127.0.0.1]) by lxmtout1.gsi.de (Postfix) with ESMTP id 055A22050D05; Wed, 27 Jan 2021 21:41:45 +0100 (CET) X-Virus-Scanned: Debian amavisd-new at lxmtout1.gsi.de Received: from lxmtout1.gsi.de ([127.0.0.1]) by localhost (lxmtout1.gsi.de [127.0.0.1]) (amavisd-new, port 10024) with LMTP id D3Nsi3KTsh3w; Wed, 27 Jan 2021 21:41:44 +0100 (CET) Received: from srvex3.campus.gsi.de (srvex3.campus.gsi.de [10.10.4.16]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-SHA256 (128/128 bits)) (No client certificate requested) by lxmtout1.gsi.de (Postfix) with ESMTPS id E07002050D00; Wed, 27 Jan 2021 21:41:44 +0100 (CET) Received: from excalibur.localnet (140.181.3.12) by srvex3.campus.gsi.de (10.10.4.16) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256_P256) id 15.1.2106.2; Wed, 27 Jan 2021 21:41:44 +0100 From: Matthias Kretz To: , Subject: [PATCH 02/16] Fix NEON intrinsic types usage Date: Wed, 27 Jan 2021 21:41:44 +0100 Message-ID: <5803344.vAW0rqQIy0@excalibur> Organization: GSI Helmholtzzentrum =?UTF-8?B?ZsO8cg==?= Schwerionenforschung In-Reply-To: <4667217.5jz8CO7rxU@excalibur> References: <4667217.5jz8CO7rxU@excalibur> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="UTF-8" X-Originating-IP: [140.181.3.12] X-ClientProxiedBy: srvex3.Campus.gsi.de (10.10.4.16) To srvex3.campus.gsi.de (10.10.4.16) X-Spam-Status: No, score=-13.4 required=5.0 tests=BAYES_00, BODY_8BITS, GIT_PATCH_0, KAM_DMARC_STATUS, SPF_HELO_PASS, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libstdc++@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++ mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Jan 2021 20:41:47 -0000 =46rom: Matthias Kretz Intrinsics types for NEON differ from gnu::vector_size types now. This requires explicit specializations for __intrinsic_type and a new __is_intrinsic_type trait. libstdc++-v3/ChangeLog: * include/experimental/bits/simd.h (__is_intrinsic_type): New internal type trait. Alias for __is_vector_type on x86. (_VectorTraitsImpl): Enable for __intrinsic_type in addition for __vector_type. (__intrin_bitcast): Allow casting to & from vector & intrinsic types. (__intrinsic_type): Explicitly specialize for NEON intrinsic vector types. =2D-- libstdc++-v3/include/experimental/bits/simd.h | 70 +++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/ include/experimental/bits/simd.h index 00eec50d64f..d56176210df 100644 =2D-- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -1379,13 +1379,35 @@ template template inline constexpr bool __is_vector_type_v =3D __is_vector_type<_Tp>::valu= e; =20 +// }}} +// __is_intrinsic_type {{{ +#if _GLIBCXX_SIMD_HAVE_SSE_ABI +template + using __is_intrinsic_type =3D __is_vector_type<_Tp>; +#else // not SSE (x86) +template > + struct __is_intrinsic_type : false_type {}; + +template + struct __is_intrinsic_type< + _Tp, void_t()[0])>,=20 sizeof(_Tp)>::type>> + : is_same<_Tp, typename __intrinsic_type< + remove_reference_t()[0])>, + sizeof(_Tp)>::type> {}; +#endif + +template + inline constexpr bool __is_intrinsic_type_v =3D=20 __is_intrinsic_type<_Tp>::value; + // }}} // _VectorTraits{{{ template > struct _VectorTraitsImpl; =20 template =2D struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>>> + struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp> + || __is_intrinsic_type_v<_Tp>>> { using type =3D _Tp; using value_type =3D remove_reference_t()[0])>; @@ -1457,7 +1479,8 @@ template _GLIBCXX_SIMD_INTRINSIC constexpr _To __intrin_bitcast(_From __v) { =2D static_assert(__is_vector_type_v<_From> && __is_vector_type_v<_To>); + static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_Fro= m>) + && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>)); if constexpr (sizeof(_To) =3D=3D sizeof(_From)) return reinterpret_cast<_To>(__v); else if constexpr (sizeof(_From) > sizeof(_To)) @@ -2183,16 +2206,55 @@ template #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI // __intrinsic_type (ARM){{{ #if _GLIBCXX_SIMD_HAVE_NEON +template <> + struct __intrinsic_type + { using type =3D float32x2_t; }; + +template <> + struct __intrinsic_type + { using type =3D float32x4_t; }; + +#if _GLIBCXX_SIMD_HAVE_NEON_A64 +template <> + struct __intrinsic_type + { using type =3D float64x1_t; }; + +template <> + struct __intrinsic_type + { using type =3D float64x2_t; }; +#endif + +#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) = =20 \ +template <> = =20 \ + struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, = =20 \ + _Np * _Bits / 8, void> \ + { using type =3D int##_Bits##x##_Np##_t; }; = =20 \ +template <> = =20 \ + struct __intrinsic_type>,= =20 \ + _Np * _Bits / 8, void> \ + { using type =3D uint##_Bits##x##_Np##_t; } +_GLIBCXX_SIMD_ARM_INTRIN(8, 8); +_GLIBCXX_SIMD_ARM_INTRIN(8, 16); +_GLIBCXX_SIMD_ARM_INTRIN(16, 4); +_GLIBCXX_SIMD_ARM_INTRIN(16, 8); +_GLIBCXX_SIMD_ARM_INTRIN(32, 2); +_GLIBCXX_SIMD_ARM_INTRIN(32, 4); +_GLIBCXX_SIMD_ARM_INTRIN(64, 1); +_GLIBCXX_SIMD_ARM_INTRIN(64, 2); +#undef _GLIBCXX_SIMD_ARM_INTRIN + template struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <=3D 16>> { =2D static constexpr int _S_VBytes =3D _Bytes <=3D 8 ? 8 : 16; + static constexpr int _SVecBytes =3D _Bytes <=3D 8 ? 8 : 16; using _Ip =3D __int_for_sizeof_t<_Tp>; using _Up =3D conditional_t< is_floating_point_v<_Tp>, _Tp, conditional_t, make_unsigned_t<_Ip>, _Ip>>; =2D using type [[__gnu__::__vector_size__(_S_VBytes)]] =3D _Up; + static_assert(!is_same_v<_Tp, _Up> || _SVecBytes !=3D _Bytes, + "should use explicit specialization above"); + using type =3D typename __intrinsic_type<_Up, _SVecBytes>::type; }; #endif // _GLIBCXX_SIMD_HAVE_NEON =20 =2D-=20 =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80= =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80= =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80 Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de std::experimental::simd https://github.com/VcDevel/std-simd =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80= =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80= =E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2= =94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94= =80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80