From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=19cO=KO=gsi.de=M.Kretz@sourceware.org>
Received: from lxmtout1.gsi.de (lxmtout1.gsi.de [140.181.3.111])
	by sourceware.org (Postfix) with ESMTPS id 1E96F3858D35;
	Fri,  8 Mar 2024 09:57:25 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 1E96F3858D35
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gsi.de
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gsi.de
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 1E96F3858D35
Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=140.181.3.111
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1709891854; cv=none;
	b=pPHsM2uOmu74HTDQ9PJtLfkT8yYJlZ55cMXBtHmY/hLPwKJsxXm32SViIz7mIXHVcFr5sycyMFBEEk4sitzCYFJx6xpvRaJIiEjHTY73kacALeY8Sc+KUA5EFALH/jYVCH/bgopx57uDYikVG+0orzP6qaxjtgn4spHq0kN4q6A=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
	t=1709891854; c=relaxed/simple;
	bh=D6vlFkoW6bWpeKqppZhZqCKfEiW2uWwKGDX1sExgSes=;
	h=From:To:Subject:Date:Message-ID:MIME-Version; b=h/Mz4mjcUPjpHlj9NyLfFTbnrWOtson5vr8CNp5KZQhflMmFLaCG3fSX2lCMFI8eb0JXkIFZLPXVoBY16WlUiPRmL4SWO+tjN5GYWDPeOxvnUifxAaCcGAvERZML22mKpmLiwxu95PLa2Th3M1dLvswadERb4+lhyfq+mlp1Ngo=
ARC-Authentication-Results: i=1; server2.sourceware.org
Received: from localhost (localhost [127.0.0.1])
	by lxmtout1.gsi.de (Postfix) with ESMTP id C341A2051044;
	Fri,  8 Mar 2024 10:57:23 +0100 (CET)
X-Virus-Scanned: Debian amavisd-new at lxmtout1.gsi.de
Received: from lxmtout1.gsi.de ([127.0.0.1])
	by localhost (lxmtout1.gsi.de [127.0.0.1]) (amavisd-new, port 10024)
	with LMTP id n3yJHeoYyRsR; Fri,  8 Mar 2024 10:57:23 +0100 (CET)
Received: from srvEX6.campus.gsi.de (unknown [10.10.4.96])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by lxmtout1.gsi.de (Postfix) with ESMTPS id A114E2051040;
	Fri,  8 Mar 2024 10:57:23 +0100 (CET)
Received: from minbar.localnet (140.181.3.12) by srvEX6.campus.gsi.de
 (10.10.4.96) with Microsoft SMTP Server (version=TLS1_2,
 cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.40; Fri, 8 Mar
 2024 10:57:23 +0100
From: Matthias Kretz <m.kretz@gsi.de>
To: <libstdc++@gcc.gnu.org>, <gcc-patches@gcc.gnu.org>,
	<richard.sandiford@arm.com>, Srinivas Yadav Singanaboina
	<vasusrinivas.vasu14@gmail.com>
Subject: Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd
Date: Fri, 8 Mar 2024 10:57:22 +0100
Message-ID: <5282839.4XsnlVU6TS@minbar>
Organization: GSI Helmholtzzentrum =?UTF-8?B?ZsO8cg==?= Schwerionenforschung
In-Reply-To: <20240209142810.97817-1-vasu.srinivasvasu.14@gmail.com>
References: <mpt8r62kwxi.fsf@arm.com>
 <20240209142810.97817-1-vasu.srinivasvasu.14@gmail.com>
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="nextPart11647824.VV5PYv0bhD"
Content-Transfer-Encoding: 7Bit
X-Originating-IP: [140.181.3.12]
X-ClientProxiedBy: srvex5.Campus.gsi.de (10.10.4.95) To srvEX6.campus.gsi.de
 (10.10.4.96)
X-Spam-Status: No, score=-11.6 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,SPF_HELO_PASS,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <libstdc++.gcc.gnu.org>

--nextPart11647824.VV5PYv0bhD
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="UTF-8"

Hi,

I applied and did extended testing on x86_64 (no regressions) and aarch64=20
using qemu testing SVE 256, 512, and 1024. Looks good!

While going through the applied patch I noticed a few style issues that I=20
simply turned into a patch (attached).

A few comments inline. Sorry for not seeing these before.

On Friday, 9 February 2024 15:28:10 CET Srinivas Yadav Singanaboina wrote:
> diff --git a/libstdc++-v3/include/experimental/bits/simd.h
> b/libstdc++-v3/include/experimental/bits/simd.h index
> 90523ea57dc..d274cd740fe 100644
> --- a/libstdc++-v3/include/experimental/bits/simd.h
> +++ b/libstdc++-v3/include/experimental/bits/simd.h
> @@ -39,12 +39,16 @@
>  #include <functional>
>  #include <iosfwd>
>  #include <utility>
> +#include <algorithm>
>=20
>  #if _GLIBCXX_SIMD_X86INTRIN
>  #include <x86intrin.h>
>  #elif _GLIBCXX_SIMD_HAVE_NEON
>  #include <arm_neon.h>
>  #endif
> +#if _GLIBCXX_SIMD_HAVE_SVE
> +#include <arm_sve.h>
> +#endif
>=20
>  /** @ingroup ts_simd
>   * @{
> @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] =3D dou=
ble;
>  using __m512i [[__gnu__::__vector_size__(64)]] =3D long long;
>  #endif
>=20
> +#if _GLIBCXX_SIMD_HAVE_SVE
> +constexpr inline int __sve_vectorized_size_bytes =3D __ARM_FEATURE_SVE_B=
ITS /
> 8; +#else
> +constexpr inline int __sve_vectorized_size_bytes =3D 0;
> +#endif
> +
>  namespace simd_abi {
>  // simd_abi forward declarations {{{
>  // implementation details:
> @@ -108,6 +118,9 @@ template <int _UsedBytes>
>  template <int _UsedBytes>
>    struct _VecBltnBtmsk;
>=20
> +template <int _UsedBytes, int _TotalBytes =3D __sve_vectorized_size_byte=
s>
> +  struct _SveAbi;
> +
>  template <typename _Tp, int _Np>
>    using _VecN =3D _VecBuiltin<sizeof(_Tp) * _Np>;
>=20
> @@ -123,6 +136,9 @@ template <int _UsedBytes =3D 64>
>  template <int _UsedBytes =3D 16>
>    using _Neon =3D _VecBuiltin<_UsedBytes>;
>=20
> +template <int _UsedBytes =3D __sve_vectorized_size_bytes>
> +  using _Sve =3D _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
> +
>  // implementation-defined:
>  using __sse =3D _Sse<>;
>  using __avx =3D _Avx<>;
> @@ -130,6 +146,7 @@ using __avx512 =3D _Avx512<>;
>  using __neon =3D _Neon<>;
>  using __neon128 =3D _Neon<16>;
>  using __neon64 =3D _Neon<8>;
> +using __sve =3D _Sve<>;
>=20
>  // standard:
>  template <typename _Tp, size_t _Np, typename...>
> @@ -250,6 +267,8 @@ constexpr inline bool __support_neon_float =3D
>    false;
>  #endif
>=20
> +constexpr inline bool __have_sve =3D _GLIBCXX_SIMD_HAVE_SVE;
> +
>  #ifdef _ARCH_PWR10
>  constexpr inline bool __have_power10vec =3D true;
>  #else
> @@ -356,12 +375,13 @@ namespace __detail
>=20
>  		 | (__have_avx512vnni         << 27)
>  		 | (__have_avx512vpopcntdq    << 28)
>  		 | (__have_avx512vp2intersect << 29);
>=20
> -    else if constexpr (__have_neon)
> +    else if constexpr (__have_neon || __have_sve)
>        return __have_neon
>=20
>  	       | (__have_neon_a32 << 1)
>  	       | (__have_neon_a64 << 2)
>  	       | (__have_neon_a64 << 2)
>=20
> -	       | (__support_neon_float << 3);
> +	       | (__support_neon_float << 3)
> +         | (__have_sve << 4);

This is not enough. This should list all feature flags that might have a=20
(significant enough) influence on code-gen in inline functions (that are no=
t=20
always_inline). AFAIU at least __ARM_FEATURE_SVE2 is necessary. But I assum=
e=20
__ARM_FEATURE_SVE2_BITPERM, __ARM_FEATURE_SVE_BITS,=20
__ARM_FEATURE_SVE_MATMUL_INT8, and __ARM_FEATURE_SVE_VECTOR_OPERATORS are a=
lso=20
relevant. Maybe more?

> [...]
bits/simd.h:

>  // fall back to fixed_size only if scalar and native ABIs don't match
>  template <typename _Tp, size_t _Np, typename =3D void>
>    struct __deduce_fixed_size_fallback {};
>=20
> +template <typename _Tp, size_t _Np, typename =3D void>
> +  struct __no_sve_deduce_fixed_size_fallback {};
> +
>  template <typename _Tp, size_t _Np>
>    struct __deduce_fixed_size_fallback<_Tp, _Np,
>      enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
>    { using type =3D simd_abi::fixed_size<_Np>; };
>=20
> +template <typename _Tp, size_t _Np>
> +  struct __no_sve_deduce_fixed_size_fallback<_Tp, _Np,
> +    enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
> +  { using type =3D simd_abi::fixed_size<_Np>; };
> +
>  template <typename _Tp, size_t _Np, typename>
>    struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {=
};
>=20
> +template <typename _Tp, size_t _Np, typename>
> +  struct __no_sve_deduce_impl : public
> __no_sve_deduce_fixed_size_fallback<_Tp, _Np> {};

I believe you don't need __no_sve_deduce_fixed_size_fallback. Simply derive=
=20
__no_sve_deduce_impl from __deduce_fixed_size_fallback. No?


> diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h
> b/libstdc++-v3/include/experimental/bits/simd_converter.h index
> 3160e251632..b233d2c70a5 100644
> --- a/libstdc++-v3/include/experimental/bits/simd_converter.h
> +++ b/libstdc++-v3/include/experimental/bits/simd_converter.h
> @@ -28,6 +28,18 @@
>  #if __cplusplus >=3D 201703L
>=20
>  _GLIBCXX_SIMD_BEGIN_NAMESPACE
> +
> +template <typename _Arg, typename _Ret, typename _To, size_t _Np>
> +_Ret __converter_fallback(_Arg __a)
> +  {
> +  _Ret __ret{};
> +  __execute_n_times<_Np>(
> +      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +        __ret._M_set(__i, static_cast<_To>(__a[__i]));
> +    });
> +  return __ret;
> +  }
> +
>  // _SimdConverter scalar -> scalar {{{
>  template <typename _From, typename _To>
>    struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar,
> @@ -56,14 +68,16 @@ template <typename _From, typename _To, typename _Abi>
>    };
>=20
>  // }}}
> -// _SimdConverter "native 1" -> "native 2" {{{
> +// _SimdConverter "native non-sve 1" -> "native non-sve 2" {{{
>  template <typename _From, typename _To, typename _AFrom, typename _ATo>
>    struct _SimdConverter<
>      _From, _AFrom, _To, _ATo,
>      enable_if_t<!disjunction_v<
>        __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
>        is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
> -      conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>>>
> +      conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
> +	  && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
> +	  >>
>    {
>      using _Arg =3D typename _AFrom::template __traits<_From>::_SimdMembe=
r;
>      using _Ret =3D typename _ATo::template __traits<_To>::_SimdMember;
> @@ -75,6 +89,26 @@ template <typename _From, typename _To, typename _AFro=
m,
> typename _ATo> { return __vector_convert<_V>(__a, __more...); }
>    };
>=20
> +// }}}
> +// _SimdConverter "native 1" -> "native 2" {{{
> +template <typename _From, typename _To, typename _AFrom, typename _ATo>
> +  struct _SimdConverter<
> +    _From, _AFrom, _To, _ATo,
> +    enable_if_t<!disjunction_v<
> +      __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
> +      is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
> +      conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
> +	  && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
> +	  >>
> +  {
> +    using _Arg =3D typename _AFrom::template __traits<_From>::_SimdMembe=
r;
> +    using _Ret =3D typename _ATo::template __traits<_To>::_SimdMember;
> +
> +    _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
> +    operator()(_Arg __x) const noexcept
> +    { return __converter_fallback<_Arg, _Ret, _To, simd_size_v<_To,
> _ATo>>(__x); } +  };
> +

I'd prefer if you could solve this with a constexpr-if in operator() instea=
d=20
of making the enable_if condition even longer. Feel free to=20
static_assert(sizeof...(_More) =3D=3D 0) in the SVE branch. (Why is it=20
unnecessary, though?)

>  // }}}
>  // _SimdConverter scalar -> fixed_size<1> {{{1
>  template <typename _From, typename _To>
> @@ -111,6 +145,10 @@ template <typename _From, typename _To, int _Np>
>        if constexpr (is_same_v<_From, _To>)
>  	return __x;
>=20
> +	  // fallback to sequential when sve is available
> +	  else if constexpr (__have_sve)
> +	return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
> +

At least the next three cases should all work, no? Or is the point that thi=
s=20
fallback leads to better code-gen with SVE?

> diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h
> b/libstdc++-v3/include/experimental/bits/simd_detail.h index
> 1fb77866bb2..52fdf7149bf 100644
> --- a/libstdc++-v3/include/experimental/bits/simd_detail.h
> +++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
> @@ -61,6 +61,11 @@
>  #else
>  #define _GLIBCXX_SIMD_HAVE_NEON_A64 0
>  #endif
> +#if (__ARM_FEATURE_SVE_BITS > 0 && __ARM_FEATURE_SVE_VECTOR_OPERATORS=3D=
=3D1)
> +#define _GLIBCXX_SIMD_HAVE_SVE 1
> +#else
> +#define _GLIBCXX_SIMD_HAVE_SVE 0
> +#endif
>  //}}}
>  // x86{{{
>  #ifdef __MMX__
> @@ -267,7 +272,7 @@
>  #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
>  #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
>=20
> -#if __STRICT_ANSI__ || defined __clang__
> +#if _GLIBCXX_SIMD_HAVE_SVE || __STRICT_ANSI__ || defined __clang__
>  #define _GLIBCXX_SIMD_CONSTEXPR
>  #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const

This is something I'd like to see resolved. (But not necessary for this pat=
ch,=20
IMHO.) Even if some parts of the SVE interface can't be used in constant=20
expressions, it must be possible to work around those with `if=20
(__builtin_is_constant_evaluated())` branches. For C++26 we will have to do=
=20
this, because the std::simd interface is fully constexpr.

> diff --git a/libstdc++-v3/include/experimental/bits/simd_sve.h
> b/libstdc++-v3/include/experimental/bits/simd_sve.h new file mode 100644
> index 00000000000..123242a3a62
> --- /dev/null
> +++ b/libstdc++-v3/include/experimental/bits/simd_sve.h
[...]
> +template <typename _Tp, size_t _Np>
> +  struct __sve_vector_type
> +  {};
> +
> +template <typename _Tp, size_t _Np>
> +  using __sve_vector_type_t =3D typename __sve_vector_type<_Tp, _Np>::ty=
pe;
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<int8_t, _Np>
> +  {
> +    typedef svint8_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(int8_t __dup)
> +    { return svdup_s8(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b8(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<uint8_t, _Np>
> +  {
> +    typedef svuint8_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(uint8_t __dup)
> +    { return svdup_u8(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b8(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<int16_t, _Np>
> +  {
> +    typedef svint16_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(int16_t __dup)
> +    { return svdup_s16(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b16(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<uint16_t, _Np>
> +  {
> +    typedef svuint16_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(uint16_t __dup)
> +    { return svdup_u16(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b16(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<int32_t, _Np>
> +  {
> +    typedef svint32_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(int32_t __dup)
> +    { return svdup_s32(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b32(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<uint32_t, _Np>
> +  {
> +    typedef svuint32_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(uint32_t __dup)
> +    { return svdup_u32(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b32(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<int64_t, _Np>
> +  {
> +    typedef svint64_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(int64_t __dup)
> +    { return svdup_s64(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b64(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<uint64_t, _Np>
> +  {
> +    typedef svuint64_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(uint64_t __dup)
> +    { return svdup_u64(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b64(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<float, _Np>
> +  {
> +    typedef svfloat32_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(float __dup)
> +    { return svdup_f32(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b32(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<double, _Np>
> +  {
> +    typedef svfloat64_t __sve_vlst_type
> __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))); +
> +    inline static __sve_vlst_type
> +    __sve_broadcast(double __dup)
> +    { return svdup_f64(__dup); }
> +
> +    inline static __sve_bool_type
> +    __sve_active_mask()
> +    { return svwhilelt_b64(size_t(0), _Np); };
> +
> +    using type =3D __sve_vlst_type;
> +  };
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<char, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<char>, _Np>
> +  {};
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<char16_t, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<char16_t>, _Np>
> +  {};
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<wchar_t, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<wchar_t>, _Np>
> +  {};
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<char32_t, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<char32_t>, _Np>
> +  {};
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<long long int, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<long long int>, _Np>
> +  {};
> +
> +template <size_t _Np>
> +  struct __sve_vector_type<long long unsigned int, _Np>
> +  : __sve_vector_type<__get_sve_value_type_t<long long unsigned int>, _N=
p>
> +  {};

Please replace the last 6 partial specializations with a generic=20
implementation of the primary template:

template <typename T, size_t _Np>
  struct __sve_vector_type
  : __sve_vector_type<__get_sve_value_type_t<T>, _Np>
  {};

This avoids issues on platforms that define (u)int64_t as (unsigned) long l=
ong=20
and is simpler in any case.

[...]
> +  template <typename _Tp, typename _Up, size_t _Np>
> +    _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _N=
p>
> +    _S_load(const _Up* __p, _SveMaskWrapper<sizeof(_Tp), _Np> __k)
> +    {
> +      using _STp =3D __get_sve_value_type_t<_Tp>;
> +      using _SUp =3D __get_sve_value_type_t<_Up>;
> +      using _V =3D __sve_vector_type_t<_Tp, _Np>;
> +      const _SUp* __up =3D reinterpret_cast<const _SUp*>(__p);
> +
> +      if constexpr (std::is_same_v<_Tp, _Up>)
> +	return _V(svld1(__k._M_data, __up));
> +      if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up>
> +		      && (sizeof(_Tp) > sizeof(_Up)))
> +	{
> +	  if constexpr (std::is_same_v<_SUp, int8_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int16_t>)
> +		return _V(svld1sb_s16(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint16_t>)
> +		return _V(svld1sb_u16(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int32_t>)
> +		return _V(svld1sb_s32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint32_t>)
> +		return _V(svld1sb_u32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1sb_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1sb_u64(__k._M_data, __up));
> +	    }
> +	  if constexpr (std::is_same_v<_SUp, uint8_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int16_t>)
> +		return _V(svld1ub_s16(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint16_t>)
> +		return _V(svld1ub_u16(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int32_t>)
> +		return _V(svld1ub_s32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint32_t>)
> +		return _V(svld1ub_u32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1ub_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1ub_u64(__k._M_data, __up));
> +	    }
> +	  if constexpr (std::is_same_v<_SUp, int16_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int32_t>)
> +		return _V(svld1sh_s32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint32_t>)
> +		return _V(svld1sh_u32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1sh_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1sh_u64(__k._M_data, __up));
> +	    }
> +	  if constexpr (std::is_same_v<_SUp, uint16_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int32_t>)
> +		return _V(svld1uh_s32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint32_t>)
> +		return _V(svld1uh_u32(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1uh_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1uh_u64(__k._M_data, __up));
> +	    }
> +	  if constexpr (std::is_same_v<_SUp, int32_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1sw_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1sw_u64(__k._M_data, __up));
> +	    }
> +	  if constexpr (std::is_same_v<_SUp, uint32_t>)
> +	    {
> +	      if constexpr (std::is_same_v<_STp, int64_t>)
> +		return _V(svld1uw_s64(__k._M_data, __up));
> +	      if constexpr (std::is_same_v<_STp, uint64_t>)
> +		return _V(svld1uw_u64(__k._M_data, __up));
> +	    }
> +	}
> +      return __generate_from_n_evaluations<_Np, __sve_vector_type_t<_Tp,
> _Np>>(
> +	       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +		 return __k[__i] ? static_cast<_Tp>(__p[__i]) : _Tp{};
> +	       });

=46ine for now, because this unlikely to be used much anyway. But I'd prefe=
r to=20
see masked vector load(s) + vector conversion(s) at some point.

> +    }
> +
> +  template <typename _Tp, typename _Up, size_t _Np>
> +    _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +    _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x,
> _SveMaskWrapper<sizeof(_Tp), _Np> __k) +    {
> +      using _SUp =3D __get_sve_value_type_t<_Up>;
> +      using _STp =3D __get_sve_value_type_t<_Tp>;
> +
> +      _SUp* __up =3D reinterpret_cast<_SUp*>(__p);
> +
> +      if constexpr (std::is_same_v<_Tp, _Up>)
> +	return svst1(__k._M_data, __up, __x);
> +      if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up>
> +		      && (sizeof(_Tp) > sizeof(_Up)))
> +	{
> +    if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>)
> +      return svst1b(__k._M_data, __up, __x);
> +    if constexpr (std::is_same_v<_SUp, uint8_t> &&
> std::is_unsigned_v<_STp>) +      return svst1b(__k._M_data, __up, __x);
> +    if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp=
>)
> +      return svst1h(__k._M_data, __up, __x);
> +    if constexpr (std::is_same_v<_SUp, uint16_t> &&
> std::is_unsigned_v<_STp>) +      return svst1h(__k._M_data, __up, __x);
> +    if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp=
>)
> +      return svst1w(__k._M_data, __up, __x);
> +    if constexpr (std::is_same_v<_SUp, uint32_t> &&
> std::is_unsigned_v<_STp>) +      return svst1w(__k._M_data, __up, __x);
> +  }
> +
> +      __execute_n_times<_Np>([&](auto __i)
> _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { +	if (__k[__i])
> +	  __p[__i] =3D static_cast<_Up>(__x[__i]);
> +      });

Same as for converting masked loads...

> +    }
> +
> +  template <typename _Tp, size_t _Np>
> +    _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _N=
p>
> +    _S_blend(_SveMaskWrapper<sizeof(_Tp), _Np> __k, _SveSimdWrapper<_Tp,
> _Np> __at0, +	     _SveSimdWrapper<_Tp, _Np> __at1)
> +    { return svsel(__k._M_data, __at1._M_data, __at0._M_data); }
> +
> +  template <size_t _Np, bool _Sanitized>
> +    _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +    _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
> +    {
> +      __execute_n_times<_Np>([&](auto __i)
> _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { +	__mem[__i] =3D __x[__i];
> +      });
> +    }
> +};
> +
> +template <typename _Abi, typename>
> +  struct _SimdImplSve
> +  {
> +    template <typename _Tp>
> +      using _MaskMember =3D typename _Abi::template _MaskMember<_Tp>;
> +
> +    template <typename _Tp>
> +      using _SimdMember =3D typename _Abi::template
> __traits<_Tp>::_SimdMember; +
> +    using _CommonImpl =3D typename _Abi::_CommonImpl;
> +    using _SuperImpl =3D typename _Abi::_SimdImpl;
> +    using _MaskImpl =3D typename _Abi::_MaskImpl;
> +
> +    template <typename _Tp>
> +      static constexpr size_t _S_full_size =3D _Abi::template
> _S_full_size<_Tp>; +
> +    template <typename _Tp>
> +      static constexpr size_t _S_size =3D _Abi::template _S_size<_Tp>;
> +
> +    template <typename _Tp>
> +      using _TypeTag =3D _Tp*;
> +
> +    using abi_type =3D _Abi;
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr auto
> +      _S_broadcast(_Tp __x) noexcept
> +      {
> +	return __sve_vector_type<_Tp, __sve_vectorized_size_bytes /=20
sizeof(_Tp)>
> +		 ::__sve_broadcast(__x);
> +      }
> +
> +    template <typename _Fp, typename _Tp>
> +      inline static constexpr _SimdMember<_Tp>
> +      _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
> +      {
> +	constexpr size_t _Np =3D _S_size<_Tp>;
> +	_SveSimdWrapper<_Tp, _Np> __ret;
> +	__execute_n_times<_S_size<_Tp>>(
> +	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {=20
__ret._M_set(__i,
> __gen(__i)); }); +	return __ret;
> +      }
> +
> +    template <typename _Tp, typename _Up>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
> +      _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
> +      {
> +	constexpr size_t _Np =3D _S_size<_Tp>;
> +	_SimdMember<_Tp> __ret =3D _CommonImpl::template _S_load<_Tp, _Up,=20
_Np>(
> +				   __mem, _SveMaskWrapper<sizeof(_Tp),=20
_Np>{
> +				     __sve_vector_type<_Tp,=20
_Np>::__sve_active_mask()});
> +	return __ret;
> +      }
> +
> +    template <typename _Tp, size_t _Np, typename _Up>
> +      static constexpr inline _SveSimdWrapper<_Tp, _Np>
> +      _S_masked_load(_SveSimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp>
> __k, const _Up* __mem) +      noexcept
> +      {
> +	__sve_vector_type_t<_Tp, _Np> __v
> +	  =3D _CommonImpl::template _S_load<_Tp, _Up, _Np>(__mem, __k);
> +	__sve_vector_type_t<_Tp, _Np> __ret =3D svsel(__k._M_data, __v,
> __merge._M_data); +	return __ret;
> +      }
> +
> +    template <typename _Tp, typename _Up>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
> +      {
> +	constexpr size_t _Np =3D _S_size<_Tp>;
> +	_CommonImpl::template _S_store<_Tp, _Up, _Np>(
> +	  __mem, __v, __sve_vector_type<_Tp, _Np>::__sve_active_mask());
> +      }
> +
> +    template <typename _Tp, typename _Up, size_t _Np>
> +      static constexpr inline void
> +      _S_masked_store(const _SveSimdWrapper<_Tp, _Np> __v, _Up* __mem,
> +		      const _SveMaskWrapper<sizeof(_Tp), _Np> __k) noexcept
> +      { _CommonImpl::template _S_store<_Tp, _Up, _Np>(__mem, __v, __k); }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_negate(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      {
> +	return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, +		       __sve_vector_type<_Tp,
> _Np>::__sve_broadcast(_Tp{}));
> +      }
> +
> +    template <typename _Tp, typename _BinaryOperation>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
> +      {
> +	auto __x_data =3D __x._M_data;
> +	constexpr size_t _Np =3D simd_size_v<_Tp, _Abi>;
> +	using __sve_vec_t =3D __sve_vector_type_t<_Tp, _Np>;
> +	std::size_t __i =3D __x.size();
> +	for (; (__i % 2) !=3D 1; __i /=3D 2)
> +	  {
> +	    __x_data =3D __binary_op(simd<_Tp, _Abi>(
> +				     __private_init, _SveSimdWrapper<_Tp,=20
_Np>(
> +						      =20
__sve_vec_t(svuzp1(__x_data, __x_data)))),
> +				   simd<_Tp, _Abi>(
> +				     __private_init, _SveSimdWrapper<_Tp,=20
_Np>(
> +						      =20
__sve_vec_t(svuzp2(__x_data, __x_data))))
> +				  )._M_data;
> +	  }
> +	_Tp __res =3D __x_data[0];
> +	for (size_t __ri =3D 1; __ri !=3D __i; __ri++)
> +	  __res =3D __binary_op(__x_data[__ri], __res);
> +	return __res;
> +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, plus<>)
> +      {
> +    return svaddv(__sve_vector_type<_Tp,
> _S_size<_Tp>>::__sve_active_mask(), __x._M_data); +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, bit_and<>)
> +      {
> +    return svandv(__sve_vector_type<_Tp,
> _S_size<_Tp>>::__sve_active_mask(), __x._M_data); +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, bit_or<>)
> +      {
> +    return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask=
(),
> __x._M_data); +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>)
> +      {
> +    return sveorv(__sve_vector_type<_Tp,
> _S_size<_Tp>>::__sve_active_mask(), __x._M_data); +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum())
> +      {
> +    return svmaxv(__sve_vector_type<_Tp,
> _S_size<_Tp>>::__sve_active_mask(), __x._M_data); +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
> +      _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum())
> +      {
> +    return svminv(__sve_vector_type<_Tp,
> _S_size<_Tp>>::__sve_active_mask(), __x._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
> +      __sve_vector_type_t<_Tp, _Np>
> +      _S_min(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __=
b)
> +      {
> +	return svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __a._M_data, __b._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
> +      __sve_vector_type_t<_Tp, _Np>
> +      _S_max(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np> __=
b)
> +      {
> +	return svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __a._M_data, __b._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
> +      pair<_SveSimdWrapper<_Tp, _Np>, _SveSimdWrapper<_Tp, _Np>>
> +      _S_minmax(_SveSimdWrapper<_Tp, _Np> __a, _SveSimdWrapper<_Tp, _Np>
> __b) +      {
> +	return {
> +	  svmin_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),=20
__a._M_data,
> __b._M_data), +	  svmax_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __a._M_data, __b._M_data) +	};
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_complement(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      {
> +	if constexpr (is_floating_point_v<_Tp>)
> +	  {
> +	    using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	    return __sve_reinterpret_cast<_Tp>(
> +		     svnot_z(__sve_vector_type<_Tp,=20
_Np>::__sve_active_mask(),
> +			     __sve_reinterpret_cast<_Ip>(__x)));
> +	  }
> +	else
> +	  return svnot_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _SveSimdWrapper<_Tp, _Np>
> +      _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      {
> +	return svmul_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, +		       static_cast<_Tp>(-1));
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      { return __x._M_data + __y._M_data; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_minus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      { return __x._M_data - __y._M_data; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x,
> _SveSimdWrapper<_Tp, _Np> __y) +      { return __x._M_data * __y._M_data;=
 }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +        __sve_vector_type_t<_Tp, _Np> __y_padded =3D
> svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), +                =
 =20
>    __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); +      =
=20
> return __x._M_data / __y_padded;
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +        __sve_vector_type_t<_Tp, _Np> __y_padded =3D
> svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), +                =
 =20
>    __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1)); +      =
=20
> return __x._M_data % __y_padded;
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_bit_and(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	if constexpr (is_floating_point_v<_Tp>)
> +	  {
> +	    using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	    return __sve_reinterpret_cast<_Tp>(
> +		     svand_x(__sve_vector_type<_Tp,=20
_Np>::__sve_active_mask(),
> +			     __sve_reinterpret_cast<_Ip>(__x),
> __sve_reinterpret_cast<_Ip>(__y))); +	  }
> +	else
> +	  return svand_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> +			 __x._M_data, __y._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_bit_or(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	if constexpr (is_floating_point_v<_Tp>)
> +	  {
> +	    using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	    return __sve_reinterpret_cast<_Tp>(
> +		     svorr_x(__sve_vector_type<_Tp,=20
_Np>::__sve_active_mask(),
> +			     __sve_reinterpret_cast<_Ip>(__x),
> __sve_reinterpret_cast<_Ip>(__y))); +	  }
> +	else
> +	  return svorr_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> +			 __x._M_data, __y._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_bit_xor(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	if constexpr (is_floating_point_v<_Tp>)
> +	  {
> +	    using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	    return __sve_reinterpret_cast<_Tp>(
> +		     sveor_x(__sve_vector_type<_Tp,=20
_Np>::__sve_active_mask(),
> +			     __sve_reinterpret_cast<_Ip>(__x),
> __sve_reinterpret_cast<_Ip>(__y))); +	  }
> +	else
> +	  return sveor_x(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> +			 __x._M_data, __y._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np>
> +      _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_=
Tp,
> _Np> __y) +      { return __x._M_data << __y._M_data; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static __sve_vector_type_t<_Tp, _Np>
> +      _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x,
> _SveSimdWrapper<_Tp, _Np> __y) +      { return __x._M_data >> __y._M_data;
> }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_bit_shift_left(_SveSimdWrapper<_Tp, _Np> __x, int __y)
> +      { return __x._M_data << __y; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp,
> _Np> +      _S_bit_shift_right(_SveSimdWrapper<_Tp, _Np> __x, int __y)
> +      { return __x._M_data >> __y; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_increment(_SveSimdWrapper<_Tp, _Np>& __x)
> +      { __x =3D __x._M_data + 1; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_decrement(_SveSimdWrapper<_Tp, _Np>& __x)
> +      { __x =3D __x._M_data - 1; }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _N=
p>
> __y) +      {
> +	return svcmpeq(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_not_equal_to(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	return svcmpne(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_less(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> _=
_y)
> +      {
> +	return svcmplt(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_less_equal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	return svcmple(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    // simd.math
> +#define _GLIBCXX_SIMD_MATH_FALLBACK(__name)                             =
  =20
>                        \ +    template <typename _Tp, size_t _Np,
> typename... _More>                                         \ +      static
> _SveSimdWrapper<_Tp, _Np> _S_##__name(const _SveSimdWrapper<_Tp, _Np>& __=
x,
>           \ +						   const=20
_More&... __more)                         \
> +      {                                                                 =
  =20
>                        \ +	_SveSimdWrapper<_Tp, _Np> __r;                =
 =20
>                                           \
> +	__execute_n_times<_Np>([&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
>                  \ +	  __r._M_set(__i, __name(__x[__i], __more[__i]...));=
 =20
>                                     \ +	});                              =
 =20
>                                                        \ +	return __r;   =
 =20
>                                                                          =
 \
> +      }
> +
> +#define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name)            =
  =20
>                        \ +    template <typename _Tp, typename... _More> =
 =20
>                                                  \ +      static auto
> _S_##__name(const _Tp& __x, const _More&... __more)                      =
 =20
>      \ +      {                                                          =
 =20
>                                \ +	return=20
__fixed_size_storage_t<_RetTp,
> _Tp::_S_size>::_S_generate(                          \ +		 [&]
(auto __meta)
> _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {                             \ +	=09
 =20
> return __meta._S_generator(                                              =
 =20
>     \ +			    [&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {          =20
>          \ +			      return __name(__x[__meta._S_offset +=20
__i],           =20
>               \ +					   =20
__more[__meta._S_offset + __i]...);             =20
>      \ +			    }, static_cast<_RetTp*>(nullptr));                        =
 =20
>           \ +		 });                                                      =
 =20
>                       \ +      }
> +
> +    _GLIBCXX_SIMD_MATH_FALLBACK(acos)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(asin)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(atan)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(cos)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(sin)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(tan)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(exp)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
> +    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(log)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(log10)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(log2)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(logb)
> +
> +    // modf implemented in simd_math.h
> +    _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(pow)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(erf)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)
> +
> +    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
> +    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)
> +
> +    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
> +    _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)
> +
> +    _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(remainder)
> +
> +    template <typename _Tp, size_t _Np>
> +      static _SveSimdWrapper<_Tp, _Np>
> +      _S_remquo(const _SveSimdWrapper<_Tp, _Np> __x, const
> _SveSimdWrapper<_Tp, _Np> __y, +		__fixed_size_storage_t<int, _Np>*=20
__z)
> +      {
> +	_SveSimdWrapper<_Tp, _Np> __r{};
> +	__execute_n_times<_Np>([&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +	  int __tmp;
> +	  __r._M_set(__i, remquo(__x[__i], __y[__i], &__tmp));
> +	  __z->_M_set(__i, __tmp);
> +	});
> +	return __r;
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
> +      _S_fpclassify(_SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +	__fixed_size_storage_t<int, _Np> __r{};
> +	__execute_n_times<_Np>([&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +	  __r._M_set(__i, std::fpclassify(__x[__i]));
> +	});
> +	return __r;
> +      }
> +
> +    // copysign in simd_math.h
> +    _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
> +    _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
> +
> +#undef _GLIBCXX_SIMD_MATH_FALLBACK
> +#undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
> +
> +    template <typename _Tp, size_t _Np, typename _Op>
> +      static constexpr _MaskMember<_Tp>
> +      __fp_cmp(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np>
> __y, _Op __op) +      {
> +	using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	using _VI =3D __sve_vector_type_t<_Ip, _Np>;
> +	using _WI =3D _SveSimdWrapper<_Ip, _Np>;
> +	const _WI __fmv =3D __sve_vector_type<_Ip,
> _Np>::__sve_broadcast(__finite_max_v<_Ip>); +	const _WI __zerov =3D
> __sve_vector_type<_Ip, _Np>::__sve_broadcast(0); +	const _WI __xn =3D
> _VI(__sve_reinterpret_cast<_Ip>(__x));
> +	const _WI __yn =3D _VI(__sve_reinterpret_cast<_Ip>(__y));
> +
> +	const _WI __xp
> +	  =3D svsel(_S_less(__xn, __zerov), _S_unary_minus(_WI(_S_bit_and(__xn,
> __fmv))), __xn); +	const _WI __yp
> +	  =3D svsel(_S_less(__yn, __zerov), _S_unary_minus(_WI(_S_bit_and(__yn,
> __fmv))), __yn); +	return svbic_z(__sve_vector_type<_Ip,
> _Np>::__sve_active_mask(), __op(__xp, __yp)._M_data, +		     =20
> _SuperImpl::_S_isunordered(__x, __y)._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      static constexpr _MaskMember<_Tp>
> +      _S_isgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _=
Np>
> __y) noexcept +      { return __fp_cmp(__x, __y, [](auto __xp, auto __yp)=
 {
> return _S_less(__yp, __xp); }); } +
> +    template <typename _Tp, size_t _Np>
> +      static constexpr _MaskMember<_Tp>
> +      _S_isgreaterequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_=
Tp,
> _Np> __y) noexcept +      { return __fp_cmp(__x, __y, [](auto __xp, auto
> __yp) { return _S_less_equal(__yp, __xp); }); } +
> +    template <typename _Tp, size_t _Np>
> +      static constexpr _MaskMember<_Tp>
> +      _S_isless(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np>
> __y) noexcept +      { return __fp_cmp(__x, __y, [](auto __xp, auto __yp)=
 {
> return _S_less(__xp, __yp); }); } +
> +    template <typename _Tp, size_t _Np>
> +      static constexpr _MaskMember<_Tp>
> +      _S_islessequal(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) noexcept +      { return __fp_cmp(__x, __y, [](auto __xp, auto
> __yp) { return _S_less_equal(__xp, __yp); }); } +
> +    template <typename _Tp, size_t _Np>
> +      static constexpr _MaskMember<_Tp>
> +      _S_islessgreater(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_T=
p,
> _Np> __y) noexcept +      {
> +	return svbic_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> +		       _SuperImpl::_S_not_equal_to(__x, __y)._M_data,
> +		       _SuperImpl::_S_isunordered(__x, __y)._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_abs(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_fabs(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svabs_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_sqrt(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svsqrt_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_ldexp(_SveSimdWrapper<_Tp, _Np> __x, __fixed_size_storage_t<int,
> _Np> __y) noexcept +      {
> +	auto __sve_register =3D __y.first;
> +	if constexpr (std::is_same_v<_Tp, float>)
> +	  return svscale_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, +			   __sve_register._M_data);
> +	else
> +	  {
> +	    __sve_vector_type_t<int64_t, _Np> __sve_d_register =3D
> svunpklo(__sve_register); +	    return svscale_z(__sve_vector_type<_Tp,
> _Np>::__sve_active_mask(), __x._M_data, +			    =20
__sve_d_register);
> +	  }
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __=
y,
> +	     _SveSimdWrapper<_Tp, _Np> __z)
> +      {
> +	return svmad_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data, +		       __z._M_data);
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> _=
_y)
> +      {
> +  return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> _=
_y)
> +      {
> +  return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_isfinite([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +#if __FINITE_MATH_ONLY__
> +	return __sve_vector_type_t<_Tp, _Np>::__sve_all_true_mask();
> +#else
> +	// if all exponent bits are set, __x is either inf or NaN
> +
> +	using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	const __sve_vector_type_t<_Ip, _Np> __absn =3D
> __sve_reinterpret_cast<_Ip>(_S_abs(__x)); +	const=20
__sve_vector_type_t<_Ip,
> _Np> __maxn
> +	  =3D __sve_reinterpret_cast<_Ip>(
> +	      __sve_vector_type<_Tp,=20
_Np>::__sve_broadcast(__finite_max_v<_Tp>));
> +
> +	return _S_less_equal(_SveSimdWrapper<_Ip, _Np>{__absn},
> _SveSimdWrapper<_Ip, _Np>{__maxn}); +#endif
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_isinf([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +#if __FINITE_MATH_ONLY__
> +	return {}; // false
> +#else
> +	return _S_equal_to<_Tp, _Np>(_S_abs(__x),
> _S_broadcast(__infinity_v<_Tp>)); +#endif
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_isnan([[maybe_unused]] _SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +#if __FINITE_MATH_ONLY__
> +	return {}; // false
> +#else
> +	return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __x._M_data); +#endif
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_isnormal(_SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +	using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	using _V =3D __sve_vector_type_t<_Ip, _Np>;
> +	using _VW =3D _SveSimdWrapper<_Ip, _Np>;
> +
> +	const _V __absn =3D __sve_reinterpret_cast<_Ip>(_S_abs(__x));
> +	const _V __minn =3D __sve_reinterpret_cast<_Ip>(
> +			    __sve_vector_type<_Tp,=20
_Np>::__sve_broadcast(__norm_min_v<_Tp>));
> +#if __FINITE_MATH_ONLY__
> +	return _S_greater_equal(_VW{__absn}, _VW{__minn});
> +#else
> +	const _V __maxn =3D __sve_reinterpret_cast<_Ip>(
> +			    __sve_vector_type<_Tp,=20
_Np>::__sve_broadcast(__finite_max_v<_Tp>));
> +	return _MaskImpl::_S_bit_and(_S_less_equal(_VW{__minn}, _VW{__absn}),
> +				     _S_less_equal(_VW{__absn},=20
_VW{__maxn}));
> +#endif
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_signbit(_SveSimdWrapper<_Tp, _Np> __x)
> +      {
> +	using _Ip =3D __get_sve_value_type_t<__int_for_sizeof_t<_Tp>>;
> +	using _V =3D __sve_vector_type_t<_Ip, _Np>;
> +	using _VW =3D _SveSimdWrapper<_Ip, _Np>;
> +
> +	const _V __xn =3D __sve_reinterpret_cast<_Ip>(__x);
> +	const _V __zeron =3D __sve_vector_type<_Ip, _Np>::__sve_broadcast(0);
> +	return _S_less(_VW{__xn}, _VW{__zeron});
> +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
> +      _S_isunordered(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp,
> _Np> __y) +      {
> +	return svcmpuo(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
> __x._M_data, __y._M_data); +      }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_nearbyint(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svrinti_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_rint(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return _SuperImpl::_S_nearbyint(__x); }
> +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_trunc(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svrintz_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_round(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svrinta_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_floor(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svrintm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
> +      _S_ceil(_SveSimdWrapper<_Tp, _Np> __x) noexcept
> +      { return svrintp_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(=
),
> __x._M_data); } +
> +    template <typename _Tp, size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k,
> _SveSimdWrapper<_Tp, _Np>& __lhs, +		     =20
> __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs)
> +      { __lhs =3D _CommonImpl::_S_blend(__k, __lhs, __rhs); }
> +
> +    template <typename _Tp, size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_masked_assign(_SveMaskWrapper<_Bits, _Np> __k,
> _SveSimdWrapper<_Tp, _Np>& __lhs, +		      =20
__type_identity_t<_Tp> __rhs)
> +      { __lhs =3D _CommonImpl::_S_blend(__k, __lhs, __data(simd<_Tp,
> _Abi>(__rhs))); } +
> +    template <typename _Op, typename _Tp, size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k,
> _SveSimdWrapper<_Tp, _Np>& __lhs, +			const
> __type_identity_t<_SveSimdWrapper<_Tp, _Np>> __rhs, _Op __op) +      {
> +	__lhs =3D _CommonImpl::_S_blend(__k, __lhs,
> +				      _SveSimdWrapper<_Tp,=20
_Np>(__op(_SuperImpl{}, __lhs, __rhs)));
> +      }
> +
> +    template <typename _Op, typename _Tp, size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_masked_cassign(const _SveMaskWrapper<_Bits, _Np> __k,
> _SveSimdWrapper<_Tp, _Np>& __lhs, +			const=20
__type_identity_t<_Tp> __rhs,
> _Op __op)
> +      { _S_masked_cassign(__k, __lhs, _S_broadcast(__rhs), __op); }
> +
> +    template <typename _Tp, size_t _Np, typename _Up>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_set(_SveSimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
> +      { __v._M_set(__i, static_cast<_Up&&>(__x)); }
> +
> +    template <template <typename> class _Op, typename _Tp, size_t _Bits,
> size_t _Np> +      _GLIBCXX_SIMD_INTRINSIC static constexpr
> _SveSimdWrapper<_Tp, _Np> +      _S_masked_unary(const
> _SveMaskWrapper<_Bits, _Np> __k, const _SveSimdWrapper<_Tp, _Np> __v) +  =
 =20
>  {
> +	auto __vv =3D simd<_Tp, _Abi>{__private_init, __v};
> +	_Op<decltype(__vv)> __op;
> +	return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
> +      }
> +  };
> +
> +template <typename _Abi, typename>
> +  struct _MaskImplSve
> +  {
> +    template <typename _Tp>
> +      using _MaskMember =3D typename _Abi::template _MaskMember<_Tp>;
> +
> +    template <typename _Tp>
> +      using _TypeTag =3D _Tp*;
> +
> +    template <typename _Tp>
> +      static constexpr size_t _S_size =3D simd_size_v<_Tp, _Abi>;
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_broadcast(bool __x)
> +      {
> +	constexpr size_t _Np =3D simd_size_v<_Tp, _Abi>;
> +	__sve_bool_type __tr =3D __sve_vector_type<_Tp,=20
_Np>::__sve_active_mask();
> +	__sve_bool_type __fl =3D svpfalse_b();;
> +	return __x ? __tr : __fl;
> +      }
> +
> +    template <typename _Tp>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
> +      _S_load(const bool* __mem)
> +      {
> +	constexpr size_t _Np =3D simd_size_v<_Tp, _Abi>;
> +  const uint8_t* __p =3D reinterpret_cast<const uint8_t*>(__mem);
> +  __sve_bool_type __u8_active_mask =3D __sve_vector_type<uint8_t,
> _Np>::__sve_active_mask(); +  __sve_vector_type_t<uint8_t, _Np>
> __u8_vec_mask_load =3D svld1(__u8_active_mask, __p); +  __sve_bool_type
> __u8_mask =3D svcmpne(__u8_active_mask, __u8_vec_mask_load, 0); +
> +  __sve_bool_type __tp_mask =3D __u8_mask;
> +  for (size_t __up_size =3D 1; __up_size !=3D sizeof(_Tp); __up_size *=
=3D 2)
> +    {
> +  __tp_mask =3D svunpklo(__tp_mask);
> +    }
> +
> +	_SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>> __r{__tp_mask};
> +  return __r;
> +      }
> +
> +    template <size_t _Bits, size_t _Np>
> +      static inline _SveMaskWrapper<_Bits, _Np>
> +      _S_masked_load(_SveMaskWrapper<_Bits, _Np> __merge,
> _SveMaskWrapper<_Bits, _Np> __mask, +		     const bool* __mem)=20
noexcept
> +      {
> +	_SveMaskWrapper<_Bits, _Np> __r;
> +
> +	__execute_n_times<_Np>([&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +	  if (__mask[__i])
> +	    __r._M_set(__i, __mem[__i]);
> +	  else
> +	    __r._M_set(__i, __merge[__i]);
> +	});
> +
> +	return __r;
> +      }
> +
> +    template <size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_store(_SveMaskWrapper<_Bits, _Np> __v, bool* __mem) noexcept
> +      {
> +	__execute_n_times<_Np>([&](auto __i)
> +			      _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {=20
__mem[__i] =3D __v[__i]; });
> +      }
> +
> +    template <size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr void
> +      _S_masked_store(const _SveMaskWrapper<_Bits, _Np> __v, bool* __mem,
> +		      const _SveMaskWrapper<_Bits, _Np> __k) noexcept
> +      {
> +	__execute_n_times<_Np>([&](auto __i)=20
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +	  if (__k[__i])
> +	    __mem[__i] =3D __v[__i];
> +	});
> +      }
> +
> +    template <size_t _Bits, size_t _Np>
> +      _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
> +      _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x)
> +      {
> +	_ULLong __r =3D 0;
> +	__execute_n_times<_Np>(
> +	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r |=3D
> _ULLong(__x[__i]) << __i; }); +	return __r;

With -msve-vector-bits=3D1024 (or larger) this can fail (UB on shift) and l=
ose=20
information. This function is needed on conversion to fixed_size_simd_mask.=
=20
However, simd_fixed_size.h isn't ready for size > 64 either. While looking=
=20
deeper I found that you didn't adjust max_fixed_size in bits/simd.h. For no=
w=20
please bump max_fixed_size to 64 for __ARM_FEATURE_SVE_BITS >=3D 512. Don't=
 go=20
higher than 64, even though the spec is asking for it:

  Additionally, for every supported simd<T, Abi> (see 9.6.1), where Abi
  is an ABI tag that is not a specialization of simd_abi::fixed_size,
  N =3D=3D simd<T, Abi>::size() shall be supported.

I.e. the existence of simd<char, simd_abi::__sve> with simd_size_v<char,=20
simd_abi> =3D=3D 128 (for -msve-vector-bits=3D1024) asks for fixed_size_sim=
d<char,=20
128>. For now we can't conform.

[...]

=46rom my side, with the noted changes the patch is ready for merging.=20
@Jonathan, any chance for a green light before GCC 14.1?

=2D-=20
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=AC=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80
 Dr. Matthias Kretz           =E2=94=82 SDE =E2=80=94 Software Development =
for Experiments
 Senior Software Engineer,    =E2=94=82 =F0=9F=93=9E +49 6159 713084
 SIMD Expert,                 =E2=94=82 =F0=9F=93=A7 m.kretz@gsi.de
 ISO C++ Numerics Chair       =E2=94=82 =F0=9F=94=97 mattkretz.github.io
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=B4=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=
=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=
=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=
=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80=E2=94=80

GSI Helmholtzzentrum f=C3=BCr Schwerionenforschung GmbH
Planckstra=C3=9Fe 1, 64291 Darmstadt, Germany, www.gsi.de

Commercial Register / Handelsregister: Amtsgericht Darmstadt, HRB 1528
Managing Directors / Gesch=C3=A4ftsf=C3=BChrung:
Professor Dr. Paolo Giubellino, J=C3=B6rg Blaurock
Chairman of the GSI Supervisory Board / Vorsitzender des GSI-Aufsichtsrats:
Ministerialdirigent Dr. Volkmar Dietz

--nextPart11647824.VV5PYv0bhD
Content-Disposition: attachment;
	filename="v2-libstdc-add-ARM-SVE-support-to-std-experimental-simd-whitespace.diff"
Content-Transfer-Encoding: 7Bit
Content-Type: text/x-patch; charset="x-UTF_8J";
	name="v2-libstdc-add-ARM-SVE-support-to-std-experimental-simd-whitespace.diff"

diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 99c07313f59a..e1b17c621b34 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -91,7 +91,7 @@
 constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
 #else
 constexpr inline int __sve_vectorized_size_bytes = 0;
-#endif 
+#endif
 
 namespace simd_abi {
 // simd_abi forward declarations {{{
@@ -381,7 +381,7 @@ __machine_flags()
 	       | (__have_neon_a64 << 2)
 	       | (__have_neon_a64 << 2)
 	       | (__support_neon_float << 3)
-         | (__have_sve << 4);
+	       | (__have_sve << 4);
     else if constexpr (__have_power_vmx)
       return __have_power_vmx
 	       | (__have_power_vsx  << 1)
@@ -4180,7 +4180,7 @@ split(const simd<_Tp, _Ap>& __x)
     constexpr size_t _N0 = _SL::template _S_at<0>();
     using _V = __deduced_simd<_Tp, _N0>;
 
-    auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 
+    auto __gen_fallback = [&]() constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
     {
       return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -4584,7 +4584,7 @@ _S_determine_best_abi()
 	    // 1. The ABI tag is valid for _Tp
 	    // 2. The storage overhead is no more than padding to fill the next
 	    //    power-of-2 number of bytes
-	    if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp> 
+	    if constexpr (_A0<_Bytes>::template _S_is_valid_v<_Tp>
             && ((__is_sve_abi<_A0<_Bytes>>() && __have_sve && (_Np <= __sve_vectorized_size_bytes/sizeof(_Tp)))
                 || (__fullsize / 2 < _Np))
         )
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index b0ffe339569d..d65e833f7851 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -2376,94 +2376,95 @@ _S_isnormal(_SimdWrapper<_Tp, _Np> __x)
       _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
       _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
       {
-		if constexpr(__have_sve)
-		{
-		__fixed_size_storage_t<int, _Np> __r{};
-		__execute_n_times<_Np>(
-			[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-				__r._M_set(__i, std::fpclassify(__x[__i]));
-			});
-		return __r;
-		}
-		else {
-	using _I = __int_for_sizeof_t<_Tp>;
-	const auto __xn
-	  = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
-	constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
-	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
-
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
-	  = __vector_broadcast<_NI, _I>(FP_NORMAL);
+	if constexpr(__have_sve)
+	  {
+	    __fixed_size_storage_t<int, _Np> __r{};
+	    __execute_n_times<_Np>(
+	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+		__r._M_set(__i, std::fpclassify(__x[__i]));
+	      });
+	    return __r;
+	  }
+	else
+	  {
+	    using _I = __int_for_sizeof_t<_Tp>;
+	    const auto __xn
+	      = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
+	    constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
+	      = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
+
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
+	      = __vector_broadcast<_NI, _I>(FP_NORMAL);
 #if !__FINITE_MATH_ONLY__
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
-	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
-	  = __vector_broadcast<_NI, _I>(FP_NAN);
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
-	  = __vector_broadcast<_NI, _I>(FP_INFINITE);
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
+	      = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
+	      = __vector_broadcast<_NI, _I>(FP_NAN);
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
+	      = __vector_broadcast<_NI, _I>(FP_INFINITE);
 #endif
 #ifndef __FAST_MATH__
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
-	  = __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
+	      = __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
 #endif
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
-	  = __vector_broadcast<_NI, _I>(FP_ZERO);
+	    _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
+	      = __vector_broadcast<_NI, _I>(FP_ZERO);
 
-	__vector_type_t<_I, _NI>
-	  __tmp = __xn < __minn
-  #ifdef __FAST_MATH__
-		    ? __fp_zero
-  #else
-		    ? (__xn == 0 ? __fp_zero : __fp_subnormal)
-  #endif
-  #if __FINITE_MATH_ONLY__
-		    : __fp_normal;
-  #else
-		    : (__xn < __infn ? __fp_normal
-				     : (__xn == __infn ? __fp_infinite : __fp_nan));
-  #endif
+	    __vector_type_t<_I, _NI>
+	      __tmp = __xn < __minn
+#ifdef __FAST_MATH__
+			       ? __fp_zero
+#else
+			       ? (__xn == 0 ? __fp_zero : __fp_subnormal)
+#endif
+#if __FINITE_MATH_ONLY__
+			       : __fp_normal;
+#else
+			       : (__xn < __infn ? __fp_normal
+						: (__xn == __infn ? __fp_infinite : __fp_nan));
+#endif
 
-	if constexpr (sizeof(_I) == sizeof(int))
-	  {
-	    using _FixedInt = __fixed_size_storage_t<int, _Np>;
-	    const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
-	    if constexpr (_FixedInt::_S_tuple_size == 1)
-	      return {__as_int};
-	    else if constexpr (_FixedInt::_S_tuple_size == 2
-				 && is_same_v<
-				      typename _FixedInt::_SecondType::_FirstAbi,
-				      simd_abi::scalar>)
-	      return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
-	    else if constexpr (_FixedInt::_S_tuple_size == 2)
-	      return {__extract<0, 2>(__as_int),
-		      __auto_bitcast(__extract<1, 2>(__as_int))};
+	    if constexpr (sizeof(_I) == sizeof(int))
+	      {
+		using _FixedInt = __fixed_size_storage_t<int, _Np>;
+		const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
+		if constexpr (_FixedInt::_S_tuple_size == 1)
+		  return {__as_int};
+		else if constexpr (_FixedInt::_S_tuple_size == 2
+				     && is_same_v<
+					  typename _FixedInt::_SecondType::_FirstAbi,
+					  simd_abi::scalar>)
+		  return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
+		else if constexpr (_FixedInt::_S_tuple_size == 2)
+		  return {__extract<0, 2>(__as_int),
+			  __auto_bitcast(__extract<1, 2>(__as_int))};
+		else
+		  __assert_unreachable<_Tp>();
+	      }
+	    else if constexpr (_Np == 2 && sizeof(_I) == 8
+				 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
+	      {
+		const auto __aslong = __vector_bitcast<_LLong>(__tmp);
+		return {int(__aslong[0]), {int(__aslong[1])}};
+	      }
+#if _GLIBCXX_SIMD_X86INTRIN
+	    else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
+				 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
+	      return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
+				      __to_intrin(__hi128(__tmp)))};
+	    else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
+				 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
+	      return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
+#endif // _GLIBCXX_SIMD_X86INTRIN
+	    else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
+	      return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
+						  [](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+						    return __make_wrapper<int>(__l...);
+						  })};
 	    else
 	      __assert_unreachable<_Tp>();
 	  }
-	else if constexpr (_Np == 2 && sizeof(_I) == 8
-			     && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
-	  {
-	    const auto __aslong = __vector_bitcast<_LLong>(__tmp);
-	    return {int(__aslong[0]), {int(__aslong[1])}};
-	  }
-#if _GLIBCXX_SIMD_X86INTRIN
-	else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
-			     && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
-	  return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
-				  __to_intrin(__hi128(__tmp)))};
-	else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
-			     && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
-	  return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
-#endif // _GLIBCXX_SIMD_X86INTRIN
-	else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
-	  return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
-					      [](auto... __l) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-						return __make_wrapper<int>(__l...);
-					      })};
-	else
-	  __assert_unreachable<_Tp>();
-		}
       }
 
     // _S_increment & _S_decrement{{{2
@@ -2796,22 +2797,22 @@ _S_convert(simd_mask<_Up, _UAbi> __x)
 	      return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
 	  }
 	else
-	{
-		if constexpr(__is_sve_abi<_UAbi>())
-		{
-			simd_mask<_Tp> __r(false);
-			constexpr size_t __min_size = std::min(__r.size(), __x.size());
-			__execute_n_times<__min_size>(
-			[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-	  			__r[__i] = __x[__i];
-			});
-			return __data(__r);			
-		}
-		else 
-	  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
-						       _S_size<_Tp>>(
-	    __data(__x));
-      }
+	  {
+	    if constexpr(__is_sve_abi<_UAbi>())
+	      {
+		simd_mask<_Tp> __r(false);
+		constexpr size_t __min_size = std::min(__r.size(), __x.size());
+		__execute_n_times<__min_size>(
+		  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+		    __r[__i] = __x[__i];
+		  });
+		return __data(__r);			
+	      }
+	    else
+	      return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
+							   _S_size<_Tp>>(
+		       __data(__x));
+	  }
 	}
     // }}}
     // _S_masked_load {{{2
diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h b/libstdc++-v3/include/experimental/bits/simd_converter.h
index 78ccc027fbb2..03fb3d28ab12 100644
--- a/libstdc++-v3/include/experimental/bits/simd_converter.h
+++ b/libstdc++-v3/include/experimental/bits/simd_converter.h
@@ -30,14 +30,14 @@
 _GLIBCXX_SIMD_BEGIN_NAMESPACE
 
 template <typename _Arg, typename _Ret, typename _To, size_t _Np>
-_Ret __converter_fallback(_Arg __a)
+  _Ret __converter_fallback(_Arg __a)
   {
-  _Ret __ret{};
-  __execute_n_times<_Np>(
+    _Ret __ret{};
+    __execute_n_times<_Np>(
       [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-        __ret._M_set(__i, static_cast<_To>(__a[__i]));
-    });
-  return __ret;
+	__ret._M_set(__i, static_cast<_To>(__a[__i]));
+      });
+    return __ret;
   }
 
 // _SimdConverter scalar -> scalar {{{
@@ -76,8 +76,7 @@ struct _SimdConverter
       __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
       is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
       conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
-	  && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
-	  >>
+		  && !(__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())>>
   {
     using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
     using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
@@ -93,13 +92,13 @@ struct _SimdConverter
 // _SimdConverter "native 1" -> "native 2" {{{
 template <typename _From, typename _To, typename _AFrom, typename _ATo>
   struct _SimdConverter<
-    _From, _AFrom, _To, _ATo,
-    enable_if_t<!disjunction_v<
-      __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
-      is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
-      conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
-	  && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
-	  >>
+	   _From, _AFrom, _To, _ATo,
+	   enable_if_t<!disjunction_v<
+			  __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>,
+			  is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>,
+			  conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>
+			 && (__is_sve_abi<_AFrom>() || __is_sve_abi<_ATo>())
+	 >>
   {
     using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
     using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
@@ -145,8 +144,8 @@ struct _SimdConverter<_From, simd_abi::fixed_size<_Np>
       if constexpr (is_same_v<_From, _To>)
 	return __x;
 
-	  // fallback to sequential when sve is available
-	  else if constexpr (__have_sve)
+      // fallback to sequential when sve is available
+      else if constexpr (__have_sve)
 	return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
 
       // special case (optimize) int signedness casts
@@ -313,12 +312,12 @@ static_assert(
       "_SimdConverter to fixed_size only works for equal element counts");
 
     using _Ret = __fixed_size_storage_t<_To, _Np>;
-	using _Arg = typename _SimdTraits<_From, _Ap>::_SimdMember;
+    using _Arg = typename _SimdTraits<_From, _Ap>::_SimdMember;
 
     _GLIBCXX_SIMD_INTRINSIC constexpr _Ret
     operator()(_Arg __x) const noexcept
     {
-	  if constexpr (__have_sve)
+      if constexpr (__have_sve)
 	return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
       else if constexpr (_Ret::_S_tuple_size == 1)
 	return {__vector_convert<typename _Ret::_FirstType::_BuiltinType>(__x)};
@@ -357,12 +356,13 @@ static_assert(
       "_SimdConverter to fixed_size only works for equal element counts");
 
     using _Arg = __fixed_size_storage_t<_From, _Np>;
-	using _Ret = typename _SimdTraits<_To, _Ap>::_SimdMember;
+    using _Ret = typename _SimdTraits<_To, _Ap>::_SimdMember;
 
     _GLIBCXX_SIMD_INTRINSIC constexpr
-      _Ret operator()(const _Arg& __x) const noexcept
+    _Ret
+    operator()(const _Arg& __x) const noexcept
     {
-	  if constexpr(__have_sve)
+      if constexpr(__have_sve)
 	return __converter_fallback<_Arg, _Ret, _To, _Np>(__x);
       else if constexpr (_Arg::_S_tuple_size == 1)
 	return __vector_convert<__vector_type_t<_To, _Np>>(__x.first);
diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h
index 769256d6992b..bf515e5145de 100644
--- a/libstdc++-v3/include/experimental/bits/simd_math.h
+++ b/libstdc++-v3/include/experimental/bits/simd_math.h
@@ -652,17 +652,17 @@ frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
 	(*__exp)[0] = __tmp;
 	return __r;
       }
-      else if constexpr (__is_sve_abi<_Abi>())
+    else if constexpr (__is_sve_abi<_Abi>())
       {
-        simd<_Tp, _Abi> __r;
-        __execute_n_times<simd_size_v<_Tp, _Abi>>(
-        [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-            	int __tmp;
-              const auto __ri = std::frexp(__x[__i], &__tmp);
-              (*__exp)[__i] = __tmp;
-              __r[__i] = __ri;
-        });
-        return __r;
+	simd<_Tp, _Abi> __r;
+	__execute_n_times<simd_size_v<_Tp, _Abi>>(
+	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	    int __tmp;
+	    const auto __ri = std::frexp(__x[__i], &__tmp);
+	    (*__exp)[__i] = __tmp;
+	    __r[__i] = __ri;
+	  });
+	return __r;
       }
     else if constexpr (__is_fixed_size_abi_v<_Abi>)
       return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
@@ -1147,7 +1147,8 @@ hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
 	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
 
 #ifndef __FAST_MATH__
-	    if constexpr (_V::size() > 1 && (__is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32))
+	    if constexpr (_V::size() > 1
+			    && __is_neon_abi<_Abi>() && __have_neon && !__have_neon_a32)
 	      { // With ARMv7 NEON, we have no subnormals and must use slightly
 		// different strategy
 		const _V __hi_exp = __hi & __inf;
diff --git a/libstdc++-v3/include/experimental/bits/simd_sve.h b/libstdc++-v3/include/experimental/bits/simd_sve.h
index 123242a3a625..511da08aafc5 100644
--- a/libstdc++-v3/include/experimental/bits/simd_sve.h
+++ b/libstdc++-v3/include/experimental/bits/simd_sve.h
@@ -573,7 +573,7 @@ __intrin() const
       _GLIBCXX_SIMD_INTRINSIC constexpr value_type
       operator[](_SizeConstant<__i>) const
       {
-        return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i);
+	return _BuiltinSveMaskType::__sve_mask_get(_M_data, __i);
       }
 
     _GLIBCXX_SIMD_INTRINSIC constexpr void
@@ -831,7 +831,7 @@ _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper<sizeof(_Tp), _
     {
       using _SUp = __get_sve_value_type_t<_Up>;
       using _STp = __get_sve_value_type_t<_Tp>;
-      
+
       _SUp* __up = reinterpret_cast<_SUp*>(__p);
 
       if constexpr (std::is_same_v<_Tp, _Up>)
@@ -839,19 +839,19 @@ _S_store(_Up* __p, _SveSimdWrapper<_Tp, _Np> __x, _SveMaskWrapper<sizeof(_Tp), _
       if constexpr (std::is_integral_v<_Tp> && std::is_integral_v<_Up>
 		      && (sizeof(_Tp) > sizeof(_Up)))
 	{
-    if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>)
-      return svst1b(__k._M_data, __up, __x);
-    if constexpr (std::is_same_v<_SUp, uint8_t> && std::is_unsigned_v<_STp>)
-      return svst1b(__k._M_data, __up, __x);
-    if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>)
-      return svst1h(__k._M_data, __up, __x);
-    if constexpr (std::is_same_v<_SUp, uint16_t> && std::is_unsigned_v<_STp>)
-      return svst1h(__k._M_data, __up, __x);
-    if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>)
-      return svst1w(__k._M_data, __up, __x);
-    if constexpr (std::is_same_v<_SUp, uint32_t> && std::is_unsigned_v<_STp>)
-      return svst1w(__k._M_data, __up, __x);
-  }
+	  if constexpr (std::is_same_v<_SUp, int8_t> && std::is_signed_v<_STp>)
+	    return svst1b(__k._M_data, __up, __x);
+	  if constexpr (std::is_same_v<_SUp, uint8_t> && std::is_unsigned_v<_STp>)
+	    return svst1b(__k._M_data, __up, __x);
+	  if constexpr (std::is_same_v<_SUp, int16_t> && std::is_signed_v<_STp>)
+	    return svst1h(__k._M_data, __up, __x);
+	  if constexpr (std::is_same_v<_SUp, uint16_t> && std::is_unsigned_v<_STp>)
+	    return svst1h(__k._M_data, __up, __x);
+	  if constexpr (std::is_same_v<_SUp, int32_t> && std::is_signed_v<_STp>)
+	    return svst1w(__k._M_data, __up, __x);
+	  if constexpr (std::is_same_v<_SUp, uint32_t> && std::is_unsigned_v<_STp>)
+	    return svst1w(__k._M_data, __up, __x);
+	}
 
       __execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 	if (__k[__i])
@@ -991,42 +991,42 @@ for (size_t __ri = 1; __ri != __i; __ri++)
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, plus<>)
       {
-    return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return svaddv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, bit_and<>)
       {
-    return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return svandv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, bit_or<>)
       {
-    return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return svorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, bit_xor<>)
       {
-    return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return sveorv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, __detail::_Maximum())
       {
-    return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return svmaxv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
       _S_reduce(simd<_Tp, _Abi> __x, __detail::_Minimum())
       {
-    return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
+	return svminv(__sve_vector_type<_Tp, _S_size<_Tp>>::__sve_active_mask(), __x._M_data);
       }
 
     template <typename _Tp, size_t _Np>
@@ -1082,7 +1082,7 @@ _S_unary_minus(_SveSimdWrapper<_Tp, _Np> __x) noexcept
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
       _S_plus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
-      { return __x._M_data + __y._M_data; } 
+      { return __x._M_data + __y._M_data; }
 
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
@@ -1097,19 +1097,21 @@ _S_multiplies(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
       _S_divides(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
-      { 
-        __sve_vector_type_t<_Tp, _Np> __y_padded = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
-                      __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
-        return __x._M_data / __y_padded; 
+      {
+	__sve_vector_type_t<_Tp, _Np> __y_padded
+	  = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
+		  __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
+	return __x._M_data / __y_padded;
       }
 
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr __sve_vector_type_t<_Tp, _Np>
       _S_modulus(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
-      { 
-        __sve_vector_type_t<_Tp, _Np> __y_padded = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
-                      __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
-        return __x._M_data % __y_padded;
+      {
+	__sve_vector_type_t<_Tp, _Np> __y_padded
+	  = svsel(__sve_vector_type<_Tp, _Np>::__sve_active_mask(),
+		  __y._M_data, __sve_vector_type<_Tp, _Np>::__sve_broadcast(1));
+	return __x._M_data % __y_padded;
       }
 
     template <typename _Tp, size_t _Np>
@@ -1412,14 +1414,14 @@ _S_fma(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y,
       _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
       _S_fmax(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
       {
-  return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
+	return svmaxnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
       }
 
     template <typename _Tp, size_t _Np>
       _GLIBCXX_SIMD_INTRINSIC static _SveSimdWrapper<_Tp, _Np>
       _S_fmin(_SveSimdWrapper<_Tp, _Np> __x, _SveSimdWrapper<_Tp, _Np> __y)
       {
-  return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
+	return svminnm_z(__sve_vector_type<_Tp, _Np>::__sve_active_mask(), __x._M_data, __y._M_data);
       }
 
     template <typename _Tp, size_t _Np>
@@ -1594,28 +1596,28 @@ _S_broadcast(bool __x)
       {
 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
 	__sve_bool_type __tr = __sve_vector_type<_Tp, _Np>::__sve_active_mask();
-	__sve_bool_type __fl = svpfalse_b();;
+	__sve_bool_type __fl = svpfalse_b();
 	return __x ? __tr : __fl;
       }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
       _S_load(const bool* __mem)
-      { 
+      {
 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
-  const uint8_t* __p = reinterpret_cast<const uint8_t*>(__mem);
-  __sve_bool_type __u8_active_mask = __sve_vector_type<uint8_t, _Np>::__sve_active_mask();
-  __sve_vector_type_t<uint8_t, _Np> __u8_vec_mask_load = svld1(__u8_active_mask, __p);
-  __sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0);
+	const uint8_t* __p = reinterpret_cast<const uint8_t*>(__mem);
+	__sve_bool_type __u8_active_mask = __sve_vector_type<uint8_t, _Np>::__sve_active_mask();
+	__sve_vector_type_t<uint8_t, _Np> __u8_vec_mask_load = svld1(__u8_active_mask, __p);
+	__sve_bool_type __u8_mask = svcmpne(__u8_active_mask, __u8_vec_mask_load, 0);
 
-  __sve_bool_type __tp_mask = __u8_mask;
-  for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2)
-    {
-  __tp_mask = svunpklo(__tp_mask);
-    }
+	__sve_bool_type __tp_mask = __u8_mask;
+	for (size_t __up_size = 1; __up_size != sizeof(_Tp); __up_size *= 2)
+	  {
+	    __tp_mask = svunpklo(__tp_mask);
+	  }
 
 	_SveMaskWrapper<sizeof(_Tp), simd_size_v<_Tp, _Abi>> __r{__tp_mask};
-  return __r;
+	return __r;
       }
 
     template <size_t _Bits, size_t _Np>
@@ -1639,8 +1641,9 @@ _S_masked_load(_SveMaskWrapper<_Bits, _Np> __merge, _SveMaskWrapper<_Bits, _Np>
       _GLIBCXX_SIMD_INTRINSIC static constexpr void
       _S_store(_SveMaskWrapper<_Bits, _Np> __v, bool* __mem) noexcept
       {
-	__execute_n_times<_Np>([&](auto __i)
-			      _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; });
+	__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	  __mem[__i] = __v[__i];
+	});
       }
 
     template <size_t _Bits, size_t _Np>
@@ -1659,8 +1662,9 @@ _S_masked_store(const _SveMaskWrapper<_Bits, _Np> __v, bool* __mem,
       _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x)
       {
 	_ULLong __r = 0;
-	__execute_n_times<_Np>(
-	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r |= _ULLong(__x[__i]) << __i; });
+	__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	  __r |= _ULLong(__x[__i]) << __i;
+	});
 	return __r;
       }
 
@@ -1669,8 +1673,9 @@ _S_to_bits(_SveMaskWrapper<_Bits, _Np> __x)
       _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
       {
 	_SveMaskWrapper<sizeof(_Tp), _Np> __r;
-	__execute_n_times<_Np>([&](auto __i)
-			      _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __bits[__i]); });
+	__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	  __r._M_set(__i, __bits[__i]);
+	});
 	return __r;
       }
 
@@ -1730,8 +1735,9 @@ for (size_t __up_size = sizeof(_Up); __up_size != sizeof(_Tp); __up_size *= 2)
       _S_convert(_BitMask<_Np, _Sanitized> __x)
       {
 	_MaskMember<_Tp> __r{};
-	__execute_n_times<_Np>([&](auto __i)
-			      _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __r._M_set(__i, __x[__i]); });
+	__execute_n_times<_Np>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	  __r._M_set(__i, __x[__i]);
+	});
 	return __r;
       }
 
@@ -1831,12 +1837,18 @@ _S_all_of(simd_mask<_Tp, _Abi> __k)
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static bool
       _S_any_of(simd_mask<_Tp, _Abi> __k)
-      { return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), __k._M_data); }
+      {
+	return svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(),
+			   __k._M_data);
+      }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static bool
       _S_none_of(simd_mask<_Tp, _Abi> __k)
-      { return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(), __k._M_data); }
+      {
+	return !svptest_any(__sve_vector_type<_Tp, simd_size_v<_Tp, _Abi>>::__sve_active_mask(),
+			    __k._M_data);
+      }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static bool
@@ -1849,7 +1861,10 @@ _S_some_of(simd_mask<_Tp, _Abi> __k)
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static int
       _S_find_first_set(simd_mask<_Tp, _Abi> __k)
-      { return svclastb(svpfirst(__k._M_data, svpfalse()), -1, __sve_mask_type<sizeof(_Tp)>::__index0123); }
+      {
+	return svclastb(svpfirst(__k._M_data, svpfalse()),
+			-1, __sve_mask_type<sizeof(_Tp)>::__index0123);
+      }
 
     template <typename _Tp>
       _GLIBCXX_SIMD_INTRINSIC static int

--nextPart11647824.VV5PYv0bhD--