public inbox for libstdc++-cvs@sourceware.org help / color / mirror / Atom feed
From: Jonathan Wakely <redi@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r11-7083] libstdc++: Improve "find_first/last_set" for NEON Date: Wed, 3 Feb 2021 15:51:25 +0000 (GMT) [thread overview] Message-ID: <20210203155125.1CBC539C0745@sourceware.org> (raw) https://gcc.gnu.org/g:598876574184e745defee4b36dc2408068b7a22e commit r11-7083-g598876574184e745defee4b36dc2408068b7a22e Author: yaozhongxiao <yaozhongxiao@linux.alibaba.com> Date: Wed Feb 3 15:49:30 2021 +0000 libstdc++: Improve "find_first/last_set" for NEON The find_first_set and find_last_set method is not optimal for neon, it needs to be improved by synthesized with horizontal adds(vaddv) which will reduce the generated assembly code. In the following cases, vaddvq_s16 will generate 2 instructions but vpadd_s16 will generate 4 instructions: # vaddvq_s16 vaddvq_s16(__asint); // addv h0, v1.8h // smov w1, v0.h[0] # vpadd_s16 vpaddq_s16(vpaddq_s16(vpaddq_s16(__asint, __zero), __zero), __zero)[0] // addp v1.8h,v1.8h,v2.8h // addp v1.8h,v1.8h,v2.8h // addp v1.8h,v1.8h,v2.8h // smov w1, v1.h[0] # libstdc++-v3/ChangeLog: * include/experimental/bits/simd_neon.h: Replace repeated vpadd calls with a single vaddv for aarch64. Diff: --- libstdc++-v3/include/experimental/bits/simd_neon.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h index 8bb21169c8d..7f472e88649 100644 --- a/libstdc++-v3/include/experimental/bits/simd_neon.h +++ b/libstdc++-v3/include/experimental/bits/simd_neon.h @@ -311,8 +311,7 @@ struct _MaskImplNeonMixin }); __asint &= __bitsel; #ifdef __aarch64__ - return vpaddq_s16(vpaddq_s16(vpaddq_s16(__asint, __zero), __zero), - __zero)[0]; + return vaddvq_s16(__asint); #else return vpadd_s16( vpadd_s16(vpadd_s16(__lo64(__asint), __hi64(__asint)), __zero), @@ -328,7 +327,7 @@ struct _MaskImplNeonMixin }); __asint &= __bitsel; #ifdef __aarch64__ - return vpaddq_s32(vpaddq_s32(__asint, __zero), __zero)[0]; + return vaddvq_s32(__asint); #else return vpadd_s32(vpadd_s32(__lo64(__asint), __hi64(__asint)), __zero)[0]; @@ -351,8 +350,12 @@ struct _MaskImplNeonMixin return static_cast<_I>(__i < _Np ? 1 << __i : 0); }); __asint &= __bitsel; +#ifdef __aarch64__ + return vaddv_s8(__asint); +#else return vpadd_s8(vpadd_s8(vpadd_s8(__asint, __zero), __zero), __zero)[0]; +#endif } else if constexpr (sizeof(_Tp) == 2) { @@ -362,12 +365,20 @@ struct _MaskImplNeonMixin return static_cast<_I>(__i < _Np ? 1 << __i : 0); }); __asint &= __bitsel; +#ifdef __aarch64__ + return vaddv_s16(__asint); +#else return vpadd_s16(vpadd_s16(__asint, __zero), __zero)[0]; +#endif } else if constexpr (sizeof(_Tp) == 4) { __asint &= __make_vector<_I>(0x1, 0x2); +#ifdef __aarch64__ + return vaddv_s32(__asint); +#else return vpadd_s32(__asint, __zero)[0]; +#endif } else __assert_unreachable<_Tp>();
reply other threads:[~2021-02-03 15:51 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210203155125.1CBC539C0745@sourceware.org \ --to=redi@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ --cc=libstdc++-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).