public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
From: Jonathan Wakely <redi@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org
Subject: [gcc r11-7083] libstdc++: Improve "find_first/last_set" for NEON
Date: Wed,  3 Feb 2021 15:51:25 +0000 (GMT)	[thread overview]
Message-ID: <20210203155125.1CBC539C0745@sourceware.org> (raw)

https://gcc.gnu.org/g:598876574184e745defee4b36dc2408068b7a22e

commit r11-7083-g598876574184e745defee4b36dc2408068b7a22e
Author: yaozhongxiao <yaozhongxiao@linux.alibaba.com>
Date:   Wed Feb 3 15:49:30 2021 +0000

    libstdc++: Improve "find_first/last_set" for NEON
    
    The find_first_set and find_last_set method is not optimal for neon, it
    needs to be improved by synthesized with horizontal adds(vaddv) which
    will reduce the generated assembly code. In the following cases,
    vaddvq_s16 will generate 2 instructions but vpadd_s16 will generate 4
    instructions:
    
     # vaddvq_s16
        vaddvq_s16(__asint);
        //  addv    h0, v1.8h
        //  smov    w1, v0.h[0]
     # vpadd_s16
        vpaddq_s16(vpaddq_s16(vpaddq_s16(__asint, __zero), __zero), __zero)[0]
        // addp v1.8h,v1.8h,v2.8h
        // addp v1.8h,v1.8h,v2.8h
        // addp v1.8h,v1.8h,v2.8h
        // smov    w1, v1.h[0]
     #
    
    libstdc++-v3/ChangeLog:
    
            * include/experimental/bits/simd_neon.h: Replace repeated vpadd
            calls with a single vaddv for aarch64.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_neon.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h
index 8bb21169c8d..7f472e88649 100644
--- a/libstdc++-v3/include/experimental/bits/simd_neon.h
+++ b/libstdc++-v3/include/experimental/bits/simd_neon.h
@@ -311,8 +311,7 @@ struct _MaskImplNeonMixin
 		  });
 	      __asint &= __bitsel;
 #ifdef __aarch64__
-	      return vpaddq_s16(vpaddq_s16(vpaddq_s16(__asint, __zero), __zero),
-				__zero)[0];
+	      return vaddvq_s16(__asint);
 #else
 	      return vpadd_s16(
 		vpadd_s16(vpadd_s16(__lo64(__asint), __hi64(__asint)), __zero),
@@ -328,7 +327,7 @@ struct _MaskImplNeonMixin
 		  });
 	      __asint &= __bitsel;
 #ifdef __aarch64__
-	      return vpaddq_s32(vpaddq_s32(__asint, __zero), __zero)[0];
+	      return vaddvq_s32(__asint);
 #else
 	      return vpadd_s32(vpadd_s32(__lo64(__asint), __hi64(__asint)),
 			       __zero)[0];
@@ -351,8 +350,12 @@ struct _MaskImplNeonMixin
 		    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
 		  });
 	      __asint &= __bitsel;
+#ifdef __aarch64__
+	      return vaddv_s8(__asint);
+#else
 	      return vpadd_s8(vpadd_s8(vpadd_s8(__asint, __zero), __zero),
 			      __zero)[0];
+#endif
 	    }
 	  else if constexpr (sizeof(_Tp) == 2)
 	    {
@@ -362,12 +365,20 @@ struct _MaskImplNeonMixin
 		    return static_cast<_I>(__i < _Np ? 1 << __i : 0);
 		  });
 	      __asint &= __bitsel;
+#ifdef __aarch64__
+	      return vaddv_s16(__asint);
+#else
 	      return vpadd_s16(vpadd_s16(__asint, __zero), __zero)[0];
+#endif
 	    }
 	  else if constexpr (sizeof(_Tp) == 4)
 	    {
 	      __asint &= __make_vector<_I>(0x1, 0x2);
+#ifdef __aarch64__
+	      return vaddv_s32(__asint);
+#else
 	      return vpadd_s32(__asint, __zero)[0];
+#endif
 	    }
 	  else
 	    __assert_unreachable<_Tp>();


                 reply	other threads:[~2021-02-03 15:51 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210203155125.1CBC539C0745@sourceware.org \
    --to=redi@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    --cc=libstdc++-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).