public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/101927] New: There is no vector mode popcount for aarch64
@ 2021-08-16  5:02 pinskia at gcc dot gnu.org
  0 siblings, 0 replies; only message in thread
From: pinskia at gcc dot gnu.org @ 2021-08-16  5:02 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101927

            Bug ID: 101927
           Summary: There is no vector mode popcount for aarch64
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: enhancement
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pinskia at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

Take:

#include <stdlib.h>
#include <stdint.h>

size_t hd (const uint8_t *restrict a, const uint8_t *restrict b, size_t l) {
  size_t r = 0, x;
  for (x = 0; x < l; x++)
    r += __builtin_popcount (a[x] ^ b[x]);

  return r;
}

at -O3 we don't vectorize this.
Clang/LLVM does:
.LBB0_5:                                // =>This Inner Loop Header: Depth=1
        ld1     { v3.b }[0], [x8]
        sub     x12, x8, #2
        ld1     { v5.b }[0], [x10]
        ld1     { v4.b }[0], [x12]
        sub     x12, x10, #2
        ld1     { v6.b }[0], [x12]
        add     x12, x8, #1
        ld1     { v3.b }[4], [x12]
        add     x12, x10, #1
        ld1     { v5.b }[4], [x12]
        sub     x12, x8, #1
        ld1     { v4.b }[4], [x12]
        sub     x12, x10, #1
        ld1     { v6.b }[4], [x12]
        eor     v3.8b, v5.8b, v3.8b
        ushll   v3.2d, v3.2s, #0
        and     v3.16b, v3.16b, v1.16b
        eor     v4.8b, v6.8b, v4.8b
        ushll   v4.2d, v4.2s, #0
        and     v4.16b, v4.16b, v1.16b
        cnt     v3.16b, v3.16b
        cnt     v4.16b, v4.16b
        uaddlp  v3.8h, v3.16b
        uaddlp  v4.8h, v4.16b
        uaddlp  v3.4s, v3.8h
        uaddlp  v4.4s, v4.8h
        add     x8, x8, #4
        subs    x11, x11, #4
        uadalp  v2.2d, v3.4s
        uadalp  v0.2d, v4.4s
        add     x10, x10, #4
        b.ne    .LBB0_5

------ CUT ----
Note I think we could be better.

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-16  5:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  5:02 [Bug target/101927] New: There is no vector mode popcount for aarch64 pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).