public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/101579] New: Suboptimal codegen for __builtin_shufflevector
@ 2021-07-22 12:41 hjl.tools at gmail dot com
  2021-07-27 10:14 ` [Bug target/101579] " rguenth at gcc dot gnu.org
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: hjl.tools at gmail dot com @ 2021-07-22 12:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101579

            Bug ID: 101579
           Summary: Suboptimal codegen for __builtin_shufflevector
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
                CC: crazylht at gmail dot com
  Target Milestone: ---
            Target: i386,x86-64

For

---
typedef unsigned int __attribute__((__vector_size__ (32))) U;
typedef unsigned char __attribute__((__vector_size__ (64))) V;

V g;

U
foo (void)
{
  V v = __builtin_shufflevector (g, g,
                                 0, 1, 2, 0, 5, 1, 0, 1, 3, 2, 3, 0, 4, 3, 1,
2,
                                 2, 0, 4, 2, 3, 1, 1, 2, 3, 4, 1, 1, 0, 0, 5,
2,
                                 0, 3, 3, 3, 3, 4, 5, 0, 1, 5, 2, 1, 0, 1, 1,
2,
                                 3, 2, 0, 5, 4, 5, 1, 0, 1, 4, 4, 3, 4, 5, 2,
0)
;
  v ^= 255;
  V w = v + g;
  U u = ((union { V a; U b; }) w).b + ((union { V a; U b; }) w).b[1];
  return u;
}
---

GCC 12 -march=skylake -O2 generates

        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        vpcmpeqd        %ymm4, %ymm4, %ymm4
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        andq    $-64, %rsp
        subq    $72, %rsp
        movzbl  g+2(%rip), %ecx
        movzbl  g+1(%rip), %edx
        movzbl  g(%rip), %eax
        movzbl  g+3(%rip), %edi
        movzbl  g+5(%rip), %esi
        movzbl  g+4(%rip), %r8d
        vmovd   %ecx, %xmm7
        vmovd   %edi, %xmm0
        vmovd   %edx, %xmm1
        vmovd   %eax, %xmm5
        vmovdqa %xmm7, -72(%rsp)
        vpinsrb $1, %eax, %xmm7, %xmm11
        vmovd   %r8d, %xmm2
        vmovd   %esi, %xmm7
        vpinsrb $1, %edx, %xmm5, %xmm6
        vpinsrb $1, %edi, %xmm2, %xmm14
        vpinsrb $1, %ecx, %xmm1, %xmm12
        vpinsrb $1, %ecx, %xmm0, %xmm15
        vpinsrb $1, %edx, %xmm7, %xmm8
        vpinsrb $1, %eax, %xmm0, %xmm3
        vpunpcklwd      %xmm11, %xmm6, %xmm13
        vpunpcklwd      %xmm12, %xmm14, %xmm9
        vpunpcklwd      %xmm6, %xmm8, %xmm8
        vpunpcklwd      %xmm3, %xmm15, %xmm3
        vpunpckldq      %xmm8, %xmm13, %xmm8
        vpunpckldq      %xmm9, %xmm3, %xmm3
        vpunpcklqdq     %xmm3, %xmm8, %xmm3
        vpaddb  g(%rip), %ymm4, %ymm10
        vmovdqa %xmm14, -88(%rsp)
        vmovdqa %xmm3, -104(%rsp)
        vpinsrb $1, %r8d, %xmm0, %xmm9
        vpinsrb $1, %ecx, %xmm7, %xmm4
        vpinsrb $1, %ecx, %xmm2, %xmm3
        vpinsrb $1, %edx, %xmm0, %xmm14
        vpinsrb $1, %edx, %xmm1, %xmm8
        vpinsrb $1, %eax, %xmm5, %xmm13
        vpunpcklwd      %xmm4, %xmm13, %xmm13
        vpunpcklwd      %xmm8, %xmm9, %xmm8
        vpunpcklwd      %xmm3, %xmm11, %xmm3
        vpunpcklwd      %xmm12, %xmm14, %xmm14
        vmovdqa -104(%rsp), %xmm4
        vpunpckldq      %xmm13, %xmm8, %xmm8
        vpunpckldq      %xmm14, %xmm3, %xmm3
        vpunpcklqdq     %xmm8, %xmm3, %xmm3
        vmovdqa -72(%rsp), %xmm13
        vinserti128     $0x1, %xmm3, %ymm4, %ymm3
        vpsubb  %ymm3, %ymm10, %ymm10
        vmovdqa %ymm10, -56(%rsp)
        vmovdqa %xmm10, %xmm8
        vpinsrb $1, %esi, %xmm1, %xmm3
        vpinsrb $1, %edi, %xmm5, %xmm10
        vpinsrb $1, %edi, %xmm0, %xmm0
        vpinsrb $1, %eax, %xmm7, %xmm7
        vpinsrb $1, %edx, %xmm13, %xmm13
        vpunpcklwd      %xmm13, %xmm3, %xmm3
        vpunpcklwd      %xmm0, %xmm10, %xmm0
        vpunpcklwd      %xmm7, %xmm9, %xmm9
        vpunpcklwd      %xmm12, %xmm6, %xmm6
        vpunpckldq      %xmm6, %xmm3, %xmm6
        vpunpckldq      %xmm9, %xmm0, %xmm0
        vpunpcklqdq     %xmm6, %xmm0, %xmm0
        vpinsrb $1, %eax, %xmm1, %xmm6
        vpinsrb $1, %r8d, %xmm1, %xmm1
        vpunpcklwd      -88(%rsp), %xmm1, %xmm1
        vpinsrb $1, %esi, %xmm5, %xmm5
        vpinsrb $1, %esi, %xmm2, %xmm2
        vpunpcklwd      %xmm5, %xmm15, %xmm3
        vpunpcklwd      %xmm6, %xmm2, %xmm5
        vpunpcklwd      %xmm11, %xmm2, %xmm2
        vpcmpeqd        %ymm4, %ymm4, %ymm4
        vpunpckldq      %xmm5, %xmm3, %xmm3
        vpunpckldq      %xmm2, %xmm1, %xmm1
        vpaddb  g+32(%rip), %ymm4, %ymm4
        vpunpcklqdq     %xmm1, %xmm3, %xmm1
        vinserti128     $0x1, %xmm1, %ymm0, %ymm0
        vpsubb  %ymm0, %ymm4, %ymm4
        vmovdqa %xmm8, 8(%rsp)
        vmovdqa %ymm4, -24(%rsp)
        vmovdqa -40(%rsp), %xmm4
        vpbroadcastd    12(%rsp), %ymm0
        vmovdqa %xmm4, 24(%rsp)
        vpaddd  8(%rsp), %ymm0, %ymm0
        leave
        .cfi_def_cfa 7, 8
        ret

clang 12 generates

foo:                                    # @foo
        .cfi_startproc
# %bb.0:
        vmovdqa g(%rip), %ymm0
        vpcmpeqd        %ymm1, %ymm1, %ymm1
        vpxor   %ymm1, %ymm0, %ymm1
        vpermq  $68, %ymm1, %ymm1               # ymm1 = ymm1[0,1,0,1]
        vpshufb .LCPI0_0(%rip), %ymm1, %ymm1    # ymm1 =
ymm1[0,1,2,0,5,1,0,1,3,
2,3,0,4,3,1,2,18,16,20,18,19,17,17,18,19,20,17,17,16,16,21,18]
        vpaddb  %ymm0, %ymm1, %ymm0
        vpbroadcastd    .LCPI0_1(%rip), %ymm1   # ymm1 = [1,1,1,1,1,1,1,1]
        vpermd  %ymm0, %ymm1, %ymm1
        vpaddd  %ymm0, %ymm1, %ymm0
        retq
.Lfunc_end0:
        .size   foo, .Lfunc_end0-foo
        .cfi_endproc

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-07-28 10:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-22 12:41 [Bug target/101579] New: Suboptimal codegen for __builtin_shufflevector hjl.tools at gmail dot com
2021-07-27 10:14 ` [Bug target/101579] " rguenth at gcc dot gnu.org
2021-07-27 10:35 ` jakub at gcc dot gnu.org
2021-07-28  9:01 ` crazylht at gmail dot com
2021-07-28  9:40 ` crazylht at gmail dot com
2021-07-28  9:45 ` crazylht at gmail dot com
2021-07-28  9:49 ` jakub at gcc dot gnu.org
2021-07-28 10:03 ` crazylht at gmail dot com
2021-07-28 10:12 ` crazylht at gmail dot com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).