public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/111829] New: Redudant register moves inside the loop
@ 2023-10-16  5:54 crazylht at gmail dot com
  2023-10-16  6:03 ` [Bug target/111829] " crazylht at gmail dot com
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: crazylht at gmail dot com @ 2023-10-16  5:54 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111829

            Bug ID: 111829
           Summary: Redudant register moves inside the loop
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---
            Target: x86_64-*-* i?86-*-*

#include<immintrin.h>
int
foo (__m128i* __restrict pa, int* b,
 __m128i* __restrict pc, int n)
{
    __m128i vsum = _mm_setzero_si128();
    for (int i = 0; i != 100000; i++)
    {
        vsum = _mm_dpbusd_epi32 (vsum, pa[i], _mm_set1_epi32 (b[i]));
    }
    *pc = vsum;
    int ssum = 0;
    for (int i = 0; i != 4; i++)
      ssum += ((__v4si)vsum)[i];
    return ssum;
}

gcc -O2 -mavxvnni

foo(long long __vector(2)*, int*, long long __vector(2)*, int):
        leaq    400000(%rsi), %rax
        vpxor   %xmm0, %xmm0, %xmm0
.L2:
        vmovdqa (%rdi), %xmm2
        vmovdqa %xmm0, %xmm1 ---- redundant
        addq    $4, %rsi
        addq    $16, %rdi
        vpbroadcastd    -4(%rsi), %xmm3
        {vex} vpdpbusd  %xmm3, %xmm2, %xmm1
        vmovdqa %xmm1, %xmm0 --- redundant
        cmpq    %rax, %rsi
        jne     .L2
        vmovdqa %xmm1, (%rdx)
        leaq    -24(%rsp), %rax
        leaq    -8(%rsp), %rcx
        xorl    %edx, %edx
.L3:
        vmovdqa %xmm0, -24(%rsp)
        addq    $4, %rax
        addl    -4(%rax), %edx
        cmpq    %rax, %rcx
        jne     .L3
        movl    %edx, %eax
        ret


it can be better with


foo(long long __vector(2)*, int*, long long __vector(2)*, int):
        leaq    400000(%rsi), %rax
        vpxor   %xmm0, %xmm0, %xmm0
.L2:
        vmovdqa (%rdi), %xmm2

        addq    $4, %rsi
        addq    $16, %rdi
        vpbroadcastd    -4(%rsi), %xmm3
        {vex} vpdpbusd  %xmm3, %xmm2, %xmm0
        cmpq    %rax, %rsi
        jne     .L2
        vmovdqa %xmm0, (%rdx)
        leaq    -24(%rsp), %rax
        leaq    -8(%rsp), %rcx
        xorl    %edx, %edx
.L3:
        vmovdqa %xmm0, -24(%rsp)
        addq    $4, %rax
        addl    -4(%rax), %edx
        cmpq    %rax, %rcx
        jne     .L3
        movl    %edx, %eax
        ret

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-10-16 17:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-16  5:54 [Bug target/111829] New: Redudant register moves inside the loop crazylht at gmail dot com
2023-10-16  6:03 ` [Bug target/111829] " crazylht at gmail dot com
2023-10-16  7:27 ` rguenth at gcc dot gnu.org
2023-10-16  8:01 ` crazylht at gmail dot com
2023-10-16  8:03 ` crazylht at gmail dot com
2023-10-16 17:19 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).