public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug middle-end/102977] New: [GCC12 regression] vectorizer failed to generate complex fma.
@ 2021-10-28  1:34 crazylht at gmail dot com
  2021-10-28  4:44 ` [Bug middle-end/102977] [GCC12 regression] vectorizer failed to generate complex fma with SVE pinskia at gcc dot gnu.org
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: crazylht at gmail dot com @ 2021-10-28  1:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102977

            Bug ID: 102977
           Summary: [GCC12 regression] vectorizer failed to generate
                    complex fma.
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---
            Target: aarch64-linux-gnu

#include<complex.h>

#include<complex.h>

void
foo (_Complex _Float16* __restrict a, _Complex _Float16* b, _Complex _Float16
*c)
{
    for (int i =0 ; i != 8; i++)
      a[i] += b[i] * c[i];
}


gcc11.2 generate 

foo:
        mov     x3, 16
        ptrue   p1.b, all
        whilelo p0.h, xzr, x3
        ld1h    z2.h, p0/z, [x1]
        ld1h    z1.h, p0/z, [x2]
        ld1h    z0.h, p0/z, [x0]
        fcmla   z0.h, p1/m, z1.h, z2.h, #0
        fcmla   z0.h, p1/m, z1.h, z2.h, #90
        st1h    z0.h, p0, [x0]
        cntb    x4
        cnth    x5
        add     x0, x0, x4
        add     x1, x1, x4
        add     x2, x2, x4
        whilelo p0.h, x5, x3
        b.none  .L1
        ld1h    z2.h, p0/z, [x1]
        ld1h    z1.h, p0/z, [x2]
        ld1h    z0.h, p0/z, [x0]
        fcmla   z0.h, p1/m, z1.h, z2.h, #0
        fcmla   z0.h, p1/m, z1.h, z2.h, #90
        st1h    z0.h, p0, [x0]
.L1:
        ret


current trunk

foo:
        ptrue   p1.h, vl8
        ptrue   p0.b, all
        ld2h    {z2.h - z3.h}, p1/z, [x1]
        ld2h    {z0.h - z1.h}, p1/z, [x2]
        ld2h    {z16.h - z17.h}, p1/z, [x0]
        fmul    z6.h, z0.h, z3.h
        movprfx z7, z16
        fmla    z7.h, p0/m, z0.h, z2.h
        fmla    z6.h, p0/m, z1.h, z2.h
        movprfx z4, z7
        fmls    z4.h, p0/m, z1.h, z3.h
        fadd    z5.h, z6.h, z17.h
        st2h    {z4.h - z5.h}, p1, [x0]
        ret


options: -Ofast -march=armv8.3-a+sve+fp16
refer to https://godbolt.org/z/4PPKnWvc1

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-10-29 11:51 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-28  1:34 [Bug middle-end/102977] New: [GCC12 regression] vectorizer failed to generate complex fma crazylht at gmail dot com
2021-10-28  4:44 ` [Bug middle-end/102977] [GCC12 regression] vectorizer failed to generate complex fma with SVE pinskia at gcc dot gnu.org
2021-10-28  4:45 ` pinskia at gcc dot gnu.org
2021-10-28  4:47 ` pinskia at gcc dot gnu.org
2021-10-28  4:52 ` [Bug middle-end/102977] [12 Regression] vectorizer failed to use armv8.3-a complex fma pinskia at gcc dot gnu.org
2021-10-28  4:58 ` [Bug tree-optimization/102977] " pinskia at gcc dot gnu.org
2021-10-28  5:12 ` pinskia at gcc dot gnu.org
2021-10-28  6:48 ` crazylht at gmail dot com
2021-10-28  8:25 ` tnfchris at gcc dot gnu.org
2021-10-28  8:52 ` tnfchris at gcc dot gnu.org
2021-10-29 11:49 ` cvs-commit at gcc dot gnu.org
2021-10-29 11:49 ` cvs-commit at gcc dot gnu.org
2021-10-29 11:51 ` tnfchris at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).