[Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

From: "acoplan at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
Date: Thu, 25 Jan 2024 10:53:17 +0000	[thread overview]
Message-ID: <bug-113597-4@http.gcc.gnu.org/bugzilla/> (raw)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

            Bug ID: 113597
           Summary: [14 Regression] aarch64: Significant code quality
                    regression since r14-8346-ga98d5130a6dcff
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: acoplan at gcc dot gnu.org
  Target Milestone: ---

The following testcase shows a significant regression in code quality
since r14-8346-ga98d5130a6dcff2ed4db371e500550134777b8cf on aarch64:

$ cat t.cc
#include <arm_neon.h>
typedef struct {
  float b;
  float c;
} d;
template <uint16_t e> void f(uint16_t g, d *u, d *v) {
  uint16_t j, l = j = e * e;
  float32_t b[j];
  float32_t c[l];
  float32x4_t m[j];
  for (int i = 0; i < j; i++)
    m[i] = vdupq_n_f32(0.F);
  float32x4_t n[l];
  for (int i = 0; i < l; i++)
    n[i] = vdupq_n_f32(0.F);
  for (uint16_t k = 0; k < g; k += 2) {
    float32x4_t o[e];
    for (int i = 0; i < e; i++)
      o[i] = vld1q_f32((float32_t *)&u[k]);
    int idx = 0;
    for (int a = 0; a < e; a++)
      for (int ah = a; ah < e; ah++)
        m[idx] = vfmaq_f32(m[idx], o[a], o[ah]);
    float32x4_t p[e];
    for (int i; i; i++)
      for (int a; a;)
        for (int ah;;)
          vfmsq_f32(n[idx], o[a], p[ah]);
  }
  for (int i = 0; i < j; i++)
    b[i] = vaddvq_f32(m[i]);
  for (int i = 0; i < l; i++)
    c[i] = vaddvq_f32(n[i]);
  constexpr uint16_t q(e * e);
  float32x4_t r[q];
  float32x2_t s;
  r[4] = float32x4_t{b[5] - c[3]};
  for (int i = 0; i < q; i++)
    vst1q_f32((float32_t *)&v[2 * i], r[i]);
  if (e % 2)
    vst1_f32((float32_t *)v, s);
}
void t() {
  d v, u;
  f<4>(0, &u, &v);
}

$ cat cmp.sh
#!/bin/bash
set -e

BEFORE=/work/builds/r14-8345/gcc
AFTER=/work/builds/r14-8346/gcc
SRC=t.cc

$BEFORE/xgcc -B $BEFORE -c -S -o before.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2
$AFTER/xgcc -B $AFTER -c -S -o after.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2

diff -u before.s after.s

$ ./cmp.sh
--- before.s    2024-01-25 10:35:56.977090552 +0000
+++ after.s     2024-01-25 10:35:57.385086341 +0000
@@ -9,16 +9,47 @@
 _Z1fILt4EEvtP1dS1_:
 .LFB3918:
        .cfi_startproc
-       ands    w0, w0, 65535
+       movi    v31.4s, 0
        sub     sp, sp, #768
        .cfi_def_cfa_offset 768
+       ands    w0, w0, 65535
        mov     w3, 0
+       stp     q31, q31, [sp, 256]
+       stp     q31, q31, [sp, 288]
+       stp     q31, q31, [sp, 320]
+       stp     q31, q31, [sp, 352]
+       stp     q31, q31, [sp, 384]
+       stp     q31, q31, [sp, 416]
+       stp     q31, q31, [sp, 448]
+       stp     q31, q31, [sp, 480]
+       stp     q31, q31, [sp, 512]
+       stp     q31, q31, [sp, 544]
+       stp     q31, q31, [sp, 576]
+       stp     q31, q31, [sp, 608]
+       stp     q31, q31, [sp, 640]
+       stp     q31, q31, [sp, 672]
+       stp     q31, q31, [sp, 704]
+       stp     q31, q31, [sp, 736]
+       movi    v31.4s, 0
        beq     .L3
        .p2align 5,,15
 .L2:
-       add     w1, w3, 2
-       and     w3, w1, 65535
-       cmp     w0, w1, uxth
+       ubfiz   x5, x3, 3, 16
+       add     w4, w3, 2
+       and     w3, w4, 65535
+       ldr     q30, [x1, x5]
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       str     q31, [sp, 256]
+       cmp     w0, w4, uxth
        bhi     .L2
 .L3:
        ldp     q30, q31, [sp]

next             reply	other threads:[~2024-01-25 10:53 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-25 10:53 acoplan at gcc dot gnu.org [this message]
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
2024-01-25 11:01 ` rguenth at gcc dot gnu.org
2024-01-25 11:05 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:16 ` pinskia at gcc dot gnu.org
2024-01-25 11:27 ` acoplan at gcc dot gnu.org
2024-01-25 11:32 ` acoplan at gcc dot gnu.org
2024-01-25 11:38 ` pinskia at gcc dot gnu.org
2024-01-25 11:40 ` acoplan at gcc dot gnu.org
2024-01-25 11:56 ` rguenth at gcc dot gnu.org
2024-01-25 13:41 ` rguenth at gcc dot gnu.org
2024-01-25 14:03 ` rguenth at gcc dot gnu.org
2024-01-29 13:56 ` rguenth at gcc dot gnu.org
2024-03-07 20:45 ` law at gcc dot gnu.org
2024-05-07  7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-113597-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).