public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
@ 2024-01-25 10:53 acoplan at gcc dot gnu.org
  2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
                   ` (15 more replies)
  0 siblings, 16 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 10:53 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

            Bug ID: 113597
           Summary: [14 Regression] aarch64: Significant code quality
                    regression since r14-8346-ga98d5130a6dcff
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: acoplan at gcc dot gnu.org
  Target Milestone: ---

The following testcase shows a significant regression in code quality
since r14-8346-ga98d5130a6dcff2ed4db371e500550134777b8cf on aarch64:

$ cat t.cc
#include <arm_neon.h>
typedef struct {
  float b;
  float c;
} d;
template <uint16_t e> void f(uint16_t g, d *u, d *v) {
  uint16_t j, l = j = e * e;
  float32_t b[j];
  float32_t c[l];
  float32x4_t m[j];
  for (int i = 0; i < j; i++)
    m[i] = vdupq_n_f32(0.F);
  float32x4_t n[l];
  for (int i = 0; i < l; i++)
    n[i] = vdupq_n_f32(0.F);
  for (uint16_t k = 0; k < g; k += 2) {
    float32x4_t o[e];
    for (int i = 0; i < e; i++)
      o[i] = vld1q_f32((float32_t *)&u[k]);
    int idx = 0;
    for (int a = 0; a < e; a++)
      for (int ah = a; ah < e; ah++)
        m[idx] = vfmaq_f32(m[idx], o[a], o[ah]);
    float32x4_t p[e];
    for (int i; i; i++)
      for (int a; a;)
        for (int ah;;)
          vfmsq_f32(n[idx], o[a], p[ah]);
  }
  for (int i = 0; i < j; i++)
    b[i] = vaddvq_f32(m[i]);
  for (int i = 0; i < l; i++)
    c[i] = vaddvq_f32(n[i]);
  constexpr uint16_t q(e * e);
  float32x4_t r[q];
  float32x2_t s;
  r[4] = float32x4_t{b[5] - c[3]};
  for (int i = 0; i < q; i++)
    vst1q_f32((float32_t *)&v[2 * i], r[i]);
  if (e % 2)
    vst1_f32((float32_t *)v, s);
}
void t() {
  d v, u;
  f<4>(0, &u, &v);
}

$ cat cmp.sh
#!/bin/bash
set -e

BEFORE=/work/builds/r14-8345/gcc
AFTER=/work/builds/r14-8346/gcc
SRC=t.cc

$BEFORE/xgcc -B $BEFORE -c -S -o before.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2
$AFTER/xgcc -B $AFTER -c -S -o after.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2

diff -u before.s after.s

$ ./cmp.sh
--- before.s    2024-01-25 10:35:56.977090552 +0000
+++ after.s     2024-01-25 10:35:57.385086341 +0000
@@ -9,16 +9,47 @@
 _Z1fILt4EEvtP1dS1_:
 .LFB3918:
        .cfi_startproc
-       ands    w0, w0, 65535
+       movi    v31.4s, 0
        sub     sp, sp, #768
        .cfi_def_cfa_offset 768
+       ands    w0, w0, 65535
        mov     w3, 0
+       stp     q31, q31, [sp, 256]
+       stp     q31, q31, [sp, 288]
+       stp     q31, q31, [sp, 320]
+       stp     q31, q31, [sp, 352]
+       stp     q31, q31, [sp, 384]
+       stp     q31, q31, [sp, 416]
+       stp     q31, q31, [sp, 448]
+       stp     q31, q31, [sp, 480]
+       stp     q31, q31, [sp, 512]
+       stp     q31, q31, [sp, 544]
+       stp     q31, q31, [sp, 576]
+       stp     q31, q31, [sp, 608]
+       stp     q31, q31, [sp, 640]
+       stp     q31, q31, [sp, 672]
+       stp     q31, q31, [sp, 704]
+       stp     q31, q31, [sp, 736]
+       movi    v31.4s, 0
        beq     .L3
        .p2align 5,,15
 .L2:
-       add     w1, w3, 2
-       and     w3, w1, 65535
-       cmp     w0, w1, uxth
+       ubfiz   x5, x3, 3, 16
+       add     w4, w3, 2
+       and     w3, w4, 65535
+       ldr     q30, [x1, x5]
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       fmla    v31.4s, v30.4s, v30.4s
+       str     q31, [sp, 256]
+       cmp     w0, w4, uxth
        bhi     .L2
 .L3:
        ldp     q30, q31, [sp]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
@ 2024-01-25 11:01 ` rguenth at gcc dot gnu.org
  2024-01-25 11:01 ` rguenth at gcc dot gnu.org
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:01 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
I will have a look - but can you explain for me what I see?  I suppose the
testcase was reduced from something?

Is the assembly diff complete?  That is, do we really have more fmla or
are they just moved?

+     stp       q31, q31, [sp, 256]

that's a store?  A paired store?  Aka, the sequence fills a stack(?)
region with replications of q31?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
  2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
@ 2024-01-25 11:01 ` rguenth at gcc dot gnu.org
  2024-01-25 11:05 ` acoplan at gcc dot gnu.org
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:01 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|---                         |14.0
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2024-01-25
             Status|UNCONFIRMED                 |ASSIGNED
           Assignee|unassigned at gcc dot gnu.org      |rguenth at gcc dot gnu.org

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
  2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
  2024-01-25 11:01 ` rguenth at gcc dot gnu.org
@ 2024-01-25 11:05 ` acoplan at gcc dot gnu.org
  2024-01-25 11:10 ` acoplan at gcc dot gnu.org
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:05 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #2 from Alex Coplan <acoplan at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #1)
> I will have a look - but can you explain for me what I see?  I suppose the
> testcase was reduced from something?

Yeah, the testcase is reduced.

> 
> Is the assembly diff complete?  That is, do we really have more fmla or
> are they just moved?

I think the diff is complete, I can upload the full before/after asm.

> 
> +     stp	q31, q31, [sp, 256] 
> 
> that's a store?  A paired store?  Aka, the sequence fills a stack(?)
> region with replications of q31?

That's right.

I'll try to take a look at the RTL dumps too to see if I can figure out
anything, too.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (2 preceding siblings ...)
  2024-01-25 11:05 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:10 ` acoplan at gcc dot gnu.org
  2024-01-25 11:10 ` acoplan at gcc dot gnu.org
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:10 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #3 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Created attachment 57210
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57210&action=edit
before.s

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (3 preceding siblings ...)
  2024-01-25 11:10 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:10 ` acoplan at gcc dot gnu.org
  2024-01-25 11:16 ` pinskia at gcc dot gnu.org
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:10 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #4 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Created attachment 57211
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57211&action=edit
after.s

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (4 preceding siblings ...)
  2024-01-25 11:10 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:16 ` pinskia at gcc dot gnu.org
  2024-01-25 11:27 ` acoplan at gcc dot gnu.org
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-01-25 11:16 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #5 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Note I think this testcase has been reduced too much, but maybe that can be
"fixed". The stores to the arguments go past the bounds.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (5 preceding siblings ...)
  2024-01-25 11:16 ` pinskia at gcc dot gnu.org
@ 2024-01-25 11:27 ` acoplan at gcc dot gnu.org
  2024-01-25 11:32 ` acoplan at gcc dot gnu.org
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:27 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #6 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Looking at the dump files, the first difference seems to be in 292r.dse1:

     8: NOTE_INSN_BASIC_BLOCK 2
     2: r116:SI=zero_extend(x0:HI)
       REG_DEAD x0:HI
@@ -178,7 +161,26 @@
     5: NOTE_INSN_FUNCTION_BEG
    10: r119:DI=sfp:DI-0x200
    12: r121:V16QI=const_vector
+   13: [r119:DI]=unspec[r121:V16QI,r121:V16QI] 38
+   14: [r119:DI+0x20]=unspec[r121:V16QI,r121:V16QI] 38
+   15: [r119:DI+0x40]=unspec[r121:V16QI,r121:V16QI] 38
+   16: [r119:DI+0x60]=unspec[r121:V16QI,r121:V16QI] 38
+   17: [r119:DI+0x80]=unspec[r121:V16QI,r121:V16QI] 38
+   18: [r119:DI+0xa0]=unspec[r121:V16QI,r121:V16QI] 38
+   19: [r119:DI+0xc0]=unspec[r121:V16QI,r121:V16QI] 38
+   20: [r119:DI+0xe0]=unspec[r121:V16QI,r121:V16QI] 38
+      REG_DEAD r119:DI
    21: r122:DI=sfp:DI-0x100
+   24: [r122:DI]=unspec[r121:V16QI,r121:V16QI] 38
+   25: [r122:DI+0x20]=unspec[r121:V16QI,r121:V16QI] 38
+   26: [r122:DI+0x40]=unspec[r121:V16QI,r121:V16QI] 38
+   27: [r122:DI+0x60]=unspec[r121:V16QI,r121:V16QI] 38
+   28: [r122:DI+0x80]=unspec[r121:V16QI,r121:V16QI] 38
+   29: [r122:DI+0xa0]=unspec[r121:V16QI,r121:V16QI] 38
+   30: [r122:DI+0xc0]=unspec[r121:V16QI,r121:V16QI] 38
+   31: [r122:DI+0xe0]=unspec[r121:V16QI,r121:V16QI] 38
+      REG_DEAD r122:DI
+      REG_DEAD r121:V16QI
     6: r100:V4SF=const_vector
     7: r106:SI=0
    32: cc:CC=cmp(r116:SI,0)
@@ -254,6 +256,7 @@
    73: r100:V4SF={r147:V4SF*r147:V4SF+r115:V4SF}
       REG_DEAD r147:V4SF
       REG_DEAD r115:V4SF
+   74: [sfp:DI-0x200]=r100:V4SF
    75: r148:SI=r106:SI+0x2
       REG_DEAD r106:SI
    76: r106:SI=zero_extend(r148:SI#0)

(the unspec 38s are store pairs).

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (6 preceding siblings ...)
  2024-01-25 11:27 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:32 ` acoplan at gcc dot gnu.org
  2024-01-25 11:38 ` pinskia at gcc dot gnu.org
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:32 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #7 from Alex Coplan <acoplan at gcc dot gnu.org> ---
I expect the store pairs come from memcpy lowering/expansion in the aarch64
backend, that is the only way we get store pairs so early in the RTL pipeline
IIRC.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (7 preceding siblings ...)
  2024-01-25 11:32 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:38 ` pinskia at gcc dot gnu.org
  2024-01-25 11:40 ` acoplan at gcc dot gnu.org
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-01-25 11:38 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #8 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(In reply to Alex Coplan from comment #7)
> I expect the store pairs come from memcpy lowering/expansion in the aarch64
> backend, that is the only way we get store pairs so early in the RTL
> pipeline IIRC.

In this case, memset is more likely.  

Either:
for (int i = 0; i < j; i++)
    m[i] = vdupq_n_f32(0.F);
Or
for (int i = 0; i < l; i++)
    n[i] = vdupq_n_f32(0.F);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (8 preceding siblings ...)
  2024-01-25 11:38 ` pinskia at gcc dot gnu.org
@ 2024-01-25 11:40 ` acoplan at gcc dot gnu.org
  2024-01-25 11:56 ` rguenth at gcc dot gnu.org
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:40 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #9 from Alex Coplan <acoplan at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #8)
> (In reply to Alex Coplan from comment #7)
> > I expect the store pairs come from memcpy lowering/expansion in the aarch64
> > backend, that is the only way we get store pairs so early in the RTL
> > pipeline IIRC.
> 
> In this case, memset is more likely.

Right, yeah.  I was using "memcpy lowering" to refer to all the
mem{cpy,set,move} expansion we have in the backend.

> 
> Either:
> for (int i = 0; i < j; i++)
>     m[i] = vdupq_n_f32(0.F);
> Or
> for (int i = 0; i < l; i++)
>     n[i] = vdupq_n_f32(0.F);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (9 preceding siblings ...)
  2024-01-25 11:40 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:56 ` rguenth at gcc dot gnu.org
  2024-01-25 13:41 ` rguenth at gcc dot gnu.org
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:56 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57212
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57212&action=edit
patch for debugging

Btw, I've used the attached to investigate other issues with the change.  It
will show the outcome of base_alias_check and find_base_term in dumps.

One issue is that we're much more dependent on MEM_EXPRs being present.

Before figuring there wouldn't be much important regressions the idea was to
instead of doing find_base_term have a known base value recorded in the
MEM_ATTRs, and as the only important ones should be the special ones for
argument frame and stack-based represent that by an enum (rather than
the other possibility of using ADDRESS).  I'll also note that for spill
slots we get around to use spill_slot_decl and set_mem_attrs_for_spill.

I've not yet convinced myself that the other special bases we have really
form a completely separate memory class.  But if they do then accesses
should do something similar there (but mind scheduling of frame related
instructions ...).

Argument stack slots are one important class, set up by init_alias_analysis.
But those are also backed by regular decls at times (but not always)?

assign_stack_temp "allocated" memory is another class, we're reusing
slots during RTL expansion and they get (even if shared) a specific
alias set.  I don't think we ever release those temps and say re-use
the space for spilling so assigning a different decl to each slot
should eventually work.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (10 preceding siblings ...)
  2024-01-25 11:56 ` rguenth at gcc dot gnu.org
@ 2024-01-25 13:41 ` rguenth at gcc dot gnu.org
  2024-01-25 14:03 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 13:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
In DSE the only differences is

 fbt (0x7ffff51a1a50: (plus:DI (reg/v/f:DI 117 [ u ])
-    (reg:DI 146 [ _44 ]))) == (address 0)
+    (reg:DI 146 [ _44 ]))) == (nil)
 fbt (0x7ffff700b3c0: (reg/f:DI 64 sfp)) == (address:DI -3)
-bac false
+bac true

that's for

(mem:BLK (reg/f:DI 64 sfp) [0  A8])

vs

(mem:V4SF (plus:DI (reg/v/f:DI 117 [ u ])
        (reg:DI 146 [ _44 ])) [0 MEM <__Float32x4_t> [(float * {ref-all})_42]+0
S16 A32])

from

#0  0x0000000002ff3796 in scan_reads (insn_info=0x5e5b680, gen=0x5ec2338, 
    kill=0x5ec2358) at /space/rguenther/src/gcc/gcc/dse.cc:3156
#1  0x0000000002ff39b1 in dse_step3_scan (bb=<basic_block 0x7ffff5160060 (5)>)
    at /space/rguenther/src/gcc/gcc/dse.cc:3238

processing

(insn 62 61 64 5 (set (reg:V4SF 147 [ MEM <__Float32x4_t> [(float *
{ref-all})_42] ])
        (mem:V4SF (plus:DI (reg/v/f:DI 117 [ u ])
                (reg:DI 146 [ _44 ])) [0 MEM <__Float32x4_t> [(float *
{ref-all})_42]+0 S16 A32])) "include/arm_neon.h":12531:36 1274
{*aarch64_simd_movv4sf}
     (expr_list:REG_DEAD (reg:DI 146 [ _44 ])
        (nil)))

in this case we have _44 point to NONLOCAL only.  It got arg_base_value
as base value (from the MEM_EXPR and that points-to set we could
eventually derive this very same base term as well).

But I'll note that (mem:BLK (reg/f:DI 64 sfp) [0  A8]) is artificial,
generated by DSE get_group_info via record_store on

(insn 13 12 14 2 (set (mem/c:V2x16QI (reg/f:DI 119) [0 +0 S32 A128])
        (unspec:V2x16QI [
                (reg:V16QI 121) repeated x2
            ] UNSPEC_STP)) "t.cc":12:10 discrim 1 92 {*store_pair_16}
     (nil))

which is figured to be const_or_frame_p () based.  That notably
lacks a MEM_EXPR (though the bare MEM means only base_alias_check would
ever be able to disambiguate here).

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (11 preceding siblings ...)
  2024-01-25 13:41 ` rguenth at gcc dot gnu.org
@ 2024-01-25 14:03 ` rguenth at gcc dot gnu.org
  2024-01-29 13:56 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 14:03 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57214
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57214&action=edit
prototype fix

The attached prototype fixes the testcase for me.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (12 preceding siblings ...)
  2024-01-25 14:03 ` rguenth at gcc dot gnu.org
@ 2024-01-29 13:56 ` rguenth at gcc dot gnu.org
  2024-03-07 20:45 ` law at gcc dot gnu.org
  2024-05-07  7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-29 13:56 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
  Attachment #57214|0                           |1
        is obsolete|                            |

--- Comment #13 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57252
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57252&action=edit
prototype fix

Note when I extended the patch to also cover a PARM_DECL base to extent
coverage I see

FAIL: gcc.dg/torture/pr70421.c   -O1  execution test
FAIL: gcc.dg/torture/pr70421.c   -O2  execution test
FAIL: gcc.dg/torture/pr70421.c   -O3 -g  execution test
FAIL: gcc.dg/torture/pr70421.c   -Os  execution test
FAIL: gcc.dg/torture/pr70421.c   -O2 -flto -fno-use-linker-plugin
-flto-partitio
n=none  execution test
FAIL: gcc.dg/torture/pr70421.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-obje
cts  execution test

on x86_64.  It seems that arg_base_value isn't the correct thing to use
but it eventually should have been unique_base_value (UNIQUE_BASE_VALUE_ARGP)?
I'm not sure whether all the different unique base values mean we'll not
be able to derive exactly those classes from MEM_EXPRs.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (13 preceding siblings ...)
  2024-01-29 13:56 ` rguenth at gcc dot gnu.org
@ 2024-03-07 20:45 ` law at gcc dot gnu.org
  2024-05-07  7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org
  15 siblings, 0 replies; 17+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-07 20:45 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P3                          |P2
                 CC|                            |law at gcc dot gnu.org

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/113597] [14/15 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
  2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
                   ` (14 preceding siblings ...)
  2024-03-07 20:45 ` law at gcc dot gnu.org
@ 2024-05-07  7:44 ` rguenth at gcc dot gnu.org
  15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-05-07  7:44 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|14.0                        |14.2

--- Comment #14 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 14.1 is being released, retargeting bugs to GCC 14.2.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2024-05-07  7:44 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
2024-01-25 11:01 ` rguenth at gcc dot gnu.org
2024-01-25 11:05 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:16 ` pinskia at gcc dot gnu.org
2024-01-25 11:27 ` acoplan at gcc dot gnu.org
2024-01-25 11:32 ` acoplan at gcc dot gnu.org
2024-01-25 11:38 ` pinskia at gcc dot gnu.org
2024-01-25 11:40 ` acoplan at gcc dot gnu.org
2024-01-25 11:56 ` rguenth at gcc dot gnu.org
2024-01-25 13:41 ` rguenth at gcc dot gnu.org
2024-01-25 14:03 ` rguenth at gcc dot gnu.org
2024-01-29 13:56 ` rguenth at gcc dot gnu.org
2024-03-07 20:45 ` law at gcc dot gnu.org
2024-05-07  7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).