public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
@ 2024-01-25 10:53 acoplan at gcc dot gnu.org
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
` (15 more replies)
0 siblings, 16 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 10:53 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
Bug ID: 113597
Summary: [14 Regression] aarch64: Significant code quality
regression since r14-8346-ga98d5130a6dcff
Product: gcc
Version: 14.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: rtl-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: acoplan at gcc dot gnu.org
Target Milestone: ---
The following testcase shows a significant regression in code quality
since r14-8346-ga98d5130a6dcff2ed4db371e500550134777b8cf on aarch64:
$ cat t.cc
#include <arm_neon.h>
typedef struct {
float b;
float c;
} d;
template <uint16_t e> void f(uint16_t g, d *u, d *v) {
uint16_t j, l = j = e * e;
float32_t b[j];
float32_t c[l];
float32x4_t m[j];
for (int i = 0; i < j; i++)
m[i] = vdupq_n_f32(0.F);
float32x4_t n[l];
for (int i = 0; i < l; i++)
n[i] = vdupq_n_f32(0.F);
for (uint16_t k = 0; k < g; k += 2) {
float32x4_t o[e];
for (int i = 0; i < e; i++)
o[i] = vld1q_f32((float32_t *)&u[k]);
int idx = 0;
for (int a = 0; a < e; a++)
for (int ah = a; ah < e; ah++)
m[idx] = vfmaq_f32(m[idx], o[a], o[ah]);
float32x4_t p[e];
for (int i; i; i++)
for (int a; a;)
for (int ah;;)
vfmsq_f32(n[idx], o[a], p[ah]);
}
for (int i = 0; i < j; i++)
b[i] = vaddvq_f32(m[i]);
for (int i = 0; i < l; i++)
c[i] = vaddvq_f32(n[i]);
constexpr uint16_t q(e * e);
float32x4_t r[q];
float32x2_t s;
r[4] = float32x4_t{b[5] - c[3]};
for (int i = 0; i < q; i++)
vst1q_f32((float32_t *)&v[2 * i], r[i]);
if (e % 2)
vst1_f32((float32_t *)v, s);
}
void t() {
d v, u;
f<4>(0, &u, &v);
}
$ cat cmp.sh
#!/bin/bash
set -e
BEFORE=/work/builds/r14-8345/gcc
AFTER=/work/builds/r14-8346/gcc
SRC=t.cc
$BEFORE/xgcc -B $BEFORE -c -S -o before.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2
$AFTER/xgcc -B $AFTER -c -S -o after.s $SRC -Wall -Werror -Ofast
-mcpu=neoverse-v2
diff -u before.s after.s
$ ./cmp.sh
--- before.s 2024-01-25 10:35:56.977090552 +0000
+++ after.s 2024-01-25 10:35:57.385086341 +0000
@@ -9,16 +9,47 @@
_Z1fILt4EEvtP1dS1_:
.LFB3918:
.cfi_startproc
- ands w0, w0, 65535
+ movi v31.4s, 0
sub sp, sp, #768
.cfi_def_cfa_offset 768
+ ands w0, w0, 65535
mov w3, 0
+ stp q31, q31, [sp, 256]
+ stp q31, q31, [sp, 288]
+ stp q31, q31, [sp, 320]
+ stp q31, q31, [sp, 352]
+ stp q31, q31, [sp, 384]
+ stp q31, q31, [sp, 416]
+ stp q31, q31, [sp, 448]
+ stp q31, q31, [sp, 480]
+ stp q31, q31, [sp, 512]
+ stp q31, q31, [sp, 544]
+ stp q31, q31, [sp, 576]
+ stp q31, q31, [sp, 608]
+ stp q31, q31, [sp, 640]
+ stp q31, q31, [sp, 672]
+ stp q31, q31, [sp, 704]
+ stp q31, q31, [sp, 736]
+ movi v31.4s, 0
beq .L3
.p2align 5,,15
.L2:
- add w1, w3, 2
- and w3, w1, 65535
- cmp w0, w1, uxth
+ ubfiz x5, x3, 3, 16
+ add w4, w3, 2
+ and w3, w4, 65535
+ ldr q30, [x1, x5]
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ fmla v31.4s, v30.4s, v30.4s
+ str q31, [sp, 256]
+ cmp w0, w4, uxth
bhi .L2
.L3:
ldp q30, q31, [sp]
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
@ 2024-01-25 11:01 ` rguenth at gcc dot gnu.org
2024-01-25 11:01 ` rguenth at gcc dot gnu.org
` (14 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:01 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
I will have a look - but can you explain for me what I see? I suppose the
testcase was reduced from something?
Is the assembly diff complete? That is, do we really have more fmla or
are they just moved?
+ stp q31, q31, [sp, 256]
that's a store? A paired store? Aka, the sequence fills a stack(?)
region with replications of q31?
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
@ 2024-01-25 11:01 ` rguenth at gcc dot gnu.org
2024-01-25 11:05 ` acoplan at gcc dot gnu.org
` (13 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:01 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Target Milestone|--- |14.0
Ever confirmed|0 |1
Last reconfirmed| |2024-01-25
Status|UNCONFIRMED |ASSIGNED
Assignee|unassigned at gcc dot gnu.org |rguenth at gcc dot gnu.org
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
2024-01-25 11:01 ` rguenth at gcc dot gnu.org
@ 2024-01-25 11:05 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
` (12 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:05 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #2 from Alex Coplan <acoplan at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #1)
> I will have a look - but can you explain for me what I see? I suppose the
> testcase was reduced from something?
Yeah, the testcase is reduced.
>
> Is the assembly diff complete? That is, do we really have more fmla or
> are they just moved?
I think the diff is complete, I can upload the full before/after asm.
>
> + stp q31, q31, [sp, 256]
>
> that's a store? A paired store? Aka, the sequence fills a stack(?)
> region with replications of q31?
That's right.
I'll try to take a look at the RTL dumps too to see if I can figure out
anything, too.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (2 preceding siblings ...)
2024-01-25 11:05 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
` (11 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:10 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #3 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Created attachment 57210
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57210&action=edit
before.s
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (3 preceding siblings ...)
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:16 ` pinskia at gcc dot gnu.org
` (10 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:10 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #4 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Created attachment 57211
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57211&action=edit
after.s
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (4 preceding siblings ...)
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:16 ` pinskia at gcc dot gnu.org
2024-01-25 11:27 ` acoplan at gcc dot gnu.org
` (9 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-01-25 11:16 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #5 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Note I think this testcase has been reduced too much, but maybe that can be
"fixed". The stores to the arguments go past the bounds.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (5 preceding siblings ...)
2024-01-25 11:16 ` pinskia at gcc dot gnu.org
@ 2024-01-25 11:27 ` acoplan at gcc dot gnu.org
2024-01-25 11:32 ` acoplan at gcc dot gnu.org
` (8 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:27 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #6 from Alex Coplan <acoplan at gcc dot gnu.org> ---
Looking at the dump files, the first difference seems to be in 292r.dse1:
8: NOTE_INSN_BASIC_BLOCK 2
2: r116:SI=zero_extend(x0:HI)
REG_DEAD x0:HI
@@ -178,7 +161,26 @@
5: NOTE_INSN_FUNCTION_BEG
10: r119:DI=sfp:DI-0x200
12: r121:V16QI=const_vector
+ 13: [r119:DI]=unspec[r121:V16QI,r121:V16QI] 38
+ 14: [r119:DI+0x20]=unspec[r121:V16QI,r121:V16QI] 38
+ 15: [r119:DI+0x40]=unspec[r121:V16QI,r121:V16QI] 38
+ 16: [r119:DI+0x60]=unspec[r121:V16QI,r121:V16QI] 38
+ 17: [r119:DI+0x80]=unspec[r121:V16QI,r121:V16QI] 38
+ 18: [r119:DI+0xa0]=unspec[r121:V16QI,r121:V16QI] 38
+ 19: [r119:DI+0xc0]=unspec[r121:V16QI,r121:V16QI] 38
+ 20: [r119:DI+0xe0]=unspec[r121:V16QI,r121:V16QI] 38
+ REG_DEAD r119:DI
21: r122:DI=sfp:DI-0x100
+ 24: [r122:DI]=unspec[r121:V16QI,r121:V16QI] 38
+ 25: [r122:DI+0x20]=unspec[r121:V16QI,r121:V16QI] 38
+ 26: [r122:DI+0x40]=unspec[r121:V16QI,r121:V16QI] 38
+ 27: [r122:DI+0x60]=unspec[r121:V16QI,r121:V16QI] 38
+ 28: [r122:DI+0x80]=unspec[r121:V16QI,r121:V16QI] 38
+ 29: [r122:DI+0xa0]=unspec[r121:V16QI,r121:V16QI] 38
+ 30: [r122:DI+0xc0]=unspec[r121:V16QI,r121:V16QI] 38
+ 31: [r122:DI+0xe0]=unspec[r121:V16QI,r121:V16QI] 38
+ REG_DEAD r122:DI
+ REG_DEAD r121:V16QI
6: r100:V4SF=const_vector
7: r106:SI=0
32: cc:CC=cmp(r116:SI,0)
@@ -254,6 +256,7 @@
73: r100:V4SF={r147:V4SF*r147:V4SF+r115:V4SF}
REG_DEAD r147:V4SF
REG_DEAD r115:V4SF
+ 74: [sfp:DI-0x200]=r100:V4SF
75: r148:SI=r106:SI+0x2
REG_DEAD r106:SI
76: r106:SI=zero_extend(r148:SI#0)
(the unspec 38s are store pairs).
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (6 preceding siblings ...)
2024-01-25 11:27 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:32 ` acoplan at gcc dot gnu.org
2024-01-25 11:38 ` pinskia at gcc dot gnu.org
` (7 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:32 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #7 from Alex Coplan <acoplan at gcc dot gnu.org> ---
I expect the store pairs come from memcpy lowering/expansion in the aarch64
backend, that is the only way we get store pairs so early in the RTL pipeline
IIRC.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (7 preceding siblings ...)
2024-01-25 11:32 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:38 ` pinskia at gcc dot gnu.org
2024-01-25 11:40 ` acoplan at gcc dot gnu.org
` (6 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-01-25 11:38 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #8 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(In reply to Alex Coplan from comment #7)
> I expect the store pairs come from memcpy lowering/expansion in the aarch64
> backend, that is the only way we get store pairs so early in the RTL
> pipeline IIRC.
In this case, memset is more likely.
Either:
for (int i = 0; i < j; i++)
m[i] = vdupq_n_f32(0.F);
Or
for (int i = 0; i < l; i++)
n[i] = vdupq_n_f32(0.F);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (8 preceding siblings ...)
2024-01-25 11:38 ` pinskia at gcc dot gnu.org
@ 2024-01-25 11:40 ` acoplan at gcc dot gnu.org
2024-01-25 11:56 ` rguenth at gcc dot gnu.org
` (5 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: acoplan at gcc dot gnu.org @ 2024-01-25 11:40 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #9 from Alex Coplan <acoplan at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #8)
> (In reply to Alex Coplan from comment #7)
> > I expect the store pairs come from memcpy lowering/expansion in the aarch64
> > backend, that is the only way we get store pairs so early in the RTL
> > pipeline IIRC.
>
> In this case, memset is more likely.
Right, yeah. I was using "memcpy lowering" to refer to all the
mem{cpy,set,move} expansion we have in the backend.
>
> Either:
> for (int i = 0; i < j; i++)
> m[i] = vdupq_n_f32(0.F);
> Or
> for (int i = 0; i < l; i++)
> n[i] = vdupq_n_f32(0.F);
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (9 preceding siblings ...)
2024-01-25 11:40 ` acoplan at gcc dot gnu.org
@ 2024-01-25 11:56 ` rguenth at gcc dot gnu.org
2024-01-25 13:41 ` rguenth at gcc dot gnu.org
` (4 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 11:56 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57212
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57212&action=edit
patch for debugging
Btw, I've used the attached to investigate other issues with the change. It
will show the outcome of base_alias_check and find_base_term in dumps.
One issue is that we're much more dependent on MEM_EXPRs being present.
Before figuring there wouldn't be much important regressions the idea was to
instead of doing find_base_term have a known base value recorded in the
MEM_ATTRs, and as the only important ones should be the special ones for
argument frame and stack-based represent that by an enum (rather than
the other possibility of using ADDRESS). I'll also note that for spill
slots we get around to use spill_slot_decl and set_mem_attrs_for_spill.
I've not yet convinced myself that the other special bases we have really
form a completely separate memory class. But if they do then accesses
should do something similar there (but mind scheduling of frame related
instructions ...).
Argument stack slots are one important class, set up by init_alias_analysis.
But those are also backed by regular decls at times (but not always)?
assign_stack_temp "allocated" memory is another class, we're reusing
slots during RTL expansion and they get (even if shared) a specific
alias set. I don't think we ever release those temps and say re-use
the space for spilling so assigning a different decl to each slot
should eventually work.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (10 preceding siblings ...)
2024-01-25 11:56 ` rguenth at gcc dot gnu.org
@ 2024-01-25 13:41 ` rguenth at gcc dot gnu.org
2024-01-25 14:03 ` rguenth at gcc dot gnu.org
` (3 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 13:41 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
In DSE the only differences is
fbt (0x7ffff51a1a50: (plus:DI (reg/v/f:DI 117 [ u ])
- (reg:DI 146 [ _44 ]))) == (address 0)
+ (reg:DI 146 [ _44 ]))) == (nil)
fbt (0x7ffff700b3c0: (reg/f:DI 64 sfp)) == (address:DI -3)
-bac false
+bac true
that's for
(mem:BLK (reg/f:DI 64 sfp) [0 A8])
vs
(mem:V4SF (plus:DI (reg/v/f:DI 117 [ u ])
(reg:DI 146 [ _44 ])) [0 MEM <__Float32x4_t> [(float * {ref-all})_42]+0
S16 A32])
from
#0 0x0000000002ff3796 in scan_reads (insn_info=0x5e5b680, gen=0x5ec2338,
kill=0x5ec2358) at /space/rguenther/src/gcc/gcc/dse.cc:3156
#1 0x0000000002ff39b1 in dse_step3_scan (bb=<basic_block 0x7ffff5160060 (5)>)
at /space/rguenther/src/gcc/gcc/dse.cc:3238
processing
(insn 62 61 64 5 (set (reg:V4SF 147 [ MEM <__Float32x4_t> [(float *
{ref-all})_42] ])
(mem:V4SF (plus:DI (reg/v/f:DI 117 [ u ])
(reg:DI 146 [ _44 ])) [0 MEM <__Float32x4_t> [(float *
{ref-all})_42]+0 S16 A32])) "include/arm_neon.h":12531:36 1274
{*aarch64_simd_movv4sf}
(expr_list:REG_DEAD (reg:DI 146 [ _44 ])
(nil)))
in this case we have _44 point to NONLOCAL only. It got arg_base_value
as base value (from the MEM_EXPR and that points-to set we could
eventually derive this very same base term as well).
But I'll note that (mem:BLK (reg/f:DI 64 sfp) [0 A8]) is artificial,
generated by DSE get_group_info via record_store on
(insn 13 12 14 2 (set (mem/c:V2x16QI (reg/f:DI 119) [0 +0 S32 A128])
(unspec:V2x16QI [
(reg:V16QI 121) repeated x2
] UNSPEC_STP)) "t.cc":12:10 discrim 1 92 {*store_pair_16}
(nil))
which is figured to be const_or_frame_p () based. That notably
lacks a MEM_EXPR (though the bare MEM means only base_alias_check would
ever be able to disambiguate here).
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (11 preceding siblings ...)
2024-01-25 13:41 ` rguenth at gcc dot gnu.org
@ 2024-01-25 14:03 ` rguenth at gcc dot gnu.org
2024-01-29 13:56 ` rguenth at gcc dot gnu.org
` (2 subsequent siblings)
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-25 14:03 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57214
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57214&action=edit
prototype fix
The attached prototype fixes the testcase for me.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (12 preceding siblings ...)
2024-01-25 14:03 ` rguenth at gcc dot gnu.org
@ 2024-01-29 13:56 ` rguenth at gcc dot gnu.org
2024-03-07 20:45 ` law at gcc dot gnu.org
2024-05-07 7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-01-29 13:56 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Attachment #57214|0 |1
is obsolete| |
--- Comment #13 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 57252
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57252&action=edit
prototype fix
Note when I extended the patch to also cover a PARM_DECL base to extent
coverage I see
FAIL: gcc.dg/torture/pr70421.c -O1 execution test
FAIL: gcc.dg/torture/pr70421.c -O2 execution test
FAIL: gcc.dg/torture/pr70421.c -O3 -g execution test
FAIL: gcc.dg/torture/pr70421.c -Os execution test
FAIL: gcc.dg/torture/pr70421.c -O2 -flto -fno-use-linker-plugin
-flto-partitio
n=none execution test
FAIL: gcc.dg/torture/pr70421.c -O2 -flto -fuse-linker-plugin
-fno-fat-lto-obje
cts execution test
on x86_64. It seems that arg_base_value isn't the correct thing to use
but it eventually should have been unique_base_value (UNIQUE_BASE_VALUE_ARGP)?
I'm not sure whether all the different unique base values mean we'll not
be able to derive exactly those classes from MEM_EXPRs.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (13 preceding siblings ...)
2024-01-29 13:56 ` rguenth at gcc dot gnu.org
@ 2024-03-07 20:45 ` law at gcc dot gnu.org
2024-05-07 7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org
15 siblings, 0 replies; 17+ messages in thread
From: law at gcc dot gnu.org @ 2024-03-07 20:45 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
Jeffrey A. Law <law at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Priority|P3 |P2
CC| |law at gcc dot gnu.org
^ permalink raw reply [flat|nested] 17+ messages in thread
* [Bug rtl-optimization/113597] [14/15 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
` (14 preceding siblings ...)
2024-03-07 20:45 ` law at gcc dot gnu.org
@ 2024-05-07 7:44 ` rguenth at gcc dot gnu.org
15 siblings, 0 replies; 17+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-05-07 7:44 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113597
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Target Milestone|14.0 |14.2
--- Comment #14 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 14.1 is being released, retargeting bugs to GCC 14.2.
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2024-05-07 7:44 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-25 10:53 [Bug rtl-optimization/113597] New: [14 Regression] aarch64: Significant code quality regression since r14-8346-ga98d5130a6dcff acoplan at gcc dot gnu.org
2024-01-25 11:01 ` [Bug rtl-optimization/113597] " rguenth at gcc dot gnu.org
2024-01-25 11:01 ` rguenth at gcc dot gnu.org
2024-01-25 11:05 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:10 ` acoplan at gcc dot gnu.org
2024-01-25 11:16 ` pinskia at gcc dot gnu.org
2024-01-25 11:27 ` acoplan at gcc dot gnu.org
2024-01-25 11:32 ` acoplan at gcc dot gnu.org
2024-01-25 11:38 ` pinskia at gcc dot gnu.org
2024-01-25 11:40 ` acoplan at gcc dot gnu.org
2024-01-25 11:56 ` rguenth at gcc dot gnu.org
2024-01-25 13:41 ` rguenth at gcc dot gnu.org
2024-01-25 14:03 ` rguenth at gcc dot gnu.org
2024-01-29 13:56 ` rguenth at gcc dot gnu.org
2024-03-07 20:45 ` law at gcc dot gnu.org
2024-05-07 7:44 ` [Bug rtl-optimization/113597] [14/15 " rguenth at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).