public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug middle-end/106081] New: missed vectorization
@ 2022-06-24 15:56 hubicka at gcc dot gnu.org
2022-06-24 16:14 ` [Bug middle-end/106081] " hubicka at gcc dot gnu.org
` (20 more replies)
0 siblings, 21 replies; 22+ messages in thread
From: hubicka at gcc dot gnu.org @ 2022-06-24 15:56 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Bug ID: 106081
Summary: missed vectorization
Product: gcc
Version: 13.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: hubicka at gcc dot gnu.org
Target Milestone: ---
This testcase (derived from ImageMagick)
struct pixels
{
short a,b,c,d;
} *pixels;
struct dpixels
{
double a,b,c,d;
};
double
test(double *k)
{
struct dpixels results={};
for (int u=0; u<10000;u++,k--)
{
results.a += *k*pixels[u].a;
results.b += *k*pixels[u].b;
results.c += *k*pixels[u].c;
results.d += *k*pixels[u].d;
}
return results.a+results.b*2+results.c*3+results.d*4;
}
gets vectorized by clang:
test: # @test
.cfi_startproc
# %bb.0:
movq pixels(%rip), %rax
vxorpd %xmm0, %xmm0, %xmm0
xorl %ecx, %ecx
.p2align 4, 0x90
.LBB0_1: # =>This Inner Loop Header: Depth=1
vpmovsxwd (%rax), %xmm1
vbroadcastsd (%rdi,%rcx,8), %ymm2
addq $8, %rax
decq %rcx
vcvtdq2pd %xmm1, %ymm1
vfmadd231pd %ymm2, %ymm1, %ymm0 # ymm0 = (ymm1 * ymm2) + ymm0
cmpq $-10000, %rcx # imm = 0xD8F0
jne .LBB0_1
# %bb.2:
vpermilpd $1, %xmm0, %xmm1 # xmm1 = xmm0[1,0]
vfmadd132sd .LCPI0_0(%rip), %xmm0, %xmm1 # xmm1 = (xmm1 * mem) +
xmm0
vextractf128 $1, %ymm0, %xmm0
vfmadd231sd .LCPI0_1(%rip), %xmm0, %xmm1 # xmm1 = (xmm0 * mem) +
xmm1
vpermilpd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
vfmadd132sd .LCPI0_2(%rip), %xmm1, %xmm0 # xmm0 = (xmm0 * mem) +
xmm1
vzeroupper
retq
but not by GCC.
Original loop is:
0.94 : 423cb0: vmovdqu (%rsi,%rdi,8),%xmm5 // morphology.c:2984
: 2983 if ( IsNaN(*k) ) continue;
0.29 : 423cb5: vpermilpd $0x1,(%rcx),%xmm4
: 2982 for (u=0; u < (ssize_t) kernel->width; u++, k--) {
0.46 : 423cbb: add $0x2,%rdi
0.07 : 423cbf: add $0xfffffffffffffff0,%rcx
: 2984 result.red += (*k)*k_pixels[u].red;
0.03 : 423cc3: vpshufb %xmm12,%xmm5,%xmm6
6.81 : 423cc8: vcvtdq2pd %xmm6,%xmm6
13.05 : 423ccc: vfmadd231pd %xmm6,%xmm4,%xmm1
: 2985 result.green += (*k)*k_pixels[u].green;
17.45 : 423cd1: vpshufb %xmm15,%xmm5,%xmm6 // morphology.c:2985
0.33 : 423cd6: vcvtdq2pd %xmm6,%xmm6
0.00 : 423cda: vfmadd231pd %xmm6,%xmm4,%xmm3
: 2986 result.blue += (*k)*k_pixels[u].blue;
15.28 : 423cdf: vpshufb %xmm13,%xmm5,%xmm6 // morphology.c:2986
: 2987 result.opacity += (*k)*k_pixels[u].opacity;
0.00 : 423ce4: vpshufb %xmm8,%xmm5,%xmm5
: 2986 result.blue += (*k)*k_pixels[u].blue;
0.00 : 423ce9: vcvtdq2pd %xmm6,%xmm6
: 2987 result.opacity += (*k)*k_pixels[u].opacity;
0.21 : 423ced: vcvtdq2pd %xmm5,%xmm5
: 2986 result.blue += (*k)*k_pixels[u].blue;
0.97 : 423cf1: vfmadd231pd %xmm6,%xmm4,%xmm0
: 2987 result.opacity += (*k)*k_pixels[u].opacity;
19.16 : 423cf6: vfmadd231pd %xmm5,%xmm4,%xmm2 // morphology.c:2987
: 2982 for (u=0; u < (ssize_t) kernel->width; u++, k--) {
14.51 : 423cfb: cmp %rdi,%rbp // morphology.c:2982
0.00 : 423cfe: jne 423cb0 <MorphologyApply.6136+0x20c0>
Changing short to double makes it vectorized:
.L2:
vmovupd (%rax), %ymm4
vmovupd 64(%rax), %ymm2
subq $-128, %rax
subq $32, %rdx
vunpcklpd -96(%rax), %ymm4, %ymm1
vunpckhpd -96(%rax), %ymm4, %ymm0
vmovupd -64(%rax), %ymm4
vunpckhpd -32(%rax), %ymm2, %ymm2
vunpcklpd -32(%rax), %ymm4, %ymm4
vpermpd $27, 32(%rdx), %ymm3
vpermpd $216, %ymm1, %ymm1
vpermpd $216, %ymm0, %ymm0
vpermpd $216, %ymm2, %ymm2
vpermpd $216, %ymm4, %ymm4
vunpcklpd %ymm2, %ymm0, %ymm10
vunpckhpd %ymm2, %ymm0, %ymm0
vunpckhpd %ymm4, %ymm1, %ymm9
vunpcklpd %ymm4, %ymm1, %ymm1
vpermpd $216, %ymm10, %ymm10
vpermpd $216, %ymm0, %ymm0
vfmadd231pd %ymm3, %ymm10, %ymm6
vfmadd231pd %ymm3, %ymm0, %ymm8
vpermpd $216, %ymm9, %ymm9
vpermpd $216, %ymm1, %ymm1
vfmadd231pd %ymm3, %ymm1, %ymm5
vfmadd231pd %ymm3, %ymm9, %ymm7
cmpq %rax, %rcx
jne .L2
howver clang's code looks shorter:
LBB0_1: # =>This Inner Loop Header: Depth=1
vbroadcastsd (%rdi,%rcx,8), %ymm1
vfmadd231pd (%rax), %ymm1, %ymm0 # ymm0 = (ymm1 * mem) + ymm0
addq $32, %rax
decq %rcx
cmpq $-10000, %rcx # imm = 0xD8F0
jne .LBB0_1
We loop vectorize while clang slp vectorizes it seems.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
@ 2022-06-24 16:14 ` hubicka at gcc dot gnu.org
2022-06-26 8:08 ` crazylht at gmail dot com
` (19 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: hubicka at gcc dot gnu.org @ 2022-06-24 16:14 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #1 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
This is an attempt to vectorize by hand, but it seems we do not generate
vpmovsxwd for the vector short->double conversion
struct pixels
{
short a __attribute__ ((vector_size(4*2)));
} *pixels;
struct dpixels
{
double a __attribute__ ((vector_size(8*4)));
};
typedef double v4df __attribute__ ((vector_size (32)));
struct dpixels
test(double *k)
{
struct dpixels results={};
for (int u=0; u<10000;u++,k--)
{
results.a += *k*__builtin_convertvector (pixels[u].a, v4df);
}
return results;
}
clang seems to do right thing here.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
2022-06-24 16:14 ` [Bug middle-end/106081] " hubicka at gcc dot gnu.org
@ 2022-06-26 8:08 ` crazylht at gmail dot com
2022-06-27 9:01 ` marxin at gcc dot gnu.org
` (18 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: crazylht at gmail dot com @ 2022-06-26 8:08 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Hongtao.liu <crazylht at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |crazylht at gmail dot com
--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
Looks quite similar to PR103999
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
2022-06-24 16:14 ` [Bug middle-end/106081] " hubicka at gcc dot gnu.org
2022-06-26 8:08 ` crazylht at gmail dot com
@ 2022-06-27 9:01 ` marxin at gcc dot gnu.org
2022-06-27 10:38 ` rguenth at gcc dot gnu.org
` (17 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: marxin at gcc dot gnu.org @ 2022-06-27 9:01 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Martin Liška <marxin at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Blocks| |26163
Ever confirmed|0 |1
Last reconfirmed| |2022-06-27
Status|UNCONFIRMED |NEW
CC| |marxin at gcc dot gnu.org
Referenced Bugs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=26163
[Bug 26163] [meta-bug] missed optimization in SPEC (2k17, 2k and 2k6 and 95)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (2 preceding siblings ...)
2022-06-27 9:01 ` marxin at gcc dot gnu.org
@ 2022-06-27 10:38 ` rguenth at gcc dot gnu.org
2022-06-27 14:26 ` hubicka at gcc dot gnu.org
` (16 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2022-06-27 10:38 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords| |missed-optimization
CC| |rguenth at gcc dot gnu.org
Blocks| |53947
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
Might be a bit easier case than PR103999 since the accumulation is in 'double'
here. In fact it should already work, but we fail to SLP the reduction
because
t.c:14:24: missed: Build SLP failed: not grouped load _1 = *k_50;
and non-SLP as well:
t.c:14:24: note: ==> examining statement: _1 = *k_50;
t.c:14:24: missed: multiple types with negative step.
t.c:14:24: missed: not falling back to elementwise accesses
t.c:16:30: missed: not vectorized: relevant stmt not supported: _1 = *k_50;
t.c:14:24: missed: bad operation or unsupported loop bound.
changing k-- to k++ lets us vectorize the code but in an awkward way
(SLP still fails on us so we get interleaving).
So the first thing to fix is SLP of non-grouped loads where I think we also
have a bug for.
Referenced Bugs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53947
[Bug 53947] [meta-bug] vectorizer missed-optimizations
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (3 preceding siblings ...)
2022-06-27 10:38 ` rguenth at gcc dot gnu.org
@ 2022-06-27 14:26 ` hubicka at gcc dot gnu.org
2022-10-24 0:14 ` pinskia at gcc dot gnu.org
` (15 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: hubicka at gcc dot gnu.org @ 2022-06-27 14:26 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #4 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
Thanks! It seems that imagemagick has quite few loops that inovlve consuming
shorts and producing doubles. Also it would be nice to do something about
__builtin_convertvector so it does not produce stupid code...
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (4 preceding siblings ...)
2022-06-27 14:26 ` hubicka at gcc dot gnu.org
@ 2022-10-24 0:14 ` pinskia at gcc dot gnu.org
2023-06-21 13:45 ` rguenth at gcc dot gnu.org
` (14 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: pinskia at gcc dot gnu.org @ 2022-10-24 0:14 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Andrew Pinski <pinskia at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Severity|normal |enhancement
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (5 preceding siblings ...)
2022-10-24 0:14 ` pinskia at gcc dot gnu.org
@ 2023-06-21 13:45 ` rguenth at gcc dot gnu.org
2023-06-27 7:49 ` rguenth at gcc dot gnu.org
` (13 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-06-21 13:45 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Depends on| |96208
--- Comment #5 from Richard Biener <rguenth at gcc dot gnu.org> ---
PR96208 is the SLP of non-grouped loads. We now can convert short -> double
and we get with the grouped load hacked and -march=znver3:
.L2:
vmovdqu (%rax), %ymm0
vpermq $27, -24(%rdi), %ymm10
addq $32, %rax
subq $32, %rdi
vpshufb %ymm7, %ymm0, %ymm0
vpermpd $85, %ymm10, %ymm9
vpermpd $170, %ymm10, %ymm8
vpermpd $255, %ymm10, %ymm6
vpmovsxwd %xmm0, %ymm1
vextracti128 $0x1, %ymm0, %xmm0
vbroadcastsd %xmm10, %ymm10
vcvtdq2pd %xmm1, %ymm11
vextracti128 $0x1, %ymm1, %xmm1
vpmovsxwd %xmm0, %ymm0
vcvtdq2pd %xmm1, %ymm1
vfmadd231pd %ymm10, %ymm11, %ymm5
vfmadd231pd %ymm9, %ymm1, %ymm2
vcvtdq2pd %xmm0, %ymm1
vextracti128 $0x1, %ymm0, %xmm0
vcvtdq2pd %xmm0, %ymm0
vfmadd231pd %ymm8, %ymm1, %ymm4
vfmadd231pd %ymm6, %ymm0, %ymm3
cmpq %rax, %rdx
jne .L2
that is, the 'short' data type forces a higher VF to us and the splat
codegen I hacked in is sub-optimal still.
Referenced Bugs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96208
[Bug 96208] non-grouped load can be SLP vectorized for 2-element vectors case
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (6 preceding siblings ...)
2023-06-21 13:45 ` rguenth at gcc dot gnu.org
@ 2023-06-27 7:49 ` rguenth at gcc dot gnu.org
2023-06-27 8:13 ` rguenth at gcc dot gnu.org
` (12 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-06-27 7:49 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Bug 106081 depends on bug 96208, which changed state.
Bug 96208 Summary: non-grouped load can be SLP vectorized for 2-element vectors case
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96208
What |Removed |Added
----------------------------------------------------------------------------
Status|ASSIGNED |RESOLVED
Resolution|--- |FIXED
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (7 preceding siblings ...)
2023-06-27 7:49 ` rguenth at gcc dot gnu.org
@ 2023-06-27 8:13 ` rguenth at gcc dot gnu.org
2023-06-27 9:10 ` rsandifo at gcc dot gnu.org
` (11 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-06-27 8:13 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |rsandifo at gcc dot gnu.org
--- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> ---
So what's interesting is that we now get as of r14-2117-gdd86a5a69cbda4 the
following. The odd thing is that we fail to eliminate the load permutation
{ 3 2 1 0 } even though this is a reduction group.
I _suppose_ the reason is the { 0 0 0 0 } load permutation (the "splat")
which we don't "support". In vect_optimize_slp_pass::start_choosing_layouts
there's
if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
{
/* If splitting out a SLP_TREE_LANE_PERMUTATION can make the node
unpermuted, record a layout that reverses this permutation.
We would need more work to cope with loads that are internally
permuted and also have inputs (such as masks for
IFN_MASK_LOADs). */
gcc_assert (partition.layout == 0 && !m_slpg->vertices[node_i].succ);
if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
continue;
which means we'll keep the permute there (well, that's OK - any permute
of the permute will retain it ...). I suspect this prevents the optimization
here. Massaging start_choosing_layouts to allow a splat on element zero
for a non-grouped access breaks things as we try to move that permute.
So I guess this needs a new kind of layout constraint? The permute
can absorb any permute but we cannot "move" it.
Richard?
t.c:14:18: note: === scheduling SLP instances ===
t.c:14:18: note: Vectorizing SLP tree:
t.c:14:18: note: node 0x4304170 (max_nunits=16, refcnt=2) vector(4) double
t.c:14:18: note: op template: _21 = _20 + results$d_60;
t.c:14:18: note: stmt 0 _21 = _20 + results$d_60;
t.c:14:18: note: stmt 1 _17 = _16 + results$c_58;
t.c:14:18: note: stmt 2 _13 = _12 + results$b_56;
t.c:14:18: note: stmt 3 _9 = _8 + results$a_54;
t.c:14:18: note: children 0x43041f8 0x4304418
t.c:14:18: note: node 0x43041f8 (max_nunits=16, refcnt=1) vector(4) double
t.c:14:18: note: op template: _20 = _1 * _19;
t.c:14:18: note: stmt 0 _20 = _1 * _19;
t.c:14:18: note: stmt 1 _16 = _1 * _15;
t.c:14:18: note: stmt 2 _12 = _1 * _11;
t.c:14:18: note: stmt 3 _8 = _1 * _7;
t.c:14:18: note: children 0x4304280 0x4304308
t.c:14:18: note: node 0x4304280 (max_nunits=4, refcnt=1) vector(4) double
t.c:14:18: note: op template: _1 = *k_50;
t.c:14:18: note: stmt 0 _1 = *k_50;
t.c:14:18: note: stmt 1 _1 = *k_50;
t.c:14:18: note: stmt 2 _1 = *k_50;
t.c:14:18: note: stmt 3 _1 = *k_50;
t.c:14:18: note: load permutation { 0 0 0 0 }
t.c:14:18: note: node 0x4304308 (max_nunits=16, refcnt=1) vector(4) double
t.c:14:18: note: op template: _19 = (double) _18;
t.c:14:18: note: stmt 0 _19 = (double) _18;
t.c:14:18: note: stmt 1 _15 = (double) _14;
t.c:14:18: note: stmt 2 _11 = (double) _10;
t.c:14:18: note: stmt 3 _7 = (double) _6;
t.c:14:18: note: children 0x4304390
t.c:14:18: note: node 0x4304390 (max_nunits=16, refcnt=1) vector(16) short
int
t.c:14:18: note: op template: _18 = _5->d;
t.c:14:18: note: stmt 0 _18 = _5->d;
t.c:14:18: note: stmt 1 _14 = _5->c;
t.c:14:18: note: stmt 2 _10 = _5->b;
t.c:14:18: note: stmt 3 _6 = _5->a;
t.c:14:18: note: load permutation { 3 2 1 0 }
t.c:14:18: note: node 0x4304418 (max_nunits=4, refcnt=1) vector(4) double
t.c:14:18: note: op template: results$d_60 = PHI <_21(5), 0.0(6)>
t.c:14:18: note: stmt 0 results$d_60 = PHI <_21(5), 0.0(6)>
t.c:14:18: note: stmt 1 results$c_58 = PHI <_17(5), 0.0(6)>
t.c:14:18: note: stmt 2 results$b_56 = PHI <_13(5), 0.0(6)>
t.c:14:18: note: stmt 3 results$a_54 = PHI <_9(5), 0.0(6)>
t.c:14:18: note: children 0x4304170 (nil)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (8 preceding siblings ...)
2023-06-27 8:13 ` rguenth at gcc dot gnu.org
@ 2023-06-27 9:10 ` rsandifo at gcc dot gnu.org
2023-06-28 12:15 ` hubicka at gcc dot gnu.org
` (10 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rsandifo at gcc dot gnu.org @ 2023-06-27 9:10 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #7 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
I don't think the splat creates a new layout, but instead a
splat should be allowed to change its layout at zero cost.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (9 preceding siblings ...)
2023-06-27 9:10 ` rsandifo at gcc dot gnu.org
@ 2023-06-28 12:15 ` hubicka at gcc dot gnu.org
2023-07-26 9:34 ` rguenth at gcc dot gnu.org
` (9 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: hubicka at gcc dot gnu.org @ 2023-06-28 12:15 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #8 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
Imagemagick improved by 17% on zen3 and 11% on altra
https://lnt.opensuse.org/db_default/v4/SPEC/37550
https://lnt.opensuse.org/db_default/v4/SPEC/37543
which is cool :)
The loop is now optimized as:
.L2:
vmovdqu16 (%rax), %zmm0
vmovupd (%rdx), %zmm2
addq $64, %rax
subq $64, %rdx
vpermpd %zmm2, %zmm15, %zmm9
vpermpd %zmm2, %zmm14, %zmm8
vpermpd %zmm2, %zmm13, %zmm7
vpermpd %zmm2, %zmm11, %zmm2
vpshufb %zmm12, %zmm0, %zmm0
vpmovsxwd %ymm0, %zmm1
vextracti64x4 $0x1, %zmm0, %ymm0
vpmovsxwd %ymm0, %zmm0
vcvtdq2pd %ymm1, %zmm10
vextracti32x8 $0x1, %zmm1, %ymm1
vcvtdq2pd %ymm1, %zmm1
vfmadd231pd %zmm2, %zmm10, %zmm6
vfmadd231pd %zmm9, %zmm1, %zmm3
vcvtdq2pd %ymm0, %zmm1
vextracti32x8 $0x1, %zmm0, %ymm0
vcvtdq2pd %ymm0, %zmm0
vfmadd231pd %zmm8, %zmm1, %zmm5
vfmadd231pd %zmm7, %zmm0, %zmm4
cmpq %rax, %rcx
jne .L2
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (10 preceding siblings ...)
2023-06-28 12:15 ` hubicka at gcc dot gnu.org
@ 2023-07-26 9:34 ` rguenth at gcc dot gnu.org
2023-07-26 9:35 ` rguenth at gcc dot gnu.org
` (8 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-26 9:34 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
So I can adjust change_layout_cost in a bit awkward way, but it seems that
internal_node_cost would already work out that a permute can be merged into
an existing permute.
It seems that existing permutes are not recorded in the "layout".
Also vectorizable_slp_permutation_1 doesn't try to elide permutes that
are noop based on knowledge of the layout of 'node', say a permute
{ 1 0 3 2 } of a { _1, _1, _2, _2 } node would be noop. But
change_layout_cost does MAX (count, 1) on its result anyway.
The following elides the unnecessary permutation for this special case
(but not the general case):
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e4430248ab5..e9048a61891 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4389,6 +4389,19 @@ vect_optimize_slp_pass::change_layout_cost (slp_tree
node,
if (from_layout_i == to_layout_i)
return 0;
+ /* When there's a uniform load permutation permutating that in any
+ way is free. */
+ if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
+ {
+ unsigned l = SLP_TREE_LOAD_PERMUTATION (node)[0];
+ unsigned i;
+ for (i = 1; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
+ if (SLP_TREE_LOAD_PERMUTATION (node)[i] != l)
+ break;
+ if (i == SLP_TREE_LOAD_PERMUTATION (node).length ())
+ return 0;
+ }
+
auto_vec<slp_tree, 1> children (1);
children.quick_push (node);
auto_lane_permutation_t perm (SLP_TREE_LANES (node));
I'm not sure this is the correct place to factor in cost savings
materialization would give. Is it? Are explicit VEC_PERM nodes
also still there in the optimization process or are they turned
into sth implicit?
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (11 preceding siblings ...)
2023-07-26 9:34 ` rguenth at gcc dot gnu.org
@ 2023-07-26 9:35 ` rguenth at gcc dot gnu.org
2023-07-26 10:01 ` rsandifo at gcc dot gnu.org
` (7 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-26 9:35 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Assignee|unassigned at gcc dot gnu.org |rguenth at gcc dot gnu.org
Status|NEW |ASSIGNED
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (12 preceding siblings ...)
2023-07-26 9:35 ` rguenth at gcc dot gnu.org
@ 2023-07-26 10:01 ` rsandifo at gcc dot gnu.org
2023-07-26 10:32 ` rguenth at gcc dot gnu.org
` (6 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rsandifo at gcc dot gnu.org @ 2023-07-26 10:01 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #10 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #9)
> So I can adjust change_layout_cost in a bit awkward way, but it seems that
> internal_node_cost would already work out that a permute can be merged into
> an existing permute.
Right.
> It seems that existing permutes are not recorded in the "layout".
They should be if they're bijective, via:
else if (SLP_TREE_CODE (node) == VEC_PERM_EXPR
&& SLP_TREE_CHILDREN (node).length () == 1
&& (child = SLP_TREE_CHILDREN (node)[0])
&& (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (child))
.is_constant (&imin)))
{
/* If the child has the same vector size as this node,
reversing the permutation can make the permutation a no-op.
In other cases it can change a true permutation into a
full-vector extract. */
tmp_perm.reserve (SLP_TREE_LANES (node));
for (unsigned j = 0; j < SLP_TREE_LANES (node); ++j)
tmp_perm.quick_push (SLP_TREE_LANE_PERMUTATION (node)[j].second);
}
> Also vectorizable_slp_permutation_1 doesn't try to elide permutes that
> are noop based on knowledge of the layout of 'node', say a permute
> { 1 0 3 2 } of a { _1, _1, _2, _2 } node would be noop.
To do that in general, I think we'd need to value-number each
element of each node (which sounds doable). But I guess doing
it at leaves would be better than nothing.
> But change_layout_cost does MAX (count, 1) on its result anyway.
At the moment, yes, because having from_layout_i != to_layout_i
for identical layouts would be a consistency failure.
> The following elides the unnecessary permutation for this special case
> (but not the general case):
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index e4430248ab5..e9048a61891 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -4389,6 +4389,19 @@ vect_optimize_slp_pass::change_layout_cost (slp_tree
> node,
> if (from_layout_i == to_layout_i)
> return 0;
>
> + /* When there's a uniform load permutation permutating that in any
> + way is free. */
> + if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
> + {
> + unsigned l = SLP_TREE_LOAD_PERMUTATION (node)[0];
> + unsigned i;
> + for (i = 1; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
> + if (SLP_TREE_LOAD_PERMUTATION (node)[i] != l)
> + break;
> + if (i == SLP_TREE_LOAD_PERMUTATION (node).length ())
> + return 0;
> + }
> +
> auto_vec<slp_tree, 1> children (1);
> children.quick_push (node);
> auto_lane_permutation_t perm (SLP_TREE_LANES (node));
>
> I'm not sure this is the correct place to factor in cost savings
> materialization would give. Is it?
Yeah, I think so. The patch LGTM. I don't know if it's worth
caching the “all the same element” result, but probably not.
> Are explicit VEC_PERM nodes also still there in the optimization
> process or are they turned into sth implicit?
They're still there. The current algorithm inherits the old
restriction that candidate layouts must be bijective, and not
all VEC_PERM_EXPRs are. So some VEC_PERM_EXPRs would have to
be explicit whatever happens. Same for non-bijective load
permutations.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (13 preceding siblings ...)
2023-07-26 10:01 ` rsandifo at gcc dot gnu.org
@ 2023-07-26 10:32 ` rguenth at gcc dot gnu.org
2023-07-26 10:52 ` rguenth at gcc dot gnu.org
` (5 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-26 10:32 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to rsandifo@gcc.gnu.org from comment #10)
> (In reply to Richard Biener from comment #9)
[...]
> > The following elides the unnecessary permutation for this special case
> > (but not the general case):
> >
> > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > index e4430248ab5..e9048a61891 100644
> > --- a/gcc/tree-vect-slp.cc
> > +++ b/gcc/tree-vect-slp.cc
> > @@ -4389,6 +4389,19 @@ vect_optimize_slp_pass::change_layout_cost (slp_tree
> > node,
> > if (from_layout_i == to_layout_i)
> > return 0;
> >
> > + /* When there's a uniform load permutation permutating that in any
> > + way is free. */
> > + if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
> > + {
> > + unsigned l = SLP_TREE_LOAD_PERMUTATION (node)[0];
> > + unsigned i;
> > + for (i = 1; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
> > + if (SLP_TREE_LOAD_PERMUTATION (node)[i] != l)
> > + break;
> > + if (i == SLP_TREE_LOAD_PERMUTATION (node).length ())
> > + return 0;
> > + }
> > +
> > auto_vec<slp_tree, 1> children (1);
> > children.quick_push (node);
> > auto_lane_permutation_t perm (SLP_TREE_LANES (node));
> >
> > I'm not sure this is the correct place to factor in cost savings
> > materialization would give. Is it?
> Yeah, I think so. The patch LGTM. I don't know if it's worth
> caching the “all the same element” result, but probably not.
Yeah, I thought of changing the load_permutation representation to
vec_perm_indices but then as I consider merging lane and load permutes
that would be wrong step.
I've thought of generalizing the above but then change_layout_cost
is always positive but for example when doing { 3, 2, 1, 0 } ontop
an existing load permutation of { 3, 2, 1, 0 } the cost should be
negative?
Anyway, going to test the above patch.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (14 preceding siblings ...)
2023-07-26 10:32 ` rguenth at gcc dot gnu.org
@ 2023-07-26 10:52 ` rguenth at gcc dot gnu.org
2023-07-26 11:00 ` rsandifo at gcc dot gnu.org
` (4 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-26 10:52 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
Btw, I see we actually materialize a permute before the splat:
t.c:14:24: note: node 0x5b311c0 (max_nunits=1, refcnt=2) vector(2) double
t.c:14:24: note: op: VEC_PERM_EXPR
t.c:14:24: note: stmt 0 _1 = *k_50;
t.c:14:24: note: stmt 1 _1 = *k_50;
t.c:14:24: note: stmt 2 _1 = *k_50;
t.c:14:24: note: stmt 3 _1 = *k_50;
t.c:14:24: note: lane permutation { 0[3] 0[2] 0[1] 0[0] }
t.c:14:24: note: children 0x5b30fc0
t.c:14:24: note: node 0x5b30fc0 (max_nunits=2, refcnt=1) vector(2) double
t.c:14:24: note: op template: _1 = *k_50;
t.c:14:24: note: stmt 0 _1 = *k_50;
t.c:14:24: note: stmt 1 _1 = *k_50;
t.c:14:24: note: stmt 2 _1 = *k_50;
t.c:14:24: note: stmt 3 _1 = *k_50;
t.c:14:24: note: load permutation { 0 0 0 0 }
this is because vect_optimize_slp_pass::get_result_with_layout doesn't
seem to consider load permutations? It's the value-numbering we perform
that in the end elides the redundant permute.
I have the feeling something doesn't fit together exactly, during
materialize () we apply the chosen layout to the partitions, then
eliminate redundant permutes but we still end up with
get_result_with_layout adding the above permute.
I can add the same logic as I've added to change_layout_cost also to
get_result_with_layout but as said, it feels like I'm missing something ...
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (15 preceding siblings ...)
2023-07-26 10:52 ` rguenth at gcc dot gnu.org
@ 2023-07-26 11:00 ` rsandifo at gcc dot gnu.org
2023-07-26 11:15 ` rsandifo at gcc dot gnu.org
` (3 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rsandifo at gcc dot gnu.org @ 2023-07-26 11:00 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #13 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
(In reply to Richard Biener from comment #12)
> Btw, I see we actually materialize a permute before the splat:
>
> t.c:14:24: note: node 0x5b311c0 (max_nunits=1, refcnt=2) vector(2) double
> t.c:14:24: note: op: VEC_PERM_EXPR
> t.c:14:24: note: stmt 0 _1 = *k_50;
> t.c:14:24: note: stmt 1 _1 = *k_50;
> t.c:14:24: note: stmt 2 _1 = *k_50;
> t.c:14:24: note: stmt 3 _1 = *k_50;
> t.c:14:24: note: lane permutation { 0[3] 0[2] 0[1] 0[0] }
> t.c:14:24: note: children 0x5b30fc0
> t.c:14:24: note: node 0x5b30fc0 (max_nunits=2, refcnt=1) vector(2) double
> t.c:14:24: note: op template: _1 = *k_50;
> t.c:14:24: note: stmt 0 _1 = *k_50;
> t.c:14:24: note: stmt 1 _1 = *k_50;
> t.c:14:24: note: stmt 2 _1 = *k_50;
> t.c:14:24: note: stmt 3 _1 = *k_50;
> t.c:14:24: note: load permutation { 0 0 0 0 }
>
> this is because vect_optimize_slp_pass::get_result_with_layout doesn't
> seem to consider load permutations?
Yeah. That's because, if to_layout_i != from_layout_i, the caller
is asking for a different layout from the one that we picked for
the load. If we wanted to change the load permutation in-place
for the given to_layout_i, we'd need to duplicate the load too,
which didn't seem like a good trade-off.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (16 preceding siblings ...)
2023-07-26 11:00 ` rsandifo at gcc dot gnu.org
@ 2023-07-26 11:15 ` rsandifo at gcc dot gnu.org
2023-07-26 12:15 ` rguenther at suse dot de
` (2 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: rsandifo at gcc dot gnu.org @ 2023-07-26 11:15 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #14 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
FWIW, changing:
if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
continue;
to:
if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
{
partition.layout = -1;
continue;
}
in start_choosing_layouts fixes it for me. We can then choose layout 1 for the
splat.
I think these conditions were carried over from the old code. Without checking
further, I'm not 100% sure what effect relaxing them would have :)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (17 preceding siblings ...)
2023-07-26 11:15 ` rsandifo at gcc dot gnu.org
@ 2023-07-26 12:15 ` rguenther at suse dot de
2023-07-26 13:28 ` cvs-commit at gcc dot gnu.org
2023-07-26 13:28 ` rguenth at gcc dot gnu.org
20 siblings, 0 replies; 22+ messages in thread
From: rguenther at suse dot de @ 2023-07-26 12:15 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #15 from rguenther at suse dot de <rguenther at suse dot de> ---
On Wed, 26 Jul 2023, rsandifo at gcc dot gnu.org wrote:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
>
> --- Comment #14 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
> FWIW, changing:
>
> if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
> continue;
>
> to:
>
> if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
> {
> partition.layout = -1;
> continue;
> }
>
> in start_choosing_layouts fixes it for me. We can then choose layout 1 for the
> splat.
>
> I think these conditions were carried over from the old code. Without checking
> further, I'm not 100% sure what effect relaxing them would have :)
I've added the above condition when introducing the support for splats
recently. The above change indeed works - I'm going to give it further
testing.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (18 preceding siblings ...)
2023-07-26 12:15 ` rguenther at suse dot de
@ 2023-07-26 13:28 ` cvs-commit at gcc dot gnu.org
2023-07-26 13:28 ` rguenth at gcc dot gnu.org
20 siblings, 0 replies; 22+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-07-26 13:28 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
--- Comment #16 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:
https://gcc.gnu.org/g:5d09fb683a8abce49dc0992f5102aa0189f8f632
commit r14-2790-g5d09fb683a8abce49dc0992f5102aa0189f8f632
Author: Richard Biener <rguenther@suse.de>
Date: Wed Jul 26 13:31:16 2023 +0200
tree-optimization/106081 - elide redundant permute
The following patch makes sure to elide a redundant permute that
can be merged with existing splats represented as load permutations
as we now do for non-grouped SLP loads. This is the last bit
missing to fix this PR where the main fix was already done by
r14-2117-gdd86a5a69cbda4
PR tree-optimization/106081
* tree-vect-slp.cc
(vect_optimize_slp_pass::start_choosing_layouts):
Assign layout -1 to splats.
* gcc.dg/vect/pr106081.c: New testcase.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [Bug middle-end/106081] missed vectorization
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
` (19 preceding siblings ...)
2023-07-26 13:28 ` cvs-commit at gcc dot gnu.org
@ 2023-07-26 13:28 ` rguenth at gcc dot gnu.org
20 siblings, 0 replies; 22+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-26 13:28 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106081
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|ASSIGNED |RESOLVED
Resolution|--- |FIXED
Target Milestone|--- |14.0
--- Comment #17 from Richard Biener <rguenth at gcc dot gnu.org> ---
Fixed now.
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2023-07-26 13:28 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-24 15:56 [Bug middle-end/106081] New: missed vectorization hubicka at gcc dot gnu.org
2022-06-24 16:14 ` [Bug middle-end/106081] " hubicka at gcc dot gnu.org
2022-06-26 8:08 ` crazylht at gmail dot com
2022-06-27 9:01 ` marxin at gcc dot gnu.org
2022-06-27 10:38 ` rguenth at gcc dot gnu.org
2022-06-27 14:26 ` hubicka at gcc dot gnu.org
2022-10-24 0:14 ` pinskia at gcc dot gnu.org
2023-06-21 13:45 ` rguenth at gcc dot gnu.org
2023-06-27 7:49 ` rguenth at gcc dot gnu.org
2023-06-27 8:13 ` rguenth at gcc dot gnu.org
2023-06-27 9:10 ` rsandifo at gcc dot gnu.org
2023-06-28 12:15 ` hubicka at gcc dot gnu.org
2023-07-26 9:34 ` rguenth at gcc dot gnu.org
2023-07-26 9:35 ` rguenth at gcc dot gnu.org
2023-07-26 10:01 ` rsandifo at gcc dot gnu.org
2023-07-26 10:32 ` rguenth at gcc dot gnu.org
2023-07-26 10:52 ` rguenth at gcc dot gnu.org
2023-07-26 11:00 ` rsandifo at gcc dot gnu.org
2023-07-26 11:15 ` rsandifo at gcc dot gnu.org
2023-07-26 12:15 ` rguenther at suse dot de
2023-07-26 13:28 ` cvs-commit at gcc dot gnu.org
2023-07-26 13:28 ` rguenth at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).