public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/107910] New: Missed optimization of struct members with mixed sizes
@ 2022-11-29 9:31 pionere at freemail dot hu
2022-11-29 15:49 ` [Bug tree-optimization/107910] " pinskia at gcc dot gnu.org
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: pionere at freemail dot hu @ 2022-11-29 9:31 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107910
Bug ID: 107910
Summary: Missed optimization of struct members with mixed sizes
Product: gcc
Version: 12.2.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: pionere at freemail dot hu
Target Milestone: ---
Store-merging generates suboptimal code to copy members of structs with
continuous memory:
#include <stdint.h>
#pragma pack(push, 1)
typedef struct StructA {
int32_t v00;
int32_t v01;
int8_t v02_0;
int8_t v02_1;
int8_t v02_2;
int8_t v02_3;
int32_t v03;
int32_t v04;
int32_t v05;
int32_t v06;
int32_t v07;
int32_t v08;
int32_t v09;
int32_t v10;
int32_t v11;
} StructA;
typedef struct StructB {
int32_t v00;
int32_t v01;
int32_t v02;
int32_t v03;
int32_t v04;
int32_t v05;
int32_t v06;
int32_t v07;
int32_t v08;
int32_t v09;
int32_t v10;
int32_t v11;
} StructB;
#pragma pack(pop)
void copyA(StructA* __restrict dest, const StructA* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02_0 = src->v02_0;
dest->v02_1 = src->v02_1;
dest->v02_2 = src->v02_2;
dest->v02_3 = src->v02_3;
dest->v03 = src->v03;
}
void copyB(StructB* __restrict dest, const StructB* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02 = src->v02;
dest->v03 = src->v03;
}
void copyAA(StructA* __restrict dest, const StructA* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02_0 = src->v02_0;
dest->v02_1 = src->v02_1;
dest->v02_2 = src->v02_2;
dest->v02_3 = src->v02_3;
dest->v03 = src->v03;
dest->v04 = src->v04;
dest->v05 = src->v05;
dest->v06 = src->v06;
dest->v07 = src->v07;
}
void copyBB(StructB* __restrict dest, const StructB* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02 = src->v02;
dest->v03 = src->v03;
dest->v04 = src->v04;
dest->v05 = src->v05;
dest->v06 = src->v06;
dest->v07 = src->v07;
}
void copyAAA(StructA* __restrict dest, const StructA* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02_0 = src->v02_0;
dest->v02_1 = src->v02_1;
dest->v02_2 = src->v02_2;
dest->v02_3 = src->v02_3;
dest->v03 = src->v03;
dest->v04 = src->v04;
dest->v05 = src->v05;
dest->v06 = src->v06;
dest->v07 = src->v07;
dest->v08 = src->v08;
dest->v09 = src->v09;
dest->v10 = src->v10;
dest->v11 = src->v11;
}
void copyBBB(StructB* __restrict dest, const StructB* __restrict src) {
dest->v00 = src->v00;
dest->v01 = src->v01;
dest->v02 = src->v02;
dest->v03 = src->v03;
dest->v04 = src->v04;
dest->v05 = src->v05;
dest->v06 = src->v06;
dest->v07 = src->v07;
dest->v08 = src->v08;
dest->v09 = src->v09;
dest->v10 = src->v10;
dest->v11 = src->v11;
}
copyA* should generate the same code as its corresponding copyB* function.
Currently gcc 12 (or trunk) generates the following:
copyA(StructA*, StructA const*):
mov rax, QWORD PTR [rsi]
mov QWORD PTR [rdi], rax
mov rax, QWORD PTR [rsi+8]
mov QWORD PTR [rdi+8], rax
ret
copyB(StructB*, StructB const*):
movdqu xmm0, XMMWORD PTR [rsi]
movups XMMWORD PTR [rdi], xmm0
ret
copyAA(StructA*, StructA const*):
mov rax, QWORD PTR [rsi]
movdqu xmm0, XMMWORD PTR [rsi+12]
mov QWORD PTR [rdi], rax
mov eax, DWORD PTR [rsi+8]
movups XMMWORD PTR [rdi+12], xmm0
mov DWORD PTR [rdi+8], eax
mov eax, DWORD PTR [rsi+28]
mov DWORD PTR [rdi+28], eax
ret
copyBB(StructB*, StructB const*):
movdqu xmm0, XMMWORD PTR [rsi+16]
movdqu xmm1, XMMWORD PTR [rsi]
movups XMMWORD PTR [rdi+16], xmm0
movups XMMWORD PTR [rdi], xmm1
ret
copyAAA(StructA*, StructA const*):
mov rax, QWORD PTR [rsi]
movdqu xmm0, XMMWORD PTR [rsi+12]
movdqu xmm1, XMMWORD PTR [rsi+28]
mov QWORD PTR [rdi], rax
mov eax, DWORD PTR [rsi+8]
movups XMMWORD PTR [rdi+12], xmm0
mov DWORD PTR [rdi+8], eax
mov eax, DWORD PTR [rsi+44]
movups XMMWORD PTR [rdi+28], xmm1
mov DWORD PTR [rdi+44], eax
ret
copyBBB(StructB*, StructB const*):
movdqu xmm1, XMMWORD PTR [rsi+16]
movdqu xmm0, XMMWORD PTR [rsi+32]
movdqu xmm2, XMMWORD PTR [rsi]
movups XMMWORD PTR [rdi+16], xmm1
movups XMMWORD PTR [rdi], xmm2
movups XMMWORD PTR [rdi+32], xmm0
ret
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/107910] Missed optimization of struct members with mixed sizes
2022-11-29 9:31 [Bug tree-optimization/107910] New: Missed optimization of struct members with mixed sizes pionere at freemail dot hu
@ 2022-11-29 15:49 ` pinskia at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: pinskia at gcc dot gnu.org @ 2022-11-29 15:49 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107910
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
The problem is an interaction between the SLP vectorizer and store merging.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/107910] Missed optimization of struct members with mixed sizes
2022-11-29 9:31 [Bug tree-optimization/107910] New: Missed optimization of struct members with mixed sizes pionere at freemail dot hu
2022-11-29 15:49 ` [Bug tree-optimization/107910] " pinskia at gcc dot gnu.org
@ 2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: rguenth at gcc dot gnu.org @ 2022-11-30 8:38 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107910
--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #1)
> The problem is an interaction between the SLP vectorizer and store merging.
Supposedly store-merging doesn't consider splitting loads/stores to re-merge
them. Like with copying
struct StructC __attribute__((packed)) {
int32_t v00;
int32_t v01;
int32_t v02;
int64_t v03;
int32_t v04;
};
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Bug tree-optimization/107910] Missed optimization of struct members with mixed sizes
2022-11-29 9:31 [Bug tree-optimization/107910] New: Missed optimization of struct members with mixed sizes pionere at freemail dot hu
2022-11-29 15:49 ` [Bug tree-optimization/107910] " pinskia at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
@ 2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2 siblings, 0 replies; 4+ messages in thread
From: rguenth at gcc dot gnu.org @ 2022-11-30 8:38 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107910
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Ever confirmed|0 |1
Last reconfirmed| |2022-11-30
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-11-30 8:38 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-29 9:31 [Bug tree-optimization/107910] New: Missed optimization of struct members with mixed sizes pionere at freemail dot hu
2022-11-29 15:49 ` [Bug tree-optimization/107910] " pinskia at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
2022-11-30 8:38 ` rguenth at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).