public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/113392] New: Missed fold of loading 8 consecutive bytes leading to a missed byteswap optimization
@ 2024-01-14 21:37 llvm at rifkin dot dev
2024-01-14 21:44 ` [Bug tree-optimization/113392] " pinskia at gcc dot gnu.org
0 siblings, 1 reply; 2+ messages in thread
From: llvm at rifkin dot dev @ 2024-01-14 21:37 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113392
Bug ID: 113392
Summary: Missed fold of loading 8 consecutive bytes leading to
a missed byteswap optimization
Product: gcc
Version: unknown
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: llvm at rifkin dot dev
Target Milestone: ---
The simple load function
uint64_t load64bits(const uint8_t* data) {
uint8_t d0 = data[0];
uint8_t d1 = data[1];
uint8_t d2 = data[2];
uint8_t d3 = data[3];
uint8_t d4 = data[4];
uint8_t d5 = data[5];
uint8_t d6 = data[6];
uint8_t d7 = data[7];
return (uint64_t) d0
| (uint64_t) d1 << 8
| (uint64_t) d2 << 16
| (uint64_t) d3 << 24
| (uint64_t) d4 << 32
| (uint64_t) d5 << 40
| (uint64_t) d6 << 48
| (uint64_t) d7 << 56;
}
is correctly optimized to
load64bits(unsigned char const*):
mov rax, QWORD PTR [rdi]
ret
however,
uint64_t load64bits2(const uint8_t* data, size_t index) {
uint8_t d0 = data[index++];
uint8_t d1 = data[index++];
uint8_t d2 = data[index++];
uint8_t d3 = data[index++];
uint8_t d4 = data[index++];
uint8_t d5 = data[index++];
uint8_t d6 = data[index++];
uint8_t d7 = data[index++];
return (uint64_t) d0
| (uint64_t) d1 << 8
| (uint64_t) d2 << 16
| (uint64_t) d3 << 24
| (uint64_t) d4 << 32
| (uint64_t) d5 << 40
| (uint64_t) d6 << 48
| (uint64_t) d7 << 56;
}
compiles to
load64bits2(unsigned char const*, unsigned long):
mov rdx, rsi
movzx eax, BYTE PTR [rdi+1+rsi]
movzx esi, BYTE PTR [rdi+2+rsi]
sal rax, 8
sal rsi, 16
or rax, rsi
movzx esi, BYTE PTR [rdi+rdx]
or rax, rsi
movzx esi, BYTE PTR [rdi+3+rdx]
sal rsi, 24
or rax, rsi
movzx esi, BYTE PTR [rdi+4+rdx]
sal rsi, 32
or rax, rsi
movzx esi, BYTE PTR [rdi+5+rdx]
sal rsi, 40
or rax, rsi
movzx esi, BYTE PTR [rdi+6+rdx]
movzx edx, BYTE PTR [rdi+7+rdx]
sal rsi, 48
sal rdx, 56
or rax, rsi
or rax, rdx
ret
Clang compiles both to a single mov.
This impacts other operations, such as a simple byteswap
uint64_t bswap64(const uint8_t* data, size_t index) {
uint8_t d0 = data[index++];
uint8_t d1 = data[index++];
uint8_t d2 = data[index++];
uint8_t d3 = data[index++];
uint8_t d4 = data[index++];
uint8_t d5 = data[index++];
uint8_t d6 = data[index++];
uint8_t d7 = data[index++];
return (uint64_t) d7
| (uint64_t) d6 << 8
| (uint64_t) d5 << 16
| (uint64_t) d4 << 24
| (uint64_t) d3 << 32
| (uint64_t) d2 << 40
| (uint64_t) d1 << 48
| (uint64_t) d0 << 56;
}
compiling to
bswap64(unsigned char const*, unsigned long):
mov rdx, rsi
movzx eax, BYTE PTR [rdi+6+rsi]
movzx esi, BYTE PTR [rdi+5+rsi]
sal rax, 8
sal rsi, 16
or rax, rsi
movzx esi, BYTE PTR [rdi+7+rdx]
or rax, rsi
movzx esi, BYTE PTR [rdi+4+rdx]
sal rsi, 24
or rax, rsi
movzx esi, BYTE PTR [rdi+3+rdx]
sal rsi, 32
or rax, rsi
movzx esi, BYTE PTR [rdi+2+rdx]
sal rsi, 40
or rax, rsi
movzx esi, BYTE PTR [rdi+1+rdx]
movzx edx, BYTE PTR [rdi+rdx]
sal rsi, 48
sal rdx, 56
or rax, rsi
or rax, rdx
ret
instead of
bswap64(unsigned char const*, unsigned long):
movbe rax, qword ptr [rdi + rsi]
ret
https://godbolt.org/z/bjxq1rEYY
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-01-14 21:44 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-14 21:37 [Bug tree-optimization/113392] New: Missed fold of loading 8 consecutive bytes leading to a missed byteswap optimization llvm at rifkin dot dev
2024-01-14 21:44 ` [Bug tree-optimization/113392] " pinskia at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).