* B^HDEAD code generation (AMD64)
@ 2023-01-09 11:35 Stefan Kanthak
2023-01-09 11:48 ` Thomas Koenig
0 siblings, 1 reply; 6+ messages in thread
From: Stefan Kanthak @ 2023-01-09 11:35 UTC (permalink / raw)
To: gcc
Hi,
compile the following 128-bit GCD routine for the AMD64 processor
with full optimization:
--- gcdti3.c ---
// Stein's algorithm: greatest common divisor
__uint128_t __gcdti3(__uint128_t p, __uint128_t q)
{
unsigned r, s = 0;
__uint128_t t;
if (p == 0)
return q;
if (q == 0)
return p;
if (p == q)
return p;
if (((unsigned long long) p | (unsigned long long) q) == 0)
p >>= 64, q >>= 64, s = 64;
r = __builtin_ctzll((unsigned long long) p | (unsigned long long) q), p >>= r, q >>= r, s += r;
if ((unsigned long long) p == 0)
p >>= 64;
r = __builtin_ctzll(p), p >>= r;
do
{
if ((unsigned long long) q == 0)
q >>= 64;
r = __builtin_ctzll(q), q >>= r;
if (p < q)
t = q, q = p, p = t;
} while (q -= p);
return p << s;
}
--- EOF ---
GCC 12.2: gcc -O3 gcdti3.c
# https://godbolt.org/z/d1Ma9qnsf
__gcdti3(unsigned __int128, unsigned __int128):
mov rax, rsi # OOPS: GCCs plays six rounds of shell game!
mov r8, rdi #
mov rsi, rdi #
mov rdi, rax #
mov rax, rdx #
mov rdx, rcx #
mov rcx, rdi
or rcx, r8
je .L1
mov rcx, r8
mov r8, rdi
xor rcx, rax
xor r8, rdx
or rcx, r8
je .L9
mov rcx, rax
or rcx, rdx
je .L9
mov rcx, rsi
xor r10d, r10d
or rcx, rax
jne .L3
mov rsi, rdi
mov rax, rdx
xor edi, edi
xor edx, edx
mov rcx, rsi
mov r10d, 64
or rcx, rax
.L3:
rep bsf rcx, rcx
xor r8d, r8d # OUCH: BSF and TZCNT return at most 63,
shrd rsi, rdi, cl
shr rdi, cl
test cl, 64 # so this is dead code!
cmovne rsi, rdi #
cmovne rdi, r8 #
shrd rax, rdx, cl
xor r11d, r11d # OUCH: BSF and TZCNT return at most 63,
shr rdx, cl
test cl, 64 # so this is dead code!
cmovne rax, rdx #
cmovne rdx, r11 #
mov r8, rsi
mov r9, rdi
add r10d, ecx
mov rcx, r8
mov rsi, rax
mov rdi, rdx
test r8, r8
je .L14
.L4:
rep bsf rcx, rcx
mov rax, r8
mov rdx, r9
xor r11d, r11d # OUCH: BSF and TZCNT return at most 63,
shr rdx, cl
shrd rax, r9, cl
and ecx, 64 # (there's also no need to modify ECX)
cmovne rax, rdx # so this is dead code!
cmovne rdx, r11 #
.L7:
mov rcx, rsi
test rsi, rsi
jne .L5
mov rsi, rdi
xor edi, edi
mov rcx, rsi
.L5:
rep bsf rcx, rcx
xor r8d, r8d # OUCH: BSF and TZCNT return at most 63,
shrd rsi, rdi, cl
shr rdi, cl
test cl, 64 # so this is dead code,
mov rcx, rdx
cmovne rsi, rdi # and that too!
cmovne rdi, r8 #
cmp rax, rsi
sbb rcx, rdi
jnc .L6
mov r8, rax
mov r9, rdx
mov rax, rsi
mov rdx, rdi
mov rsi, r8
mov rdi, r9
.L6:
sub rsi, rax
sbb rdi, rdx
mov rcx, rdi
or rcx, rsi
jne .L7
mov ecx, r10d
xor esi, esi
shld rdx, rax, cl
sal rax, cl
and ecx, 64 # Oops: there's no need to modify ECX!
cmovne rdx, rax
cmovne rax, rsi
ret
.L9:
mov rax, rsi
mov rdx, rdi
.L1:
ret
.L14:
mov r8, r9
xor r9d, r9d
mov rcx, r8
jmp .L4
20 superfluous instructions of the total 102 instructions!
NOT AMUSED
Stefan Kanthak
PS: <https://skanthak.homepage.t-online.de/gcc.html#case27-amd64-s>
shows properly written assembly code.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: B^HDEAD code generation (AMD64)
2023-01-09 11:35 B^HDEAD code generation (AMD64) Stefan Kanthak
@ 2023-01-09 11:48 ` Thomas Koenig
2023-01-09 12:17 ` LIU Hao
2023-01-10 0:34 ` Stefan Kanthak
0 siblings, 2 replies; 6+ messages in thread
From: Thomas Koenig @ 2023-01-09 11:48 UTC (permalink / raw)
To: Stefan Kanthak, gcc
On 09.01.23 12:35, Stefan Kanthak wrote:
> 20 superfluous instructions of the total 102 instructions!
The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
Feel free to submit these cases there.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: B^HDEAD code generation (AMD64)
2023-01-09 11:48 ` Thomas Koenig
@ 2023-01-09 12:17 ` LIU Hao
2023-01-10 0:34 ` Stefan Kanthak
1 sibling, 0 replies; 6+ messages in thread
From: LIU Hao @ 2023-01-09 12:17 UTC (permalink / raw)
To: Thomas Koenig, Stefan Kanthak, gcc
[-- Attachment #1.1: Type: text/plain, Size: 966 bytes --]
在 2023/1/9 19:48, Thomas Koenig via Gcc 写道:
> On 09.01.23 12:35, Stefan Kanthak wrote:
>> 20 superfluous instructions of the total 102 instructions!
>
> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
> Feel free to submit these cases there.
Created https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108341, although I'm not sure whether it's
related to this issue.
Given
```
extern int r;
int
bz(int value)
{
r = __builtin_ctz(value);
return value != 0; // always true
}
```
`value` is passed to `__builtin_ctz` and GCC should assume it can't be zero, otherwise the behavior
is undefined. However GCC fails to optimize it and produces:
```
bz:
endbr64
xor eax, eax
rep bsf eax, edi
mov DWORD PTR r[rip], eax
xor eax, eax
test edi, edi
setne al
ret
```
--
Best regards,
LIU Hao
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 840 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: B^HDEAD code generation (AMD64)
2023-01-09 11:48 ` Thomas Koenig
2023-01-09 12:17 ` LIU Hao
@ 2023-01-10 0:34 ` Stefan Kanthak
2023-01-10 3:22 ` Andrew Pinski
2023-01-10 7:12 ` Gabriel Ravier
1 sibling, 2 replies; 6+ messages in thread
From: Stefan Kanthak @ 2023-01-10 0:34 UTC (permalink / raw)
To: gcc, Thomas Koenig
"Thomas Koenig" <tkoenig@netcologne.de> wrote:
> On 09.01.23 12:35, Stefan Kanthak wrote:
>> 20 superfluous instructions of the total 102 instructions!
>
> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
OUCH: there's NO proper place for bugs at all!
> Feel free to submit these cases there.
I feel free to do whatever I like to do where I do it, for example:
--- bug.cpp ---
int main() {
__uint128_t long long bug = 0;
}
--- EOF ---
See <https://godbolt.org/z/G7f81Mx5Y>
regards
Stefan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: B^HDEAD code generation (AMD64)
2023-01-10 0:34 ` Stefan Kanthak
@ 2023-01-10 3:22 ` Andrew Pinski
2023-01-10 7:12 ` Gabriel Ravier
1 sibling, 0 replies; 6+ messages in thread
From: Andrew Pinski @ 2023-01-10 3:22 UTC (permalink / raw)
To: Stefan Kanthak; +Cc: gcc, Thomas Koenig
On Mon, Jan 9, 2023 at 4:42 PM Stefan Kanthak <stefan.kanthak@nexgo.de> wrote:
>
> "Thomas Koenig" <tkoenig@netcologne.de> wrote:
>
> > On 09.01.23 12:35, Stefan Kanthak wrote:
> >> 20 superfluous instructions of the total 102 instructions!
> >
> > The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
>
> OUCH: there's NO proper place for bugs at all!
HUH? soon people will ignore emails that are demeaning/ableist like
what you have been recently (saying things like braindead, etc.). And
yes bugzilla is where GCC tracks bug reports.
>
> > Feel free to submit these cases there.
>
> I feel free to do whatever I like to do where I do it, for example:
>
> --- bug.cpp ---
> int main() {
> __uint128_t long long bug = 0;
> }
> --- EOF ---
With -pedantic-errors we get:
<source>: In function 'int main()':
<source>:2:22: error: 'long long' specified with '__int128 unsigned'
[-Wpedantic]
2 | __uint128_t long long bug = 0;
| ^~~~
And also run into https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108099 .
This is a known extension but maybe it is not documented ...
Anyways read that bug report.
>
> See <https://godbolt.org/z/G7f81Mx5Y>
>
> regards
> Stefan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: B^HDEAD code generation (AMD64)
2023-01-10 0:34 ` Stefan Kanthak
2023-01-10 3:22 ` Andrew Pinski
@ 2023-01-10 7:12 ` Gabriel Ravier
1 sibling, 0 replies; 6+ messages in thread
From: Gabriel Ravier @ 2023-01-10 7:12 UTC (permalink / raw)
To: Stefan Kanthak, gcc
On 1/10/23 01:34, Stefan Kanthak wrote:
> "Thomas Koenig" <tkoenig@netcologne.de> wrote:
>
>> On 09.01.23 12:35, Stefan Kanthak wrote:
>>> 20 superfluous instructions of the total 102 instructions!
>> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
> OUCH: there's NO proper place for bugs at all!
>
>> Feel free to submit these cases there.
> I feel free to do whatever I like to do where I do it, for example:
>
> --- bug.cpp ---
> int main() {
> __uint128_t long long bug = 0;
> }
> --- EOF ---
>
> See <https://godbolt.org/z/G7f81Mx5Y>
>
> regards
> Stefan
If you're trying to speedrun actually getting banned from this mailing
list, then sure, I guess you can "do whatever I like to do where I do
it", but you might find that more difficult after somebody decides to do
something about it.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-01-10 7:12 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-09 11:35 B^HDEAD code generation (AMD64) Stefan Kanthak
2023-01-09 11:48 ` Thomas Koenig
2023-01-09 12:17 ` LIU Hao
2023-01-10 0:34 ` Stefan Kanthak
2023-01-10 3:22 ` Andrew Pinski
2023-01-10 7:12 ` Gabriel Ravier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).