From: "Stefan Kanthak" <stefan.kanthak@nexgo.de>
To: <gcc@gnu.org>
Subject: B^HDEAD code generation (AMD64)
Date: Mon, 9 Jan 2023 12:35:24 +0100 [thread overview]
Message-ID: <76C395596DD94CCA94BF821C6100FF26@H270> (raw)
Hi,
compile the following 128-bit GCD routine for the AMD64 processor
with full optimization:
--- gcdti3.c ---
// Stein's algorithm: greatest common divisor
__uint128_t __gcdti3(__uint128_t p, __uint128_t q)
{
unsigned r, s = 0;
__uint128_t t;
if (p == 0)
return q;
if (q == 0)
return p;
if (p == q)
return p;
if (((unsigned long long) p | (unsigned long long) q) == 0)
p >>= 64, q >>= 64, s = 64;
r = __builtin_ctzll((unsigned long long) p | (unsigned long long) q), p >>= r, q >>= r, s += r;
if ((unsigned long long) p == 0)
p >>= 64;
r = __builtin_ctzll(p), p >>= r;
do
{
if ((unsigned long long) q == 0)
q >>= 64;
r = __builtin_ctzll(q), q >>= r;
if (p < q)
t = q, q = p, p = t;
} while (q -= p);
return p << s;
}
--- EOF ---
GCC 12.2: gcc -O3 gcdti3.c
# https://godbolt.org/z/d1Ma9qnsf
__gcdti3(unsigned __int128, unsigned __int128):
mov rax, rsi # OOPS: GCCs plays six rounds of shell game!
mov r8, rdi #
mov rsi, rdi #
mov rdi, rax #
mov rax, rdx #
mov rdx, rcx #
mov rcx, rdi
or rcx, r8
je .L1
mov rcx, r8
mov r8, rdi
xor rcx, rax
xor r8, rdx
or rcx, r8
je .L9
mov rcx, rax
or rcx, rdx
je .L9
mov rcx, rsi
xor r10d, r10d
or rcx, rax
jne .L3
mov rsi, rdi
mov rax, rdx
xor edi, edi
xor edx, edx
mov rcx, rsi
mov r10d, 64
or rcx, rax
.L3:
rep bsf rcx, rcx
xor r8d, r8d # OUCH: BSF and TZCNT return at most 63,
shrd rsi, rdi, cl
shr rdi, cl
test cl, 64 # so this is dead code!
cmovne rsi, rdi #
cmovne rdi, r8 #
shrd rax, rdx, cl
xor r11d, r11d # OUCH: BSF and TZCNT return at most 63,
shr rdx, cl
test cl, 64 # so this is dead code!
cmovne rax, rdx #
cmovne rdx, r11 #
mov r8, rsi
mov r9, rdi
add r10d, ecx
mov rcx, r8
mov rsi, rax
mov rdi, rdx
test r8, r8
je .L14
.L4:
rep bsf rcx, rcx
mov rax, r8
mov rdx, r9
xor r11d, r11d # OUCH: BSF and TZCNT return at most 63,
shr rdx, cl
shrd rax, r9, cl
and ecx, 64 # (there's also no need to modify ECX)
cmovne rax, rdx # so this is dead code!
cmovne rdx, r11 #
.L7:
mov rcx, rsi
test rsi, rsi
jne .L5
mov rsi, rdi
xor edi, edi
mov rcx, rsi
.L5:
rep bsf rcx, rcx
xor r8d, r8d # OUCH: BSF and TZCNT return at most 63,
shrd rsi, rdi, cl
shr rdi, cl
test cl, 64 # so this is dead code,
mov rcx, rdx
cmovne rsi, rdi # and that too!
cmovne rdi, r8 #
cmp rax, rsi
sbb rcx, rdi
jnc .L6
mov r8, rax
mov r9, rdx
mov rax, rsi
mov rdx, rdi
mov rsi, r8
mov rdi, r9
.L6:
sub rsi, rax
sbb rdi, rdx
mov rcx, rdi
or rcx, rsi
jne .L7
mov ecx, r10d
xor esi, esi
shld rdx, rax, cl
sal rax, cl
and ecx, 64 # Oops: there's no need to modify ECX!
cmovne rdx, rax
cmovne rax, rsi
ret
.L9:
mov rax, rsi
mov rdx, rdi
.L1:
ret
.L14:
mov r8, r9
xor r9d, r9d
mov rcx, r8
jmp .L4
20 superfluous instructions of the total 102 instructions!
NOT AMUSED
Stefan Kanthak
PS: <https://skanthak.homepage.t-online.de/gcc.html#case27-amd64-s>
shows properly written assembly code.
next reply other threads:[~2023-01-09 11:38 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-09 11:35 Stefan Kanthak [this message]
2023-01-09 11:48 ` Thomas Koenig
2023-01-09 12:17 ` LIU Hao
2023-01-10 0:34 ` Stefan Kanthak
2023-01-10 3:22 ` Andrew Pinski
2023-01-10 7:12 ` Gabriel Ravier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=76C395596DD94CCA94BF821C6100FF26@H270 \
--to=stefan.kanthak@nexgo.de \
--cc=gcc@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).