* gcc-trunk generates incorrect code for LEA in asm template
@ 2023-07-14 18:18 Mason
2023-07-15 0:40 ` Mason
0 siblings, 1 reply; 3+ messages in thread
From: Mason @ 2023-07-14 18:18 UTC (permalink / raw)
To: gcc-help; +Cc: Jakub Jelinek, Uros Bizjak
Hello,
For fun, I'm trying to write code propagating carries in bignums.
gcc-trunk generates incorrect code, but I suspect that's
because my asm template is invalid (it's a byzantine syntax)
typedef unsigned long long u64;
void testcase(u64 *acc, u64 a, u64 b)
{
asm("add %[LO], %[D0]\n\t" "adc %[HI], %[D1]\n"
"1:\n\t" "adc $0, %[D2]\n\t" "lea %[D1], %[ACC]\n\t" "jc 1b" :
[D0] "+m" (acc[0]), [D1] "+m" (acc[1]), [D2] "+m" (acc[2]), [ACC] "+r" (acc) :
[LO] "r" (a), [HI] "r" (b) : "cc");
}
void foo(u64 *acc, u64 a, u64 b)
{
testcase(acc+0, a, b);
testcase(acc+1, a, b);
}
$ gcc-trunk -O3 -march=znver1 -S
testcase:
add %rsi, (%rdi) # acc[0] += a
adc %rdx, 8(%rdi) # acc[1] += b + cf
1:
adc $0, 16(%rdi) # acc[2] += cf
lea 8(%rdi), %rdi # ++acc
jc 1b # loop until cf = 0
ret
foo:
leaq 8(%rdi), %rcx # rcx = acc+1
movq %rdi, %rax # rax = acc
add %rsi, (%rax) # acc[0] += a
adc %rdx, 8(%rax) # acc[1] += b + cf
1:
adc $0, 16(%rax) # acc[2] += cf
lea 8(%rax), %rax # ++acc
jc 1b
# So far, so good
movq %rcx, %rax # rax = acc+1
add %rsi, (%rax) # acc[1] += a
# Not sure why we switch to rdi instead of rax at this point, but why not...
adc %rdx, 16(%rdi) # acc[2] += b + cf
1:
adc $0, 24(%rdi) # acc[3] += cf
# BROKEN: increments wrong reg, and by 2
lea 16(%rdi), %rax
jc 1b
ret
I guess I'm using the wrong constraints?
Regards
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: gcc-trunk generates incorrect code for LEA in asm template
2023-07-14 18:18 gcc-trunk generates incorrect code for LEA in asm template Mason
@ 2023-07-15 0:40 ` Mason
2023-07-15 9:28 ` Andrew Haley
0 siblings, 1 reply; 3+ messages in thread
From: Mason @ 2023-07-15 0:40 UTC (permalink / raw)
To: gcc-help; +Cc: Jakub Jelinek, Uros Bizjak
On 14/07/2023 20:18, Mason wrote:
> For fun, I'm trying to write code propagating carries in bignums.
>
> gcc-trunk generates incorrect code, but I suspect that's
> because my asm template is invalid (it's a byzantine syntax)
>
>
> typedef unsigned long long u64;
>
> void testcase(u64 *acc, u64 a, u64 b)
> {
> asm("add %[LO], %[D0]\n\t" "adc %[HI], %[D1]\n"
> "1:\n\t" "adc $0, %[D2]\n\t" "lea %[D1], %[ACC]\n\t" "jc 1b" :
> [D0] "+m" (acc[0]), [D1] "+m" (acc[1]), [D2] "+m" (acc[2]), [ACC] "+r" (acc) :
> [LO] "r" (a), [HI] "r" (b) : "cc");
> }
>
> void foo(u64 *acc, u64 a, u64 b)
> {
> testcase(acc+0, a, b);
> testcase(acc+1, a, b);
> }
If I tweak the code as below, it works as expected,
except I now have to mark the asm block as volatile.
Why though? (And see below for offset optimization.)
typedef unsigned long long u64;
void testcase(u64 *acc, u64 a, u64 b)
{
asm volatile("add %[LO], 0(%[ACC])\n\t" "adc %[HI], 8(%[ACC])\n"
"1:\n\t" "adc $0, 16(%[ACC])\n\t" "lea 8(%[ACC]), %[ACC]\n\t" "jc 1b" :
[ACC] "+r" (acc) : [LO] "r" (a), [HI] "r" (b) : "cc");
}
void foo(u64 *acc, u64 a, u64 b)
{
testcase(acc+0, a, b);
testcase(acc+1, a, b);
}
testcase:
add %rsi, 0(%rdi)
adc %rdx, 8(%rdi)
1:
adc $0, 16(%rdi)
lea 8(%rdi), %rdi
jc 1b
# Everything checks out so far
ret
foo:
movq %rdi, %rax # useless copy. caused by volatile?
add %rsi, 0(%rax)
adc %rdx, 8(%rax)
1:
adc $0, 16(%rax)
lea 8(%rax), %rax # that works
jc 1b
# sub-optimal add, gcc could have shifted the offset by 8...
# EXCEPT it doesn't *know* about the offsets, because they're
# hard-coded in the template... Back to square 2 then :)
addq $8, %rdi
add %rsi, 0(%rdi)
adc %rdx, 8(%rdi)
1:
adc $0, 16(%rdi)
lea 8(%rdi), %rdi
jc 1b
ret
Regards
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: gcc-trunk generates incorrect code for LEA in asm template
2023-07-15 0:40 ` Mason
@ 2023-07-15 9:28 ` Andrew Haley
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Haley @ 2023-07-15 9:28 UTC (permalink / raw)
To: gcc-help
On 7/15/23 01:40, Mason via Gcc-help wrote:
> Why though?
The asm as here has no effect. You need a memory clobber.
--
Andrew Haley (he/him)
Java Platform Lead Engineer
Red Hat UK Ltd. <https://www.redhat.com>
https://keybase.io/andrewhaley
EAC8 43EB D3EF DB98 CC77 2FAD A5CD 6035 332F A671
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-07-15 9:28 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-14 18:18 gcc-trunk generates incorrect code for LEA in asm template Mason
2023-07-15 0:40 ` Mason
2023-07-15 9:28 ` Andrew Haley
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).