* [x86] Tweak testcases for PR82361
@ 2019-09-17 16:34 Richard Sandiford
2019-09-18 6:44 ` Uros Bizjak
0 siblings, 1 reply; 3+ messages in thread
From: Richard Sandiford @ 2019-09-17 16:34 UTC (permalink / raw)
To: gcc-patches; +Cc: hubicka, ubizjak
gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we
can optimise away a 32-to-64-bit zero extension of a 32-bit
division or modulus result. Currently this fails for the modulus
part of f1 and f2 in pr82361-1.c:
/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
one. */
/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
pr82361-2.c instead expects no failures:
/* Ditto %edx to %rdx zero extensions. */
/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.
The reason they don't trigger a failure is that the RA allocates the
asm input for "d" to %rdi rather than %rdx, so we have:
movl %rdi, %rdx
instead of:
movl %rdx, %rdx
For the tests to work as expected, I think they have to force "c" and
"d" to be %rax and %rdx respectively. We then see the same failure in
pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division
path).
Tested on x86_64-linux-gnu. OK to install?
Richard
2019-09-17 Richard Sandiford <richard.sandiford@arm.com>
gcc/testsuite/
* gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
"c" to be in %rax and "d" to be in %rdx.
* gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".
Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-03-08 18:14:39.040959532 +0000
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-09-17 17:32:00.930930762 +0100
@@ -11,43 +11,43 @@
void
f1 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
- unsigned long long d = a % b;
+ register unsigned long long c asm ("rax") = a / b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (c), "r" (d));
}
void
f2 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (c), "r" (d));
}
void
f3 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
+ register unsigned long long c asm ("rax") = a / b;
asm volatile ("" : : "r" (c));
}
void
f4 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
asm volatile ("" : : "r" (c));
}
void
f5 (unsigned int a, unsigned int b)
{
- unsigned long long d = a % b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (d));
}
void
f6 (int a, int b)
{
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (d));
}
Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 16:34:52.280124553 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 17:32:00.930930762 +0100
@@ -4,7 +4,8 @@
/* We should be able to optimize all %eax to %rax zero extensions, because
div and idiv instructions with 32-bit operands zero-extend both results. */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* Ditto %edx to %rdx zero extensions. */
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
+ one. */
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
#include "pr82361-1.c"
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [x86] Tweak testcases for PR82361
2019-09-17 16:34 [x86] Tweak testcases for PR82361 Richard Sandiford
@ 2019-09-18 6:44 ` Uros Bizjak
2019-09-18 7:40 ` Richard Sandiford
0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2019-09-18 6:44 UTC (permalink / raw)
To: Richard Sandiford; +Cc: gcc-patches, Jan Hubicka
On Tue, Sep 17, 2019 at 6:34 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we
> can optimise away a 32-to-64-bit zero extension of a 32-bit
> division or modulus result. Currently this fails for the modulus
> part of f1 and f2 in pr82361-1.c:
>
> /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
> one. */
> /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
>
> pr82361-2.c instead expects no failures:
>
> /* Ditto %edx to %rdx zero extensions. */
> /* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
>
> But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.
> The reason they don't trigger a failure is that the RA allocates the
> asm input for "d" to %rdi rather than %rdx, so we have:
>
> movl %rdi, %rdx
>
> instead of:
>
> movl %rdx, %rdx
>
> For the tests to work as expected, I think they have to force "c" and
> "d" to be %rax and %rdx respectively. We then see the same failure in
> pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division
> path).
>
> Tested on x86_64-linux-gnu. OK to install?
>
> Richard
>
>
> 2019-09-17 Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/testsuite/
> * gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
> "c" to be in %rax and "d" to be in %rdx.
> * gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".
OK, with a comment improvement below.
Thanks,
Uros.
> Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-03-08 18:14:39.040959532 +0000
> +++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-09-17 17:32:00.930930762 +0100
> @@ -11,43 +11,43 @@
> void
> f1 (unsigned int a, unsigned int b)
> {
> - unsigned long long c = a / b;
> - unsigned long long d = a % b;
> + register unsigned long long c asm ("rax") = a / b;
> + register unsigned long long d asm ("rdx") = a % b;
> asm volatile ("" : : "r" (c), "r" (d));
> }
>
> void
> f2 (int a, int b)
> {
> - unsigned long long c = (unsigned int) (a / b);
> - unsigned long long d = (unsigned int) (a % b);
> + register unsigned long long c asm ("rax") = (unsigned int) (a / b);
> + register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
> asm volatile ("" : : "r" (c), "r" (d));
> }
>
> void
> f3 (unsigned int a, unsigned int b)
> {
> - unsigned long long c = a / b;
> + register unsigned long long c asm ("rax") = a / b;
> asm volatile ("" : : "r" (c));
> }
>
> void
> f4 (int a, int b)
> {
> - unsigned long long c = (unsigned int) (a / b);
> + register unsigned long long c asm ("rax") = (unsigned int) (a / b);
> asm volatile ("" : : "r" (c));
> }
>
> void
> f5 (unsigned int a, unsigned int b)
> {
> - unsigned long long d = a % b;
> + register unsigned long long d asm ("rdx") = a % b;
> asm volatile ("" : : "r" (d));
> }
>
> void
> f6 (int a, int b)
> {
> - unsigned long long d = (unsigned int) (a % b);
> + register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
> asm volatile ("" : : "r" (d));
> }
> Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 16:34:52.280124553 +0100
> +++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 17:32:00.930930762 +0100
> @@ -4,7 +4,8 @@
> /* We should be able to optimize all %eax to %rax zero extensions, because
> div and idiv instructions with 32-bit operands zero-extend both results. */
> /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
> -/* Ditto %edx to %rdx zero extensions. */
> -/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
> +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
> + one. */
Can we please change comment here and in pr82361-2.c to something like:
/* FIXME: The compiler does not merge zero-extension to the modulo part. */
> +/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
>
> #include "pr82361-1.c"
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [x86] Tweak testcases for PR82361
2019-09-18 6:44 ` Uros Bizjak
@ 2019-09-18 7:40 ` Richard Sandiford
0 siblings, 0 replies; 3+ messages in thread
From: Richard Sandiford @ 2019-09-18 7:40 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka
Uros Bizjak <ubizjak@gmail.com> writes:
> On Tue, Sep 17, 2019 at 6:34 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> gcc/testsuite/gcc.target/i386/pr82361-[12].c check whether we
>> can optimise away a 32-to-64-bit zero extension of a 32-bit
>> division or modulus result. Currently this fails for the modulus
>> part of f1 and f2 in pr82361-1.c:
>>
>> /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
>> one. */
>> /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
>>
>> pr82361-2.c instead expects no failures:
>>
>> /* Ditto %edx to %rdx zero extensions. */
>> /* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
>>
>> But we actually get the same zero-extensions for f1 and f2 in pr82361-2.c.
>> The reason they don't trigger a failure is that the RA allocates the
>> asm input for "d" to %rdi rather than %rdx, so we have:
>>
>> movl %rdi, %rdx
>>
>> instead of:
>>
>> movl %rdx, %rdx
>>
>> For the tests to work as expected, I think they have to force "c" and
>> "d" to be %rax and %rdx respectively. We then see the same failure in
>> pr82361-2.c as for pr82361-1.c (but doubled, due to the 8-bit division
>> path).
>>
>> Tested on x86_64-linux-gnu. OK to install?
>>
>> Richard
>>
>>
>> 2019-09-17 Richard Sandiford <richard.sandiford@arm.com>
>>
>> gcc/testsuite/
>> * gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
>> "c" to be in %rax and "d" to be in %rdx.
>> * gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".
>
> OK, with a comment improvement below.
>
> Thanks,
> Uros.
>
>> Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
>> ===================================================================
>> --- gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-03-08 18:14:39.040959532 +0000
>> +++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-09-17 17:32:00.930930762 +0100
>> @@ -11,43 +11,43 @@
>> void
>> f1 (unsigned int a, unsigned int b)
>> {
>> - unsigned long long c = a / b;
>> - unsigned long long d = a % b;
>> + register unsigned long long c asm ("rax") = a / b;
>> + register unsigned long long d asm ("rdx") = a % b;
>> asm volatile ("" : : "r" (c), "r" (d));
>> }
>>
>> void
>> f2 (int a, int b)
>> {
>> - unsigned long long c = (unsigned int) (a / b);
>> - unsigned long long d = (unsigned int) (a % b);
>> + register unsigned long long c asm ("rax") = (unsigned int) (a / b);
>> + register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
>> asm volatile ("" : : "r" (c), "r" (d));
>> }
>>
>> void
>> f3 (unsigned int a, unsigned int b)
>> {
>> - unsigned long long c = a / b;
>> + register unsigned long long c asm ("rax") = a / b;
>> asm volatile ("" : : "r" (c));
>> }
>>
>> void
>> f4 (int a, int b)
>> {
>> - unsigned long long c = (unsigned int) (a / b);
>> + register unsigned long long c asm ("rax") = (unsigned int) (a / b);
>> asm volatile ("" : : "r" (c));
>> }
>>
>> void
>> f5 (unsigned int a, unsigned int b)
>> {
>> - unsigned long long d = a % b;
>> + register unsigned long long d asm ("rdx") = a % b;
>> asm volatile ("" : : "r" (d));
>> }
>>
>> void
>> f6 (int a, int b)
>> {
>> - unsigned long long d = (unsigned int) (a % b);
>> + register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
>> asm volatile ("" : : "r" (d));
>> }
>> Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
>> ===================================================================
>> --- gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 16:34:52.280124553 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 17:32:00.930930762 +0100
>> @@ -4,7 +4,8 @@
>> /* We should be able to optimize all %eax to %rax zero extensions, because
>> div and idiv instructions with 32-bit operands zero-extend both results. */
>> /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
>> -/* Ditto %edx to %rdx zero extensions. */
>> -/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
>> +/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
>> + one. */
>
> Can we please change comment here and in pr82361-2.c to something like:
>
> /* FIXME: The compiler does not merge zero-extension to the modulo part. */
Thanks, here's what I applied.
Richard
2019-09-18 Richard Sandiford <richard.sandiford@arm.com>
gcc/testsuite/
* gcc.target/i386/pr82361-1.c (f1, f2, f3, f4, f5, f6): Force
"c" to be in %rax and "d" to be in %rdx.
* gcc.target/i386/pr82361-2.c: Expect 4 instances of "movl\t%edx".
Index: gcc/testsuite/gcc.target/i386/pr82361-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-09-17 18:00:14.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-1.c 2019-09-18 08:37:39.030720198 +0100
@@ -4,50 +4,50 @@
/* We should be able to optimize all %eax to %rax zero extensions, because
div and idiv instructions with 32-bit operands zero-extend both results. */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
- one. */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+ of f1 and f2. */
/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
void
f1 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
- unsigned long long d = a % b;
+ register unsigned long long c asm ("rax") = a / b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (c), "r" (d));
}
void
f2 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (c), "r" (d));
}
void
f3 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
+ register unsigned long long c asm ("rax") = a / b;
asm volatile ("" : : "r" (c));
}
void
f4 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
asm volatile ("" : : "r" (c));
}
void
f5 (unsigned int a, unsigned int b)
{
- unsigned long long d = a % b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (d));
}
void
f6 (int a, int b)
{
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (d));
}
Index: gcc/testsuite/gcc.target/i386/pr82361-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-17 18:00:14.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr82361-2.c 2019-09-18 08:37:39.034720166 +0100
@@ -4,7 +4,8 @@
/* We should be able to optimize all %eax to %rax zero extensions, because
div and idiv instructions with 32-bit operands zero-extend both results. */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* Ditto %edx to %rdx zero extensions. */
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+ of f1 and f2. */
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
#include "pr82361-1.c"
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-09-18 7:40 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-17 16:34 [x86] Tweak testcases for PR82361 Richard Sandiford
2019-09-18 6:44 ` Uros Bizjak
2019-09-18 7:40 ` Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).