From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
To: Szabolcs Nagy <Szabolcs.Nagy@arm.com>,
Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>,
gcc Patches <gcc-patches@gcc.gnu.org>, nd <nd@arm.com>,
Richard Sandiford <richard.sandiford@arm.com>
Subject: Re: [AArch64] [SVE] PR88837 - Poor vector construction code in VL-specific mode
Date: Fri, 07 Jun 2019 09:46:00 -0000 [thread overview]
Message-ID: <CAAgBjM=UxEQkUPx8PbMY4fpmSsMEFFgM+QYyzk82_inDic0wkA@mail.gmail.com> (raw)
In-Reply-To: <mptftongdmu.fsf@arm.com>
[-- Attachment #1: Type: text/plain, Size: 3016 bytes --]
On Thu, 6 Jun 2019 at 16:54, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Szabolcs Nagy <Szabolcs.Nagy@arm.com> writes:
> > On 03/06/2019 08:26, Prathamesh Kulkarni wrote:
> >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c
> >> @@ -0,0 +1,32 @@
> >> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> >> +/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
> >> +
> >> +/* Case 5.2: Interleaved elements and constants. */
> >> +
> >> +#include <stdint.h>
> >> +
> >> +typedef int32_t vnx4si __attribute__((vector_size (32)));
> >> +
> >> +__attribute__((noipa))
> >> +vnx4si foo(int a, int b, int c, int d)
> >> +{
> >> + return (vnx4si) { a, 1, b, 2, c, 3, d, 4 };
> >> +}
> >> +
> >> +/*
> >> +foo:
> >> +.LFB0:
> >> + .cfi_startproc
> >> + ptrue p0.s, vl8
> >> + mov z0.s, w3
> >> + adrp x3, .LANCHOR0
> >> + insr z0.s, w2
> >> + add x3, x3, :lo12:.LANCHOR0
> >> + insr z0.s, w1
> >> + ld1w z1.s, p0/z, [x3]
> >> + insr z0.s, w0
> >> + zip1 z0.s, z0.s, z1.s
> >> + ret
> >> +*/
> >> +
> >> +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */
> >
> > this fails with tiny model when i'm testing aarch64-none-elf
> >
> > $ make check-c 'RUNTESTFLAGS=--target_board=aarch64-elf-qemu{-mcmodel=tiny} aarch64-sve.exp=init_8.c'
> > ...
> > FAIL: gcc.target/aarch64/sve/init_8.c -march=armv8.2-a+sve scan-assembler \\tmov\\t(z[0-9]+\\.s), w3\\n\\tadrp\\t(x[0-9]+),
> > \\.LANCHOR0\\n\\tinsr\\t\\1, w2\\n\\tadd\\t\\2, \\2, :lo12:\\.LANCHOR0\\n\\tinsr\\t\\1, w1\\n\\tld1w\\t(z[0-9]+\\.s), p[0-9]+/z,
> > \\[\\2\\]\\n\\tinsr\\t\\1, w0\\n\\tzip1\\t\\1, \\1, \\3
> >
> > i think you need conditional scan asm for { target aarch64_small }
> > and { target aarch64_tiny } or just skip the test for tiny,
>
> Maybe we should remove the address calculation and replace the ld1w
> address with \[[^]]*\]. All that really matters for this test is that
> the vector is loaded from memory.
>
> > but even then matching exact register name and instruction scheduling
> > seems fragile.
>
> The only hard-coded register names are the parameters, which are
> guaranteed by the ABI. Testing for those should be fine.
>
> The dg-options pass -fno-schedule-insns, but I guess they should
> also pass -fno-schedule-insns2. Or maybe just use -O instead.
> We can always revisit this later if even that isn't enough to make
> the order stable.
Thanks for the suggestions. Passing -fno-schedule-insns2 does seem to
make the order stable.
For init_1.c to init_4.c there were no intervening instructions, and
for remaining tests, the patch passes -fno-schedule-insns2
and adjusts dg-scan accordingly. I verified the tests pass with -mcmodel=tiny.
OK to commit ?
Thanks,
Prathamesh
>
> Richard
[-- Attachment #2: pr88837-tests-1.txt --]
[-- Type: text/plain, Size: 9977 bytes --]
2019-06-07 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
* gcc.target/aarch64/sve/init_5.c: Pass -fno-schedule-insns2.
Update assembly in comments and adjust dg-scan.
* gcc.target/aarch64/sve/init_6.c: Likewise.
* gcc.target/aarch64/sve/init_7.c: Likewise.
* gcc.target/aarch64/sve/init_8.c: Likewise.
* gcc.target/aarch64/sve/init_9.c: Likewise.
* gcc.target/aarch64/sve/init_10.c: Likewise.
* gcc.target/aarch64/sve/init_11.c: Likewise.
* gcc.target/aarch64/sve/init_12.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c
index 9d6e2dfc876..08437e5d8f1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.4: Interleaved repeating elements and non-repeating elements. */
@@ -17,13 +17,14 @@ vnx4si foo(int a, int b, int c, int f)
foo:
.LFB0:
.cfi_startproc
- mov z0.s, w2
mov z1.s, w3
+ mov z0.s, w2
insr z0.s, w1
- ptrue p0.s, vl8
insr z0.s, w0
zip1 z0.s, z0.s, z1.s
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c
index e50cd54ef13..786765dbfb7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.5: Interleaved repeating elements and trailing same elements. */
@@ -18,11 +18,12 @@ foo:
.LFB0:
.cfi_startproc
mov z0.s, w1
- mov z1.s, w2
insr z0.s, w0
- ptrue p0.s, vl8
+ mov z1.s, w2
zip1 z0.s, z0.s, z1.s
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w0\n.*\tzip1\t\1, \1, \2} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tinsr\t\1, w0\n\tmov\t(z[0-9]+\.s), w2\n\tzip1\t\1, \1, \2} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c
index 21d9e764360..e65b1af475c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.5: Interleaved repeating elements and trailing same elements. */
@@ -17,14 +17,15 @@ vnx4si foo(int a, int b, int f)
foo:
.LFB0:
.cfi_startproc
- mov z0.s, w0
mov z1.s, w2
+ mov z0.s, w0
insr z0.s, w1
- ptrue p0.s, vl8
insr z0.s, w1
insr z0.s, w1
zip1 z0.s, z0.s, z1.s
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c
index e7fbdd1a2aa..acab6d2d405 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 3: Trailing same element. */
@@ -18,10 +18,11 @@ foo:
.LFB0:
.cfi_startproc
mov z0.s, w2
- ptrue p0.s, vl8
insr z0.s, w1
insr z0.s, w0
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c
index f6f3da5958d..fd6d4b9b85a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 3: Trailing same element. */
@@ -18,11 +18,12 @@ foo:
.LFB0:
.cfi_startproc
mov z0.s, w2
- ptrue p0.s, vl8
insr z0.s, w1
insr z0.s, w0
rev z0.s, z0.s
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c
index e3104a35f13..cf6926d3a73 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.1: All elements. */
@@ -18,7 +18,6 @@ foo:
.LFB0:
.cfi_startproc
mov z0.s, w7
- ptrue p0.s, vl8
insr z0.s, w6
insr z0.s, w5
insr z0.s, w4
@@ -26,7 +25,9 @@ foo:
insr z0.s, w2
insr z0.s, w1
insr z0.s, w0
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n.*\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c
index 7ff3e0849cc..b3ed32e4c82 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.2: Interleaved elements and constants. */
@@ -18,15 +18,16 @@ foo:
.LFB0:
.cfi_startproc
ptrue p0.s, vl8
+ adrp x4, .LANCHOR0
+ add x4, x4, :lo12:.LANCHOR0
+ ld1w z1.s, p0/z, [x4]
mov z0.s, w3
- adrp x3, .LANCHOR0
insr z0.s, w2
- add x3, x3, :lo12:.LANCHOR0
insr z0.s, w1
- ld1w z1.s, p0/z, [x3]
insr z0.s, w0
zip1 z0.s, z0.s, z1.s
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[x[0-9]+\]\n\tmov\t(z[0-9]+\.s), w3\n\tinsr\t\2, w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c
index 4d3c59b3bf8..333bd4f2852 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c
@@ -1,5 +1,5 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */
/* Case 5.3: Repeated elements. */
@@ -19,9 +19,10 @@ foo:
.cfi_startproc
mov z0.s, w0
mov z1.s, w1
- ptrue p0.s, vl8
zip1 z0.s, z0.s, z1.s
+ ptrue p0.s, vl8
+ st1w z0.s, p0, [x8]
ret
*/
-/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n.*\tzip1\t\1, \1, \2} } } */
+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n\tzip1\t\1, \1, \2} } } */
next prev parent reply other threads:[~2019-06-07 9:46 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-27 10:27 Prathamesh Kulkarni
2019-05-29 12:40 ` Richard Sandiford
2019-05-30 9:37 ` Prathamesh Kulkarni
2019-05-30 10:14 ` Richard Sandiford
2019-05-30 14:52 ` Prathamesh Kulkarni
2019-05-30 16:01 ` Richard Sandiford
2019-06-03 7:26 ` Prathamesh Kulkarni
2019-06-03 9:23 ` Richard Sandiford
2019-06-03 9:52 ` Prathamesh Kulkarni
2019-06-03 10:55 ` Richard Sandiford
2019-06-03 12:15 ` Prathamesh Kulkarni
2019-06-06 11:10 ` Szabolcs Nagy
2019-06-06 11:24 ` Richard Sandiford
2019-06-07 9:46 ` Prathamesh Kulkarni [this message]
2019-06-07 12:56 ` Richard Sandiford
2019-06-07 16:29 ` Prathamesh Kulkarni
2019-06-07 17:17 ` Richard Sandiford
2019-06-08 8:32 ` Prathamesh Kulkarni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAAgBjM=UxEQkUPx8PbMY4fpmSsMEFFgM+QYyzk82_inDic0wkA@mail.gmail.com' \
--to=prathamesh.kulkarni@linaro.org \
--cc=Szabolcs.Nagy@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).