From: Tamar Christina <tamar.christina@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com,
Kyrylo.Tkachov@arm.com, richard.sandiford@arm.com
Subject: [PATCH 2/4]AArch64: Add support for sign differing dot-product usdot for NEON and SVE.
Date: Wed, 5 May 2021 18:38:56 +0100 [thread overview]
Message-ID: <20210505173854.GA17884@arm.com> (raw)
In-Reply-To: <patch-14433-tamar@arm.com>
[-- Attachment #1: Type: text/plain, Size: 7936 bytes --]
Hi All,
This adds optabs implementing usdot_prod.
The following testcase:
#define N 480
#define SIGNEDNESS_1 unsigned
#define SIGNEDNESS_2 signed
#define SIGNEDNESS_3 signed
#define SIGNEDNESS_4 unsigned
SIGNEDNESS_1 int __attribute__ ((noipa))
f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
SIGNEDNESS_4 char *restrict b)
{
for (__INTPTR_TYPE__ i = 0; i < N; ++i)
{
int av = a[i];
int bv = b[i];
SIGNEDNESS_2 short mult = av * bv;
res += mult;
}
return res;
}
Generates for NEON
f:
movi v0.4s, 0
mov x3, 0
.p2align 3,,7
.L2:
ldr q1, [x2, x3]
ldr q2, [x1, x3]
usdot v0.4s, v1.16b, v2.16b
add x3, x3, 16
cmp x3, 480
bne .L2
addv s0, v0.4s
fmov w1, s0
add w0, w0, w1
ret
and for SVE
f:
mov x3, 0
cntb x5
mov w4, 480
mov z1.b, #0
whilelo p0.b, wzr, w4
mov z3.b, #0
ptrue p1.b, all
.p2align 3,,7
.L2:
ld1b z2.b, p0/z, [x1, x3]
ld1b z0.b, p0/z, [x2, x3]
add x3, x3, x5
sel z0.b, p0, z0.b, z3.b
whilelo p0.b, w3, w4
usdot z1.s, z0.b, z2.b
b.any .L2
uaddv d0, p1, z1.s
fmov x1, d0
add w0, w0, w1
ret
instead of
f:
movi v0.4s, 0
mov x3, 0
.p2align 3,,7
.L2:
ldr q2, [x1, x3]
ldr q1, [x2, x3]
add x3, x3, 16
sxtl v4.8h, v2.8b
sxtl2 v3.8h, v2.16b
uxtl v2.8h, v1.8b
uxtl2 v1.8h, v1.16b
mul v2.8h, v2.8h, v4.8h
mul v1.8h, v1.8h, v3.8h
saddw v0.4s, v0.4s, v2.4h
saddw2 v0.4s, v0.4s, v2.8h
saddw v0.4s, v0.4s, v1.4h
saddw2 v0.4s, v0.4s, v1.8h
cmp x3, 480
bne .L2
addv s0, v0.4s
fmov w1, s0
add w0, w0, w1
ret
and
f:
mov x3, 0
cnth x5
mov w4, 480
mov z1.b, #0
whilelo p0.h, wzr, w4
ptrue p2.b, all
.p2align 3,,7
.L2:
ld1sb z2.h, p0/z, [x1, x3]
punpklo p1.h, p0.b
ld1b z0.h, p0/z, [x2, x3]
add x3, x3, x5
mul z0.h, p2/m, z0.h, z2.h
sunpklo z2.s, z0.h
sunpkhi z0.s, z0.h
add z1.s, p1/m, z1.s, z2.s
punpkhi p1.h, p0.b
whilelo p0.h, w3, w4
add z1.s, p1/m, z1.s, z0.s
b.any .L2
uaddv d0, p2, z1.s
fmov x1, d0
add w0, w0, w1
ret
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (usdot_prod<vsi2qi>): New.
* config/aarch64/aarch64-sve.md (@aarch64_<sur>dot_prod<vsi2qi>):
Rename to...
(@<sur>dot_prod<vsi2qi>): ...This.
* config/aarch64/aarch64-sve-builtins-base.cc
(svusdot_impl::expand): Use it.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/vusdot-autovec.c: New test.
* gcc.target/aarch64/sve/vusdot-autovec.c: New test.
--- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de17126432e2a63a32b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>"
DONE;
})
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand")
+ (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
+ (match_operand:<VSI2QI> 2 "register_operand")]
+ UNSPEC_USDOT)
+ (match_operand:VS 3 "register_operand")))]
+ "TARGET_I8MM"
+{
+ emit_insn (
+ gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1],
+ operands[2]));
+ emit_move_insn (operands[0], operands[3]);
+ DONE;
+})
+
;; These instructions map to the __builtins for the Dot Product
;; indexed operations.
(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index dfdf0e2fd186389cbddcff51ef52f8778d7fdb24..50adcd5404e97e610485140fdbfe4c8ebbf2f602 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2366,7 +2366,7 @@ public:
Hence we do the same rotation on arguments as svdot_impl does. */
e.rotate_inputs_left (0, 3);
machine_mode mode = e.vector_mode (0);
- insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+ insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
return e.use_exact_insn (icode);
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..1278f6f12fadf8eec693cd47fd545ff3277f08f1 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6870,7 +6870,7 @@ (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
[(set_attr "movprfx" "*,yes")]
)
-(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+(define_insn "@<sur>dot_prod<vsi2qi>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
(plus:VNx4SI_ONLY
(unspec:VNx4SI_ONLY
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+ SIGNEDNESS_4 char *restrict b)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+ SIGNEDNESS_4 char *restrict a)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+ SIGNEDNESS_4 char *restrict b)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+ SIGNEDNESS_4 char *restrict a)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
--
[-- Attachment #2: rb14434.patch --]
[-- Type: text/x-diff, Size: 4713 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de17126432e2a63a32b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>"
DONE;
})
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand")
+ (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
+ (match_operand:<VSI2QI> 2 "register_operand")]
+ UNSPEC_USDOT)
+ (match_operand:VS 3 "register_operand")))]
+ "TARGET_I8MM"
+{
+ emit_insn (
+ gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1],
+ operands[2]));
+ emit_move_insn (operands[0], operands[3]);
+ DONE;
+})
+
;; These instructions map to the __builtins for the Dot Product
;; indexed operations.
(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index dfdf0e2fd186389cbddcff51ef52f8778d7fdb24..50adcd5404e97e610485140fdbfe4c8ebbf2f602 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2366,7 +2366,7 @@ public:
Hence we do the same rotation on arguments as svdot_impl does. */
e.rotate_inputs_left (0, 3);
machine_mode mode = e.vector_mode (0);
- insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
+ insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
return e.use_exact_insn (icode);
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..1278f6f12fadf8eec693cd47fd545ff3277f08f1 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6870,7 +6870,7 @@ (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
[(set_attr "movprfx" "*,yes")]
)
-(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
+(define_insn "@<sur>dot_prod<vsi2qi>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
(plus:VNx4SI_ONLY
(unspec:VNx4SI_ONLY
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+ SIGNEDNESS_4 char *restrict b)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+ SIGNEDNESS_4 char *restrict a)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
new file mode 100644
index 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+ SIGNEDNESS_4 char *restrict b)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+ SIGNEDNESS_4 char *restrict a)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */
next prev parent reply other threads:[~2021-05-05 17:39 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-05 17:38 [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes Tamar Christina
2021-05-05 17:38 ` Tamar Christina [this message]
2021-05-10 16:49 ` [PATCH 2/4]AArch64: Add support for sign differing dot-product usdot for NEON and SVE Richard Sandiford
2021-05-25 14:57 ` Tamar Christina
2021-05-26 8:50 ` Richard Sandiford
2021-05-05 17:39 ` [PATCH 3/4][AArch32]: Add support for sign differing dot-product usdot for NEON Tamar Christina
2021-05-05 17:42 ` FW: " Tamar Christina
[not found] ` <VI1PR08MB5325B832EE3BB6139886C0E9FF259@VI1PR08MB5325.eurprd08.prod.outlook.com>
2021-05-25 15:02 ` Tamar Christina
2021-05-26 10:45 ` Kyrylo Tkachov
2021-05-06 9:23 ` Christophe Lyon
2021-05-06 9:27 ` Tamar Christina
2021-05-05 17:39 ` [PATCH 4/4]middle-end: Add tests middle end generic tests for sign differing dotproduct Tamar Christina
[not found] ` <VI1PR08MB532511701573C18A33AC6291FF259@VI1PR08MB5325.eurprd08.prod.outlook.com>
2021-05-25 15:01 ` FW: " Tamar Christina
[not found] ` <11s2181-8856-30rq-26or-84q8o7qrr2o@fhfr.qr>
2021-05-26 8:48 ` Tamar Christina
2021-06-14 12:08 ` Tamar Christina
2021-05-07 11:45 ` [PATCH 1/4]middle-end Vect: Add support for dot-product where the sign for the multiplicant changes Richard Biener
2021-05-07 12:42 ` Tamar Christina
2021-05-10 11:39 ` Richard Biener
2021-05-10 12:58 ` Tamar Christina
2021-05-10 13:29 ` Richard Biener
2021-05-25 14:57 ` Tamar Christina
2021-05-26 8:56 ` Richard Biener
2021-06-02 9:28 ` Tamar Christina
2021-06-04 10:12 ` Tamar Christina
2021-06-07 10:10 ` Richard Sandiford
2021-06-14 12:06 ` Tamar Christina
2021-06-21 8:11 ` Tamar Christina
2021-06-22 10:56 ` Richard Sandiford
2021-06-22 11:16 ` Richard Sandiford
2021-07-12 9:18 ` Tamar Christina
2021-07-12 9:39 ` Richard Sandiford
2021-07-12 9:56 ` Tamar Christina
2021-07-12 10:25 ` Richard Sandiford
2021-07-12 12:29 ` Tamar Christina
2021-07-12 14:55 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210505173854.GA17884@arm.com \
--to=tamar.christina@arm.com \
--cc=Kyrylo.Tkachov@arm.com \
--cc=Marcus.Shawcroft@arm.com \
--cc=Richard.Earnshaw@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).