public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-4402] sve: optimize add reduction patterns
@ 2021-10-14 14:08 Tamar Christina
0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2021-10-14 14:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:62b505a4d5fc8916867e25ed86dfb865fca81511
commit r12-4402-g62b505a4d5fc8916867e25ed86dfb865fca81511
Author: Tamar Christina <tamar.christina@arm.com>
Date: Thu Oct 14 15:07:14 2021 +0100
sve: optimize add reduction patterns
The following loop does a conditional reduction using an add:
#include <stdint.h>
int32_t f (int32_t *restrict array, int len, int min)
{
int32_t iSum = 0;
for (int i=0; i<len; i++) {
if (array[i] >= min)
iSum += array[i];
}
return iSum;
}
for this we currently generate:
mov z1.b, #0
mov z2.s, w2
mov z3.d, z1.d
ptrue p2.b, all
ld1w z0.s, p0/z, [x0, x3, lsl 2]
cmpge p1.s, p2/z, z0.s, z2.s
add x3, x3, x4
sel z0.s, p1, z0.s, z3.s
add z1.s, p0/m, z1.s, z0.s
whilelo p0.s, w3, w1
where the SEL is unneeded as it's selecting between 0 or a value. This can be
optimized to just doing the conditional add on p1 instead of p0. After this
patch we generate:
mov z2.s, w2
mov z0.b, #0
ptrue p1.b, all
ld1w z1.s, p0/z, [x0, x3, lsl 2]
cmpge p0.s, p0/z, z1.s, z2.s
add x3, x3, x4
add z0.s, p0/m, z0.s, z1.s
whilelo p0.s, w3, w1
and so we drop the SEL and the 0 move.
gcc/ChangeLog:
* match.pd: New rule.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pred-cond-reduc.c: New test.
Diff:
---
gcc/match.pd | 12 ++++++++++++
gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c | 18 ++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/gcc/match.pd b/gcc/match.pd
index c153e9a6e98..038a7981c0e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7156,6 +7156,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& element_precision (type) == element_precision (op_type))
(view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1)))))))
+/* Detect simplication for a conditional reduction where
+
+ a = mask1 ? b : 0
+ c = mask2 ? d + a : d
+
+ is turned into
+
+ c = mask1 && mask2 ? d + b : d. */
+(simplify
+ (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
+ (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
+
/* For pointers @0 and @2 and nonnegative constant offset @1, look for
expressions like:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c
new file mode 100644
index 00000000000..bd53025d3f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+int32_t f (int32_t *restrict array, int len, int min)
+{
+ int32_t iSum = 0;
+
+ for (int i=0; i<len; i++) {
+ if (array[i] >= min)
+ iSum += array[i];
+ }
+ return iSum;
+}
+
+
+/* { dg-final { scan-assembler-not {\tsel\tz[0-9]+\.s, p1, z[0-9]+\.s, z[0-9]+\.s} } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-10-14 14:08 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-14 14:08 [gcc r12-4402] sve: optimize add reduction patterns Tamar Christina
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).