public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix LDP/STP handling for Morello
@ 2022-05-05 12:06 Matthew Malcomson
0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:06 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8
commit 7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8
Author: Richard Sandiford <richard.sandiford@arm.com>
Date: Wed Mar 30 05:02:43 2022 +0100
aarch64: Fix LDP/STP handling for Morello
There are two main ways of generating a single LDP/STP:
- combine a vec_concat of two elements followed by a store
of the result (query type ADDR_QUERY_LDP_STP_N).
- opportunistically match two individual loads or stores to
nearby locations (query type ADDR_QUERY_LDP_STP).
This wasn't working for purecap (giving a missed optimisation)
because extract_base_offset_in_addr didn't handle POINTER_PLUS.
But the optimisation isn't available for alternative bases,
since there are no LDP/STP forms for those.
As well as matching single LDPs and STPs, we also have code to match
sequences of 4 loads and stores whose addresses aren't directly valid
for LDP/STP. It's then worth paying the cost of doing some extra
address arithmetic in order to produce 2 LDP/STP pairs.
The optimisation didn't handle purecap correctly because the
peepholes reserve a register in DImode:
(define_peephole2
[(match_scratch:DI 8 "r")
...
(match_dup 8)]
etc. We need to coerce the register to the right base mode
before using it.
A later patch will handle LDP/STP for capability registers.
There's an unrelated issue with the vectoriser dropping __capability.
alt-base-pair-2.c works around that by disabling vectorisation.
There should really be 5 zero stores in normal-base-pair-2.c;
a later patch fixes this.
Diff:
---
gcc/config/aarch64/aarch64.c | 24 +++++++++++----
.../gcc.target/aarch64/morello/alt-base-pair-1.c | 21 +++++++++++++
.../gcc.target/aarch64/morello/alt-base-pair-2.c | 24 +++++++++++++++
.../gcc.target/aarch64/morello/alt-base-pair-3.c | 35 ++++++++++++++++++++++
.../aarch64/morello/normal-base-pair-1.c | 21 +++++++++++++
.../aarch64/morello/normal-base-pair-2.c | 29 ++++++++++++++++++
.../aarch64/morello/normal-base-pair-3.c | 35 ++++++++++++++++++++++
7 files changed, 183 insertions(+), 6 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 72ba5f4507f..99a6e4169e1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9811,6 +9811,9 @@ aarch64_classify_address (struct aarch64_address_info *info,
else
ldr_str_mode = mode;
+ if (alt_base_p && ldp_stp_mode != VOIDmode)
+ return false;
+
bool allow_reg_index_p = (ldp_stp_mode == VOIDmode
&& (known_lt (GET_MODE_SIZE (mode), 16)
|| mode == CADImode
@@ -23304,7 +23307,7 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
return true;
}
- if (GET_CODE (addr) == PLUS
+ if (any_plus_p (addr)
&& REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
{
*base = XEXP (addr, 0);
@@ -23692,6 +23695,12 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
if (MEM_VOLATILE_P (mem[i]))
return false;
+ /* The addres must use a normal rather than an alternative
+ base register. */
+ if (TARGET_CAPABILITY_HYBRID
+ && CAPABILITY_MODE_P (mem_address_mode (mem[i])))
+ return false;
+
/* Check if the addresses are in the form of [base+offset]. */
extract_base_offset_in_addr (mem[i], base + i, offset + i);
if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
@@ -23864,13 +23873,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
|| new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
return false;
- replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
+ auto addr_mode = mem_address_mode (mem_1);
+ rtx new_base = gen_rtx_REG (addr_mode, REGNO (operands[8]));
+ replace_equiv_address_nv (mem_1, plus_constant (addr_mode, new_base,
new_off_1), true);
- replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
+ replace_equiv_address_nv (mem_2, plus_constant (addr_mode, new_base,
new_off_1 + msize), true);
- replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
+ replace_equiv_address_nv (mem_3, plus_constant (addr_mode, new_base,
new_off_3), true);
- replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
+ replace_equiv_address_nv (mem_4, plus_constant (addr_mode, new_base,
new_off_3 + msize), true);
if (!aarch64_mem_pair_operand (mem_1, mode)
@@ -23916,7 +23927,8 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
}
/* Emit adjusting instruction. */
- emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
+ emit_insn (gen_rtx_SET (new_base, plus_constant (addr_mode,
+ base, base_off)));
/* Emit ldp/stp instructions. */
t1 = gen_rtx_SET (operands[0], operands[1]);
t2 = gen_rtx_SET (operands[2], operands[3]);
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c
new file mode 100644
index 00000000000..c137b9090f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+void
+fpr (uint32x4_t *__capability res, uint32x2_t v0, uint32x2_t v1)
+{
+ *res = vcombine_u32 (v0, v1);
+}
+
+void
+gpr (uint32x4_t *__capability res)
+{
+ uint32x2_t v0, v1;
+ asm ("" : "=r" (v0), "=r" (v1));
+ *res = vcombine_u32 (v0, v1);
+}
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
new file mode 100644
index 00000000000..959f0b6fdeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE) \
+ void \
+ test_##TYPE (TYPE *__capability ptr, TYPE a, TYPE b) \
+ { \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = (TYPE) { 0 }; \
+ ptr[3] = (TYPE) { 0 }; \
+ }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+TEST_TYPE (uint32x2_t)
+TEST_TYPE (uint32x4_t)
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c
new file mode 100644
index 00000000000..a900a66b969
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE) \
+ void \
+ test2_##TYPE (char *__capability vptr, TYPE a, TYPE b) \
+ { \
+ TYPE *__capability ptr = (TYPE *__capability) (vptr + 1); \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = a; \
+ ptr[3] = b; \
+ } \
+ \
+ void \
+ test3_##TYPE (char *__capability vptr, TYPE a, TYPE b) \
+ { \
+ TYPE *__capability ptr = (TYPE *__capability) (vptr + 1); \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = a; \
+ ptr[3] = b; \
+ ptr[4] = a; \
+ ptr[5] = b; \
+ }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c
new file mode 100644
index 00000000000..bcd693fa62d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } { "" } } */
+
+#include <arm_neon.h>
+
+void
+fpr (uint32x4_t *res, uint32x2_t v0, uint32x2_t v1)
+{
+ *res = vcombine_u32 (v0, v1);
+}
+
+void
+gpr (uint32x4_t *res)
+{
+ uint32x2_t v0, v1;
+ asm ("" : "=r" (v0), "=r" (v1));
+ *res = vcombine_u32 (v0, v1);
+}
+
+/* { dg-final { scan-assembler-times {\tstp\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
new file mode 100644
index 00000000000..1bf9e852bc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE) \
+ void \
+ test_##TYPE (TYPE *ptr, TYPE a, TYPE b) \
+ { \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = (TYPE) { 0 }; \
+ ptr[3] = (TYPE) { 0 }; \
+ }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+TEST_TYPE (uint32x2_t)
+TEST_TYPE (uint32x4_t)
+
+/* { dg-final { scan-assembler-times {\tstp\tw[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\tx[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\ts[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\td[0-9]+,} 2 } } */
+/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+,} 2 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[wx]zr,} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c
new file mode 100644
index 00000000000..a9bb13dd85d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE) \
+ void \
+ test2_##TYPE (char *vptr, TYPE a, TYPE b) \
+ { \
+ TYPE *ptr = (TYPE *) (vptr + 1); \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = a; \
+ ptr[3] = b; \
+ } \
+ \
+ void \
+ test3_##TYPE (char *vptr, TYPE a, TYPE b) \
+ { \
+ TYPE *ptr = (TYPE *) (vptr + 1); \
+ ptr[0] = a; \
+ ptr[1] = b; \
+ ptr[2] = a; \
+ ptr[3] = b; \
+ ptr[4] = a; \
+ ptr[5] = b; \
+ }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-times {\tstp\t} 16 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-05-05 12:06 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:06 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix LDP/STP handling for Morello Matthew Malcomson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).