public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Matthew Malcomson <matmal01@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix LDP/STP handling for Morello Date: Thu, 5 May 2022 12:06:21 +0000 (GMT) [thread overview] Message-ID: <20220505120621.27123385DC05@sourceware.org> (raw) https://gcc.gnu.org/g:7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8 commit 7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8 Author: Richard Sandiford <richard.sandiford@arm.com> Date: Wed Mar 30 05:02:43 2022 +0100 aarch64: Fix LDP/STP handling for Morello There are two main ways of generating a single LDP/STP: - combine a vec_concat of two elements followed by a store of the result (query type ADDR_QUERY_LDP_STP_N). - opportunistically match two individual loads or stores to nearby locations (query type ADDR_QUERY_LDP_STP). This wasn't working for purecap (giving a missed optimisation) because extract_base_offset_in_addr didn't handle POINTER_PLUS. But the optimisation isn't available for alternative bases, since there are no LDP/STP forms for those. As well as matching single LDPs and STPs, we also have code to match sequences of 4 loads and stores whose addresses aren't directly valid for LDP/STP. It's then worth paying the cost of doing some extra address arithmetic in order to produce 2 LDP/STP pairs. The optimisation didn't handle purecap correctly because the peepholes reserve a register in DImode: (define_peephole2 [(match_scratch:DI 8 "r") ... (match_dup 8)] etc. We need to coerce the register to the right base mode before using it. A later patch will handle LDP/STP for capability registers. There's an unrelated issue with the vectoriser dropping __capability. alt-base-pair-2.c works around that by disabling vectorisation. There should really be 5 zero stores in normal-base-pair-2.c; a later patch fixes this. Diff: --- gcc/config/aarch64/aarch64.c | 24 +++++++++++---- .../gcc.target/aarch64/morello/alt-base-pair-1.c | 21 +++++++++++++ .../gcc.target/aarch64/morello/alt-base-pair-2.c | 24 +++++++++++++++ .../gcc.target/aarch64/morello/alt-base-pair-3.c | 35 ++++++++++++++++++++++ .../aarch64/morello/normal-base-pair-1.c | 21 +++++++++++++ .../aarch64/morello/normal-base-pair-2.c | 29 ++++++++++++++++++ .../aarch64/morello/normal-base-pair-3.c | 35 ++++++++++++++++++++++ 7 files changed, 183 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 72ba5f4507f..99a6e4169e1 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9811,6 +9811,9 @@ aarch64_classify_address (struct aarch64_address_info *info, else ldr_str_mode = mode; + if (alt_base_p && ldp_stp_mode != VOIDmode) + return false; + bool allow_reg_index_p = (ldp_stp_mode == VOIDmode && (known_lt (GET_MODE_SIZE (mode), 16) || mode == CADImode @@ -23304,7 +23307,7 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) return true; } - if (GET_CODE (addr) == PLUS + if (any_plus_p (addr) && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1))) { *base = XEXP (addr, 0); @@ -23692,6 +23695,12 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, if (MEM_VOLATILE_P (mem[i])) return false; + /* The addres must use a normal rather than an alternative + base register. */ + if (TARGET_CAPABILITY_HYBRID + && CAPABILITY_MODE_P (mem_address_mode (mem[i]))) + return false; + /* Check if the addresses are in the form of [base+offset]. */ extract_base_offset_in_addr (mem[i], base + i, offset + i); if (base[i] == NULL_RTX || offset[i] == NULL_RTX) @@ -23864,13 +23873,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit) return false; - replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8], + auto addr_mode = mem_address_mode (mem_1); + rtx new_base = gen_rtx_REG (addr_mode, REGNO (operands[8])); + replace_equiv_address_nv (mem_1, plus_constant (addr_mode, new_base, new_off_1), true); - replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8], + replace_equiv_address_nv (mem_2, plus_constant (addr_mode, new_base, new_off_1 + msize), true); - replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8], + replace_equiv_address_nv (mem_3, plus_constant (addr_mode, new_base, new_off_3), true); - replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8], + replace_equiv_address_nv (mem_4, plus_constant (addr_mode, new_base, new_off_3 + msize), true); if (!aarch64_mem_pair_operand (mem_1, mode) @@ -23916,7 +23927,8 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, } /* Emit adjusting instruction. */ - emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off))); + emit_insn (gen_rtx_SET (new_base, plus_constant (addr_mode, + base, base_off))); /* Emit ldp/stp instructions. */ t1 = gen_rtx_SET (operands[0], operands[1]); t2 = gen_rtx_SET (operands[2], operands[3]); diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c new file mode 100644 index 00000000000..c137b9090f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c @@ -0,0 +1,21 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include <arm_neon.h> + +void +fpr (uint32x4_t *__capability res, uint32x2_t v0, uint32x2_t v1) +{ + *res = vcombine_u32 (v0, v1); +} + +void +gpr (uint32x4_t *__capability res) +{ + uint32x2_t v0, v1; + asm ("" : "=r" (v0), "=r" (v1)); + *res = vcombine_u32 (v0, v1); +} + +/* { dg-final { scan-assembler-not {\tstp\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c new file mode 100644 index 00000000000..959f0b6fdeb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c @@ -0,0 +1,24 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include <arm_neon.h> + +#define TEST_TYPE(TYPE) \ + void \ + test_##TYPE (TYPE *__capability ptr, TYPE a, TYPE b) \ + { \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = (TYPE) { 0 }; \ + ptr[3] = (TYPE) { 0 }; \ + } + +TEST_TYPE (uint32_t) +TEST_TYPE (uint64_t) +TEST_TYPE (float) +TEST_TYPE (double) +TEST_TYPE (uint32x2_t) +TEST_TYPE (uint32x4_t) + +/* { dg-final { scan-assembler-not {\tstp\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c new file mode 100644 index 00000000000..a900a66b969 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c @@ -0,0 +1,35 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include <arm_neon.h> + +#define TEST_TYPE(TYPE) \ + void \ + test2_##TYPE (char *__capability vptr, TYPE a, TYPE b) \ + { \ + TYPE *__capability ptr = (TYPE *__capability) (vptr + 1); \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = a; \ + ptr[3] = b; \ + } \ + \ + void \ + test3_##TYPE (char *__capability vptr, TYPE a, TYPE b) \ + { \ + TYPE *__capability ptr = (TYPE *__capability) (vptr + 1); \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = a; \ + ptr[3] = b; \ + ptr[4] = a; \ + ptr[5] = b; \ + } + +TEST_TYPE (uint32_t) +TEST_TYPE (uint64_t) +TEST_TYPE (float) +TEST_TYPE (double) + +/* { dg-final { scan-assembler-not {\tstp\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c new file mode 100644 index 00000000000..bcd693fa62d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c @@ -0,0 +1,21 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } { "" } } */ + +#include <arm_neon.h> + +void +fpr (uint32x4_t *res, uint32x2_t v0, uint32x2_t v1) +{ + *res = vcombine_u32 (v0, v1); +} + +void +gpr (uint32x4_t *res) +{ + uint32x2_t v0, v1; + asm ("" : "=r" (v0), "=r" (v1)); + *res = vcombine_u32 (v0, v1); +} + +/* { dg-final { scan-assembler-times {\tstp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c new file mode 100644 index 00000000000..1bf9e852bc9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ + +#include <arm_neon.h> + +#define TEST_TYPE(TYPE) \ + void \ + test_##TYPE (TYPE *ptr, TYPE a, TYPE b) \ + { \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = (TYPE) { 0 }; \ + ptr[3] = (TYPE) { 0 }; \ + } + +TEST_TYPE (uint32_t) +TEST_TYPE (uint64_t) +TEST_TYPE (float) +TEST_TYPE (double) +TEST_TYPE (uint32x2_t) +TEST_TYPE (uint32x4_t) + +/* { dg-final { scan-assembler-times {\tstp\tw[0-9]+,} 1 } } */ +/* { dg-final { scan-assembler-times {\tstp\tx[0-9]+,} 1 } } */ +/* { dg-final { scan-assembler-times {\tstp\ts[0-9]+,} 1 } } */ +/* { dg-final { scan-assembler-times {\tstp\td[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\tstp\t[wx]zr,} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c new file mode 100644 index 00000000000..a9bb13dd85d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c @@ -0,0 +1,35 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ + +#include <arm_neon.h> + +#define TEST_TYPE(TYPE) \ + void \ + test2_##TYPE (char *vptr, TYPE a, TYPE b) \ + { \ + TYPE *ptr = (TYPE *) (vptr + 1); \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = a; \ + ptr[3] = b; \ + } \ + \ + void \ + test3_##TYPE (char *vptr, TYPE a, TYPE b) \ + { \ + TYPE *ptr = (TYPE *) (vptr + 1); \ + ptr[0] = a; \ + ptr[1] = b; \ + ptr[2] = a; \ + ptr[3] = b; \ + ptr[4] = a; \ + ptr[5] = b; \ + } + +TEST_TYPE (uint32_t) +TEST_TYPE (uint64_t) +TEST_TYPE (float) +TEST_TYPE (double) + +/* { dg-final { scan-assembler-times {\tstp\t} 16 } } */
reply other threads:[~2022-05-05 12:06 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220505120621.27123385DC05@sourceware.org \ --to=matmal01@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).