From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1130) id D99AE3858417; Wed, 9 Feb 2022 16:57:31 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D99AE3858417 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Sandiford To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-7137] aarch64: Generalise adjacency check for load_pair_lanes X-Act-Checkin: gcc X-Git-Author: Richard Sandiford X-Git-Refname: refs/heads/trunk X-Git-Oldrev: fabc5d9bceb0aec8db2147eb50ae375c711eea90 X-Git-Newrev: 958448a9441ee54e012c67cfc3cf88083f3d0e4a Message-Id: <20220209165731.D99AE3858417@sourceware.org> Date: Wed, 9 Feb 2022 16:57:31 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 09 Feb 2022 16:57:32 -0000 https://gcc.gnu.org/g:958448a9441ee54e012c67cfc3cf88083f3d0e4a commit r12-7137-g958448a9441ee54e012c67cfc3cf88083f3d0e4a Author: Richard Sandiford Date: Wed Feb 9 16:57:03 2022 +0000 aarch64: Generalise adjacency check for load_pair_lanes This patch generalises the load_pair_lanes guard so that it uses aarch64_check_consecutive_mems to check for consecutive mems. It also allows the pattern to be used for STRICT_ALIGNMENT targets if the alignment is high enough. The main aim is to avoid an inline test, for the sake of a later patch that needs to repeat it. Reusing aarch64_check_consecutive_mems seemed simpler than writing an entirely new function. gcc/ * config/aarch64/aarch64-protos.h (aarch64_mergeable_load_pair_p): Declare. * config/aarch64/aarch64-simd.md (load_pair_lanes): Use aarch64_mergeable_load_pair_p instead of inline check. * config/aarch64/aarch64.cc (aarch64_expand_vector_init): Likewise. (aarch64_check_consecutive_mems): Allow the reversed parameter to be null. (aarch64_mergeable_load_pair_p): New function. Diff: --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-simd.md | 7 +--- gcc/config/aarch64/aarch64.cc | 54 +++++++++++++++++---------- gcc/testsuite/gcc.target/aarch64/vec-init-6.c | 12 ++++++ gcc/testsuite/gcc.target/aarch64/vec-init-7.c | 12 ++++++ 5 files changed, 62 insertions(+), 24 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 26368538a55..b75ed35635b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1000,6 +1000,7 @@ void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); int aarch64_ccmp_mode_to_code (machine_mode mode); bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); +bool aarch64_mergeable_load_pair_p (machine_mode, rtx, rtx); bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode); void aarch64_swap_ldrstr_operands (rtx *, bool); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 872a3d78269..c5bc2ea658b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4353,11 +4353,8 @@ (vec_concat: (match_operand:VDC 1 "memory_operand" "Utq") (match_operand:VDC 2 "memory_operand" "m")))] - "TARGET_SIMD && !STRICT_ALIGNMENT - && rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (mode)))" + "TARGET_SIMD + && aarch64_mergeable_load_pair_p (mode, operands[1], operands[2])" "ldr\\t%q0, %1" [(set_attr "type" "neon_load1_1reg_q")] ) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 296145e6008..c47543aebf3 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -21063,11 +21063,7 @@ aarch64_expand_vector_init (rtx target, rtx vals) for store_pair_lanes. */ if (memory_operand (x0, inner_mode) && memory_operand (x1, inner_mode) - && !STRICT_ALIGNMENT - && rtx_equal_p (XEXP (x1, 0), - plus_constant (Pmode, - XEXP (x0, 0), - GET_MODE_SIZE (inner_mode)))) + && aarch64_mergeable_load_pair_p (mode, x0, x1)) { rtx t; if (inner_mode == DFmode) @@ -24687,14 +24683,20 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority) return priority; } -/* Check if *MEM1 and *MEM2 are consecutive memory references and, +/* If REVERSED is null, return true if memory reference *MEM2 comes + immediately after memory reference *MEM1. Do not change the references + in this case. + + Otherwise, check if *MEM1 and *MEM2 are consecutive memory references and, if they are, try to make them use constant offsets from the same base register. Return true on success. When returning true, set *REVERSED to true if *MEM1 comes after *MEM2, false if *MEM1 comes before *MEM2. */ static bool aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) { - *reversed = false; + if (reversed) + *reversed = false; + if (GET_RTX_CLASS (GET_CODE (XEXP (*mem1, 0))) == RTX_AUTOINC || GET_RTX_CLASS (GET_CODE (XEXP (*mem2, 0))) == RTX_AUTOINC) return false; @@ -24723,7 +24725,7 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) if (known_eq (UINTVAL (offset1) + size1, UINTVAL (offset2))) return true; - if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1))) + if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1)) && reversed) { *reversed = true; return true; @@ -24756,22 +24758,25 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) if (known_eq (expr_offset1 + size1, expr_offset2)) ; - else if (known_eq (expr_offset2 + size2, expr_offset1)) + else if (known_eq (expr_offset2 + size2, expr_offset1) && reversed) *reversed = true; else return false; - if (base2) + if (reversed) { - rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0), - expr_offset1 - expr_offset2); - *mem1 = replace_equiv_address_nv (*mem1, addr1); - } - else - { - rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0), - expr_offset2 - expr_offset1); - *mem2 = replace_equiv_address_nv (*mem2, addr2); + if (base2) + { + rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0), + expr_offset1 - expr_offset2); + *mem1 = replace_equiv_address_nv (*mem1, addr1); + } + else + { + rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0), + expr_offset2 - expr_offset1); + *mem2 = replace_equiv_address_nv (*mem2, addr2); + } } return true; } @@ -24779,6 +24784,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) return false; } +/* Return true if MEM1 and MEM2 can be combined into a single access + of mode MODE, with the combined access having the same address as MEM1. */ + +bool +aarch64_mergeable_load_pair_p (machine_mode mode, rtx mem1, rtx mem2) +{ + if (STRICT_ALIGNMENT && MEM_ALIGN (mem1) < GET_MODE_ALIGNMENT (mode)) + return false; + return aarch64_check_consecutive_mems (&mem1, &mem2, nullptr); +} + /* Given OPERANDS of consecutive load/store, check if we can merge them into ldp/stp. LOAD is true if they are load instructions. MODE is the mode of memory operands. */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-6.c b/gcc/testsuite/gcc.target/aarch64/vec-init-6.c new file mode 100644 index 00000000000..96450157498 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +int64_t s64[2]; +float64_t f64[2]; + +int64x2_t test_s64() { return (int64x2_t) { s64[0], s64[1] }; } +float64x2_t test_f64() { return (float64x2_t) { f64[0], f64[1] }; } + +/* { dg-final { scan-assembler-not {\tins\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-7.c b/gcc/testsuite/gcc.target/aarch64/vec-init-7.c new file mode 100644 index 00000000000..795895286db --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-7.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mstrict-align" } */ + +#include + +int64_t s64[2] __attribute__((aligned(16))); +float64_t f64[2] __attribute__((aligned(16))); + +int64x2_t test_s64() { return (int64x2_t) { s64[0], s64[1] }; } +float64x2_t test_f64() { return (float64x2_t) { f64[0], f64[1] }; } + +/* { dg-final { scan-assembler-not {\tins\t} } } */