public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Richard Sandiford <rsandifo@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-9156] aarch64: Tighten early-ra chain test for wide registers [PR113295] Date: Fri, 23 Feb 2024 14:13:29 +0000 (GMT) [thread overview] Message-ID: <20240223141329.10575385842A@sourceware.org> (raw) https://gcc.gnu.org/g:9f105cfdc1bca6c9224384b3044c4ca5894e1e4c commit r14-9156-g9f105cfdc1bca6c9224384b3044c4ca5894e1e4c Author: Richard Sandiford <richard.sandiford@arm.com> Date: Fri Feb 23 14:12:55 2024 +0000 aarch64: Tighten early-ra chain test for wide registers [PR113295] Most code in early-ra used is_chain_candidate to check whether we should chain two allocnos. This included both tests that matter for correctness and tests for certain heuristics. Once that test passes for one pair of allocnos, we test whether it's safe to chain the containing groups (which might contain multiple allocnos for x2, x3 and x4 modes). This test used an inline test for correctness only, deliberately skipping the heuristics. However, this instance of the test was missing some handling of equivalent allocnos. This patch fixes things by making is_chain_candidate take a strictness parameter: correctness only, or correctness + heuristics. It then makes the group-chaining test use the correctness version rather than trying to replicate it inline. gcc/ PR target/113295 * config/aarch64/aarch64-early-ra.cc (early_ra::test_strictness): New enum. (early_ra::is_chain_candidate): Add a strictness parameter to control whether only correctness matters, or whether both correctness and heuristics should be used. Handle multiple levels of equivalence. (early_ra::find_related_start): Update call accordingly. (early_ra::strided_polarity_pref): Likewise. (early_ra::form_chains): Likewise. (early_ra::try_to_chain_allocnos): Use is_chain_candidate in correctness mode rather than trying to inline the test. gcc/testsuite/ PR target/113295 * gcc.target/aarch64/pr113295-2.c: New test. Diff: --- gcc/config/aarch64/aarch64-early-ra.cc | 48 +++++++++++----------- gcc/testsuite/gcc.target/aarch64/pr113295-2.c | 57 +++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 23 deletions(-) diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index 58ae5a49913a..9ac9ec1bb0db 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -95,6 +95,10 @@ public: void execute (); private: + // Whether to test only things that are required for correctness, + // or whether to take optimization heuristics into account as well. + enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS }; + static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits"); using operand_mask = uint32_t; @@ -452,7 +456,7 @@ private: template<unsigned int allocno_info::*field> static int cmp_increasing (const void *, const void *); - bool is_chain_candidate (allocno_info *, allocno_info *); + bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness); int rate_chain (allocno_info *, allocno_info *); static int cmp_chain_candidates (const void *, const void *); void chain_allocnos (unsigned int &, unsigned int &); @@ -1588,7 +1592,7 @@ early_ra::find_related_start (allocno_info *dest_allocno, return res; auto *next_allocno = m_allocnos[dest_allocno->copy_dest]; - if (!is_chain_candidate (dest_allocno, next_allocno)) + if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS)) return res; dest_allocno = next_allocno; @@ -2011,7 +2015,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1, if (allocno1->offset + 1 < allocno1->group_size && allocno2->offset + 1 < allocno2->group_size) { - if (is_chain_candidate (allocno1 + 1, allocno2 + 1)) + if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS)) return 1; else return -1; @@ -2019,7 +2023,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1, if (allocno1->offset > 0 && allocno2->offset > 0) { - if (is_chain_candidate (allocno1 - 1, allocno2 - 1)) + if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS)) return 1; else return -1; @@ -2215,38 +2219,37 @@ early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr) } // Return true if we should consider chaining ALLOCNO1 onto the head -// of ALLOCNO2. This is just a local test of the two allocnos; it doesn't -// guarantee that chaining them would give a self-consistent system. +// of ALLOCNO2. STRICTNESS says whether we should take copy-elision +// heuristics into account, or whether we should just consider things +// that matter for correctness. +// +// This is just a local test of the two allocnos; it doesn't guarantee +// that chaining them would give a self-consistent system. bool -early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2) +early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2, + test_strictness strictness) { if (allocno2->is_shared ()) return false; - if (allocno1->is_equiv) + while (allocno1->is_equiv) allocno1 = m_allocnos[allocno1->related_allocno]; if (allocno2->start_point >= allocno1->end_point && !allocno2->is_equiv_to (allocno1->id)) return false; - if (allocno2->is_strong_copy_dest) - { - if (!allocno1->is_strong_copy_src - || allocno1->copy_dest != allocno2->id) - return false; - } - else if (allocno2->is_copy_dest) + if (allocno1->is_earlyclobbered + && allocno1->end_point == allocno2->start_point + 1) + return false; + + if (strictness == ALL_REASONS && allocno2->is_copy_dest) { if (allocno1->copy_dest != allocno2->id) return false; - } - else if (allocno1->is_earlyclobbered) - { - if (allocno1->end_point == allocno2->start_point + 1) + if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src) return false; } - return true; } @@ -2470,8 +2473,7 @@ early_ra::try_to_chain_allocnos (allocno_info *allocno1, auto *head2 = m_allocnos[headi2]; if (head1->chain_next != INVALID_ALLOCNO) return false; - if (!head2->is_equiv_to (head1->id) - && head1->end_point <= head2->start_point) + if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY)) return false; } } @@ -2620,7 +2622,7 @@ early_ra::form_chains () auto *allocno2 = m_sorted_allocnos[sci]; if (allocno2->chain_prev == INVALID_ALLOCNO) { - if (!is_chain_candidate (allocno1, allocno2)) + if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS)) continue; chain_candidate_info candidate; candidate.allocno = allocno2; diff --git a/gcc/testsuite/gcc.target/aarch64/pr113295-2.c b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c new file mode 100644 index 000000000000..6fa29bdfc057 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c @@ -0,0 +1,57 @@ +// { dg-do run } +// { dg-options "-O2" } + +#include <arm_neon.h> + +void __attribute__ ((noinline)) +foo (int8_t **ptr) +{ + int8x16_t v0 = vld1q_s8 (ptr[0]); + int8x16_t v1 = vld1q_s8 (ptr[1]); + int8x16_t v2 = vld1q_s8 (ptr[2]); + int8x16_t v3 = vld1q_s8 (ptr[3]); + int8x16_t v4 = vld1q_s8 (ptr[4]); + + int8x16x4_t res0 = { v0, v1, v2, v3 }; + vst4q_s8 (ptr[5], res0); + + int8x16_t add = vaddq_s8 (v2, v3); + int8x16x3_t res1 = { v1, add, v3 }; + vst3q_s8 (ptr[6], res1); + + int8x16x3_t res2 = { v0, v1, v2 }; + vst3q_s8 (ptr[7], res2); +} + +int8_t arr0[16] = { 1 }; +int8_t arr1[16] = { 2 }; +int8_t arr2[16] = { 4 }; +int8_t arr3[16] = { 8 }; +int8_t arr4[16] = { 16 }; +int8_t arr5[16 * 4]; +int8_t arr6[16 * 3]; +int8_t arr7[16 * 3]; +int8_t *ptr[] = +{ + arr0, + arr1, + arr2, + arr3, + arr4, + arr5, + arr6, + arr7 +}; + +int +main (void) +{ + foo (ptr); + if (arr5[0] != 1 || arr5[1] != 2 || arr5[2] != 4 || arr5[3] != 8) + __builtin_abort (); + if (arr6[0] != 2 || arr6[1] != 12 || arr6[2] != 8) + __builtin_abort (); + if (arr7[0] != 1 || arr7[1] != 2 || arr7[2] != 4) + __builtin_abort (); + return 0; +}
reply other threads:[~2024-02-23 14:13 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240223141329.10575385842A@sourceware.org \ --to=rsandifo@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).