public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-9156] aarch64: Tighten early-ra chain test for wide registers [PR113295]
@ 2024-02-23 14:13 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2024-02-23 14:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9f105cfdc1bca6c9224384b3044c4ca5894e1e4c

commit r14-9156-g9f105cfdc1bca6c9224384b3044c4ca5894e1e4c
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Feb 23 14:12:55 2024 +0000

    aarch64: Tighten early-ra chain test for wide registers [PR113295]
    
    Most code in early-ra used is_chain_candidate to check whether we
    should chain two allocnos.  This included both tests that matter
    for correctness and tests for certain heuristics.
    
    Once that test passes for one pair of allocnos, we test whether
    it's safe to chain the containing groups (which might contain
    multiple allocnos for x2, x3 and x4 modes).  This test used an
    inline test for correctness only, deliberately skipping the
    heuristics.  However, this instance of the test was missing
    some handling of equivalent allocnos.
    
    This patch fixes things by making is_chain_candidate take a
    strictness parameter: correctness only, or correctness + heuristics.
    It then makes the group-chaining test use the correctness version
    rather than trying to replicate it inline.
    
    gcc/
            PR target/113295
            * config/aarch64/aarch64-early-ra.cc
            (early_ra::test_strictness): New enum.
            (early_ra::is_chain_candidate): Add a strictness parameter to
            control whether only correctness matters, or whether both correctness
            and heuristics should be used.  Handle multiple levels of equivalence.
            (early_ra::find_related_start): Update call accordingly.
            (early_ra::strided_polarity_pref): Likewise.
            (early_ra::form_chains): Likewise.
            (early_ra::try_to_chain_allocnos): Use is_chain_candidate in
            correctness mode rather than trying to inline the test.
    
    gcc/testsuite/
            PR target/113295
            * gcc.target/aarch64/pr113295-2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-early-ra.cc        | 48 +++++++++++-----------
 gcc/testsuite/gcc.target/aarch64/pr113295-2.c | 57 +++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 23 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 58ae5a49913a..9ac9ec1bb0db 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -95,6 +95,10 @@ public:
   void execute ();
 
 private:
+  // Whether to test only things that are required for correctness,
+  // or whether to take optimization heuristics into account as well.
+  enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
+
   static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
   using operand_mask = uint32_t;
 
@@ -452,7 +456,7 @@ private:
 
   template<unsigned int allocno_info::*field>
   static int cmp_increasing (const void *, const void *);
-  bool is_chain_candidate (allocno_info *, allocno_info *);
+  bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
   int rate_chain (allocno_info *, allocno_info *);
   static int cmp_chain_candidates (const void *, const void *);
   void chain_allocnos (unsigned int &, unsigned int &);
@@ -1588,7 +1592,7 @@ early_ra::find_related_start (allocno_info *dest_allocno,
 	return res;
 
       auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
-      if (!is_chain_candidate (dest_allocno, next_allocno))
+      if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
 	return res;
 
       dest_allocno = next_allocno;
@@ -2011,7 +2015,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
   if (allocno1->offset + 1 < allocno1->group_size
       && allocno2->offset + 1 < allocno2->group_size)
     {
-      if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
+      if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
 	return 1;
       else
 	return -1;
@@ -2019,7 +2023,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
 
   if (allocno1->offset > 0 && allocno2->offset > 0)
     {
-      if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
+      if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
 	return 1;
       else
 	return -1;
@@ -2215,38 +2219,37 @@ early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
 }
 
 // Return true if we should consider chaining ALLOCNO1 onto the head
-// of ALLOCNO2.  This is just a local test of the two allocnos; it doesn't
-// guarantee that chaining them would give a self-consistent system.
+// of ALLOCNO2.  STRICTNESS says whether we should take copy-elision
+// heuristics into account, or whether we should just consider things
+// that matter for correctness.
+//
+// This is just a local test of the two allocnos; it doesn't guarantee
+// that chaining them would give a self-consistent system.
 bool
-early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
+early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
+			      test_strictness strictness)
 {
   if (allocno2->is_shared ())
     return false;
 
-  if (allocno1->is_equiv)
+  while (allocno1->is_equiv)
     allocno1 = m_allocnos[allocno1->related_allocno];
 
   if (allocno2->start_point >= allocno1->end_point
       && !allocno2->is_equiv_to (allocno1->id))
     return false;
 
-  if (allocno2->is_strong_copy_dest)
-    {
-      if (!allocno1->is_strong_copy_src
-	  || allocno1->copy_dest != allocno2->id)
-	return false;
-    }
-  else if (allocno2->is_copy_dest)
+  if (allocno1->is_earlyclobbered
+      && allocno1->end_point == allocno2->start_point + 1)
+    return false;
+
+  if (strictness == ALL_REASONS && allocno2->is_copy_dest)
     {
       if (allocno1->copy_dest != allocno2->id)
 	return false;
-    }
-  else if (allocno1->is_earlyclobbered)
-    {
-      if (allocno1->end_point == allocno2->start_point + 1)
+      if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
 	return false;
     }
-
   return true;
 }
 
@@ -2470,8 +2473,7 @@ early_ra::try_to_chain_allocnos (allocno_info *allocno1,
 	  auto *head2 = m_allocnos[headi2];
 	  if (head1->chain_next != INVALID_ALLOCNO)
 	    return false;
-	  if (!head2->is_equiv_to (head1->id)
-	      && head1->end_point <= head2->start_point)
+	  if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
 	    return false;
 	}
     }
@@ -2620,7 +2622,7 @@ early_ra::form_chains ()
 	  auto *allocno2 = m_sorted_allocnos[sci];
 	  if (allocno2->chain_prev == INVALID_ALLOCNO)
 	    {
-	      if (!is_chain_candidate (allocno1, allocno2))
+	      if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
 		continue;
 	      chain_candidate_info candidate;
 	      candidate.allocno = allocno2;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113295-2.c b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c
new file mode 100644
index 000000000000..6fa29bdfc057
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113295-2.c
@@ -0,0 +1,57 @@
+// { dg-do run }
+// { dg-options "-O2" }
+
+#include <arm_neon.h>
+
+void __attribute__ ((noinline))
+foo (int8_t **ptr)
+{
+  int8x16_t v0 = vld1q_s8 (ptr[0]);
+  int8x16_t v1 = vld1q_s8 (ptr[1]);
+  int8x16_t v2 = vld1q_s8 (ptr[2]);
+  int8x16_t v3 = vld1q_s8 (ptr[3]);
+  int8x16_t v4 = vld1q_s8 (ptr[4]);
+
+  int8x16x4_t res0 = { v0, v1, v2, v3 };
+  vst4q_s8 (ptr[5], res0);
+
+  int8x16_t add = vaddq_s8 (v2, v3);
+  int8x16x3_t res1 = { v1, add, v3 };
+  vst3q_s8 (ptr[6], res1);
+
+  int8x16x3_t res2 = { v0, v1, v2 };
+  vst3q_s8 (ptr[7], res2);
+}
+
+int8_t arr0[16] = { 1 };
+int8_t arr1[16] = { 2 };
+int8_t arr2[16] = { 4 };
+int8_t arr3[16] = { 8 };
+int8_t arr4[16] = { 16 };
+int8_t arr5[16 * 4];
+int8_t arr6[16 * 3];
+int8_t arr7[16 * 3];
+int8_t *ptr[] =
+{
+  arr0,
+  arr1,
+  arr2,
+  arr3,
+  arr4,
+  arr5,
+  arr6,
+  arr7
+};
+
+int
+main (void)
+{
+  foo (ptr);
+  if (arr5[0] != 1 || arr5[1] != 2 || arr5[2] != 4 || arr5[3] != 8)
+    __builtin_abort ();
+  if (arr6[0] != 2 || arr6[1] != 12 || arr6[2] != 8)
+    __builtin_abort ();
+  if (arr7[0] != 1 || arr7[1] != 2 || arr7[2] != 4)
+    __builtin_abort ();
+  return 0;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-23 14:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-23 14:13 [gcc r14-9156] aarch64: Tighten early-ra chain test for wide registers [PR113295] Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).