[gcc r12-2758] aarch64: Don't include vec_select high-half in SIMD add cost

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-2758] aarch64: Don't include vec_select high-half in SIMD add cost
@ 2021-08-05 10:53 Jonathan Wright
  0 siblings, 0 replies; only message in thread
From: Jonathan Wright @ 2021-08-05 10:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8cd27a3b25558e5be7f8595fc1c828bc46641671

commit r12-2758-g8cd27a3b25558e5be7f8595fc1c828bc46641671
Author: Jonathan Wright <jonathan.wright@arm.com>
Date:   Wed Jul 28 15:49:29 2021 +0100

    aarch64: Don't include vec_select high-half in SIMD add cost
    
    The Neon add-long/add-widen instructions can select the top or bottom
    half of the operand registers. This selection does not change the
    cost of the underlying instruction and this should be reflected by
    the RTL cost function.
    
    This patch adds RTL tree traversal in the Neon add cost function to
    match vec_select high-half of its operands. This traversal prevents
    the cost of the vec_select from being added into the cost of the
    subtract - meaning that these instructions can now be emitted in the
    combine pass as they are no longer deemed prohibitively expensive.
    
    gcc/ChangeLog:
    
    2021-07-28  Jonathan Wright  <jonathan.wright@arm.com>
    
            * config/aarch64/aarch64.c: Traverse RTL tree to prevent cost
            of vec_select high-half from being added into Neon add cost.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/vaddX_high_cost.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.c                       | 15 +++++++++
 gcc/testsuite/gcc.target/aarch64/vaddX_high_cost.c | 38 ++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e02cbcbcb38..aa687c57946 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13161,6 +13161,21 @@ cost_minus:
 	op1 = XEXP (x, 1);
 
 cost_plus:
+	if (VECTOR_MODE_P (mode))
+	  {
+	    /* ADDL2 and ADDW2.  */
+	    unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+	    if (vec_flags & VEC_ADVSIMD)
+	      {
+		/* The select-operand-high-half versions of the add instruction
+		   have the same cost as the regular three vector version -
+		   don't add the costs of the select into the costs of the add.
+		   */
+		op0 = aarch64_strip_extend_vec_half (op0);
+		op1 = aarch64_strip_extend_vec_half (op1);
+	      }
+	  }
+
 	if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
 	    || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
 	  {
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddX_high_cost.c b/gcc/testsuite/gcc.target/aarch64/vaddX_high_cost.c
new file mode 100644
index 00000000000..43f28d597a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddX_high_cost.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+#define TEST_ADDL(rettype, intype, ts, rs) \
+  rettype test_vaddl_ ## ts (intype a, intype b, intype c) \
+	{ \
+		rettype t0 = vaddl_ ## ts (vget_high_ ## ts (a), \
+					   vget_high_ ## ts (c)); \
+		rettype t1 = vaddl_ ## ts (vget_high_ ## ts (b), \
+					   vget_high_ ## ts (c)); \
+		return vaddq ## _ ## rs (t0, t1); \
+	}
+
+TEST_ADDL (int16x8_t, int8x16_t, s8, s16)
+TEST_ADDL (uint16x8_t, uint8x16_t, u8, u16)
+TEST_ADDL (int32x4_t, int16x8_t, s16, s32)
+TEST_ADDL (uint32x4_t, uint16x8_t, u16, u32)
+TEST_ADDL (int64x2_t, int32x4_t, s32, s64)
+TEST_ADDL (uint64x2_t, uint32x4_t, u32, u64)
+
+#define TEST_ADDW(rettype, intype, intypel, ts, rs) \
+  rettype test_vaddw_ ## ts (intype a, intype b, intypel c) \
+	{ \
+		rettype t0 = vaddw_ ## ts (a, vget_high_ ## ts (c)); \
+		rettype t1 = vaddw_ ## ts (b, vget_high_ ## ts (c)); \
+		return vaddq ## _ ## rs (t0, t1); \
+	}
+
+TEST_ADDW (int16x8_t, int16x8_t, int8x16_t, s8, s16)
+TEST_ADDW (uint16x8_t, uint16x8_t, uint8x16_t, u8, u16)
+TEST_ADDW (int32x4_t, int32x4_t, int16x8_t, s16, s32)
+TEST_ADDW (uint32x4_t, uint32x4_t, uint16x8_t, u16, u32)
+TEST_ADDW (int64x2_t, int64x2_t, int32x4_t, s32, s64)
+TEST_ADDW (uint64x2_t, uint64x2_t, uint32x4_t, u32, u64)
+
+/* { dg-final { scan-assembler-not "dup\\t" } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-05 10:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-05 10:53 [gcc r12-2758] aarch64: Don't include vec_select high-half in SIMD add cost Jonathan Wright

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).