public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r10-9423] aarch64: Add internal tune flag to minimise VL-based scalar ops
@ 2021-03-08  9:37 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2021-03-08  9:37 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e1da328ec157ded50659d07f044767abea79cda2

commit r10-9423-ge1da328ec157ded50659d07f044767abea79cda2
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Mon Mar 8 09:35:14 2021 +0000

    aarch64: Add internal tune flag to minimise VL-based scalar ops
    
    This is a backport of the cse_sve_vl_constants tuning param to GCC 10.
    
    Bootstrapped and tested on the branch on aarch64-none-linux-gnu.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-tuning-flags.def (cse_sve_vl_constants):
            Define.
            * config/aarch64/aarch64.md (add<mode>3): Force CONST_POLY_INT immediates
            into a register when the above is enabled.
            * config/aarch64/aarch64.c (neoversev1_tunings):
            AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS.
            (aarch64_rtx_costs): Use AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/sve/cse_sve_vl_constants_1.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-tuning-flags.def             |  2 ++
 gcc/config/aarch64/aarch64.c                            | 17 ++++++++++++++---
 gcc/config/aarch64/aarch64.md                           |  8 ++++++++
 .../gcc.target/aarch64/sve/cse_sve_vl_constants_1.c     | 12 ++++++++++++
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 52c7f4763f5..7677ec0b0b4 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
 /* Prefer Advanced SIMD over SVE for auto-vectorization.  */
 AARCH64_EXTRA_TUNING_OPTION ("prefer_advsimd_autovec", PREFER_ADVSIMD_AUTOVEC)
 
+AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 46fe6835506..775f79d88ff 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1403,7 +1403,8 @@ static const struct tune_params neoversev1_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC
+   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS),	/* tune_flags.  */
   &generic_prefetch_tune
 };
 
@@ -12532,8 +12533,18 @@ cost_plus:
 	    *cost += rtx_cost (op0, mode, PLUS, 0, speed);
 
 	    if (speed)
-	      /* ADD (immediate).  */
-	      *cost += extra_cost->alu.arith;
+	      {
+		/* ADD (immediate).  */
+		*cost += extra_cost->alu.arith;
+
+		/* Some tunings prefer to not use the VL-based scalar ops.
+		   Increase the cost of the poly immediate to prevent their
+		   formation.  */
+		if (GET_CODE (op1) == CONST_POLY_INT
+		    && (aarch64_tune_params.extra_tuning_flags
+			& AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+		  *cost += COSTS_N_INSNS (1);
+	      }
 	    return true;
 	  }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7f262e067ce..8f6bbcde904 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1898,6 +1898,14 @@
       && (!REG_P (op1)
 	 || !REGNO_PTR_FRAME_P (REGNO (op1))))
     operands[2] = force_reg (<MODE>mode, operands[2]);
+  /* Some tunings prefer to avoid VL-based operations.
+     Split off the poly immediate here.  The rtx costs hook will reject attempts
+     to combine them back.  */
+  else if (GET_CODE (operands[2]) == CONST_POLY_INT
+	   && can_create_pseudo_p ()
+	   && (aarch64_tune_params.extra_tuning_flags
+	       & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
   /* Expand polynomial additions now if the destination is the stack
      pointer, since we don't want to use that as a temporary.  */
   else if (operands[0] == stack_pointer_rtx
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c
new file mode 100644
index 00000000000..dd04b667ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -moverride=tune=cse_sve_vl_constants" } */
+
+void __attribute__((noinline, noclone))
+vadd (int *dst, int *op1, int *op2, int count)
+{
+  for (int i = 0; i < count; ++i)
+    dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-not {\tincw\tx[0-9]+} } } */
+


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-03-08  9:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-08  9:37 [gcc r10-9423] aarch64: Add internal tune flag to minimise VL-based scalar ops Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).