public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-2691] aarch64: Add gather_load_xNN_cost tuning fields
@ 2021-08-03 12:01 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2021-08-03 12:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:78770e0e5d9fef70679e1db4eb2fb06596fbb2f8

commit r12-2691-g78770e0e5d9fef70679e1db4eb2fb06596fbb2f8
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Tue Aug 3 13:00:45 2021 +0100

    aarch64: Add gather_load_xNN_cost tuning fields
    
    This patch adds tuning fields for the total cost of a gather load
    instruction.  Until now, we've costed them as one scalar load
    per element instead.  Those scalar_load-based values are also
    what the patch uses to fill in the new fields for existing
    cost structures.
    
    gcc/
            * config/aarch64/aarch64-protos.h (sve_vec_cost):
            Add gather_load_x32_cost and gather_load_x64_cost.
            * config/aarch64/aarch64.c (generic_sve_vector_cost)
            (a64fx_sve_vector_cost, neoversev1_sve_vector_cost): Update
            accordingly, using the values given by the scalar_load * number
            of elements calculation that we used previously.
            (aarch64_detect_vector_stmt_subtype): Use the new fields.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h |  9 +++++++++
 gcc/config/aarch64/aarch64.c        | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index fb4ce8e9f84..b91eeeba101 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -259,12 +259,16 @@ struct sve_vec_cost : simd_vec_cost
 			  unsigned int fadda_f16_cost,
 			  unsigned int fadda_f32_cost,
 			  unsigned int fadda_f64_cost,
+			  unsigned int gather_load_x32_cost,
+			  unsigned int gather_load_x64_cost,
 			  unsigned int scatter_store_elt_cost)
     : simd_vec_cost (base),
       clast_cost (clast_cost),
       fadda_f16_cost (fadda_f16_cost),
       fadda_f32_cost (fadda_f32_cost),
       fadda_f64_cost (fadda_f64_cost),
+      gather_load_x32_cost (gather_load_x32_cost),
+      gather_load_x64_cost (gather_load_x64_cost),
       scatter_store_elt_cost (scatter_store_elt_cost)
   {}
 
@@ -279,6 +283,11 @@ struct sve_vec_cost : simd_vec_cost
   const int fadda_f32_cost;
   const int fadda_f64_cost;
 
+  /* The cost of a gather load instruction.  The x32 value is for loads
+     of 32-bit elements and the x64 value is for loads of 64-bit elements.  */
+  const int gather_load_x32_cost;
+  const int gather_load_x64_cost;
+
   /* The per-element cost of a scatter store.  */
   const int scatter_store_elt_cost;
 };
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b14b6f22aec..36f11808916 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -675,6 +675,8 @@ static const sve_vec_cost generic_sve_vector_cost =
   2, /* fadda_f16_cost  */
   2, /* fadda_f32_cost  */
   2, /* fadda_f64_cost  */
+  4, /* gather_load_x32_cost  */
+  2, /* gather_load_x64_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
@@ -744,6 +746,8 @@ static const sve_vec_cost a64fx_sve_vector_cost =
   13, /* fadda_f16_cost  */
   13, /* fadda_f32_cost  */
   13, /* fadda_f64_cost  */
+  64, /* gather_load_x32_cost  */
+  32, /* gather_load_x64_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
@@ -1739,6 +1743,8 @@ static const sve_vec_cost neoversev1_sve_vector_cost =
   19, /* fadda_f16_cost  */
   11, /* fadda_f32_cost  */
   8, /* fadda_f64_cost  */
+  32, /* gather_load_x32_cost  */
+  16, /* gather_load_x64_cost  */
   3 /* scatter_store_elt_cost  */
 };
 
@@ -14958,6 +14964,19 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
       && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
     return simd_costs->store_elt_extra_cost;
 
+  /* Detect SVE gather loads, which are costed as a single scalar_load
+     for each element.  We therefore need to divide the full-instruction
+     cost by the number of elements in the vector.  */
+  if (kind == scalar_load
+      && sve_costs
+      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+    {
+      unsigned int nunits = vect_nunits_for_cost (vectype);
+      if (GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype)) == 64)
+	return { sve_costs->gather_load_x64_cost, nunits };
+      return { sve_costs->gather_load_x32_cost, nunits };
+    }
+
   /* Detect cases in which a scalar_store is really storing one element
      in a scatter operation.  */
   if (kind == scalar_store


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-03 12:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-03 12:01 [gcc r12-2691] aarch64: Add gather_load_xNN_cost tuning fields Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).