public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-7854] aarch64: Add costs for storing one element of a vector
@ 2021-03-26 16:10 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2021-03-26 16:10 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d1ff0847b2df6ad21f77e26e7e516643c5aa7d40

commit r11-7854-gd1ff0847b2df6ad21f77e26e7e516643c5aa7d40
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Mar 26 16:08:31 2021 +0000

    aarch64: Add costs for storing one element of a vector
    
    Storing one element of a vector is costed as a vec_to_scalar
    followed by a scalar_store.  However, vec_to_scalar is also
    used for reductions and for vector-to-GPR moves, which makes
    it difficult to pick one cost for them all.
    
    This patch therefore adds a cost for extracting one element
    of a vector in preparation for storing it out.  The store
    itself is still costed separately.
    
    Like with the previous patches, this one only becomes active if
    a CPU selects use_new_vector_costs.  It should therefore have
    a very low impact on other CPUs.
    
    gcc/
            * config/aarch64/aarch64-protos.h
            (simd_vec_cost::store_elt_extra_cost): New member variable.
            * config/aarch64/aarch64.c (generic_advsimd_vector_cost): Update
            accordingly, using the vec_to_scalar cost for the new field.
            (generic_sve_vector_cost, a64fx_advsimd_vector_cost): Likewise.
            (a64fx_sve_vector_cost, qdf24xx_advsimd_vector_cost): Likewise.
            (thunderx_advsimd_vector_cost, tsv110_advsimd_vector_cost): Likewise.
            (cortexa57_advsimd_vector_cost, exynosm1_advsimd_vector_cost)
            (xgene1_advsimd_vector_cost, thunderx2t99_advsimd_vector_cost)
            (thunderx3t110_advsimd_vector_cost): Likewise.
            (aarch64_detect_vector_stmt_subtype): Detect single-element stores.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h |  4 ++++
 gcc/config/aarch64/aarch64.c        | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3d152754981..fabe3df7071 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -224,6 +224,10 @@ struct simd_vec_cost
   const int reduc_f32_cost;
   const int reduc_f64_cost;
 
+  /* Additional cost of storing a single vector element, on top of the
+     normal cost of a scalar store.  */
+  const int store_elt_extra_cost;
+
   /* Cost of a vector-to-scalar operation.  */
   const int vec_to_scalar_cost;
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8fb723dabd2..20bb75bd56c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -601,6 +601,7 @@ static const advsimd_vec_cost generic_advsimd_vector_cost =
   2, /* reduc_f16_cost  */
   2, /* reduc_f32_cost  */
   2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
   2, /* vec_to_scalar_cost  */
   1, /* scalar_to_vec_cost  */
   1, /* align_load_cost  */
@@ -626,6 +627,7 @@ static const sve_vec_cost generic_sve_vector_cost =
     2, /* reduc_f16_cost  */
     2, /* reduc_f32_cost  */
     2, /* reduc_f64_cost  */
+    2, /* store_elt_extra_cost  */
     2, /* vec_to_scalar_cost  */
     1, /* scalar_to_vec_cost  */
     1, /* align_load_cost  */
@@ -667,6 +669,7 @@ static const advsimd_vec_cost a64fx_advsimd_vector_cost =
   13, /* reduc_f16_cost  */
   13, /* reduc_f32_cost  */
   13, /* reduc_f64_cost  */
+  13, /* store_elt_extra_cost  */
   13, /* vec_to_scalar_cost  */
   4, /* scalar_to_vec_cost  */
   6, /* align_load_cost  */
@@ -691,6 +694,7 @@ static const sve_vec_cost a64fx_sve_vector_cost =
     13, /* reduc_f16_cost  */
     13, /* reduc_f32_cost  */
     13, /* reduc_f64_cost  */
+    13, /* store_elt_extra_cost  */
     13, /* vec_to_scalar_cost  */
     4, /* scalar_to_vec_cost  */
     6, /* align_load_cost  */
@@ -731,6 +735,7 @@ static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
   1, /* reduc_f16_cost  */
   1, /* reduc_f32_cost  */
   1, /* reduc_f64_cost  */
+  1, /* store_elt_extra_cost  */
   1, /* vec_to_scalar_cost  */
   1, /* scalar_to_vec_cost  */
   1, /* align_load_cost  */
@@ -768,6 +773,7 @@ static const advsimd_vec_cost thunderx_advsimd_vector_cost =
   2, /* reduc_f16_cost  */
   2, /* reduc_f32_cost  */
   2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
   2, /* vec_to_scalar_cost  */
   2, /* scalar_to_vec_cost  */
   3, /* align_load_cost  */
@@ -804,6 +810,7 @@ static const advsimd_vec_cost tsv110_advsimd_vector_cost =
   3, /* reduc_f16_cost  */
   3, /* reduc_f32_cost  */
   3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
   3, /* vec_to_scalar_cost  */
   2, /* scalar_to_vec_cost  */
   5, /* align_load_cost  */
@@ -839,6 +846,7 @@ static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
   8, /* reduc_f16_cost  */
   8, /* reduc_f32_cost  */
   8, /* reduc_f64_cost  */
+  8, /* store_elt_extra_cost  */
   8, /* vec_to_scalar_cost  */
   8, /* scalar_to_vec_cost  */
   4, /* align_load_cost  */
@@ -875,6 +883,7 @@ static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
   3, /* reduc_f16_cost  */
   3, /* reduc_f32_cost  */
   3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
   3, /* vec_to_scalar_cost  */
   3, /* scalar_to_vec_cost  */
   5, /* align_load_cost  */
@@ -910,6 +919,7 @@ static const advsimd_vec_cost xgene1_advsimd_vector_cost =
   4, /* reduc_f16_cost  */
   4, /* reduc_f32_cost  */
   4, /* reduc_f64_cost  */
+  4, /* store_elt_extra_cost  */
   4, /* vec_to_scalar_cost  */
   4, /* scalar_to_vec_cost  */
   10, /* align_load_cost  */
@@ -946,6 +956,7 @@ static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
   6, /* reduc_f16_cost  */
   6, /* reduc_f32_cost  */
   6, /* reduc_f64_cost  */
+  6, /* store_elt_extra_cost  */
   6, /* vec_to_scalar_cost  */
   5, /* scalar_to_vec_cost  */
   4, /* align_load_cost  */
@@ -982,6 +993,7 @@ static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
   5, /* reduc_f16_cost  */
   5, /* reduc_f32_cost  */
   5, /* reduc_f64_cost  */
+  5, /* store_elt_extra_cost  */
   5, /* vec_to_scalar_cost  */
   5, /* scalar_to_vec_cost  */
   4, /* align_load_cost  */
@@ -14259,6 +14271,14 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
   if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
     sve_costs = aarch64_tune_params.vec_costs->sve;
 
+  /* Detect cases in which vec_to_scalar is describing the extraction of a
+     vector element in preparation for a scalar store.  The store itself is
+     costed separately.  */
+  if (kind == vec_to_scalar
+      && STMT_VINFO_DATA_REF (stmt_info)
+      && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+    return simd_costs->store_elt_extra_cost;
+
   /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
   if (kind == vec_to_scalar
       && where == vect_body


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-03-26 16:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-26 16:10 [gcc r11-7854] aarch64: Add costs for storing one element of a vector Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).