* [PATCH v2 05/07] RISC-V: autovec: Add tuning and target vectorization hooks
@ 2023-03-06 3:16 Michael Collison
2023-03-06 7:55 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Michael Collison @ 2023-03-06 3:16 UTC (permalink / raw)
To: gcc-patches
This patch adds support for registering target hooks for basic
autovectorization support as well as basic tuning information for the
vector extension.
gcc/ChangeLog:
2023-03-02 Michael Collison <collison@rivosinc.com>
Juzhe Zhong <juzhe.zhong@rivai.ai>
* config/riscv/riscv-cores.def (RISCV_TUNE):
Add VECTOR_TUNE_INFO parameter and
* common/config/riscv/riscv-common.cc (RISCV_TUNE):
Add VECTOR_TUNE_INFO parameter.
* config/riscv/riscv.cc (riscv_vector_tune_param):
New struct for vector tuning information.
(riscv_tune_info): add vector_tune_param.
(vector_tune_param): New static variable.
(riscv_vectorization_factor): New variable.
(generic_rvv_insn_scale_table): New struct.
(generic_rvv_stmt_scale_table): New struct.
(generic_rvv_insn_cost_table): New vector insn cost table.
(generic_rvv_stmt_cost_table): New vector statement
cost table.
(generic_rvv_tune_info): New rvv tuning table.
(RISCV_TUNE): Add VECTOR_TUNE_INFO parameter.
(riscv_rtx_costs): Return vector estimate if vector mode.
(riscv_option_override): Set vector_tune_param.
(riscv_option_override): Set riscv_vectorization_factor.
(riscv_estimated_poly_value): Implement
TARGET_ESTIMATED_POLY_VALUE.
(riscv_preferred_simd_mode): Implement
TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
(riscv_autovectorize_vector_modes): Implement
TARGET_AUTOVECTORIZE_VECTOR_MODES.
(riscv_get_mask_mode): Implement
TARGET_VECTORIZE_GET_MASK_MODE.
(riscv_empty_mask_is_expensive): Implement
TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE.
(riscv_builtin_vectorization_cost): Implement
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.
(riscv_vectorize_create_costs): Implement
TARGET_VECTORIZE_CREATE_COSTS.
(TARGET_ESTIMATED_POLY_VALUE): Register target macro.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Ditto.
(TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Ditto.
(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES): Ditto.
(TARGET_VECTORIZE_GET_MASK_MODE): Ditto.
(TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Ditto.
(TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK): Ditto.
(TARGET_VECTORIZE_CREATE_COSTS): Ditto
---
gcc/common/config/riscv/riscv-common.cc | 2 +-
gcc/config/riscv/riscv-cores.def | 14 +-
gcc/config/riscv/riscv.cc | 324 +++++++++++++++++++++++-
3 files changed, 328 insertions(+), 12 deletions(-)
diff --git a/gcc/common/config/riscv/riscv-common.cc
b/gcc/common/config/riscv/riscv-common.cc
index ebc1ed7d7e4..6b8d92af986 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -246,7 +246,7 @@ static const riscv_cpu_info riscv_cpu_tables[] =
static const char *riscv_tunes[] =
{
-#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO,
VECTOR_TUNE_INFO) \
TUNE_NAME,
#include "../../../config/riscv/riscv-cores.def"
NULL
diff --git a/gcc/config/riscv/riscv-cores.def
b/gcc/config/riscv/riscv-cores.def
index 2a834cae21d..4feb0366222 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -30,15 +30,15 @@
identifier, reference to riscv.cc. */
#ifndef RISCV_TUNE
-#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO, VECTOR_TUNE_INFO)
#endif
-RISCV_TUNE("rocket", generic, rocket_tune_info)
-RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
-RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
-RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
-RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
-RISCV_TUNE("size", generic, optimize_size_tune_info)
+RISCV_TUNE("rocket", generic, rocket_tune_info, generic_rvv_tune_info)
+RISCV_TUNE("sifive-3-series", generic, rocket_tune_info,
generic_rvv_tune_info)
+RISCV_TUNE("sifive-5-series", generic, rocket_tune_info,
generic_rvv_tune_info)
+RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info,
generic_rvv_tune_info)
+RISCV_TUNE("thead-c906", generic, thead_c906_tune_info,
generic_rvv_tune_info)
+RISCV_TUNE("size", generic, optimize_size_tune_info, generic_rvv_tune_info)
#undef RISCV_TUNE
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index befb9b498b7..44659062070 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -60,6 +60,16 @@ along with GCC; see the file COPYING3. If not see
#include "opts.h"
#include "tm-constrs.h"
#include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "cfgrtl.h"
+#include "sel-sched.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "gimple-expr.h"
+#include "tree-vectorizer.h"
+#include "riscv-vector-cost.h"
/* This file should be included last. */
#include "target-def.h"
@@ -238,6 +248,12 @@ struct riscv_tune_param
bool slow_unaligned_access;
};
+/* Cost for vector insn classes. */
+struct riscv_vector_tune_param {
+ const vector_insn_cost_table* rvv_insn_costs_table;
+ const vector_stmt_cost_table* rvv_stmt_costs_table;
+};
+
/* Information about one micro-arch we know about. */
struct riscv_tune_info {
/* This micro-arch canonical name. */
@@ -248,6 +264,9 @@ struct riscv_tune_info {
/* Tuning parameters for this micro-arch. */
const struct riscv_tune_param *tune_param;
+
+ /* Tuning vector parameters for this micro-arch. */
+ const struct riscv_vector_tune_param *vector_tune_param;
};
/* Global variables for machine-dependent things. */
@@ -266,6 +285,9 @@ static int epilogue_cfa_sp_offset;
/* Which tuning parameters to use. */
static const struct riscv_tune_param *tune_param;
+/* Which vector tuning parameters to use. */
+static const struct riscv_vector_tune_param *vector_tune_param;
+
/* Which automaton to use for tuning. */
enum riscv_microarchitecture_type riscv_microarchitecture;
@@ -275,6 +297,9 @@ poly_uint16 riscv_vector_chunks;
/* The number of bytes in a vector chunk. */
unsigned riscv_bytes_per_vector_chunk;
+/* Prefer vf for auto-vectorizer. */
+unsigned riscv_vectorization_factor;
+
/* Index R is the smallest register class that contains register R. */
const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
GR_REGS, GR_REGS, GR_REGS, GR_REGS,
@@ -367,6 +392,47 @@ static const struct riscv_tune_param
optimize_size_tune_info = {
false, /* slow_unaligned_access */
};
+static const vector_insn_scale_table generic_rvv_insn_scale_table = {
+ 4, /* load */
+ 1, /* store */
+ 1, /* alu */
+ 1, /* mult */
+ 1, /* movi */
+ 1, /* dup */
+ 1, /* extract */
+ 1, /* if_then_else */
+};
+
+static const vector_stmt_scale_table generic_rvv_stmt_scale_table = {
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 1, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* vec_int_stmt_cost */
+ 1, /* vec_fp_stmt_cost */
+ 1, /* vec_permute_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* vec_align_load_cost */
+ 1, /* vec_unalign_load_cost */
+ 1, /* vec_unalign_store_cost */
+ 1, /* vec_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1 /* cond_not_taken_branch_cost */
+};
+
+static const vector_insn_cost_table* generic_rvv_insn_cost_table =
+ new vector_insn_cost_table (&generic_rvv_insn_scale_table);
+
+static const vector_stmt_cost_table* generic_rvv_stmt_cost_table =
+ new vector_stmt_cost_table (&generic_rvv_stmt_scale_table);
+
+/* Costs to use when optimizing for riscv vector. */
+static const struct riscv_vector_tune_param generic_rvv_tune_info = {
+ generic_rvv_insn_cost_table,
+ generic_rvv_stmt_cost_table
+};
+
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int,
bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -403,8 +469,8 @@ static const unsigned gpr_save_reg_order[] = {
/* A table describing all the processors GCC knows about. */
static const struct riscv_tune_info riscv_tune_info_table[] = {
-#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
- { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO,
VECTOR_TUNE_INFO) \
+ { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO, &VECTOR_TUNE_INFO},
#include "riscv-cores.def"
};
@@ -2237,8 +2303,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
Cost Model need to be well analyzed and supported in the future. */
if (riscv_v_ext_vector_mode_p (mode))
{
- *total = COSTS_N_INSNS (1);
- return true;
+ return vector_tune_param->rvv_insn_costs_table->get_cost (x,
mode, total,
+ speed);
}
bool float_mode_p = FLOAT_MODE_P (mode);
@@ -6080,6 +6146,7 @@ riscv_option_override (void)
RISCV_TUNE_STRING_DEFAULT));
riscv_microarchitecture = cpu->microarchitecture;
tune_param = optimize_size ? &optimize_size_tune_info : cpu->tune_param;
+ vector_tune_param = cpu->vector_tune_param;
/* Use -mtune's setting for slow_unaligned_access, even when optimizing
for size. For architectures that trap and emulate unaligned
accesses,
@@ -6199,6 +6266,10 @@ riscv_option_override (void)
/* Convert -march to a chunks count. */
riscv_vector_chunks = riscv_convert_vector_bits ();
+
+ if (TARGET_VECTOR)
+ riscv_vectorization_factor = riscv_vector_lmul;
+
}
/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
@@ -6893,6 +6964,220 @@ riscv_dwarf_poly_indeterminate_value (unsigned
int i, unsigned int *factor,
return RISCV_DWARF_VLENB;
}
+/* Implement TARGET_ESTIMATED_POLY_VALUE.
+ Look into the tuning structure for an estimate.
+ KIND specifies the type of requested estimate: min, max or likely.
+ For cores with a known RVV width all three estimates are the same.
+ For generic RVV tuning we want to distinguish the maximum estimate from
+ the minimum and likely ones.
+ The likely estimate is the same as the minimum in that case to give a
+ conservative behavior of auto-vectorizing with RVV when it is a win
+ even for 128-bit RVV.
+ When RVV width information is available VAL.coeffs[1] is multiplied by
+ the number of VQ chunks over the initial Advanced SIMD 128 bits. */
+
+static HOST_WIDE_INT
+riscv_estimated_poly_value (poly_int64 val,
+ poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
+{
+ unsigned int width_source = BITS_PER_RISCV_VECTOR.is_constant ()
+ ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant ()
+ : (unsigned int) RVV_SCALABLE;
+
+ /* If there is no core-specific information then the minimum and likely
+ values are based on 128-bit vectors and the maximum is based on
+ the architectural maximum of 2048 bits. */
+ if (width_source == RVV_SCALABLE)
+ switch (kind)
+ {
+ case POLY_VALUE_MIN:
+ case POLY_VALUE_LIKELY:
+ return val.coeffs[0];
+
+ case POLY_VALUE_MAX:
+ return val.coeffs[0] + val.coeffs[1] * 15;
+ }
+
+ /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL,
treating the
+ lowest as likely. This could be made more general if future -mtune
+ options need it to be. */
+ if (kind == POLY_VALUE_MAX)
+ width_source = 1 << floor_log2 (width_source);
+ else
+ width_source = least_bit_hwi (width_source);
+
+ /* If the core provides width information, use that. */
+ HOST_WIDE_INT over_128 = width_source - 128;
+ return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
+}
+
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
+
+static machine_mode
+riscv_preferred_simd_mode (scalar_mode mode)
+{
+ machine_mode vmode =
+ riscv_vector::riscv_vector_preferred_simd_mode (mode,
+ riscv_vectorization_factor);
+ if (VECTOR_MODE_P (vmode))
+ return vmode;
+
+ return word_mode;
+}
+
+/* Implement TARGET_AUTOVECTORIZE_VECTOR_MODES for RVV. */
+static unsigned int
+riscv_autovectorize_vector_modes (vector_modes *modes, bool)
+{
+ if (!TARGET_VECTOR)
+ return 0;
+
+ if (riscv_vectorization_factor == RVV_LMUL1)
+ {
+ modes->safe_push (VNx16QImode);
+ modes->safe_push (VNx8QImode);
+ modes->safe_push (VNx4QImode);
+ modes->safe_push (VNx2QImode);
+ }
+ else if (riscv_vectorization_factor == RVV_LMUL2)
+ {
+ modes->safe_push (VNx32QImode);
+ modes->safe_push (VNx16QImode);
+ modes->safe_push (VNx8QImode);
+ modes->safe_push (VNx4QImode);
+ }
+ else if (riscv_vectorization_factor == RVV_LMUL4)
+ {
+ modes->safe_push (VNx64QImode);
+ modes->safe_push (VNx32QImode);
+ modes->safe_push (VNx16QImode);
+ modes->safe_push (VNx8QImode);
+ }
+ else
+ {
+ modes->safe_push (VNx64QImode);
+ modes->safe_push (VNx32QImode);
+ modes->safe_push (VNx16QImode);
+ }
+
+ return 0;
+}
+
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
+
+static opt_machine_mode
+riscv_get_mask_mode (machine_mode mode)
+{
+ machine_mode mask_mode = VOIDmode;
+ if (TARGET_VECTOR
+ && riscv_vector::riscv_vector_get_mask_mode (mode).exists
(&mask_mode))
+ return mask_mode;
+
+ return default_get_mask_mode (mode);
+}
+
+/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
+ it isn't worth branching around empty masked ops (including masked
+ stores). */
+
+static bool
+riscv_empty_mask_is_expensive (unsigned)
+{
+ return false;
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+int
+riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+ bool fp = false;
+ rtx x = NULL_RTX;
+ machine_mode mode = VOIDmode;
+
+ if (vectype != NULL)
+ {
+ fp = FLOAT_TYPE_P (vectype);
+ mode = TYPE_MODE (vectype);
+ }
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return fp ?
vector_tune_param->rvv_stmt_costs_table->scalar_fp->cost (x,
+ mode)
+ : vector_tune_param->rvv_stmt_costs_table->scalar_int->cost (x,
+ mode);
+
+ case scalar_load:
+ return vector_tune_param->rvv_stmt_costs_table->scalar_load->cost (x,
+ mode);
+
+ case scalar_store:
+ return
vector_tune_param->rvv_stmt_costs_table->scalar_store->cost (x,
+ mode);
+
+ case vector_stmt:
+ return fp ? vector_tune_param->rvv_stmt_costs_table->vec_fp->cost (x,
+ mode)
+ : vector_tune_param->rvv_stmt_costs_table->vec_int->cost (x,
+ mode);
+
+ case vector_load:
+ return
vector_tune_param->rvv_stmt_costs_table->vec_align_load->cost (x,
+ mode);
+
+ case vector_store:
+ return vector_tune_param->rvv_stmt_costs_table->vec_store->cost
(x, mode);
+
+ case vec_to_scalar:
+ return
vector_tune_param->rvv_stmt_costs_table->vec_to_scalar->cost (x,
+ mode);
+
+ case scalar_to_vec:
+ return
vector_tune_param->rvv_stmt_costs_table->scalar_to_vec->cost (x,
+ mode);
+
+ case unaligned_load:
+ case vector_gather_load:
+ return
vector_tune_param->rvv_stmt_costs_table->vec_unalign_load->cost (x,
+ mode);
+
+ case unaligned_store:
+ case vector_scatter_store:
+ return
vector_tune_param->rvv_stmt_costs_table->vec_unalign_store->cost (
+ x,
+ mode);
+
+ case cond_branch_taken:
+ return
vector_tune_param->rvv_stmt_costs_table->cond_taken_branch->cost (
+ x,
+ mode);
+
+ case cond_branch_not_taken:
+ return vector_tune_param->rvv_stmt_costs_table->cond_not_taken_branch
+ ->cost (x, mode);
+
+ case vec_perm:
+ return vector_tune_param->rvv_stmt_costs_table->vec_permute->cost (x,
+ mode);
+
+ case vec_promote_demote:
+ return fp ? vector_tune_param->rvv_stmt_costs_table->vec_fp->cost (x,
+ mode)
+ : vector_tune_param->rvv_stmt_costs_table->vec_int->cost (x,
+ mode);
+
+ case vec_construct:
+ elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ return elements / 2 + 1;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Return true if a shift-amount matches the trailing cleared bits on
a bitmask. */
@@ -6902,6 +7187,13 @@ riscv_shamt_matches_mask_p (int shamt,
HOST_WIDE_INT mask)
return shamt == ctz_hwi (mask);
}
+/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
+vector_costs *
+riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
+{
+ return new riscv_vector_costs (vinfo, costing_for_scalar);
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -7144,6 +7436,30 @@ riscv_shamt_matches_mask_p (int shamt,
HOST_WIDE_INT mask)
#undef TARGET_VERIFY_TYPE_CONTEXT
#define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
+#undef TARGET_ESTIMATED_POLY_VALUE
+#define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
riscv_builtin_vectorization_cost
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
riscv_autovectorize_vector_modes
+
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
+
+#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
+#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
riscv_empty_mask_is_expensive
+
+#undef TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK
+#define TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK
riscv_loop_len_override_mask
+
+#undef TARGET_VECTORIZE_CREATE_COSTS
+#define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
+
#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
--
2.34.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v2 05/07] RISC-V: autovec: Add tuning and target vectorization hooks
2023-03-06 3:16 [PATCH v2 05/07] RISC-V: autovec: Add tuning and target vectorization hooks Michael Collison
@ 2023-03-06 7:55 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2023-03-06 7:55 UTC (permalink / raw)
To: Michael Collison; +Cc: gcc-patches
On Mon, Mar 6, 2023 at 4:16 AM Michael Collison <collison@rivosinc.com> wrote:
>
> This patch adds support for registering target hooks for basic
> autovectorization support as well as basic tuning information for the
> vector extension.
Btw, during the state tuning isn't established or autovect support being
limited I would suggest to make the costing hooks reject all vectorization
and thus vectorize with -fno-vect-cost-model only (that's what the basic
vect.exp testsuite uses).
That allows collaborative development on trunk while not surprising
users with not profitable vectorization.
I agree that loads and stores are the first priority for any autovect
attempts because there you learn about the details and you get
pushed on the right track.
Richard.
> gcc/ChangeLog:
>
> 2023-03-02 Michael Collison <collison@rivosinc.com>
> Juzhe Zhong <juzhe.zhong@rivai.ai>
>
> * config/riscv/riscv-cores.def (RISCV_TUNE):
> Add VECTOR_TUNE_INFO parameter and
> * common/config/riscv/riscv-common.cc (RISCV_TUNE):
> Add VECTOR_TUNE_INFO parameter.
> * config/riscv/riscv.cc (riscv_vector_tune_param):
> New struct for vector tuning information.
> (riscv_tune_info): add vector_tune_param.
> (vector_tune_param): New static variable.
> (riscv_vectorization_factor): New variable.
> (generic_rvv_insn_scale_table): New struct.
> (generic_rvv_stmt_scale_table): New struct.
> (generic_rvv_insn_cost_table): New vector insn cost table.
> (generic_rvv_stmt_cost_table): New vector statement
> cost table.
> (generic_rvv_tune_info): New rvv tuning table.
> (RISCV_TUNE): Add VECTOR_TUNE_INFO parameter.
> (riscv_rtx_costs): Return vector estimate if vector mode.
> (riscv_option_override): Set vector_tune_param.
> (riscv_option_override): Set riscv_vectorization_factor.
> (riscv_estimated_poly_value): Implement
> TARGET_ESTIMATED_POLY_VALUE.
> (riscv_preferred_simd_mode): Implement
> TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
> (riscv_autovectorize_vector_modes): Implement
> TARGET_AUTOVECTORIZE_VECTOR_MODES.
> (riscv_get_mask_mode): Implement
> TARGET_VECTORIZE_GET_MASK_MODE.
> (riscv_empty_mask_is_expensive): Implement
> TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE.
> (riscv_builtin_vectorization_cost): Implement
> TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.
> (riscv_vectorize_create_costs): Implement
> TARGET_VECTORIZE_CREATE_COSTS.
> (TARGET_ESTIMATED_POLY_VALUE): Register target macro.
> (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Ditto.
> (TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Ditto.
> (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES): Ditto.
> (TARGET_VECTORIZE_GET_MASK_MODE): Ditto.
> (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Ditto.
> (TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK): Ditto.
> (TARGET_VECTORIZE_CREATE_COSTS): Ditto
>
> ---
> gcc/common/config/riscv/riscv-common.cc | 2 +-
> gcc/config/riscv/riscv-cores.def | 14 +-
> gcc/config/riscv/riscv.cc | 324 +++++++++++++++++++++++-
> 3 files changed, 328 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc
> b/gcc/common/config/riscv/riscv-common.cc
> index ebc1ed7d7e4..6b8d92af986 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -246,7 +246,7 @@ static const riscv_cpu_info riscv_cpu_tables[] =
>
> static const char *riscv_tunes[] =
> {
> -#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
> +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO,
> VECTOR_TUNE_INFO) \
> TUNE_NAME,
> #include "../../../config/riscv/riscv-cores.def"
> NULL
> diff --git a/gcc/config/riscv/riscv-cores.def
> b/gcc/config/riscv/riscv-cores.def
> index 2a834cae21d..4feb0366222 100644
> --- a/gcc/config/riscv/riscv-cores.def
> +++ b/gcc/config/riscv/riscv-cores.def
> @@ -30,15 +30,15 @@
> identifier, reference to riscv.cc. */
>
> #ifndef RISCV_TUNE
> -#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
> +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO, VECTOR_TUNE_INFO)
> #endif
>
> -RISCV_TUNE("rocket", generic, rocket_tune_info)
> -RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
> -RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
> -RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
> -RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
> -RISCV_TUNE("size", generic, optimize_size_tune_info)
> +RISCV_TUNE("rocket", generic, rocket_tune_info, generic_rvv_tune_info)
> +RISCV_TUNE("sifive-3-series", generic, rocket_tune_info,
> generic_rvv_tune_info)
> +RISCV_TUNE("sifive-5-series", generic, rocket_tune_info,
> generic_rvv_tune_info)
> +RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info,
> generic_rvv_tune_info)
> +RISCV_TUNE("thead-c906", generic, thead_c906_tune_info,
> generic_rvv_tune_info)
> +RISCV_TUNE("size", generic, optimize_size_tune_info, generic_rvv_tune_info)
>
> #undef RISCV_TUNE
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index befb9b498b7..44659062070 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -60,6 +60,16 @@ along with GCC; see the file COPYING3. If not see
> #include "opts.h"
> #include "tm-constrs.h"
> #include "rtl-iter.h"
> +#include "gimple.h"
> +#include "cfghooks.h"
> +#include "cfgloop.h"
> +#include "cfgrtl.h"
> +#include "sel-sched.h"
> +#include "fold-const.h"
> +#include "gimple-iterator.h"
> +#include "gimple-expr.h"
> +#include "tree-vectorizer.h"
> +#include "riscv-vector-cost.h"
>
> /* This file should be included last. */
> #include "target-def.h"
> @@ -238,6 +248,12 @@ struct riscv_tune_param
> bool slow_unaligned_access;
> };
>
> +/* Cost for vector insn classes. */
> +struct riscv_vector_tune_param {
> + const vector_insn_cost_table* rvv_insn_costs_table;
> + const vector_stmt_cost_table* rvv_stmt_costs_table;
> +};
> +
> /* Information about one micro-arch we know about. */
> struct riscv_tune_info {
> /* This micro-arch canonical name. */
> @@ -248,6 +264,9 @@ struct riscv_tune_info {
>
> /* Tuning parameters for this micro-arch. */
> const struct riscv_tune_param *tune_param;
> +
> + /* Tuning vector parameters for this micro-arch. */
> + const struct riscv_vector_tune_param *vector_tune_param;
> };
>
> /* Global variables for machine-dependent things. */
> @@ -266,6 +285,9 @@ static int epilogue_cfa_sp_offset;
> /* Which tuning parameters to use. */
> static const struct riscv_tune_param *tune_param;
>
> +/* Which vector tuning parameters to use. */
> +static const struct riscv_vector_tune_param *vector_tune_param;
> +
> /* Which automaton to use for tuning. */
> enum riscv_microarchitecture_type riscv_microarchitecture;
>
> @@ -275,6 +297,9 @@ poly_uint16 riscv_vector_chunks;
> /* The number of bytes in a vector chunk. */
> unsigned riscv_bytes_per_vector_chunk;
>
> +/* Prefer vf for auto-vectorizer. */
> +unsigned riscv_vectorization_factor;
> +
> /* Index R is the smallest register class that contains register R. */
> const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
> GR_REGS, GR_REGS, GR_REGS, GR_REGS,
> @@ -367,6 +392,47 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> false, /* slow_unaligned_access */
> };
>
> +static const vector_insn_scale_table generic_rvv_insn_scale_table = {
> + 4, /* load */
> + 1, /* store */
> + 1, /* alu */
> + 1, /* mult */
> + 1, /* movi */
> + 1, /* dup */
> + 1, /* extract */
> + 1, /* if_then_else */
> +};
> +
> +static const vector_stmt_scale_table generic_rvv_stmt_scale_table = {
> + 1, /* scalar_int_stmt_cost */
> + 1, /* scalar_fp_stmt_cost */
> + 1, /* scalar_load_cost */
> + 1, /* scalar_store_cost */
> + 1, /* vec_int_stmt_cost */
> + 1, /* vec_fp_stmt_cost */
> + 1, /* vec_permute_cost */
> + 1, /* vec_to_scalar_cost */
> + 1, /* scalar_to_vec_cost */
> + 1, /* vec_align_load_cost */
> + 1, /* vec_unalign_load_cost */
> + 1, /* vec_unalign_store_cost */
> + 1, /* vec_store_cost */
> + 1, /* cond_taken_branch_cost */
> + 1 /* cond_not_taken_branch_cost */
> +};
> +
> +static const vector_insn_cost_table* generic_rvv_insn_cost_table =
> + new vector_insn_cost_table (&generic_rvv_insn_scale_table);
> +
> +static const vector_stmt_cost_table* generic_rvv_stmt_cost_table =
> + new vector_stmt_cost_table (&generic_rvv_stmt_scale_table);
> +
> +/* Costs to use when optimizing for riscv vector. */
> +static const struct riscv_vector_tune_param generic_rvv_tune_info = {
> + generic_rvv_insn_cost_table,
> + generic_rvv_stmt_cost_table
> +};
> +
> static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int,
> bool *);
> static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
>
> @@ -403,8 +469,8 @@ static const unsigned gpr_save_reg_order[] = {
>
> /* A table describing all the processors GCC knows about. */
> static const struct riscv_tune_info riscv_tune_info_table[] = {
> -#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
> - { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
> +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO,
> VECTOR_TUNE_INFO) \
> + { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO, &VECTOR_TUNE_INFO},
> #include "riscv-cores.def"
> };
>
> @@ -2237,8 +2303,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
> outer_code, int opno ATTRIBUTE_UN
> Cost Model need to be well analyzed and supported in the future. */
> if (riscv_v_ext_vector_mode_p (mode))
> {
> - *total = COSTS_N_INSNS (1);
> - return true;
> + return vector_tune_param->rvv_insn_costs_table->get_cost (x,
> mode, total,
> + speed);
> }
>
> bool float_mode_p = FLOAT_MODE_P (mode);
> @@ -6080,6 +6146,7 @@ riscv_option_override (void)
> RISCV_TUNE_STRING_DEFAULT));
> riscv_microarchitecture = cpu->microarchitecture;
> tune_param = optimize_size ? &optimize_size_tune_info : cpu->tune_param;
> + vector_tune_param = cpu->vector_tune_param;
>
> /* Use -mtune's setting for slow_unaligned_access, even when optimizing
> for size. For architectures that trap and emulate unaligned
> accesses,
> @@ -6199,6 +6266,10 @@ riscv_option_override (void)
>
> /* Convert -march to a chunks count. */
> riscv_vector_chunks = riscv_convert_vector_bits ();
> +
> + if (TARGET_VECTOR)
> + riscv_vectorization_factor = riscv_vector_lmul;
> +
> }
>
> /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
> @@ -6893,6 +6964,220 @@ riscv_dwarf_poly_indeterminate_value (unsigned
> int i, unsigned int *factor,
> return RISCV_DWARF_VLENB;
> }
>
> +/* Implement TARGET_ESTIMATED_POLY_VALUE.
> + Look into the tuning structure for an estimate.
> + KIND specifies the type of requested estimate: min, max or likely.
> + For cores with a known RVV width all three estimates are the same.
> + For generic RVV tuning we want to distinguish the maximum estimate from
> + the minimum and likely ones.
> + The likely estimate is the same as the minimum in that case to give a
> + conservative behavior of auto-vectorizing with RVV when it is a win
> + even for 128-bit RVV.
> + When RVV width information is available VAL.coeffs[1] is multiplied by
> + the number of VQ chunks over the initial Advanced SIMD 128 bits. */
> +
> +static HOST_WIDE_INT
> +riscv_estimated_poly_value (poly_int64 val,
> + poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
> +{
> + unsigned int width_source = BITS_PER_RISCV_VECTOR.is_constant ()
> + ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant ()
> + : (unsigned int) RVV_SCALABLE;
> +
> + /* If there is no core-specific information then the minimum and likely
> + values are based on 128-bit vectors and the maximum is based on
> + the architectural maximum of 2048 bits. */
> + if (width_source == RVV_SCALABLE)
> + switch (kind)
> + {
> + case POLY_VALUE_MIN:
> + case POLY_VALUE_LIKELY:
> + return val.coeffs[0];
> +
> + case POLY_VALUE_MAX:
> + return val.coeffs[0] + val.coeffs[1] * 15;
> + }
> +
> + /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL,
> treating the
> + lowest as likely. This could be made more general if future -mtune
> + options need it to be. */
> + if (kind == POLY_VALUE_MAX)
> + width_source = 1 << floor_log2 (width_source);
> + else
> + width_source = least_bit_hwi (width_source);
> +
> + /* If the core provides width information, use that. */
> + HOST_WIDE_INT over_128 = width_source - 128;
> + return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
> +}
> +
> +/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
> +
> +static machine_mode
> +riscv_preferred_simd_mode (scalar_mode mode)
> +{
> + machine_mode vmode =
> + riscv_vector::riscv_vector_preferred_simd_mode (mode,
> + riscv_vectorization_factor);
> + if (VECTOR_MODE_P (vmode))
> + return vmode;
> +
> + return word_mode;
> +}
> +
> +/* Implement TARGET_AUTOVECTORIZE_VECTOR_MODES for RVV. */
> +static unsigned int
> +riscv_autovectorize_vector_modes (vector_modes *modes, bool)
> +{
> + if (!TARGET_VECTOR)
> + return 0;
> +
> + if (riscv_vectorization_factor == RVV_LMUL1)
> + {
> + modes->safe_push (VNx16QImode);
> + modes->safe_push (VNx8QImode);
> + modes->safe_push (VNx4QImode);
> + modes->safe_push (VNx2QImode);
> + }
> + else if (riscv_vectorization_factor == RVV_LMUL2)
> + {
> + modes->safe_push (VNx32QImode);
> + modes->safe_push (VNx16QImode);
> + modes->safe_push (VNx8QImode);
> + modes->safe_push (VNx4QImode);
> + }
> + else if (riscv_vectorization_factor == RVV_LMUL4)
> + {
> + modes->safe_push (VNx64QImode);
> + modes->safe_push (VNx32QImode);
> + modes->safe_push (VNx16QImode);
> + modes->safe_push (VNx8QImode);
> + }
> + else
> + {
> + modes->safe_push (VNx64QImode);
> + modes->safe_push (VNx32QImode);
> + modes->safe_push (VNx16QImode);
> + }
> +
> + return 0;
> +}
> +
> +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
> +
> +static opt_machine_mode
> +riscv_get_mask_mode (machine_mode mode)
> +{
> + machine_mode mask_mode = VOIDmode;
> + if (TARGET_VECTOR
> + && riscv_vector::riscv_vector_get_mask_mode (mode).exists
> (&mask_mode))
> + return mask_mode;
> +
> + return default_get_mask_mode (mode);
> +}
> +
> +/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
> + it isn't worth branching around empty masked ops (including masked
> + stores). */
> +
> +static bool
> +riscv_empty_mask_is_expensive (unsigned)
> +{
> + return false;
> +}
> +
> +/* Implement targetm.vectorize.builtin_vectorization_cost. */
> +int
> +riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
> + tree vectype, int misalign ATTRIBUTE_UNUSED)
> +{
> + unsigned elements;
> + bool fp = false;
> + rtx x = NULL_RTX;
> + machine_mode mode = VOIDmode;
> +
> + if (vectype != NULL)
> + {
> + fp = FLOAT_TYPE_P (vectype);
> + mode = TYPE_MODE (vectype);
> + }
> +
> + switch (type_of_cost)
> + {
> + case scalar_stmt:
> + return fp ?
> vector_tune_param->rvv_stmt_costs_table->scalar_fp->cost (x,
> + mode)
> + : vector_tune_param->rvv_stmt_costs_table->scalar_int->cost (x,
> + mode);
> +
> + case scalar_load:
> + return vector_tune_param->rvv_stmt_costs_table->scalar_load->cost (x,
> + mode);
> +
> + case scalar_store:
> + return
> vector_tune_param->rvv_stmt_costs_table->scalar_store->cost (x,
> + mode);
> +
> + case vector_stmt:
> + return fp ? vector_tune_param->rvv_stmt_costs_table->vec_fp->cost (x,
> + mode)
> + : vector_tune_param->rvv_stmt_costs_table->vec_int->cost (x,
> + mode);
> +
> + case vector_load:
> + return
> vector_tune_param->rvv_stmt_costs_table->vec_align_load->cost (x,
> + mode);
> +
> + case vector_store:
> + return vector_tune_param->rvv_stmt_costs_table->vec_store->cost
> (x, mode);
> +
> + case vec_to_scalar:
> + return
> vector_tune_param->rvv_stmt_costs_table->vec_to_scalar->cost (x,
> + mode);
> +
> + case scalar_to_vec:
> + return
> vector_tune_param->rvv_stmt_costs_table->scalar_to_vec->cost (x,
> + mode);
> +
> + case unaligned_load:
> + case vector_gather_load:
> + return
> vector_tune_param->rvv_stmt_costs_table->vec_unalign_load->cost (x,
> + mode);
> +
> + case unaligned_store:
> + case vector_scatter_store:
> + return
> vector_tune_param->rvv_stmt_costs_table->vec_unalign_store->cost (
> + x,
> + mode);
> +
> + case cond_branch_taken:
> + return
> vector_tune_param->rvv_stmt_costs_table->cond_taken_branch->cost (
> + x,
> + mode);
> +
> + case cond_branch_not_taken:
> + return vector_tune_param->rvv_stmt_costs_table->cond_not_taken_branch
> + ->cost (x, mode);
> +
> + case vec_perm:
> + return vector_tune_param->rvv_stmt_costs_table->vec_permute->cost (x,
> + mode);
> +
> + case vec_promote_demote:
> + return fp ? vector_tune_param->rvv_stmt_costs_table->vec_fp->cost (x,
> + mode)
> + : vector_tune_param->rvv_stmt_costs_table->vec_int->cost (x,
> + mode);
> +
> + case vec_construct:
> + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
> + return elements / 2 + 1;
> +
> + default:
> + gcc_unreachable ();
> + }
> +}
> +
> /* Return true if a shift-amount matches the trailing cleared bits on
> a bitmask. */
>
> @@ -6902,6 +7187,13 @@ riscv_shamt_matches_mask_p (int shamt,
> HOST_WIDE_INT mask)
> return shamt == ctz_hwi (mask);
> }
>
> +/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
> +vector_costs *
> +riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
> +{
> + return new riscv_vector_costs (vinfo, costing_for_scalar);
> +}
> +
> /* Initialize the GCC target structure. */
> #undef TARGET_ASM_ALIGNED_HI_OP
> #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
> @@ -7144,6 +7436,30 @@ riscv_shamt_matches_mask_p (int shamt,
> HOST_WIDE_INT mask)
> #undef TARGET_VERIFY_TYPE_CONTEXT
> #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
>
> +#undef TARGET_ESTIMATED_POLY_VALUE
> +#define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
> +
> +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
> +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
> riscv_builtin_vectorization_cost
> +
> +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
> +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
> +
> +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
> +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
> riscv_autovectorize_vector_modes
> +
> +#undef TARGET_VECTORIZE_GET_MASK_MODE
> +#define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
> +
> +#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
> +#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
> riscv_empty_mask_is_expensive
> +
> +#undef TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK
> +#define TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK
> riscv_loop_len_override_mask
> +
> +#undef TARGET_VECTORIZE_CREATE_COSTS
> +#define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
> +
> #undef TARGET_VECTOR_ALIGNMENT
> #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
>
> --
> 2.34.1
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-03-06 7:55 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-06 3:16 [PATCH v2 05/07] RISC-V: autovec: Add tuning and target vectorization hooks Michael Collison
2023-03-06 7:55 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).