public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* aarch64: Add scheduling model for Neoverse V1
@ 2023-05-07 22:49 Evandro Menezes
  0 siblings, 0 replies; only message in thread
From: Evandro Menezes @ 2023-05-07 22:49 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 260 bytes --]

This patch adds the scheduling model for Neoverse V1, based on the information from the “Arm Neoverse V1 Software Optimization Guide” and on static and dynamic analysis of internal and public benchmarks.  Results are forthcoming. 

-- 
Evandro Menezes

[-- Attachment #2: 0001-aarch64-Add-scheduling-model-for-Neoverse-V1.patch --]
[-- Type: application/octet-stream, Size: 29948 bytes --]

From 3ca533a5fa7ac01bf09c344a32097910caf66734 Mon Sep 17 00:00:00 2001
From: Evandro Menezes <evandro@gcc.gnu.org>
Date: Fri, 28 Apr 2023 21:19:44 -0500
Subject: [PATCH] aarch64: Add scheduling model for Neoverse V1

gcc/ChangeLog:

	* config/aarch64/aarch64-cores.def: Use the Neoverse V1 scheduling model.
	* config/aarch64/aarch64.md: Include `neoverse-v1.md`.
	* config/aarch64/neoverse-v1.md: New file.

Signed-off-by: Evandro Menezes <evandro@gcc.gnu.org>
---
 gcc/config/aarch64/aarch64-cores.def |   4 +-
 gcc/config/aarch64/aarch64.md        |   1 +
 gcc/config/aarch64/neoverse-v1.md    | 714 +++++++++++++++++++++++++++
 3 files changed, 717 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/aarch64/neoverse-v1.md

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index cc842c4e22c..5af5076b834 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -144,8 +144,8 @@ AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  (CRYPTO, S
 
 /* Arm ('A') cores.  */
 AARCH64_CORE("zeus", zeus, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, neoversev1, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, neoversev1, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
 
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira",     saphira,    saphira,    V8_4A,  (CRYPTO), saphira,   0x51, 0xC01, -1)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fa8a65b13fd..2899ed4d6d8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -464,6 +464,7 @@
 (include "../arm/exynos-m1.md")
 (include "falkor.md")
 (include "neoverse-n1.md")
+(include "neoverse-v1.md")
 (include "saphira.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
diff --git a/gcc/config/aarch64/neoverse-v1.md b/gcc/config/aarch64/neoverse-v1.md
new file mode 100644
index 00000000000..77216427c29
--- /dev/null
+++ b/gcc/config/aarch64/neoverse-v1.md
@@ -0,0 +1,714 @@
+;; Arm Neoverse V1 pipeline description
+;; (Based on the "Arm Neoverse V1 Software Optimization Guide")
+;;
+;; Copyright (C) 2014-2023 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The Neoverse N1 core is modelled as a multiple issue pipeline that has
+;; the following functional units.
+
+(define_automaton "neoverse_v1")
+
+;; 1 - Pipelines for integer operations: SX1, SX2.
+
+(define_cpu_unit "neov1_sx1" "neoverse_v1")
+(define_cpu_unit "neov1_sx2" "neoverse_v1")
+
+;; 2 - Pipelines for complex integer operations: MX1, MX2.
+
+(define_cpu_unit "neov1_mx1" "neoverse_v1")
+(define_cpu_unit "neov1_mx2" "neoverse_v1")
+
+;; 3 - Asymmetric pipelines for Neon and FP operations: CX1, CX2, CX3, CX4.
+
+(define_automaton "neoverse_v1_cx")
+
+(define_cpu_unit "neov1_cx1" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx2" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx3" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx4" "neoverse_v1_cx")
+
+;; 4 - Pipelines for branch operations: BX1, BX2.
+
+(define_cpu_unit "neov1_bx1" "neoverse_v1")
+(define_cpu_unit "neov1_bx2" "neoverse_v1")
+
+;; 5 - Pipelines for load and store operations: LS1, LS2, LD1.
+
+(define_automaton "neoverse_v1_ls")
+
+(define_cpu_unit "neov1_ls1" "neoverse_v1_ls")
+(define_cpu_unit "neov1_ls2" "neoverse_v1_ls")
+(define_cpu_unit "neov1_ld1" "neoverse_v1_ls")
+
+;; 6 - Pipelines for data stores: SD1, SD2.
+
+(define_cpu_unit "neov1_sd1" "neoverse_v1_ls")
+(define_cpu_unit "neov1_sd2" "neoverse_v1_ls")
+
+;; Issue groups.
+
+(define_reservation "neov1_b" "(neov1_bx1 | neov1_bx2)")
+(define_reservation "neov1_s" "(neov1_sx1 | neov1_sx2)")
+(define_reservation "neov1_i" "(neov1_sx1 | neov1_sx2 | neov1_mx1 | neov1_mx2)")
+(define_reservation "neov1_m" "(neov1_mx1 | neov1_mx2)")
+(define_reservation "neov1_m0" "neov1_mx1")
+(define_reservation "neov1_l01" "(neov1_ls1 | neov1_ls2)")
+(define_reservation "neov1_l" "(neov1_ls1 | neov1_ls2 | neov1_ld1)")
+(define_reservation "neov1_d" "(neov1_sd1 | neov1_sd2)")
+(define_reservation "neov1_v" "(neov1_cx1 | neov1_cx2 | neov1_cx3 | neov1_cx4)")
+(define_reservation "neov1_v01" "(neov1_cx1 | neov1_cx2)")
+(define_reservation "neov1_v02" "(neov1_cx1 | neov1_cx3)")
+(define_reservation "neov1_v13" "(neov1_cx1 | neov1_cx4)")
+(define_reservation "neov1_v0" "neov1_cx1")
+(define_reservation "neov1_v1" "neov1_cx2")
+
+;; Blocking.
+
+(define_reservation "neov1_all_block" "neov1_sx1 + neov1_sx2
+                                       + neov1_mx1 + neov1_mx2
+                                       + neov1_cx1 + neov1_cx2
+                                       + neov1_cx3 + neov1_cx4
+                                       + neov1_ls1 + neov1_ls2
+                                       + neov1_ld1
+                                       + neov1_sd1 + neov1_sd2")
+
+(define_reservation "neov1_m0_block" "neov1_m0")
+
+(define_reservation "neov1_v02_block" "neov1_v02")
+
+;; Intructions.
+
+;; Block.
+(define_insn_reservation "neoverse_v1_block" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "block"))
+  "neov1_all_block")
+
+;; Branches
+;; No latency as there is no result.
+(define_insn_reservation "neoverse_v1_branch" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "branch"))
+  "neov1_b")
+
+;; Calls
+;; No latency as there is no result.
+(define_insn_reservation "neoverse_v1_call" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "call"))
+  "neov1_b + neov1_s")
+
+;; ALU with no or simple shift.
+;; TODO: there should also be "alus_shift_imm_lsl_1to4".
+(define_insn_reservation "neoverse_v1_alu" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "alu_imm, alu_shift_imm_lsl_1to4, alu_sreg, \
+                        alus_imm, alus_sreg, \
+                        csel, \
+                        logic_imm, logic_reg, logic_shift_imm, \
+                        logics_imm, logics_reg, \
+                        mov_reg, \
+                        shift_reg"))
+  "neov1_i")
+
+;; ALU with extension or complex shift.
+;; TODO: there should also be "alus_shift_imm_other".
+(define_insn_reservation "neoverse_v1_alu_shift" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "alu_ext, \
+                        alu_shift_imm_other, alu_shift_reg, \
+                        alus_shift_imm, alus_shift_reg, \
+                        logic_shift_reg, \
+                        logics_shift_imm, logics_shift_reg, \
+                        crc"))
+  "neov1_m")
+
+;; Miscellaneous ALU.
+;; TODO: model 2-register "extr", "bfi", variable shifts.
+(define_insn_reservation "neoverse_v1_alu_misc" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "adr, rotate_imm, bfm, clz, mov_imm, rbit, rev"))
+  "neov1_i")
+
+;; Integer divide.
+;; Divisions are not pipelined.
+(define_insn_reservation "neoverse_v1_div" 12
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "udiv, sdiv"))
+  "neov1_m0, (neov1_m0_block * 12)")
+
+;; Multiply.
+;; TODO: model the high forms.
+(define_insn_reservation "neoverse_v1_mul" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "mul, smull, umull"))
+  "neov1_m")
+
+;; Multiply accumulate.
+(define_insn_reservation "neoverse_v1_mla" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "mla, smlal, umlal"))
+  "neov1_m0")
+
+;; Integer load.
+(define_insn_reservation "neoverse_v1_ld" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "load_byte, load_4, load_8"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_ld16" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "load_16"))
+  "neov1_l01 * 2")
+
+;; Integer store.
+(define_insn_reservation "neoverse_v1_st" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "store_4, store_8, store_16"))
+  "neov1_l01, neov1_d")
+
+;; FP arithmetic.
+(define_insn_reservation "neoverse_v1_fp_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_minmaxd, f_minmaxs, \
+                        faddd, fadds, \
+                        fconstd, fconsts, \
+                        ffarithd, ffariths, \
+                        fmov"))
+  "neov1_v")
+
+;; FP compare.
+(define_insn_reservation "neoverse_v1_fp_cmp" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fcmpd, fcmps, fccmpd, fccmps"))
+  "neov1_v0")
+
+(define_insn_reservation "neoverse_v1_fp_csel" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fcsel"))
+  "neov1_v01")
+
+;; FP round.
+(define_insn_reservation "neoverse_v1_fp_rint" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_rintd, f_rints"))
+  "neov1_v02")
+
+;; FP divide & square-root.
+;; Divide & square-root are not pipelined.
+(define_insn_reservation "neoverse_v1_fp_divd" 15
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fdivd"))
+  "neov1_v02, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_fp_divs" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fdivs"))
+  "neov1_v02, (neov1_v02_block * 4)")
+
+(define_insn_reservation "neoverse_v1_fp_sqrd" 16
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fsqrtd"))
+  "neov1_v02, (neov1_v02_block * 8)")
+
+(define_insn_reservation "neoverse_v1_fp_sqrs" 9
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fsqrts"))
+  "neov1_v02, (neov1_v02_block * 4)")
+
+;; FP multiply.
+(define_insn_reservation "neoverse_v1_fp_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fmuld, fmuls"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_fp_mac" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fmacd, fmacs"))
+  "neov1_v")
+
+;; FP convert.
+(define_insn_reservation "neoverse_v1_fp_cvt" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvt"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_fp_cvti2f" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvti2f"))
+  "neov1_m0")
+
+(define_insn_reservation "neoverse_v1_fp_cvtf2i" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvtf2i"))
+  "neov1_v0")
+
+;; FP move.
+(define_insn_reservation "neoverse_v1_fp_mov" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fconstd, fconsts, \
+                        fmov"))
+  "neov1_v")
+
+;; TODO: model the high form.
+(define_insn_reservation "neoverse_v1_fp_movi2f" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_mcr"))
+  "neov1_m0")
+
+(define_insn_reservation "neoverse_v1_fp_movf2i" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_mrc, \
+                        neon_to_gp, neon_to_gp_q"))
+  "neov1_v1")
+
+;; FP load.
+(define_insn_reservation "neoverse_v1_fp_ld" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_loadd, f_loads, \
+                        neon_ldp"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_fp_ldp_q" 7
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_ldp_q"))
+  "(neov1_l * 2)")
+
+;; FP store.
+(define_insn_reservation "neoverse_v1_fp_st" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_stored, f_stores, \
+                        neon_stp, neon_stp_q"))
+  "neov1_l01, neov1_v01")
+
+;; ASIMD arithmetic.
+(define_insn_reservation "neoverse_v1_asimd_abd_long" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_abd_long"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_abd, neon_abd_q, \
+                        neon_abs, neon_abs_q, \
+                        neon_add, neon_add_q, \
+                        neon_add_halve, neon_add_halve_q, \
+                        neon_add_halve_narrow_q, \
+                        neon_add_long, neon_add_widen, \
+                        neon_bsl, neon_bsl_q, \
+                        neon_cls, neon_cls_q, \
+                        neon_compare, neon_compare_q, \
+                        neon_compare_zero, neon_compare_zero_q, \
+                        neon_dup, neon_dup_q, \
+                        neon_ext, neon_ext_q, \
+                        neon_ins, neon_ins_q, \
+                        neon_logic, neon_logic_q, \
+                        neon_minmax, neon_minmax_q, \
+                        neon_move, neon_move_q, \
+                        neon_move_narrow_q, \
+                        neon_neg, neon_neg_q, \
+                        neon_permute, neon_permute_q, \
+                        neon_qabs, neon_qabs_q, \
+                        neon_qadd, neon_qadd_q, \
+                        neon_qneg, neon_qneg_q, \
+                        neon_qsub, neon_qsub_q, \
+                        neon_rbit, neon_rbit_q, \
+                        neon_reduc_add, neon_reduc_add_q, \
+                        neon_rev, neon_rev_q, \
+                        neon_sub, neon_sub_q, \
+                        neon_sub_halve, neon_sub_halve_q, \
+                        neon_sub_halve_narrow_q, \
+                        neon_sub_long, neon_sub_widen"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_arith_acc" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_arith_acc, neon_arith_acc_q, \
+                        neon_shift_acc, neon_shift_acc_q"))
+  "neov1_v13")
+
+;; TODO: model all forms.
+(define_insn_reservation "neoverse_v1_asimd_reduc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_reduc_add_long, \
+                        neon_reduc_minmax, neon_reduc_minmax_q"))
+  "neov1_v13")
+
+;; ASIMD multiply.
+;; TODO: model BF form.
+(define_insn_reservation "neoverse_v1_asimd_dot" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_dot, neon_dot_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_mla" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mla_b, neon_mla_b_q, neon_mla_b_long, \
+                        neon_mla_h, neon_mla_h_q, neon_mla_h_long, \
+                        neon_mla_h_scalar, neon_mla_h_scalar_q, \
+                        neon_mla_h_scalar_long, \
+                        neon_mla_s, neon_mla_s_long, \
+                        neon_mla_s_scalar, neon_mla_s_scalar_q, 
+                        neon_mla_s_scalar_long"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_asimd_mla_q" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mla_s_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_mla_sat" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_sat_mla_b_long, \
+                        neon_sat_mla_h_long, neon_sat_mla_h_scalar_long, \
+                        neon_sat_mla_s_long, neon_sat_mla_s_scalar_long"))
+  "neov1_v02")
+
+
+(define_insn_reservation "neoverse_v1_asimd_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_b_long, \
+                        neon_mul_h, neon_mul_h_q, neon_mul_h_long, \
+                        neon_mul_s, neon_mul_s_q, neon_mul_s_long,
+                        neon_sat_mul_b, neon_sat_mul_b_q, neon_sat_mul_b_long,
+                        neon_sat_mul_h, neon_sat_mul_h_q, neon_sat_mul_h_long, \
+                        neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q, \
+                        neon_sat_mul_h_scalar_long,
+                        neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_s_long, \
+                        neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q, \
+                        neon_sat_mul_s_scalar_long"))
+  "neov1_v02")
+
+;; ASIMD shift.
+(define_insn_reservation "neoverse_v1_asimd_shift" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_shift_imm, neon_shift_imm_q, \
+                        neon_shift_imm_long, neon_shift_imm_narrow_q, \
+                        neon_shift_reg, neon_shift_reg_q"))
+  "neov1_v13")
+
+(define_insn_reservation "neoverse_v1_asimd_shift_sat" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q, \
+                        neon_sat_shift_imm_narrow_q, \
+                        neon_sat_shift_reg, neon_sat_shift_reg_q"))
+  "neov1_v13")
+
+;; ASIMD FP arithmetic.
+(define_insn_reservation "neoverse_v1_asimd_fp_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fcadd, \
+                        neon_fp_abd_d, neon_fp_abd_d_q, \
+                        neon_fp_abd_s, neon_fp_abd_s_q, \
+                        neon_fp_abs_d, neon_fp_abs_d_q, \
+                        neon_fp_abs_s, neon_fp_abs_s_q, \
+                        neon_fp_addsub_d, neon_fp_addsub_d_q, \
+                        neon_fp_addsub_s, neon_fp_addsub_s_q, \
+                        neon_fp_compare_d, neon_fp_compare_d_q, \
+                        neon_fp_compare_s, neon_fp_compare_s_q, \
+                        neon_fp_minmax_d, neon_fp_minmax_d_q, \
+                        neon_fp_minmax_s, neon_fp_minmax_s_q, \
+                        neon_fp_neg_d, neon_fp_neg_d_q, \
+                        neon_fp_neg_s, neon_fp_neg_s_q, \
+                        neon_fp_reduc_add_d, neon_fp_reduc_add_d_q, \
+                        neon_fp_reduc_add_s, neon_fp_reduc_add_s_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_reduc" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_s"))
+  "(neov1_v * 2)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_reduc_q" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_reduc_minmax_d_q, neon_fp_reduc_minmax_s_q"))
+  "(neov1_v * 3)")
+
+;; ASIMD FP convert.
+(define_insn_reservation "neoverse_v1_asimd_cvt" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_cvt_narrow_d_q, \
+                        neon_fp_cvt_widen_s, \
+                        neon_fp_to_int_d, neon_fp_to_int_d_q, \
+                        neon_fp_to_int_s, \
+                        neon_int_to_fp_d, neon_int_to_fp_d_q, \
+                        neon_int_to_fp_s, \
+                        neon_fp_recpe_d, neon_fp_recpe_s, \
+                        neon_fp_recpx_d, neon_fp_recpx_s, \
+                        neon_fp_round_d, neon_fp_round_d_q, \
+                        neon_fp_round_s"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_asimd_cvt_q" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_cvt_narrow_s_q, \
+                        neon_fp_cvt_widen_h, \
+                        neon_fp_to_int_s_q, \
+                        neon_int_to_fp_s_q, \
+                        neon_fp_recpe_d_q, neon_fp_recpe_s_q, \
+                        neon_fp_recpx_d_q, neon_fp_recpx_s_q, \
+                        neon_fp_round_s_q"))
+  "(neov1_v02 * 2)")
+
+;; ASIMD FP divide & square-root.
+;; Divisions are not pipelined.
+(define_insn_reservation "neoverse_v1_asimd_fp_divd_q" 15
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_d_q"))
+  "neov1_v02, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_divs" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_s"))
+  "neov1_v02, (neov1_v02_block * 3)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_divs_q" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_s_q"))
+  "neov1_v02, (neov1_v02_block * 5)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrtd_q" 16
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_d_q"))
+  "neov1_v0, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrts" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_s"))
+  "neov1_v0, (neov1_v02_block * 3)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrts_q" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_s_q"))
+  "neov1_v0, (neov1_v02_block * 5)")
+
+;; ASIMD FP multiply.
+(define_insn_reservation "neoverse_v1_asimd_fp_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_mul_d, neon_fp_mul_d_q, neon_fp_mul_d_scalar_q, \
+                        neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar_q"))
+  "neov1_v")
+
+;; TODO: model the long form.
+(define_insn_reservation "neoverse_v1_asimd_fp_mla" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fcmla, \
+                        neon_fp_mla_d, neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, \
+                        neon_fp_mla_s, neon_fp_mla_s_q, neon_fp_mla_s_scalar_q, \
+                        neon_fp_recps_d, neon_fp_recps_d_q, \
+                        neon_fp_recps_s, neon_fp_recps_s_q"))
+  "neov1_v")
+
+;; ASIMD miscellaneous.
+(define_insn_reservation "neoverse_v1_asimd_gp_fp" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_from_gp, neon_from_gp_q"))
+  "neov1_m0")
+
+;; TODO: model "tbx" fully.
+(define_insn_reservation "neoverse_v1_asimd_tbl" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl1, neon_tbl1_q, \
+                        neon_tbl2, neon_tbl2_q"))
+  "neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_tbl_3" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl3, neon_tbl3_q"))
+  "neov1_v * 2")
+
+(define_insn_reservation "neoverse_v1_asimd_tbl_4" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl4, neon_tbl4_q"))
+  "neov1_v * 3")
+
+;; ASIMD load.
+(define_insn_reservation "neoverse_v1_asimd_ld_a" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q, \
+                        neon_load1_2reg, neon_load1_3reg_q"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_b" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q, \
+                        neon_load1_4reg"))
+  "neov1_l * 2")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_c" 7
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_4reg_q"))
+  "neov1_l * 4")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_d" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q, \
+                        neon_load1_one_lane, neon_load1_one_lane_q, \
+                        neon_load2_2reg"))
+  "neov1_l + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_e" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load2_2reg_q"))
+  "(neov1_l * 2) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_f" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q, \
+                        neon_load2_one_lane, neon_load2_one_lane_q"))
+  "neov1_l + (neov1_v * 2)")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_g" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q, \
+                        neon_load3_all_lanes, neon_load3_all_lanes_q, \
+                        neon_load3_one_lane, neon_load3_one_lane_q"))
+  "(neov1_l * 4) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_h" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load3_3reg_q,
+                        neon_load4_4reg, \
+                        neon_load4_all_lanes, neon_load4_all_lanes_q, \
+                        neon_load4_one_lane, neon_load4_one_lane_q"))
+  "(neov1_l * 3) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_i" 9
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load4_4reg_q"))
+  "(neov1_l * 6) + neov1_v")
+
+;; ASIMD store.
+(define_insn_reservation "neoverse_v1_asimd_st_a" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q, \
+                        neon_store1_2reg"))
+  "neov1_l01 + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_b" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_2reg_q, \
+                        neon_store1_3reg, \
+                        neon_store1_4reg"))
+  "(neov1_l01 * 2) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_c" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_3reg_q"))
+  "(neov1_l01 * 3) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_d" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_4reg_q"))
+  "(neov1_l01 * 4) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_e" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q, \
+                        neon_store2_2reg, \
+                        neon_store2_one_lane, neon_store2_one_lane_q"))
+  "neov1_l01 + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_f" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store2_2reg_q, \
+                        neon_store3_3reg, \
+                        neon_store3_one_lane, neon_store3_one_lane_q, \
+                        neon_store4_one_lane_q"))
+  "(neov1_l01 * 2) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_g" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store3_3reg_q"))
+  "(neov1_l01 * 3) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_h" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store4_4reg, \
+                        neon_store4_one_lane"))
+  "(neov1_l01 * 6) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_i" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store4_4reg_q"))
+  "(neov1_l01 * 8) + neov1_v01")
+
+;; ASIMD crypto.
+;; TODO: model different widths.
+(define_insn_reservation "neoverse_v1_asimd_aese" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_aese"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_aesmc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_aesmc"))
+  "neov1_v")
+
+;; FIXME: "sha256u1" should be "crypto_sha256_fast".
+(define_insn_reservation "neoverse_v1_asimd_sha_fast" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_sha1_fast, crypto_sha1_xor, \
+                        crypto_sha256_fast, \
+                        crypto_sha512, \
+                        crypto_sha3, \
+                        crypto_sm3"))
+  "neov1_v0")
+
+(define_insn_reservation "neoverse_v1_asimd_sha_slow" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow, \
+                        crypto_sm4"))
+  "neov1_v0")
+
+;; NOTE: "pmull" sometimes is also "neon_mul_{b,h,s}(_scalar)?(_(q|long))?"
+(define_insn_reservation "neoverse_v1_asimd_poly" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_pmull"))
+  "neov1_v")
+
+;; CRC
+(define_insn_reservation "neoverse_v1_crc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crc"))
+  "neov1_m0")
+
+;; Bypasses.
+
+;; Integer multiply.
+(define_bypass 1 "neoverse_v1_mla" "neoverse_v1_mla")
+
+;; FP multiply.
+(define_bypass 1 "neoverse_v1_fp_mul" "neoverse_v1_fp_mac")
+(define_bypass 2 "neoverse_v1_fp_mac" "neoverse_v1_fp_mac")
+
+;; ASIMD arithmetic.
+(define_bypass 1 "neoverse_v1_asimd_arith_acc" "neoverse_v1_asimd_arith_acc")
+
+;; ASIMD multiply.
+(define_bypass 1 "neoverse_v1_asimd_dot" "neoverse_v1_asimd_dot")
+(define_bypass 1 "neoverse_v1_asimd_mla" "neoverse_v1_asimd_mla")
+(define_bypass 1 "neoverse_v1_asimd_mla_q" "neoverse_v1_asimd_mla_q")
+
+;; ASIMD FP multiply.
+(define_bypass 1 "neoverse_v1_asimd_fp_mul" "neoverse_v1_asimd_fp_mla")
+(define_bypass 2 "neoverse_v1_asimd_fp_mla" "neoverse_v1_asimd_fp_mla")
+
+;; CRC
+(define_bypass 1 "neoverse_v1_crc" "neoverse_v1_*")
-- 
2.39.2 (Apple Git-143)


[-- Attachment #3: Type: text/plain, Size: 1 bytes --]



^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-07 22:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-07 22:49 aarch64: Add scheduling model for Neoverse V1 Evandro Menezes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).