public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Evandro Menezes <ebahapo@icloud.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Subject: aarch64: Add scheduling model for Neoverse V1
Date: Sun, 7 May 2023 17:49:49 -0500	[thread overview]
Message-ID: <667B07D1-9506-47F0-BF54-733522A2ECB8@icloud.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 260 bytes --]

This patch adds the scheduling model for Neoverse V1, based on the information from the “Arm Neoverse V1 Software Optimization Guide” and on static and dynamic analysis of internal and public benchmarks.  Results are forthcoming. 

-- 
Evandro Menezes

[-- Attachment #2: 0001-aarch64-Add-scheduling-model-for-Neoverse-V1.patch --]
[-- Type: application/octet-stream, Size: 29948 bytes --]

From 3ca533a5fa7ac01bf09c344a32097910caf66734 Mon Sep 17 00:00:00 2001
From: Evandro Menezes <evandro@gcc.gnu.org>
Date: Fri, 28 Apr 2023 21:19:44 -0500
Subject: [PATCH] aarch64: Add scheduling model for Neoverse V1

gcc/ChangeLog:

	* config/aarch64/aarch64-cores.def: Use the Neoverse V1 scheduling model.
	* config/aarch64/aarch64.md: Include `neoverse-v1.md`.
	* config/aarch64/neoverse-v1.md: New file.

Signed-off-by: Evandro Menezes <evandro@gcc.gnu.org>
---
 gcc/config/aarch64/aarch64-cores.def |   4 +-
 gcc/config/aarch64/aarch64.md        |   1 +
 gcc/config/aarch64/neoverse-v1.md    | 714 +++++++++++++++++++++++++++
 3 files changed, 717 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/aarch64/neoverse-v1.md

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index cc842c4e22c..5af5076b834 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -144,8 +144,8 @@ AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, V8_3A,  (CRYPTO, S
 
 /* Arm ('A') cores.  */
 AARCH64_CORE("zeus", zeus, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, neoversev1, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, neoversev1, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
 
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira",     saphira,    saphira,    V8_4A,  (CRYPTO), saphira,   0x51, 0xC01, -1)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fa8a65b13fd..2899ed4d6d8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -464,6 +464,7 @@
 (include "../arm/exynos-m1.md")
 (include "falkor.md")
 (include "neoverse-n1.md")
+(include "neoverse-v1.md")
 (include "saphira.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
diff --git a/gcc/config/aarch64/neoverse-v1.md b/gcc/config/aarch64/neoverse-v1.md
new file mode 100644
index 00000000000..77216427c29
--- /dev/null
+++ b/gcc/config/aarch64/neoverse-v1.md
@@ -0,0 +1,714 @@
+;; Arm Neoverse V1 pipeline description
+;; (Based on the "Arm Neoverse V1 Software Optimization Guide")
+;;
+;; Copyright (C) 2014-2023 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The Neoverse N1 core is modelled as a multiple issue pipeline that has
+;; the following functional units.
+
+(define_automaton "neoverse_v1")
+
+;; 1 - Pipelines for integer operations: SX1, SX2.
+
+(define_cpu_unit "neov1_sx1" "neoverse_v1")
+(define_cpu_unit "neov1_sx2" "neoverse_v1")
+
+;; 2 - Pipelines for complex integer operations: MX1, MX2.
+
+(define_cpu_unit "neov1_mx1" "neoverse_v1")
+(define_cpu_unit "neov1_mx2" "neoverse_v1")
+
+;; 3 - Asymmetric pipelines for Neon and FP operations: CX1, CX2, CX3, CX4.
+
+(define_automaton "neoverse_v1_cx")
+
+(define_cpu_unit "neov1_cx1" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx2" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx3" "neoverse_v1_cx")
+(define_cpu_unit "neov1_cx4" "neoverse_v1_cx")
+
+;; 4 - Pipelines for branch operations: BX1, BX2.
+
+(define_cpu_unit "neov1_bx1" "neoverse_v1")
+(define_cpu_unit "neov1_bx2" "neoverse_v1")
+
+;; 5 - Pipelines for load and store operations: LS1, LS2, LD1.
+
+(define_automaton "neoverse_v1_ls")
+
+(define_cpu_unit "neov1_ls1" "neoverse_v1_ls")
+(define_cpu_unit "neov1_ls2" "neoverse_v1_ls")
+(define_cpu_unit "neov1_ld1" "neoverse_v1_ls")
+
+;; 6 - Pipelines for data stores: SD1, SD2.
+
+(define_cpu_unit "neov1_sd1" "neoverse_v1_ls")
+(define_cpu_unit "neov1_sd2" "neoverse_v1_ls")
+
+;; Issue groups.
+
+(define_reservation "neov1_b" "(neov1_bx1 | neov1_bx2)")
+(define_reservation "neov1_s" "(neov1_sx1 | neov1_sx2)")
+(define_reservation "neov1_i" "(neov1_sx1 | neov1_sx2 | neov1_mx1 | neov1_mx2)")
+(define_reservation "neov1_m" "(neov1_mx1 | neov1_mx2)")
+(define_reservation "neov1_m0" "neov1_mx1")
+(define_reservation "neov1_l01" "(neov1_ls1 | neov1_ls2)")
+(define_reservation "neov1_l" "(neov1_ls1 | neov1_ls2 | neov1_ld1)")
+(define_reservation "neov1_d" "(neov1_sd1 | neov1_sd2)")
+(define_reservation "neov1_v" "(neov1_cx1 | neov1_cx2 | neov1_cx3 | neov1_cx4)")
+(define_reservation "neov1_v01" "(neov1_cx1 | neov1_cx2)")
+(define_reservation "neov1_v02" "(neov1_cx1 | neov1_cx3)")
+(define_reservation "neov1_v13" "(neov1_cx1 | neov1_cx4)")
+(define_reservation "neov1_v0" "neov1_cx1")
+(define_reservation "neov1_v1" "neov1_cx2")
+
+;; Blocking.
+
+(define_reservation "neov1_all_block" "neov1_sx1 + neov1_sx2
+                                       + neov1_mx1 + neov1_mx2
+                                       + neov1_cx1 + neov1_cx2
+                                       + neov1_cx3 + neov1_cx4
+                                       + neov1_ls1 + neov1_ls2
+                                       + neov1_ld1
+                                       + neov1_sd1 + neov1_sd2")
+
+(define_reservation "neov1_m0_block" "neov1_m0")
+
+(define_reservation "neov1_v02_block" "neov1_v02")
+
+;; Intructions.
+
+;; Block.
+(define_insn_reservation "neoverse_v1_block" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "block"))
+  "neov1_all_block")
+
+;; Branches
+;; No latency as there is no result.
+(define_insn_reservation "neoverse_v1_branch" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "branch"))
+  "neov1_b")
+
+;; Calls
+;; No latency as there is no result.
+(define_insn_reservation "neoverse_v1_call" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "call"))
+  "neov1_b + neov1_s")
+
+;; ALU with no or simple shift.
+;; TODO: there should also be "alus_shift_imm_lsl_1to4".
+(define_insn_reservation "neoverse_v1_alu" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "alu_imm, alu_shift_imm_lsl_1to4, alu_sreg, \
+                        alus_imm, alus_sreg, \
+                        csel, \
+                        logic_imm, logic_reg, logic_shift_imm, \
+                        logics_imm, logics_reg, \
+                        mov_reg, \
+                        shift_reg"))
+  "neov1_i")
+
+;; ALU with extension or complex shift.
+;; TODO: there should also be "alus_shift_imm_other".
+(define_insn_reservation "neoverse_v1_alu_shift" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "alu_ext, \
+                        alu_shift_imm_other, alu_shift_reg, \
+                        alus_shift_imm, alus_shift_reg, \
+                        logic_shift_reg, \
+                        logics_shift_imm, logics_shift_reg, \
+                        crc"))
+  "neov1_m")
+
+;; Miscellaneous ALU.
+;; TODO: model 2-register "extr", "bfi", variable shifts.
+(define_insn_reservation "neoverse_v1_alu_misc" 1
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "adr, rotate_imm, bfm, clz, mov_imm, rbit, rev"))
+  "neov1_i")
+
+;; Integer divide.
+;; Divisions are not pipelined.
+(define_insn_reservation "neoverse_v1_div" 12
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "udiv, sdiv"))
+  "neov1_m0, (neov1_m0_block * 12)")
+
+;; Multiply.
+;; TODO: model the high forms.
+(define_insn_reservation "neoverse_v1_mul" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "mul, smull, umull"))
+  "neov1_m")
+
+;; Multiply accumulate.
+(define_insn_reservation "neoverse_v1_mla" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "mla, smlal, umlal"))
+  "neov1_m0")
+
+;; Integer load.
+(define_insn_reservation "neoverse_v1_ld" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "load_byte, load_4, load_8"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_ld16" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "load_16"))
+  "neov1_l01 * 2")
+
+;; Integer store.
+(define_insn_reservation "neoverse_v1_st" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "store_4, store_8, store_16"))
+  "neov1_l01, neov1_d")
+
+;; FP arithmetic.
+(define_insn_reservation "neoverse_v1_fp_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_minmaxd, f_minmaxs, \
+                        faddd, fadds, \
+                        fconstd, fconsts, \
+                        ffarithd, ffariths, \
+                        fmov"))
+  "neov1_v")
+
+;; FP compare.
+(define_insn_reservation "neoverse_v1_fp_cmp" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fcmpd, fcmps, fccmpd, fccmps"))
+  "neov1_v0")
+
+(define_insn_reservation "neoverse_v1_fp_csel" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fcsel"))
+  "neov1_v01")
+
+;; FP round.
+(define_insn_reservation "neoverse_v1_fp_rint" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_rintd, f_rints"))
+  "neov1_v02")
+
+;; FP divide & square-root.
+;; Divide & square-root are not pipelined.
+(define_insn_reservation "neoverse_v1_fp_divd" 15
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fdivd"))
+  "neov1_v02, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_fp_divs" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fdivs"))
+  "neov1_v02, (neov1_v02_block * 4)")
+
+(define_insn_reservation "neoverse_v1_fp_sqrd" 16
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fsqrtd"))
+  "neov1_v02, (neov1_v02_block * 8)")
+
+(define_insn_reservation "neoverse_v1_fp_sqrs" 9
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fsqrts"))
+  "neov1_v02, (neov1_v02_block * 4)")
+
+;; FP multiply.
+(define_insn_reservation "neoverse_v1_fp_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fmuld, fmuls"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_fp_mac" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fmacd, fmacs"))
+  "neov1_v")
+
+;; FP convert.
+(define_insn_reservation "neoverse_v1_fp_cvt" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvt"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_fp_cvti2f" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvti2f"))
+  "neov1_m0")
+
+(define_insn_reservation "neoverse_v1_fp_cvtf2i" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_cvtf2i"))
+  "neov1_v0")
+
+;; FP move.
+(define_insn_reservation "neoverse_v1_fp_mov" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "fconstd, fconsts, \
+                        fmov"))
+  "neov1_v")
+
+;; TODO: model the high form.
+(define_insn_reservation "neoverse_v1_fp_movi2f" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_mcr"))
+  "neov1_m0")
+
+(define_insn_reservation "neoverse_v1_fp_movf2i" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_mrc, \
+                        neon_to_gp, neon_to_gp_q"))
+  "neov1_v1")
+
+;; FP load.
+(define_insn_reservation "neoverse_v1_fp_ld" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_loadd, f_loads, \
+                        neon_ldp"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_fp_ldp_q" 7
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_ldp_q"))
+  "(neov1_l * 2)")
+
+;; FP store.
+(define_insn_reservation "neoverse_v1_fp_st" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "f_stored, f_stores, \
+                        neon_stp, neon_stp_q"))
+  "neov1_l01, neov1_v01")
+
+;; ASIMD arithmetic.
+(define_insn_reservation "neoverse_v1_asimd_abd_long" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_abd_long"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_abd, neon_abd_q, \
+                        neon_abs, neon_abs_q, \
+                        neon_add, neon_add_q, \
+                        neon_add_halve, neon_add_halve_q, \
+                        neon_add_halve_narrow_q, \
+                        neon_add_long, neon_add_widen, \
+                        neon_bsl, neon_bsl_q, \
+                        neon_cls, neon_cls_q, \
+                        neon_compare, neon_compare_q, \
+                        neon_compare_zero, neon_compare_zero_q, \
+                        neon_dup, neon_dup_q, \
+                        neon_ext, neon_ext_q, \
+                        neon_ins, neon_ins_q, \
+                        neon_logic, neon_logic_q, \
+                        neon_minmax, neon_minmax_q, \
+                        neon_move, neon_move_q, \
+                        neon_move_narrow_q, \
+                        neon_neg, neon_neg_q, \
+                        neon_permute, neon_permute_q, \
+                        neon_qabs, neon_qabs_q, \
+                        neon_qadd, neon_qadd_q, \
+                        neon_qneg, neon_qneg_q, \
+                        neon_qsub, neon_qsub_q, \
+                        neon_rbit, neon_rbit_q, \
+                        neon_reduc_add, neon_reduc_add_q, \
+                        neon_rev, neon_rev_q, \
+                        neon_sub, neon_sub_q, \
+                        neon_sub_halve, neon_sub_halve_q, \
+                        neon_sub_halve_narrow_q, \
+                        neon_sub_long, neon_sub_widen"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_arith_acc" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_arith_acc, neon_arith_acc_q, \
+                        neon_shift_acc, neon_shift_acc_q"))
+  "neov1_v13")
+
+;; TODO: model all forms.
+(define_insn_reservation "neoverse_v1_asimd_reduc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_reduc_add_long, \
+                        neon_reduc_minmax, neon_reduc_minmax_q"))
+  "neov1_v13")
+
+;; ASIMD multiply.
+;; TODO: model BF form.
+(define_insn_reservation "neoverse_v1_asimd_dot" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_dot, neon_dot_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_mla" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mla_b, neon_mla_b_q, neon_mla_b_long, \
+                        neon_mla_h, neon_mla_h_q, neon_mla_h_long, \
+                        neon_mla_h_scalar, neon_mla_h_scalar_q, \
+                        neon_mla_h_scalar_long, \
+                        neon_mla_s, neon_mla_s_long, \
+                        neon_mla_s_scalar, neon_mla_s_scalar_q, 
+                        neon_mla_s_scalar_long"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_asimd_mla_q" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mla_s_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_mla_sat" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_sat_mla_b_long, \
+                        neon_sat_mla_h_long, neon_sat_mla_h_scalar_long, \
+                        neon_sat_mla_s_long, neon_sat_mla_s_scalar_long"))
+  "neov1_v02")
+
+
+(define_insn_reservation "neoverse_v1_asimd_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_b_long, \
+                        neon_mul_h, neon_mul_h_q, neon_mul_h_long, \
+                        neon_mul_s, neon_mul_s_q, neon_mul_s_long,
+                        neon_sat_mul_b, neon_sat_mul_b_q, neon_sat_mul_b_long,
+                        neon_sat_mul_h, neon_sat_mul_h_q, neon_sat_mul_h_long, \
+                        neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q, \
+                        neon_sat_mul_h_scalar_long,
+                        neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_s_long, \
+                        neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q, \
+                        neon_sat_mul_s_scalar_long"))
+  "neov1_v02")
+
+;; ASIMD shift.
+(define_insn_reservation "neoverse_v1_asimd_shift" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_shift_imm, neon_shift_imm_q, \
+                        neon_shift_imm_long, neon_shift_imm_narrow_q, \
+                        neon_shift_reg, neon_shift_reg_q"))
+  "neov1_v13")
+
+(define_insn_reservation "neoverse_v1_asimd_shift_sat" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q, \
+                        neon_sat_shift_imm_narrow_q, \
+                        neon_sat_shift_reg, neon_sat_shift_reg_q"))
+  "neov1_v13")
+
+;; ASIMD FP arithmetic.
+(define_insn_reservation "neoverse_v1_asimd_fp_alu" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fcadd, \
+                        neon_fp_abd_d, neon_fp_abd_d_q, \
+                        neon_fp_abd_s, neon_fp_abd_s_q, \
+                        neon_fp_abs_d, neon_fp_abs_d_q, \
+                        neon_fp_abs_s, neon_fp_abs_s_q, \
+                        neon_fp_addsub_d, neon_fp_addsub_d_q, \
+                        neon_fp_addsub_s, neon_fp_addsub_s_q, \
+                        neon_fp_compare_d, neon_fp_compare_d_q, \
+                        neon_fp_compare_s, neon_fp_compare_s_q, \
+                        neon_fp_minmax_d, neon_fp_minmax_d_q, \
+                        neon_fp_minmax_s, neon_fp_minmax_s_q, \
+                        neon_fp_neg_d, neon_fp_neg_d_q, \
+                        neon_fp_neg_s, neon_fp_neg_s_q, \
+                        neon_fp_reduc_add_d, neon_fp_reduc_add_d_q, \
+                        neon_fp_reduc_add_s, neon_fp_reduc_add_s_q"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_reduc" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_s"))
+  "(neov1_v * 2)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_reduc_q" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_reduc_minmax_d_q, neon_fp_reduc_minmax_s_q"))
+  "(neov1_v * 3)")
+
+;; ASIMD FP convert.
+(define_insn_reservation "neoverse_v1_asimd_cvt" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_cvt_narrow_d_q, \
+                        neon_fp_cvt_widen_s, \
+                        neon_fp_to_int_d, neon_fp_to_int_d_q, \
+                        neon_fp_to_int_s, \
+                        neon_int_to_fp_d, neon_int_to_fp_d_q, \
+                        neon_int_to_fp_s, \
+                        neon_fp_recpe_d, neon_fp_recpe_s, \
+                        neon_fp_recpx_d, neon_fp_recpx_s, \
+                        neon_fp_round_d, neon_fp_round_d_q, \
+                        neon_fp_round_s"))
+  "neov1_v02")
+
+(define_insn_reservation "neoverse_v1_asimd_cvt_q" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_cvt_narrow_s_q, \
+                        neon_fp_cvt_widen_h, \
+                        neon_fp_to_int_s_q, \
+                        neon_int_to_fp_s_q, \
+                        neon_fp_recpe_d_q, neon_fp_recpe_s_q, \
+                        neon_fp_recpx_d_q, neon_fp_recpx_s_q, \
+                        neon_fp_round_s_q"))
+  "(neov1_v02 * 2)")
+
+;; ASIMD FP divide & square-root.
+;; Divisions are not pipelined.
+(define_insn_reservation "neoverse_v1_asimd_fp_divd_q" 15
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_d_q"))
+  "neov1_v02, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_divs" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_s"))
+  "neov1_v02, (neov1_v02_block * 3)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_divs_q" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_div_s_q"))
+  "neov1_v02, (neov1_v02_block * 5)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrtd_q" 16
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_d_q"))
+  "neov1_v0, (neov1_v02_block * 7)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrts" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_s"))
+  "neov1_v0, (neov1_v02_block * 3)")
+
+(define_insn_reservation "neoverse_v1_asimd_fp_sqrts_q" 10
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_sqrt_s_q"))
+  "neov1_v0, (neov1_v02_block * 5)")
+
+;; ASIMD FP multiply.
+(define_insn_reservation "neoverse_v1_asimd_fp_mul" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fp_mul_d, neon_fp_mul_d_q, neon_fp_mul_d_scalar_q, \
+                        neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar_q"))
+  "neov1_v")
+
+;; TODO: model the long form.
+(define_insn_reservation "neoverse_v1_asimd_fp_mla" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_fcmla, \
+                        neon_fp_mla_d, neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, \
+                        neon_fp_mla_s, neon_fp_mla_s_q, neon_fp_mla_s_scalar_q, \
+                        neon_fp_recps_d, neon_fp_recps_d_q, \
+                        neon_fp_recps_s, neon_fp_recps_s_q"))
+  "neov1_v")
+
+;; ASIMD miscellaneous.
+(define_insn_reservation "neoverse_v1_asimd_gp_fp" 3
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_from_gp, neon_from_gp_q"))
+  "neov1_m0")
+
+;; TODO: model "tbx" fully.
+(define_insn_reservation "neoverse_v1_asimd_tbl" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl1, neon_tbl1_q, \
+                        neon_tbl2, neon_tbl2_q"))
+  "neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_tbl_3" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl3, neon_tbl3_q"))
+  "neov1_v * 2")
+
+(define_insn_reservation "neoverse_v1_asimd_tbl_4" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_tbl4, neon_tbl4_q"))
+  "neov1_v * 3")
+
+;; ASIMD load.
+(define_insn_reservation "neoverse_v1_asimd_ld_a" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q, \
+                        neon_load1_2reg, neon_load1_3reg_q"))
+  "neov1_l")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_b" 6
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q, \
+                        neon_load1_4reg"))
+  "neov1_l * 2")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_c" 7
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_4reg_q"))
+  "neov1_l * 4")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_d" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q, \
+                        neon_load1_one_lane, neon_load1_one_lane_q, \
+                        neon_load2_2reg"))
+  "neov1_l + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_e" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load2_2reg_q"))
+  "(neov1_l * 2) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_f" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q, \
+                        neon_load2_one_lane, neon_load2_one_lane_q"))
+  "neov1_l + (neov1_v * 2)")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_g" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q, \
+                        neon_load3_all_lanes, neon_load3_all_lanes_q, \
+                        neon_load3_one_lane, neon_load3_one_lane_q"))
+  "(neov1_l * 4) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_h" 8
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load3_3reg_q,
+                        neon_load4_4reg, \
+                        neon_load4_all_lanes, neon_load4_all_lanes_q, \
+                        neon_load4_one_lane, neon_load4_one_lane_q"))
+  "(neov1_l * 3) + neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_ld_i" 9
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_load4_4reg_q"))
+  "(neov1_l * 6) + neov1_v")
+
+;; ASIMD store.
+(define_insn_reservation "neoverse_v1_asimd_st_a" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q, \
+                        neon_store1_2reg"))
+  "neov1_l01 + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_b" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_2reg_q, \
+                        neon_store1_3reg, \
+                        neon_store1_4reg"))
+  "(neov1_l01 * 2) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_c" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_3reg_q"))
+  "(neov1_l01 * 3) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_d" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_4reg_q"))
+  "(neov1_l01 * 4) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_e" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q, \
+                        neon_store2_2reg, \
+                        neon_store2_one_lane, neon_store2_one_lane_q"))
+  "neov1_l01 + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_f" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store2_2reg_q, \
+                        neon_store3_3reg, \
+                        neon_store3_one_lane, neon_store3_one_lane_q, \
+                        neon_store4_one_lane_q"))
+  "(neov1_l01 * 2) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_g" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store3_3reg_q"))
+  "(neov1_l01 * 3) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_h" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store4_4reg, \
+                        neon_store4_one_lane"))
+  "(neov1_l01 * 6) + neov1_v01")
+
+(define_insn_reservation "neoverse_v1_asimd_st_i" 0
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "neon_store4_4reg_q"))
+  "(neov1_l01 * 8) + neov1_v01")
+
+;; ASIMD crypto.
+;; TODO: model different widths.
+(define_insn_reservation "neoverse_v1_asimd_aese" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_aese"))
+  "neov1_v")
+
+(define_insn_reservation "neoverse_v1_asimd_aesmc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_aesmc"))
+  "neov1_v")
+
+;; FIXME: "sha256u1" should be "crypto_sha256_fast".
+(define_insn_reservation "neoverse_v1_asimd_sha_fast" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_sha1_fast, crypto_sha1_xor, \
+                        crypto_sha256_fast, \
+                        crypto_sha512, \
+                        crypto_sha3, \
+                        crypto_sm3"))
+  "neov1_v0")
+
+(define_insn_reservation "neoverse_v1_asimd_sha_slow" 4
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow, \
+                        crypto_sm4"))
+  "neov1_v0")
+
+;; NOTE: "pmull" sometimes is also "neon_mul_{b,h,s}(_scalar)?(_(q|long))?"
+(define_insn_reservation "neoverse_v1_asimd_poly" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crypto_pmull"))
+  "neov1_v")
+
+;; CRC
+(define_insn_reservation "neoverse_v1_crc" 2
+  (and (eq_attr "tune" "neoversen1")
+       (eq_attr "type" "crc"))
+  "neov1_m0")
+
+;; Bypasses.
+
+;; Integer multiply.
+(define_bypass 1 "neoverse_v1_mla" "neoverse_v1_mla")
+
+;; FP multiply.
+(define_bypass 1 "neoverse_v1_fp_mul" "neoverse_v1_fp_mac")
+(define_bypass 2 "neoverse_v1_fp_mac" "neoverse_v1_fp_mac")
+
+;; ASIMD arithmetic.
+(define_bypass 1 "neoverse_v1_asimd_arith_acc" "neoverse_v1_asimd_arith_acc")
+
+;; ASIMD multiply.
+(define_bypass 1 "neoverse_v1_asimd_dot" "neoverse_v1_asimd_dot")
+(define_bypass 1 "neoverse_v1_asimd_mla" "neoverse_v1_asimd_mla")
+(define_bypass 1 "neoverse_v1_asimd_mla_q" "neoverse_v1_asimd_mla_q")
+
+;; ASIMD FP multiply.
+(define_bypass 1 "neoverse_v1_asimd_fp_mul" "neoverse_v1_asimd_fp_mla")
+(define_bypass 2 "neoverse_v1_asimd_fp_mla" "neoverse_v1_asimd_fp_mla")
+
+;; CRC
+(define_bypass 1 "neoverse_v1_crc" "neoverse_v1_*")
-- 
2.39.2 (Apple Git-143)


[-- Attachment #3: Type: text/plain, Size: 1 bytes --]



                 reply	other threads:[~2023-05-07 22:50 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=667B07D1-9506-47F0-BF54-733522A2ECB8@icloud.com \
    --to=ebahapo@icloud.com \
    --cc=evandro+gcc-patches@gcc.gnu.org \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).