* Re: add tsv110 pipeline scheduling
@ 2019-01-14 14:03 wuyuan (E)
2019-01-14 15:13 ` Kyrill Tkachov
2019-01-17 23:47 ` James Greenhalgh
0 siblings, 2 replies; 12+ messages in thread
From: wuyuan (E) @ 2019-01-14 14:03 UTC (permalink / raw)
To: Kyrill Tkachov, gcc-patches; +Cc: Zhangyichao (AB), Zhanghaijian (A)
[-- Attachment #1: Type: text/plain, Size: 28357 bytes --]
Hi Kyrill:
The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!
Best Regards,
wuyuan
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md: New file.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b0766..085c40f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 513aec1..97e0703 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -356,6 +356,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..e33c5cc
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*8")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store_4,store_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*8)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*8)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27075 bytes --]
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b0766..085c40f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 513aec1..97e0703 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -356,6 +356,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..e33c5cc
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*8")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store_4,store_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*8)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*8)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
2019-01-14 14:03 add tsv110 pipeline scheduling wuyuan (E)
@ 2019-01-14 15:13 ` Kyrill Tkachov
2019-01-17 23:47 ` James Greenhalgh
1 sibling, 0 replies; 12+ messages in thread
From: Kyrill Tkachov @ 2019-01-14 15:13 UTC (permalink / raw)
To: wuyuan (E), gcc-patches
Cc: Zhangyichao (AB), Zhanghaijian (A), Richard Earnshaw (lists),
James Greenhalgh, Marcus Shawcroft
Hi Wuyuan,
On 14/01/19 14:02, wuyuan (E) wrote:
> Hi Kyrill:
> The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
> now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!
For the future, please test the patches against the branch you plan to apply them to.
In this case, since you're submitting a trunk patch it needs to be applied and tested on trunk.
This latest version builds on trunk and looks ok to me but you'll need approval from the aarch64 maintainers to commit.
I've cc'ed them for you.
Thanks,
Kyrill
> Best Regards,
> wuyuan
>
>
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md: New file.
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 70b0766..085c40f 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
> AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 513aec1..97e0703 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -356,6 +356,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
> new file mode 100644
> index 0000000..e33c5cc
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but
> +;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +;; General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has
> +;; the following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110")
> +(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110")
> +(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110")
> +(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110")
> +(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110")
> +(define_cpu_unit "tsv110_ls2_issue" "tsv110")
> +(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
> +(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "widen_mul64" "yes")))
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*8")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load_4,load_8"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store_4,store_8"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that
> +;; precisely, treat them as blocking execution for two cycles when
> +;; issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*8)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*8)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift")
> +(define_bypass 2 "tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is
> +;; predicted in which case we pay no penalty, or the branch is
> +;; mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
>
>
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
2019-01-14 14:03 add tsv110 pipeline scheduling wuyuan (E)
2019-01-14 15:13 ` Kyrill Tkachov
@ 2019-01-17 23:47 ` James Greenhalgh
1 sibling, 0 replies; 12+ messages in thread
From: James Greenhalgh @ 2019-01-17 23:47 UTC (permalink / raw)
To: wuyuan (E)
Cc: Kyrill Tkachov, gcc-patches, Zhangyichao (AB), Zhanghaijian (A), nd
On Mon, Jan 14, 2019 at 08:02:45AM -0600, wuyuan (E) wrote:
> Hi Kyrill:
> The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
> now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!
> Best Regards,
> wuyuan
Please check your modeling of Advanced SIMD operations.
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
This model says you will reserve
LS1 for 8 cycles,
OR LS2 for 8 cycles,
OR FSU1 for 8 cycles,
OR FSU2 for 8 cycles.
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
This model says you will reserve
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle.
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
This model says you will reserve:
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
AND
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
Which would be a redundant AND.
Is that how you intend to model these operations?
Remember,
If you are looking to model a 'THEN' relationship you can use the ',' operator,
If you are looking to model an 'AND' relationship you can use the '+' operator,
If you are looking to model an 'OR' relationship you can use the '|' operator.
Taking Cortex-A57 as an example:
> (define_insn_reservation
> "cortex_a57_neon_load_d" 11
> (and (eq_attr "tune" "cortexa57")
> (eq_attr "cortex_a57_neon_type" "neon_load_d"))
> "ca57_cx1_issue+ca57_cx2_issue,
> ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
This model says you will reserve:
CX1_ISSUE AND CX2_ISSUE,
THEN LS_ISSUE AND LS_ISSUE,
THEN LDR for 2 cycles.
Please let me know if you plan to update the model. If I have misunderstood
your intentions, please accept my apologies.
Best Regards,
James Greenhalgh
>
>
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md: New file.
^ permalink raw reply [flat|nested] 12+ messages in thread
* re: add tsv110 pipeline scheduling
@ 2019-03-07 14:25 wuyuan (E)
0 siblings, 0 replies; 12+ messages in thread
From: wuyuan (E) @ 2019-03-07 14:25 UTC (permalink / raw)
To: James Greenhalgh
Cc: Kyrill Tkachov, gcc-patches, Zhangyichao (AB), Zhanghaijian (A),
wufeng (O)
Hi ,James:
The modified patch has been uploaded for ten days. If you have time, I hope to get your comments earlier, thank you very much! Best Regards,
wuyuan
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2019年3月4日 21:46
收件人: 'James Greenhalgh' <james.greenhalgh@arm.com>
抄送: 'Kyrill Tkachov' <kyrylo.tkachov@foss.arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; 'nd@arm.com' <nd@arm.com>; wufeng (O) <wufeng9@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>
主题: re: add tsv110 pipeline scheduling
Hi ,James:
Have you seen the patch submitted last week? If the problem with the patch has been fixed, I hope to get into the trunk earlier. look forward to your reply. Thank you.
Best Regards,
wuyuan
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2019年2月23日 21:28
收件人: 'James Greenhalgh' <james.greenhalgh@arm.com>
抄送: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com>; gcc-patches@gcc.gnu.org; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; nd@arm.com; wufeng (O) <wufeng9@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com> Re : add tsv110 pipeline scheduling
Hi ,James:
Sorry for not responding to your email in time because of Chinese New Year’s holiday and urgent work. The three questions you mentioned last email are due to my misunderstanding of pipeline.
the first question, These instructions will occupy both the tsv110_ls* and tsv110_fsu* Pipeline at the same time.
rewritten as follows:
(define_insn_reservation
"tsv110_neon_ld4_lane" 9
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
neon_load4_one_lane,neon_load4_one_lane_q"))
"(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
the second question, These instructions will use tsv110_fsu1 Pipeline or tsv110_fsu2 Pipeline.
rewritten as follows:
(define_insn_reservation "tsv110_neon_abd_aba" 4
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_abd,neon_arith_acc"))
"tsv110_fsu1|tsv110_fsu2")
the third question, These instructions will use tsv110_fsu1 Pipeline or tsv110_fsu2 Pipeline.
rewritten as follows:
(define_insn_reservation "tsv110_neon_abd_aba_q" 4
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_arith_acc_q"))
"tsv110_fsu1|tsv110_fsu2")
In addition to the above changes, I asked hardware engineers and colleagues to review my patch and modify some of the errors. The detailed patches are as follows:
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md: New file.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index ed56e5e..82d91d6
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -105,7 +105,7 @@ AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_ AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa53, 0x41, 0xd4a, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b7cd9fc..861f059 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -361,6 +361,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..9d12839
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it ;;
+under the terms of the GNU General Public License as published by ;;
+the Free Software Foundation; either version 3, or (at your option) ;;
+any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but ;;
+WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
+General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License ;;
+along with GCC; see the file COPYING3. If not see ;;
+<http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has ;; the
+following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
+"tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
+"tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
+"tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
+"tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3:
+BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
+"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
+"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
+"tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
+tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store_4,store_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "tsv110_fsu1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "tsv110_fsu1")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type"
+"neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "tsv110_fsu1")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type"
+"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that ;;
+precisely, treat them as blocking execution for two cycles when ;;
+issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_sqrts" 24
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "tsv110_fsu2")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift") (define_bypass 2
+"tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is ;;
+predicted in which case we pay no penalty, or the branch is ;;
+mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
-----邮件原件-----
发件人: James Greenhalgh [mailto:james.greenhalgh@arm.com]
发送时间: 2019年1月18日 7:47
收件人: wuyuan (E) <wuyuan5@huawei.com>
抄送: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com>; gcc-patches@gcc.gnu.org; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; nd@arm.com
主题: Re: add tsv110 pipeline scheduling
On Mon, Jan 14, 2019 at 08:02:45AM -0600, wuyuan (E) wrote:
> Hi Kyrill:
> The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
> now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!
> Best Regards,
> wuyuan
Please check your modeling of Advanced SIMD operations.
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
This model says you will reserve
LS1 for 8 cycles,
OR LS2 for 8 cycles,
OR FSU1 for 8 cycles,
OR FSU2 for 8 cycles.
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
This model says you will reserve
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle.
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
This model says you will reserve:
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
AND
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
Which would be a redundant AND.
Is that how you intend to model these operations?
Remember,
If you are looking to model a 'THEN' relationship you can use the ',' operator, If you are looking to model an 'AND' relationship you can use the '+' operator, If you are looking to model an 'OR' relationship you can use the '|' operator.
Taking Cortex-A57 as an example:
> (define_insn_reservation
> "cortex_a57_neon_load_d" 11
> (and (eq_attr "tune" "cortexa57")
> (eq_attr "cortex_a57_neon_type" "neon_load_d"))
> "ca57_cx1_issue+ca57_cx2_issue,
> ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
This model says you will reserve:
CX1_ISSUE AND CX2_ISSUE,
THEN LS_ISSUE AND LS_ISSUE,
THEN LDR for 2 cycles.
Please let me know if you plan to update the model. If I have misunderstood your intentions, please accept my apologies.
Best Regards,
James Greenhalgh
>
>
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md: New file.
^ permalink raw reply [flat|nested] 12+ messages in thread
* re: add tsv110 pipeline scheduling
@ 2019-03-04 13:46 wuyuan (E)
0 siblings, 0 replies; 12+ messages in thread
From: wuyuan (E) @ 2019-03-04 13:46 UTC (permalink / raw)
To: James Greenhalgh
Cc: Kyrill Tkachov, gcc-patches, Zhangyichao (AB), Zhanghaijian (A),
nd, wufeng (O), Yangfei (Felix)
Hi ,James:
Have you seen the patch submitted last week? If the problem with the patch has been fixed, I hope to get into the trunk earlier. look forward to your reply. Thank you.
Best Regards,
wuyuan
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2019年2月23日 21:28
收件人: 'James Greenhalgh' <james.greenhalgh@arm.com>
抄送: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com>; gcc-patches@gcc.gnu.org; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; nd@arm.com; wufeng (O) <wufeng9@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>
Re : add tsv110 pipeline scheduling
Hi ,James:
Sorry for not responding to your email in time because of Chinese New Year’s holiday and urgent work. The three questions you mentioned last email are due to my misunderstanding of pipeline.
the first question, These instructions will occupy both the tsv110_ls* and tsv110_fsu* Pipeline at the same time.
rewritten as follows:
(define_insn_reservation
"tsv110_neon_ld4_lane" 9
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
neon_load4_one_lane,neon_load4_one_lane_q"))
"(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
the second question, These instructions will use tsv110_fsu1 Pipeline or tsv110_fsu2 Pipeline.
rewritten as follows:
(define_insn_reservation "tsv110_neon_abd_aba" 4
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_abd,neon_arith_acc"))
"tsv110_fsu1|tsv110_fsu2")
the third question, These instructions will use tsv110_fsu1 Pipeline or tsv110_fsu2 Pipeline.
rewritten as follows:
(define_insn_reservation "tsv110_neon_abd_aba_q" 4
(and (eq_attr "tune" "tsv110")
(eq_attr "type" "neon_arith_acc_q"))
"tsv110_fsu1|tsv110_fsu2")
In addition to the above changes, I asked hardware engineers and colleagues to review my patch and modify some of the errors. The detailed patches are as follows:
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md: New file.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index ed56e5e..82d91d6
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -105,7 +105,7 @@ AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_ AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa53, 0x41, 0xd4a, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b7cd9fc..861f059 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -361,6 +361,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..9d12839
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it ;;
+under the terms of the GNU General Public License as published by ;;
+the Free Software Foundation; either version 3, or (at your option) ;;
+any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but ;;
+WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
+General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License ;;
+along with GCC; see the file COPYING3. If not see ;;
+<http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has ;; the
+following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
+"tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
+"tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
+"tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
+"tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3:
+BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
+"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
+"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
+"tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
+tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store_4,store_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "tsv110_fsu1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "tsv110_fsu1")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type"
+"neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "tsv110_fsu1")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type"
+"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "tsv110_ls1|tsv110_ls2")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 +
+tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that ;;
+precisely, treat them as blocking execution for two cycles when ;;
+issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_fp_sqrts" 24
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "tsv110_fsu2")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift") (define_bypass 2
+"tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is ;;
+predicted in which case we pay no penalty, or the branch is ;;
+mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
-----邮件原件-----
发件人: James Greenhalgh [mailto:james.greenhalgh@arm.com]
发送时间: 2019年1月18日 7:47
收件人: wuyuan (E) <wuyuan5@huawei.com>
抄送: Kyrill Tkachov <kyrylo.tkachov@foss.arm.com>; gcc-patches@gcc.gnu.org; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; nd@arm.com
主题: Re: add tsv110 pipeline scheduling
On Mon, Jan 14, 2019 at 08:02:45AM -0600, wuyuan (E) wrote:
> Hi Kyrill:
> The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
> now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!
> Best Regards,
> wuyuan
Please check your modeling of Advanced SIMD operations.
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
This model says you will reserve
LS1 for 8 cycles,
OR LS2 for 8 cycles,
OR FSU1 for 8 cycles,
OR FSU2 for 8 cycles.
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
This model says you will reserve
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle.
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
This model says you will reserve:
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
AND
FSU1 for 1 cycle,
THEN FSU2 for 1 cycle
Which would be a redundant AND.
Is that how you intend to model these operations?
Remember,
If you are looking to model a 'THEN' relationship you can use the ',' operator, If you are looking to model an 'AND' relationship you can use the '+' operator, If you are looking to model an 'OR' relationship you can use the '|' operator.
Taking Cortex-A57 as an example:
> (define_insn_reservation
> "cortex_a57_neon_load_d" 11
> (and (eq_attr "tune" "cortexa57")
> (eq_attr "cortex_a57_neon_type" "neon_load_d"))
> "ca57_cx1_issue+ca57_cx2_issue,
> ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
This model says you will reserve:
CX1_ISSUE AND CX2_ISSUE,
THEN LS_ISSUE AND LS_ISSUE,
THEN LDR for 2 cycles.
Please let me know if you plan to update the model. If I have misunderstood your intentions, please accept my apologies.
Best Regards,
James Greenhalgh
>
>
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md: New file.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
2019-01-13 9:37 wuyuan (E)
@ 2019-01-14 10:26 ` Kyrill Tkachov
0 siblings, 0 replies; 12+ messages in thread
From: Kyrill Tkachov @ 2019-01-14 10:26 UTC (permalink / raw)
To: wuyuan (E), gcc-patches
Cc: Zhangyichao (AB), Zhanghaijian (A), Zhangshaokun, JIANJIANG CENG
Hi Wuyuan,
On 13/01/19 09:36, wuyuan (E) wrote:
> Hi Kyrill:
> Thank you very much for your to review my patch. I have modified the code accordingly to your opinion.
> first, mul64 was renamed to widen_mul64, and use load_4, load_8 to loading 4 and 8 bytes in the latest version of GCC. besides, I change the reservation durations (the *16 part above) to 8. Test performance with some test cases, the result has improvement (Will these changes improvement performance?).
> now, the tsv110 automaton size is 8641 states. I don't know if the code modification is complete. If there is any need to modify it, please let me know, thank you.
Thanks, that's a much better size.
I wouldn't expect the changes to change performance much, but it is possible.
One comment inline.
>
> 2019-01-11 wuyuan <wuyuan5@huawei.com>
>
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md: New file.
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 70b0766..085c40f 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
> AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 513aec1..97e0703 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -356,6 +356,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
> new file mode 100644
> index 0000000..e33c5cc
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but
> +;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +;; General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has
> +;; the following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110")
> +(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110")
> +(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110")
> +(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110")
> +(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110")
> +(define_cpu_unit "tsv110_ls2_issue" "tsv110")
> +(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
> +(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "widen_mul64" "yes")))
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*8")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load_4,load_8"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
This still needs to be updated. I think you want "store_4,store_8" here.
This patch doesn't build currently...
Thanks,
Kyrill
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that
> +;; precisely, treat them as blocking execution for two cycles when
> +;; issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*8)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*8)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift")
> +(define_bypass 2 "tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is
> +;; predicted in which case we pay no penalty, or the branch is
> +;; mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> -----é®ä»¶å件-----
> å件人: Kyrill Tkachov [mailto:kyrylo.tkachov@foss.arm.com]
> åéæ¶é´: 2019å¹´1æ8æ¥ 20:15
> æ¶ä»¶äºº: wuyuan (E) <wuyuan5@huawei.com>; Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>; gcc-patches@gcc.gnu.org; Marcus Shawcroft <Marcus.Shawcroft@arm.com>; James Greenhalgh <James.Greenhalgh@arm.com>; Richard Earnshaw <Richard.Earnshaw@arm.com>
> æé: Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
> 主é¢: Re: add tsv110 pipeline scheduling
>
> Hi Wuyuan,
>
> Thanks for pinging.
> Some comments inline
>
> On 08/01/19 11:23, wuyuan (E) wrote:
>> Hi , Maintainers
>> I submitted a tsv110 pipeline patch on the 20th of last month , Have you reviewed the patch? look forward to your reply.
>> Best Regards,
>> Wuyuan
>>
>> 2019-1-8 wuyuan <wuyuan5@huawei.com>
>>
> Please use the date format 2019-01-08.
>
> Also, only two spaces between date and your name.
>
>> * config/aarch64/aarch64-cores.def: New CPU.
> This should be
> * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
>
>> * config/aarch64/aarch64.md : Add "tsv110.md"
>> * config/aarch64/tsv110.md : tsv110.md new file
> This should be:
> * config/aarch64/tsv110.md: New file.
>
>>
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> old mode 100644
>> new mode 100755
>> index 20f4924..ea9b7c5
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>>
>> /* HiSilicon ('H') cores. */
>> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>>
>> /* ARMv8.4-A Architecture Processors. */
>>
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -349,6 +349,7 @@
>> (include "thunderx.md")
>> (include "../arm/xgene1.md")
>> (include "thunderx2t99.md")
>> +(include "tsv110.md")
>>
>> ;; -------------------------------------------------------------------
>> ;; Jumps and other miscellaneous insns
>> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
>> --- /dev/null
>> +++ b/gcc/config/aarch64/tsv110.md
>> @@ -0,0 +1,708 @@
>> +;; tsv110 pipeline description
>> +;; Copyright (C) 2018 Free Software Foundation, Inc.
>> +;;
>> +;; This file is part of GCC.
>> +;;
>> +;; GCC is free software; you can redistribute it and/or modify it ;;
>> +under the terms of the GNU General Public License as published by ;;
>> +the Free Software Foundation; either version 3, or (at your option) ;;
>> +any later version.
>> +;;
>> +;; GCC is distributed in the hope that it will be useful, but ;;
>> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
>> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
>> +General Public License for more details.
>> +;;
>> +;; You should have received a copy of the GNU General Public License ;;
>> +along with GCC; see the file COPYING3. If not see ;;
>> +<http://www.gnu.org/licenses/>.
>> +
>> +(define_automaton "tsv110")
>> +
>> +(define_attr "tsv110_neon_type"
>> + "neon_arith_acc, neon_arith_acc_q,
>> + neon_arith_basic, neon_arith_complex,
>> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
>> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
>> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
>> + neon_shift_imm_complex,
>> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
>> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
>> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
>> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
>> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
>> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
>> + neon_bitops, neon_bitops_q, neon_from_gp,
>> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
>> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
>> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
>> + unknown"
>> + (cond [
>> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
>> + neon_reduc_add_acc_q")
>> + (const_string "neon_arith_acc")
>> + (eq_attr "type" "neon_arith_acc_q")
>> + (const_string "neon_arith_acc_q")
>> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
>> + neon_add_widen, neon_neg, neon_neg_q,\
>> + neon_reduc_add, neon_reduc_add_q,\
>> + neon_reduc_add_long, neon_sub, neon_sub_q,\
>> + neon_sub_long, neon_sub_widen, neon_logic,\
>> + neon_logic_q, neon_tst, neon_tst_q,\
>> + neon_compare, neon_compare_q,\
>> + neon_compare_zero, neon_compare_zero_q,\
>> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
>> + neon_reduc_minmax_q")
>> + (const_string "neon_arith_basic")
>> + (eq_attr "type" "neon_add_halve_narrow_q,\
>> + neon_add_halve, neon_add_halve_q,\
>> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
>> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
>> + neon_qneg_q, neon_qsub, neon_qsub_q,\
>> + neon_sub_halve_narrow_q")
>> + (const_string "neon_arith_complex")
>> +
>> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
>> + neon_mul_h_scalar, neon_mul_s_scalar,\
>> + neon_sat_mul_b, neon_sat_mul_h,\
>> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
>> + neon_sat_mul_s_scalar,\
>> + neon_mul_b_long, neon_mul_h_long,\
>> + neon_mul_s_long,\
>> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
>> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
>> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
>> + neon_sat_mul_s_scalar_long,\
>> + neon_mla_b, neon_mla_h, neon_mla_s,\
>> + neon_mla_h_scalar, neon_mla_s_scalar,\
>> + neon_mla_b_long, neon_mla_h_long,\
>> + neon_mla_s_long,\
>> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
>> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
>> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
>> + neon_sat_mla_s_scalar_long")
>> + (const_string "neon_multiply")
>> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
>> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
>> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
>> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
>> + neon_sat_mul_s_scalar_q,\
>> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
>> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
>> + (const_string "neon_multiply_q")
>> +
>> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
>> + (const_string "neon_shift_acc")
>> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
>> + neon_shift_imm_narrow_q, neon_shift_imm_long")
>> + (const_string "neon_shift_imm_basic")
>> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
>> + neon_sat_shift_imm_narrow_q")
>> + (const_string "neon_shift_imm_complex")
>> + (eq_attr "type" "neon_shift_reg")
>> + (const_string "neon_shift_reg_basic")
>> + (eq_attr "type" "neon_shift_reg_q")
>> + (const_string "neon_shift_reg_basic_q")
>> + (eq_attr "type" "neon_sat_shift_reg")
>> + (const_string "neon_shift_reg_complex")
>> + (eq_attr "type" "neon_sat_shift_reg_q")
>> + (const_string "neon_shift_reg_complex_q")
>> +
>> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
>> + neon_fp_abs_s, neon_fp_abs_s_q,\
>> + neon_fp_neg_d, neon_fp_neg_d_q,\
>> + neon_fp_abs_d, neon_fp_abs_d_q,\
>> + neon_fp_minmax_s,neon_fp_minmax_d,\
>> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
>> + (const_string "neon_fp_negabs")
>> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
>> + neon_fp_reduc_add_s, neon_fp_compare_s,\
>> + neon_fp_round_s,\
>> + neon_fp_addsub_d, neon_fp_abd_d,\
>> + neon_fp_reduc_add_d, neon_fp_compare_d,\
>> + neon_fp_round_d")
>> + (const_string "neon_fp_arith")
>> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
>> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
>> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
>> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
>> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
>> + neon_fp_minmax_d_q, neon_fp_round_d_q")
>> + (const_string "neon_fp_arith_q")
>> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
>> + neon_fp_reduc_minmax_d_q,\
>> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
>> + (const_string "neon_fp_reductions_q")
>> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
>> + neon_fp_to_int_d, neon_int_to_fp_d")
>> + (const_string "neon_fp_cvt_int")
>> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
>> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
>> + (const_string "neon_fp_cvt_int_q")
>> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
>> + (const_string "neon_fp_cvt16")
>> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
>> + neon_fp_mul_d")
>> + (const_string "neon_fp_mul")
>> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
>> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
>> + (const_string "neon_fp_mul_q")
>> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
>> + neon_fp_mla_d")
>> + (const_string "neon_fp_mla")
>> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
>> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
>> + (const_string "neon_fp_mla_q")
>> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
>> + neon_fp_recpx_s,\
>> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
>> + neon_fp_recpx_d")
>> + (const_string "neon_fp_recpe_rsqrte")
>> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
>> + neon_fp_recpx_s_q,\
>> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
>> + neon_fp_recpx_d_q")
>> + (const_string "neon_fp_recpe_rsqrte_q")
>> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
>> + neon_fp_recps_d, neon_fp_rsqrts_d")
>> + (const_string "neon_fp_recps_rsqrts")
>> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
>> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
>> + (const_string "neon_fp_recps_rsqrts_q")
>> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
>> + neon_rev, neon_permute, neon_rbit,\
>> + neon_tbl1, neon_tbl2, neon_zip,\
>> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
>> + neon_move, neon_move_q, neon_move_narrow_q")
>> + (const_string "neon_bitops")
>> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
>> + neon_rev_q, neon_permute_q, neon_rbit_q")
>> + (const_string "neon_bitops_q")
>> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
>> + (const_string "neon_from_gp")
>> + (eq_attr "type" "neon_from_gp_q")
>> + (const_string "neon_from_gp_q")
>> +
>> + (eq_attr "type" "f_loads, f_loadd,\
>> + neon_load1_1reg, neon_load1_1reg_q,\
>> + neon_load1_2reg, neon_load1_2reg_q")
>> + (const_string "neon_load_a")
>> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
>> + neon_load1_4reg, neon_load1_4reg_q")
>> + (const_string "neon_load_b")
>> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
>> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
>> + neon_load2_2reg, neon_load2_2reg_q,\
>> + neon_load2_all_lanes, neon_load2_all_lanes_q")
>> + (const_string "neon_load_c")
>> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
>> + neon_load3_3reg, neon_load3_3reg_q,\
>> + neon_load3_one_lane, neon_load3_one_lane_q,\
>> + neon_load4_4reg, neon_load4_4reg_q")
>> + (const_string "neon_load_d")
>> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
>> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
>> + neon_load4_all_lanes, neon_load4_all_lanes_q")
>> + (const_string "neon_load_e")
>> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
>> + (const_string "neon_load_f")
>> +
>> + (eq_attr "type" "f_stores, f_stored,\
>> + neon_store1_1reg")
>> + (const_string "neon_store_a")
>> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
>> + (const_string "neon_store_b")
>> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
>> + neon_store3_3reg, neon_store3_3reg_q,\
>> + neon_store2_4reg, neon_store2_4reg_q,\
>> + neon_store4_4reg, neon_store4_4reg_q,\
>> + neon_store2_2reg, neon_store2_2reg_q,\
>> + neon_store3_one_lane, neon_store3_one_lane_q,\
>> + neon_store4_one_lane, neon_store4_one_lane_q,\
>> + neon_store1_4reg, neon_store1_4reg_q,\
>> + neon_store1_one_lane, neon_store1_one_lane_q,\
>> + neon_store2_one_lane, neon_store2_one_lane_q")
>> + (const_string "neon_store_complex")]
>> + (const_string "unknown")))
>> +
>> +;; The tsv110 core is modelled as issues pipeline that has ;; the
>> +following functional units.
>> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
>> +
>> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
>> +"tsv110_alu1" "tsv110_alu1_issue")
>> +
>> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
>> +"tsv110_alu2" "tsv110_alu2_issue")
>> +
>> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
>> +"tsv110_alu3" "tsv110_alu3_issue")
>> +
>> +;; 2. One pipeline for complex integer operations: MDU
>> +
>> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
>> +"tsv110_mdu" "tsv110_mdu_issue")
>> +
>> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
>> +(define_automaton "tsv110_fsu")
>> +
>> +(define_cpu_unit "tsv110_fsu1_issue"
>> + "tsv110_fsu")
>> +(define_cpu_unit "tsv110_fsu2_issue"
>> + "tsv110_fsu")
>> +
>> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
>> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
>> +
>> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
>> +BRU1, BRU2
>> +
>> +;; 5. Two pipelines for load and store operations: LS1, LS2.
>> +
>> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
>> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
>> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
>> +"tsv110_ls2_issue")
>> +
>> +;; Block all issue queues.
>> +
>> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
>> + + tsv110_mdu_issue + tsv110_alu1_issue
>> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
>> +tsv110_ls2_issue")
>> +
>> +;; Simple Execution Unit:
>> +;;
>> +;; Simple ALU without shift
>> +(define_insn_reservation "tsv110_alu" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alu_imm,logic_imm,\
>> + alu_sreg,logic_reg,\
>> + adc_imm,adc_reg,\
>> + adr,bfm,clz,rbit,rev,\
>> + shift_imm,shift_reg,\
>> + mov_imm,mov_reg,\
>> + mvn_imm,mvn_reg,\
>> + mrs,multiple,no_insn"))
>> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
>> +
>> +(define_insn_reservation "tsv110_alus" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alus_imm,logics_imm,\
>> + alus_sreg,logics_reg,\
>> + adcs_imm,adcs_reg"))
>> + "tsv110_alu2|tsv110_alu3")
>> +
>> +;; ALU ops with shift
>> +(define_insn_reservation "tsv110_alu_shift" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "extend,\
>> + alu_shift_imm,alu_shift_reg,\
>> + crc,logic_shift_imm,logic_shift_reg,\
>> + mov_shift,mvn_shift,\
>> + mov_shift_reg,mvn_shift_reg"))
>> + "tsv110_mdu")
>> +
>> +(define_insn_reservation "tsv110_alus_shift" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
>> + logics_shift_imm,logics_shift_reg"))
>> + "tsv110_alu2")
>> +
>> +;; Multiplies instructions
>> +(define_insn_reservation "tsv110_mult" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (ior (eq_attr "mul32" "yes")
>> + (eq_attr "mul64" "yes")))
>
> mul64 was renamed to widen_mul64 in r266471. Can you please update your patch, otherwise it won't build.
>
>
>
>> + "tsv110_mdu")
>> +
>> +;; Integer divide
>> +(define_insn_reservation "tsv110_div" 10
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "udiv,sdiv"))
>> + "tsv110_mdu*10")
>> +
>> +;; Block all issue pipes for a cycle
>> +(define_insn_reservation "tsv110_block" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "block"))
>> + "tsv110_block")
>> +
>> +;; Branch execution Unit
>> +;;
>> +;; Branches take two issue slot.
>> +;; No latency as there is no result
>> +(define_insn_reservation "tsv110_branch" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "branch"))
>> + "tsv110_alu1|tsv110_alu2")
>> +
>> +;; Load-store execution Unit
>> +;;
>> +;; Loads of up to two words.
>> +(define_insn_reservation "tsv110_load1" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "load1,load2"))
>> + "tsv110_ls1|tsv110_ls2")
> There are no types called load1, load2. This won't build.
> Did you mean to use load_4, load_8? (for loading 4 and 8 bytes accordingly).
>
>
>> +
>> +;; Stores of up to two words.
>> +(define_insn_reservation "tsv110_store1" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "store1,store2"))
>> + "tsv110_ls1|tsv110_ls2")
>> +
> Similarly, did you mean store_4 and store_8?
>
>> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
>> +
>> +(define_insn_reservation "tsv110_neon_abd_aba" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_abd,neon_arith_acc"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_arith_acc_q"))
>> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_neon_arith_basic" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +(define_insn_reservation "tsv110_neon_arith_complex" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +;; Integer Multiply Instructions.
>> +;; D-form
>> +(define_insn_reservation "tsv110_neon_multiply" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_multiply"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_mul_d_long"))
>> + "tsv110_fsu1")
>> +
>> +;; Q-form
>> +(define_insn_reservation "tsv110_neon_multiply_q" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
>> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
>> +
>> +;; Integer Shift Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_shift_acc" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
>> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
>> + neon_shift_reg_complex"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_shift_acc_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
>> + neon_shift_reg_complex_q"))
>> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
>> +
>> +;; Floating Point Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_negabs" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_arith" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_arith_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_minmax_q" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_reductions_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_cvt_int" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type"
>> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mul" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mul_q" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mla" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
>> + neon_fp_recps_rsqrts"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_recpe_rsqrte" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mla_q" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
>> + neon_fp_recps_rsqrts_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_recpe_rsqrte_q" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +;; Miscellaneous Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_bitops" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_bitops"))
>> + "(tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_dup" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_from_gp,f_mcr"))
>> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_mov" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_mcrr"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_bitops_q" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_from_gp_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
>> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_to_gp" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
>> + "(tsv110_fsu1)")
>> +
>> +;; Load Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_lane" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
>> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
>> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg1" 6
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type"
>> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
>> + "((tsv110_ls1)|(tsv110_ls2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg2" 6
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
>> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg3" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
>> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg4" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
>> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld2" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
>> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
>> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
>> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld3" 9
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
>> + neon_load3_one_lane,neon_load3_one_lane_q,\
>> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
>> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld4_lane" 9
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
>> + neon_load4_one_lane,neon_load4_one_lane_q"))
>> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld4_reg" 11
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
>> + neon_load4_one_lane,neon_load4_one_lane_q"))
>> +
>> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
> With the above two bugs fixed I am concerned that this automaton is much larger than other automata in config/aarch64.
> This hurts GCC compile time and memory requirements. We've had bug reports in the past where people were not able to build
> GCC on memory-constrained systems due to these issues.
> You can check the size of the generated automata during build time by adding (automata_option "stats") to your .md file.
> With this, the tsv110 automaton size is 38017 states, more than 5x the size of the next largest automaton (cortex_a53_advsimd).
>
> This is usually due to unnecessarily large reservation durations (the *16 part above) on long-running instructions such as divisions (integer and floating-point)
> and ld4 instructions, such as this one. If you use only a maximum of 8 in the reservation duration here, and in the division instructions you get a much
> smaller automaton size (I see 7681 states if I change it to 8 here and in tsv110_div, tsv110_fp_sqrts and tsv110_fp_divs).
>
> Because 8 cycles is such a large scheduling window anyway, it is unlikely that modelling the full 16 cycles will give any benefit in real world code.
> That has been our experience in the past.
>
> So I recommend you modify the model to use only a maximum of 8 in its reservation durations.
>
> Hope this helps,
> Kyrill
>
>> +
>> +;; Store Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_store_a" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_a"))
>> + "tsv110_fsu1|tsv110_fsu2")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_store_b" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_b"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +;; These block issue for a number of cycles proportional to the number
>> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
>> +precisely, treat them as blocking execution for two cycles when ;;
>> +issued.
>> +(define_insn_reservation
>> + "tsv110_neon_store_complex" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
>> + "tsv110_block*2")
>> +
>> +;; Floating-Point Operations.
>> +
>> +(define_insn_reservation "tsv110_fp_const" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fconsts,fconstd,fmov"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_add_sub" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_mac" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvt" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvt"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvtf2i"))
>> + "(tsv110_fsu1)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvti2f" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvti2f"))
>> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
>> +
>> +(define_insn_reservation "tsv110_fp_cmp" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fcmps,fcmpd"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_arith" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "ffariths,ffarithd"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_divs" 12
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
>> + neon_fp_div_s_q,neon_fp_div_d_q"))
>> + "(tsv110_fsu1*12)")
>> +
>> +(define_insn_reservation "tsv110_fp_sqrts" 12
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
>> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
>> + "(tsv110_fsu2*12)")
>> +
>> +(define_insn_reservation "tsv110_crypto_aes" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_fast"))
>> + "(tsv110_fsu1)")
>> +
>> +(define_insn_reservation "tsv110_crypto_complex" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
>> + "tsv110_fsu1")
>> +
>> +;; We lie with calls. They take up all issue slots, but are otherwise
>> +;; not harmful.
>> +(define_insn_reservation "tsv110_call" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "call"))
>> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
>> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
>> +)
>> +
>> +;; Simple execution unit bypasses
>> +(define_bypass 1 "tsv110_alu"
>> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
>> +"tsv110_alu_shift"
>> + "tsv110_alu,tsv110_alu_shift")
>> +
>> +;; An MLA or a MUL can feed a dependent MLA.
>> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
>> + "tsv110_neon_*mla*")
>> +
>> +;; We don't need to care about control hazards, either the branch is ;;
>> +predicted in which case we pay no penalty, or the branch is ;;
>> +mispredicted in which case instruction scheduling will be unlikely to
>> +;; help.
>> +(define_bypass 1 "tsv110_*"
>> + "tsv110_call,tsv110_branch")
>>
>>
>>
>>
>>
>> -----é®ä»¶å件-----
>> å件人: wuyuan (E)
>> åéæ¶é´: 2019å¹´1æ3æ¥ 10:57
>> æ¶ä»¶äºº: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
>> æé: 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>
>> 主é¢: Re: add tsv110 pipeline scheduling
>>
>> Hi , Maintainers
>> Happy new year!
>> On the 20th of last month, I submitted a tsv110 pipeline patch. I want to know if you have received it. Looking forward to your reply.
>> Best Regards,
>> wuyuan
>>
>>
>>
>>
>> -----é®ä»¶å件-----
>> å件人: wuyuan (E)
>> åéæ¶é´: 2018å¹´12æ20æ¥ 14:06
>> æ¶ä»¶äºº: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
>> æé: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
>> 主é¢: Re: add tsv110 pipeline scheduling
>>
>>
>> Hi Ramana,
>> Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
>> The patch as follows :
>>
>>
>>
>> 2018-12-20 wuyuan <wuyuan5@huawei.com>
>>
>> * config/aarch64/aarch64-cores.def: New CPU.
>> * config/aarch64/aarch64.md : Add "tsv110.md"
>> * config/aarch64/tsv110.md : tsv110.md new file
>>
>>
>>
>>
>>
>>
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> old mode 100644
>> new mode 100755
>> index 20f4924..ea9b7c5
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>>
>> /* HiSilicon ('H') cores. */
>> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>>
>> /* ARMv8.4-A Architecture Processors. */
>>
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -349,6 +349,7 @@
>> (include "thunderx.md")
>> (include "../arm/xgene1.md")
>> (include "thunderx2t99.md")
>> +(include "tsv110.md")
>>
>> ;; -------------------------------------------------------------------
>> ;; Jumps and other miscellaneous insns
>> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
>> --- /dev/null
>> +++ b/gcc/config/aarch64/tsv110.md
>> @@ -0,0 +1,708 @@
>> +;; tsv110 pipeline description
>> +;; Copyright (C) 2018 Free Software Foundation, Inc.
>> +;;
>> +;; This file is part of GCC.
>> +;;
>> +;; GCC is free software; you can redistribute it and/or modify it ;;
>> +under the terms of the GNU General Public License as published by ;;
>> +the Free Software Foundation; either version 3, or (at your option) ;;
>> +any later version.
>> +;;
>> +;; GCC is distributed in the hope that it will be useful, but ;;
>> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
>> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
>> +General Public License for more details.
>> +;;
>> +;; You should have received a copy of the GNU General Public License ;;
>> +along with GCC; see the file COPYING3. If not see ;;
>> +<http://www.gnu.org/licenses/>.
>> +
>> +(define_automaton "tsv110")
>> +
>> +(define_attr "tsv110_neon_type"
>> + "neon_arith_acc, neon_arith_acc_q,
>> + neon_arith_basic, neon_arith_complex,
>> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
>> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
>> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
>> + neon_shift_imm_complex,
>> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
>> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
>> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
>> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
>> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
>> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
>> + neon_bitops, neon_bitops_q, neon_from_gp,
>> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
>> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
>> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
>> + unknown"
>> + (cond [
>> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
>> + neon_reduc_add_acc_q")
>> + (const_string "neon_arith_acc")
>> + (eq_attr "type" "neon_arith_acc_q")
>> + (const_string "neon_arith_acc_q")
>> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
>> + neon_add_widen, neon_neg, neon_neg_q,\
>> + neon_reduc_add, neon_reduc_add_q,\
>> + neon_reduc_add_long, neon_sub, neon_sub_q,\
>> + neon_sub_long, neon_sub_widen, neon_logic,\
>> + neon_logic_q, neon_tst, neon_tst_q,\
>> + neon_compare, neon_compare_q,\
>> + neon_compare_zero, neon_compare_zero_q,\
>> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
>> + neon_reduc_minmax_q")
>> + (const_string "neon_arith_basic")
>> + (eq_attr "type" "neon_add_halve_narrow_q,\
>> + neon_add_halve, neon_add_halve_q,\
>> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
>> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
>> + neon_qneg_q, neon_qsub, neon_qsub_q,\
>> + neon_sub_halve_narrow_q")
>> + (const_string "neon_arith_complex")
>> +
>> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
>> + neon_mul_h_scalar, neon_mul_s_scalar,\
>> + neon_sat_mul_b, neon_sat_mul_h,\
>> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
>> + neon_sat_mul_s_scalar,\
>> + neon_mul_b_long, neon_mul_h_long,\
>> + neon_mul_s_long,\
>> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
>> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
>> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
>> + neon_sat_mul_s_scalar_long,\
>> + neon_mla_b, neon_mla_h, neon_mla_s,\
>> + neon_mla_h_scalar, neon_mla_s_scalar,\
>> + neon_mla_b_long, neon_mla_h_long,\
>> + neon_mla_s_long,\
>> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
>> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
>> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
>> + neon_sat_mla_s_scalar_long")
>> + (const_string "neon_multiply")
>> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
>> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
>> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
>> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
>> + neon_sat_mul_s_scalar_q,\
>> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
>> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
>> + (const_string "neon_multiply_q")
>> +
>> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
>> + (const_string "neon_shift_acc")
>> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
>> + neon_shift_imm_narrow_q, neon_shift_imm_long")
>> + (const_string "neon_shift_imm_basic")
>> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
>> + neon_sat_shift_imm_narrow_q")
>> + (const_string "neon_shift_imm_complex")
>> + (eq_attr "type" "neon_shift_reg")
>> + (const_string "neon_shift_reg_basic")
>> + (eq_attr "type" "neon_shift_reg_q")
>> + (const_string "neon_shift_reg_basic_q")
>> + (eq_attr "type" "neon_sat_shift_reg")
>> + (const_string "neon_shift_reg_complex")
>> + (eq_attr "type" "neon_sat_shift_reg_q")
>> + (const_string "neon_shift_reg_complex_q")
>> +
>> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
>> + neon_fp_abs_s, neon_fp_abs_s_q,\
>> + neon_fp_neg_d, neon_fp_neg_d_q,\
>> + neon_fp_abs_d, neon_fp_abs_d_q,\
>> + neon_fp_minmax_s,neon_fp_minmax_d,\
>> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
>> + (const_string "neon_fp_negabs")
>> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
>> + neon_fp_reduc_add_s, neon_fp_compare_s,\
>> + neon_fp_round_s,\
>> + neon_fp_addsub_d, neon_fp_abd_d,\
>> + neon_fp_reduc_add_d, neon_fp_compare_d,\
>> + neon_fp_round_d")
>> + (const_string "neon_fp_arith")
>> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
>> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
>> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
>> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
>> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
>> + neon_fp_minmax_d_q, neon_fp_round_d_q")
>> + (const_string "neon_fp_arith_q")
>> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
>> + neon_fp_reduc_minmax_d_q,\
>> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
>> + (const_string "neon_fp_reductions_q")
>> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
>> + neon_fp_to_int_d, neon_int_to_fp_d")
>> + (const_string "neon_fp_cvt_int")
>> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
>> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
>> + (const_string "neon_fp_cvt_int_q")
>> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
>> + (const_string "neon_fp_cvt16")
>> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
>> + neon_fp_mul_d")
>> + (const_string "neon_fp_mul")
>> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
>> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
>> + (const_string "neon_fp_mul_q")
>> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
>> + neon_fp_mla_d")
>> + (const_string "neon_fp_mla")
>> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
>> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
>> + (const_string "neon_fp_mla_q")
>> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
>> + neon_fp_recpx_s,\
>> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
>> + neon_fp_recpx_d")
>> + (const_string "neon_fp_recpe_rsqrte")
>> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
>> + neon_fp_recpx_s_q,\
>> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
>> + neon_fp_recpx_d_q")
>> + (const_string "neon_fp_recpe_rsqrte_q")
>> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
>> + neon_fp_recps_d, neon_fp_rsqrts_d")
>> + (const_string "neon_fp_recps_rsqrts")
>> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
>> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
>> + (const_string "neon_fp_recps_rsqrts_q")
>> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
>> + neon_rev, neon_permute, neon_rbit,\
>> + neon_tbl1, neon_tbl2, neon_zip,\
>> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
>> + neon_move, neon_move_q, neon_move_narrow_q")
>> + (const_string "neon_bitops")
>> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
>> + neon_rev_q, neon_permute_q, neon_rbit_q")
>> + (const_string "neon_bitops_q")
>> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
>> + (const_string "neon_from_gp")
>> + (eq_attr "type" "neon_from_gp_q")
>> + (const_string "neon_from_gp_q")
>> +
>> + (eq_attr "type" "f_loads, f_loadd,\
>> + neon_load1_1reg, neon_load1_1reg_q,\
>> + neon_load1_2reg, neon_load1_2reg_q")
>> + (const_string "neon_load_a")
>> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
>> + neon_load1_4reg, neon_load1_4reg_q")
>> + (const_string "neon_load_b")
>> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
>> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
>> + neon_load2_2reg, neon_load2_2reg_q,\
>> + neon_load2_all_lanes, neon_load2_all_lanes_q")
>> + (const_string "neon_load_c")
>> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
>> + neon_load3_3reg, neon_load3_3reg_q,\
>> + neon_load3_one_lane, neon_load3_one_lane_q,\
>> + neon_load4_4reg, neon_load4_4reg_q")
>> + (const_string "neon_load_d")
>> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
>> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
>> + neon_load4_all_lanes, neon_load4_all_lanes_q")
>> + (const_string "neon_load_e")
>> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
>> + (const_string "neon_load_f")
>> +
>> + (eq_attr "type" "f_stores, f_stored,\
>> + neon_store1_1reg")
>> + (const_string "neon_store_a")
>> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
>> + (const_string "neon_store_b")
>> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
>> + neon_store3_3reg, neon_store3_3reg_q,\
>> + neon_store2_4reg, neon_store2_4reg_q,\
>> + neon_store4_4reg, neon_store4_4reg_q,\
>> + neon_store2_2reg, neon_store2_2reg_q,\
>> + neon_store3_one_lane, neon_store3_one_lane_q,\
>> + neon_store4_one_lane, neon_store4_one_lane_q,\
>> + neon_store1_4reg, neon_store1_4reg_q,\
>> + neon_store1_one_lane, neon_store1_one_lane_q,\
>> + neon_store2_one_lane, neon_store2_one_lane_q")
>> + (const_string "neon_store_complex")]
>> + (const_string "unknown")))
>> +
>> +;; The tsv110 core is modelled as issues pipeline that has ;; the
>> +following functional units.
>> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
>> +
>> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
>> +"tsv110_alu1" "tsv110_alu1_issue")
>> +
>> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
>> +"tsv110_alu2" "tsv110_alu2_issue")
>> +
>> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
>> +"tsv110_alu3" "tsv110_alu3_issue")
>> +
>> +;; 2. One pipeline for complex integer operations: MDU
>> +
>> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
>> +"tsv110_mdu" "tsv110_mdu_issue")
>> +
>> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
>> +(define_automaton "tsv110_fsu")
>> +
>> +(define_cpu_unit "tsv110_fsu1_issue"
>> + "tsv110_fsu")
>> +(define_cpu_unit "tsv110_fsu2_issue"
>> + "tsv110_fsu")
>> +
>> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
>> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
>> +
>> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
>> +BRU1, BRU2
>> +
>> +;; 5. Two pipelines for load and store operations: LS1, LS2.
>> +
>> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
>> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
>> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
>> +"tsv110_ls2_issue")
>> +
>> +;; Block all issue queues.
>> +
>> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
>> + + tsv110_mdu_issue + tsv110_alu1_issue
>> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
>> +tsv110_ls2_issue")
>> +
>> +;; Simple Execution Unit:
>> +;;
>> +;; Simple ALU without shift
>> +(define_insn_reservation "tsv110_alu" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alu_imm,logic_imm,\
>> + alu_sreg,logic_reg,\
>> + adc_imm,adc_reg,\
>> + adr,bfm,clz,rbit,rev,\
>> + shift_imm,shift_reg,\
>> + mov_imm,mov_reg,\
>> + mvn_imm,mvn_reg,\
>> + mrs,multiple,no_insn"))
>> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
>> +
>> +(define_insn_reservation "tsv110_alus" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alus_imm,logics_imm,\
>> + alus_sreg,logics_reg,\
>> + adcs_imm,adcs_reg"))
>> + "tsv110_alu2|tsv110_alu3")
>> +
>> +;; ALU ops with shift
>> +(define_insn_reservation "tsv110_alu_shift" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "extend,\
>> + alu_shift_imm,alu_shift_reg,\
>> + crc,logic_shift_imm,logic_shift_reg,\
>> + mov_shift,mvn_shift,\
>> + mov_shift_reg,mvn_shift_reg"))
>> + "tsv110_mdu")
>> +
>> +(define_insn_reservation "tsv110_alus_shift" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
>> + logics_shift_imm,logics_shift_reg"))
>> + "tsv110_alu2")
>> +
>> +;; Multiplies instructions
>> +(define_insn_reservation "tsv110_mult" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (ior (eq_attr "mul32" "yes")
>> + (eq_attr "mul64" "yes")))
>> + "tsv110_mdu")
>> +
>> +;; Integer divide
>> +(define_insn_reservation "tsv110_div" 10
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "udiv,sdiv"))
>> + "tsv110_mdu*10")
>> +
>> +;; Block all issue pipes for a cycle
>> +(define_insn_reservation "tsv110_block" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "block"))
>> + "tsv110_block")
>> +
>> +;; Branch execution Unit
>> +;;
>> +;; Branches take two issue slot.
>> +;; No latency as there is no result
>> +(define_insn_reservation "tsv110_branch" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "branch"))
>> + "tsv110_alu1|tsv110_alu2")
>> +
>> +;; Load-store execution Unit
>> +;;
>> +;; Loads of up to two words.
>> +(define_insn_reservation "tsv110_load1" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "load1,load2"))
>> + "tsv110_ls1|tsv110_ls2")
>> +
>> +;; Stores of up to two words.
>> +(define_insn_reservation "tsv110_store1" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "store1,store2"))
>> + "tsv110_ls1|tsv110_ls2")
>> +
>> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
>> +
>> +(define_insn_reservation "tsv110_neon_abd_aba" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_abd,neon_arith_acc"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_arith_acc_q"))
>> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_neon_arith_basic" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +(define_insn_reservation "tsv110_neon_arith_complex" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
>> + "tsv110_fsu1,tsv110_fsu2")
>> +
>> +;; Integer Multiply Instructions.
>> +;; D-form
>> +(define_insn_reservation "tsv110_neon_multiply" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_multiply"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_mul_d_long"))
>> + "tsv110_fsu1")
>> +
>> +;; Q-form
>> +(define_insn_reservation "tsv110_neon_multiply_q" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
>> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
>> +
>> +;; Integer Shift Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_shift_acc" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
>> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
>> + neon_shift_reg_complex"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_shift_acc_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
>> + neon_shift_reg_complex_q"))
>> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
>> +
>> +;; Floating Point Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_negabs" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_arith" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_arith_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_minmax_q" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_reductions_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_cvt_int" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type"
>> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mul" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mul_q" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mla" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
>> + neon_fp_recps_rsqrts"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_recpe_rsqrte" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_mla_q" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
>> + neon_fp_recps_rsqrts_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_fp_recpe_rsqrte_q" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +;; Miscellaneous Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_bitops" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_bitops"))
>> + "(tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_dup" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_from_gp,f_mcr"))
>> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_mov" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_mcrr"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_bitops_q" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
>> + "(tsv110_fsu1+tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_from_gp_q" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
>> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_to_gp" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
>> + "(tsv110_fsu1)")
>> +
>> +;; Load Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_lane" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
>> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
>> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg1" 6
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type"
>> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
>> + "((tsv110_ls1)|(tsv110_ls2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg2" 6
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
>> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg3" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
>> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld1_reg4" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
>> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld2" 8
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
>> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
>> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
>> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld3" 9
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
>> + neon_load3_one_lane,neon_load3_one_lane_q,\
>> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
>> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld4_lane" 9
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
>> + neon_load4_one_lane,neon_load4_one_lane_q"))
>> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_ld4_reg" 11
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
>> + neon_load4_one_lane,neon_load4_one_lane_q"))
>> +
>> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
>> +
>> +;; Store Instructions.
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_store_a" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_a"))
>> + "tsv110_fsu1|tsv110_fsu2")
>> +
>> +(define_insn_reservation
>> + "tsv110_neon_store_b" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_b"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +;; These block issue for a number of cycles proportional to the number
>> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
>> +precisely, treat them as blocking execution for two cycles when ;;
>> +issued.
>> +(define_insn_reservation
>> + "tsv110_neon_store_complex" 0
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
>> + "tsv110_block*2")
>> +
>> +;; Floating-Point Operations.
>> +
>> +(define_insn_reservation "tsv110_fp_const" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fconsts,fconstd,fmov"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_add_sub" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_mac" 7
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvt" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvt"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvtf2i"))
>> + "(tsv110_fsu1)")
>> +
>> +(define_insn_reservation "tsv110_fp_cvti2f" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "f_cvti2f"))
>> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
>> +
>> +(define_insn_reservation "tsv110_fp_cmp" 4
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fcmps,fcmpd"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_arith" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "ffariths,ffarithd"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_fp_divs" 12
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
>> + neon_fp_div_s_q,neon_fp_div_d_q"))
>> + "(tsv110_fsu1*12)")
>> +
>> +(define_insn_reservation "tsv110_fp_sqrts" 12
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
>> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
>> + "(tsv110_fsu2*12)")
>> +
>> +(define_insn_reservation "tsv110_crypto_aes" 3
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
>> + "tsv110_fsu1")
>> +
>> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
>> + "(tsv110_fsu1|tsv110_fsu2)")
>> +
>> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_fast"))
>> + "(tsv110_fsu1)")
>> +
>> +(define_insn_reservation "tsv110_crypto_complex" 5
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
>> + "tsv110_fsu1")
>> +
>> +;; We lie with calls. They take up all issue slots, but are otherwise
>> +;; not harmful.
>> +(define_insn_reservation "tsv110_call" 1
>> + (and (eq_attr "tune" "tsv110")
>> + (eq_attr "type" "call"))
>> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
>> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
>> +)
>> +
>> +;; Simple execution unit bypasses
>> +(define_bypass 1 "tsv110_alu"
>> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
>> +"tsv110_alu_shift"
>> + "tsv110_alu,tsv110_alu_shift")
>> +
>> +;; An MLA or a MUL can feed a dependent MLA.
>> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
>> + "tsv110_neon_*mla*")
>> +
>> +;; We don't need to care about control hazards, either the branch is ;;
>> +predicted in which case we pay no penalty, or the branch is ;;
>> +mispredicted in which case instruction scheduling will be unlikely to
>> +;; help.
>> +(define_bypass 1 "tsv110_*"
>> + "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
@ 2019-01-13 9:37 wuyuan (E)
2019-01-14 10:26 ` Kyrill Tkachov
0 siblings, 1 reply; 12+ messages in thread
From: wuyuan (E) @ 2019-01-13 9:37 UTC (permalink / raw)
To: Kyrill Tkachov, gcc-patches
Cc: Zhangyichao (AB), Zhanghaijian (A), Zhangshaokun, JIANJIANG CENG
[-- Attachment #1: Type: text/plain, Size: 97823 bytes --]
Hi Kyrill:
Thank you very much for your to review my patch. I have modified the code accordingly to your opinion.
first, mul64 was renamed to widen_mul64, and use load_4, load_8 to loading 4 and 8 bytes in the latest version of GCC. besides, I change the reservation durations (the *16 part above) to 8. Test performance with some test cases, the result has improvement (Will these changes improvement performance?).
now, the tsv110 automaton size is 8641 states. I don't know if the code modification is complete. If there is any need to modify it, please let me know, thank you.
2019-01-11 wuyuan <wuyuan5@huawei.com>
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md: New file.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b0766..085c40f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 513aec1..97e0703 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -356,6 +356,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..e33c5cc
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*8")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*8)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*8)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
-----邮件原件-----
发件人: Kyrill Tkachov [mailto:kyrylo.tkachov@foss.arm.com]
发送时间: 2019年1月8日 20:15
收件人: wuyuan (E) <wuyuan5@huawei.com>; Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>; gcc-patches@gcc.gnu.org; Marcus Shawcroft <Marcus.Shawcroft@arm.com>; James Greenhalgh <James.Greenhalgh@arm.com>; Richard Earnshaw <Richard.Earnshaw@arm.com>
抄送: Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
主题: Re: add tsv110 pipeline scheduling
Hi Wuyuan,
Thanks for pinging.
Some comments inline
On 08/01/19 11:23, wuyuan (E) wrote:
> Hi , Maintainers
> I submitted a tsv110 pipeline patch on the 20th of last month , Have you reviewed the patch? look forward to your reply.
> Best Regards,
> Wuyuan
>
> 2019-1-8 wuyuan <wuyuan5@huawei.com>
>
Please use the date format 2019-01-08.
Also, only two spaces between date and your name.
> * config/aarch64/aarch64-cores.def: New CPU.
This should be
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md : tsv110.md new file
This should be:
* config/aarch64/tsv110.md: New file.
>
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> old mode 100644
> new mode 100755
> index 20f4924..ea9b7c5
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -349,6 +349,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it ;;
> +under the terms of the GNU General Public License as published by ;;
> +the Free Software Foundation; either version 3, or (at your option) ;;
> +any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but ;;
> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
> +General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License ;;
> +along with GCC; see the file COPYING3. If not see ;;
> +<http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has ;; the
> +following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
> +"tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
> +"tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
> +"tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
> +"tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
> +BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
> +"tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
> +tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "mul64" "yes")))
mul64 was renamed to widen_mul64 in r266471. Can you please update your patch, otherwise it won't build.
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*10")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load1,load2"))
> + "tsv110_ls1|tsv110_ls2")
There are no types called load1, load2. This won't build.
Did you mean to use load_4, load_8? (for loading 4 and 8 bytes accordingly).
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
> +
Similarly, did you mean store_4 and store_8?
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type"
> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type"
> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> +
> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
With the above two bugs fixed I am concerned that this automaton is much larger than other automata in config/aarch64.
This hurts GCC compile time and memory requirements. We've had bug reports in the past where people were not able to build
GCC on memory-constrained systems due to these issues.
You can check the size of the generated automata during build time by adding (automata_option "stats") to your .md file.
With this, the tsv110 automaton size is 38017 states, more than 5x the size of the next largest automaton (cortex_a53_advsimd).
This is usually due to unnecessarily large reservation durations (the *16 part above) on long-running instructions such as divisions (integer and floating-point)
and ld4 instructions, such as this one. If you use only a maximum of 8 in the reservation duration here, and in the division instructions you get a much
smaller automaton size (I see 7681 states if I change it to 8 here and in tsv110_div, tsv110_fp_sqrts and tsv110_fp_divs).
Because 8 cycles is such a large scheduling window anyway, it is unlikely that modelling the full 16 cycles will give any benefit in real world code.
That has been our experience in the past.
So I recommend you modify the model to use only a maximum of 8 in its reservation durations.
Hope this helps,
Kyrill
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
> +precisely, treat them as blocking execution for two cycles when ;;
> +issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*12)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*12)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
> +"tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is ;;
> +predicted in which case we pay no penalty, or the branch is ;;
> +mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
>
>
>
>
>
> -----邮件原件-----
> 发件人: wuyuan (E)
> 发送时间: 2019年1月3日 10:57
> 收件人: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
> 抄送: 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>
> 主题: Re: add tsv110 pipeline scheduling
>
> Hi , Maintainers
> Happy new year!
> On the 20th of last month, I submitted a tsv110 pipeline patch. I want to know if you have received it. Looking forward to your reply.
> Best Regards,
> wuyuan
>
>
>
>
> -----邮件原件-----
> 发件人: wuyuan (E)
> 发送时间: 2018年12月20日 14:06
> 收件人: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
> 抄送: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
> 主题: Re: add tsv110 pipeline scheduling
>
>
> Hi Ramana,
> Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
> The patch as follows :
>
>
>
> 2018-12-20 wuyuan <wuyuan5@huawei.com>
>
> * config/aarch64/aarch64-cores.def: New CPU.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md : tsv110.md new file
>
>
>
>
>
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> old mode 100644
> new mode 100755
> index 20f4924..ea9b7c5
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -349,6 +349,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it ;;
> +under the terms of the GNU General Public License as published by ;;
> +the Free Software Foundation; either version 3, or (at your option) ;;
> +any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but ;;
> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
> +General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License ;;
> +along with GCC; see the file COPYING3. If not see ;;
> +<http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has ;; the
> +following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
> +"tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
> +"tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
> +"tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
> +"tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
> +BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
> +"tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
> +tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "mul64" "yes")))
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*10")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load1,load2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type"
> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type"
> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> +
> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
> +precisely, treat them as blocking execution for two cycles when ;;
> +issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*12)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*12)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
> +"tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is ;;
> +predicted in which case we pay no penalty, or the branch is ;;
> +mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27073 bytes --]
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b0766..085c40f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 513aec1..97e0703 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -356,6 +356,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..e33c5cc
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*8")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load_4,load_8"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*8)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*8)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
2019-01-08 11:23 wuyuan (E)
@ 2019-01-08 12:16 ` Kyrill Tkachov
0 siblings, 0 replies; 12+ messages in thread
From: Kyrill Tkachov @ 2019-01-08 12:16 UTC (permalink / raw)
To: wuyuan (E),
Ramana Radhakrishnan, gcc-patches, Marcus Shawcroft,
James Greenhalgh, Richard Earnshaw
Cc: Zhangyichao (AB), Zhanghaijian (A), Zhangshaokun
Hi Wuyuan,
Thanks for pinging.
Some comments inline
On 08/01/19 11:23, wuyuan (E) wrote:
> Hi , Maintainers
> I submitted a tsv110 pipeline patch on the 20th of last month , Have you reviewed the patch? look forward to your reply.
> Best Regards,
> Wuyuan
>
> 2019-1-8 wuyuan <wuyuan5@huawei.com>
>
Please use the date format 2019-01-08.
Also, only two spaces between date and your name.
> * config/aarch64/aarch64-cores.def: New CPU.
This should be
* config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md : tsv110.md new file
This should be:
* config/aarch64/tsv110.md: New file.
>
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> old mode 100644
> new mode 100755
> index 20f4924..ea9b7c5
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -349,6 +349,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it ;;
> +under the terms of the GNU General Public License as published by ;;
> +the Free Software Foundation; either version 3, or (at your option) ;;
> +any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but ;;
> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
> +General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License ;;
> +along with GCC; see the file COPYING3. If not see ;;
> +<http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has ;; the
> +following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
> +"tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
> +"tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
> +"tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
> +"tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
> +BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
> +"tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
> +tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "mul64" "yes")))
mul64 was renamed to widen_mul64 in r266471. Can you please update your patch, otherwise it won't build.
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*10")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load1,load2"))
> + "tsv110_ls1|tsv110_ls2")
There are no types called load1, load2. This won't build.
Did you mean to use load_4, load_8? (for loading 4 and 8 bytes accordingly).
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
> +
Similarly, did you mean store_4 and store_8?
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type"
> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type"
> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> +
> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
With the above two bugs fixed I am concerned that this automaton is much larger than other automata in config/aarch64.
This hurts GCC compile time and memory requirements. We've had bug reports in the past where people were not able to build
GCC on memory-constrained systems due to these issues.
You can check the size of the generated automata during build time by adding (automata_option "stats") to your .md file.
With this, the tsv110 automaton size is 38017 states, more than 5x the size of the next largest automaton (cortex_a53_advsimd).
This is usually due to unnecessarily large reservation durations (the *16 part above) on long-running instructions such as divisions (integer and floating-point)
and ld4 instructions, such as this one. If you use only a maximum of 8 in the reservation duration here, and in the division instructions you get a much
smaller automaton size (I see 7681 states if I change it to 8 here and in tsv110_div, tsv110_fp_sqrts and tsv110_fp_divs).
Because 8 cycles is such a large scheduling window anyway, it is unlikely that modelling the full 16 cycles will give any benefit in real world code.
That has been our experience in the past.
So I recommend you modify the model to use only a maximum of 8 in its reservation durations.
Hope this helps,
Kyrill
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
> +precisely, treat them as blocking execution for two cycles when ;;
> +issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*12)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*12)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
> +"tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is ;;
> +predicted in which case we pay no penalty, or the branch is ;;
> +mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
>
>
>
>
>
> -----é®ä»¶å件-----
> å件人: wuyuan (E)
> åéæ¶é´: 2019å¹´1æ3æ¥ 10:57
> æ¶ä»¶äºº: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
> æé: 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>
> 主é¢: Re: add tsv110 pipeline scheduling
>
> Hi , Maintainers
> Happy new year!
> On the 20th of last month, I submitted a tsv110 pipeline patch. I want to know if you have received it. Looking forward to your reply.
> Best Regards,
> wuyuan
>
>
>
>
> -----é®ä»¶å件-----
> å件人: wuyuan (E)
> åéæ¶é´: 2018å¹´12æ20æ¥ 14:06
> æ¶ä»¶äºº: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
> æé: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
> 主é¢: Re: add tsv110 pipeline scheduling
>
>
> Hi Ramana,
> Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
> The patch as follows :
>
>
>
> 2018-12-20 wuyuan <wuyuan5@huawei.com>
>
> * config/aarch64/aarch64-cores.def: New CPU.
> * config/aarch64/aarch64.md : Add "tsv110.md"
> * config/aarch64/tsv110.md : tsv110.md new file
>
>
>
>
>
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> old mode 100644
> new mode 100755
> index 20f4924..ea9b7c5
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* ARMv8.4-A Architecture Processors. */
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -349,6 +349,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> +(include "tsv110.md")
>
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it ;;
> +under the terms of the GNU General Public License as published by ;;
> +the Free Software Foundation; either version 3, or (at your option) ;;
> +any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but ;;
> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
> +General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License ;;
> +along with GCC; see the file COPYING3. If not see ;;
> +<http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has ;; the
> +following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
> +"tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
> +"tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
> +"tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
> +"tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and alu3:
> +BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
> +"tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
> +tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "mul64" "yes")))
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*10")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load1,load2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type"
> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type"
> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> + neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> +
> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that ;;
> +precisely, treat them as blocking execution for two cycles when ;;
> +issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*12)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*12)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
> +"tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is ;;
> +predicted in which case we pay no penalty, or the branch is ;;
> +mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
@ 2019-01-08 11:23 wuyuan (E)
2019-01-08 12:16 ` Kyrill Tkachov
0 siblings, 1 reply; 12+ messages in thread
From: wuyuan (E) @ 2019-01-08 11:23 UTC (permalink / raw)
To: Ramana Radhakrishnan, gcc-patches, marcus.shawcroft,
james.greenhalgh, Richard Earnshaw
Cc: Zhangyichao (AB), Zhanghaijian (A), Zhangshaokun
[-- Attachment #1: Type: text/plain, Size: 59080 bytes --]
Hi , Maintainers
I submitted a tsv110 pipeline patch on the 20th of last month , Have you reviewed the patch? look forward to your reply.
Best Regards,
Wuyuan
2019-1-8 wuyuan <wuyuan5@huawei.com>
* config/aarch64/aarch64-cores.def: New CPU.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md : tsv110.md new file
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it ;;
+under the terms of the GNU General Public License as published by ;;
+the Free Software Foundation; either version 3, or (at your option) ;;
+any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but ;;
+WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
+General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License ;;
+along with GCC; see the file COPYING3. If not see ;;
+<http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has ;; the
+following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
+"tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
+"tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
+"tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
+"tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3:
+BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
+"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
+"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
+"tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
+tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type"
+"neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type"
+"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+
+"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that ;;
+precisely, treat them as blocking execution for two cycles when ;;
+issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift") (define_bypass 2
+"tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is ;;
+predicted in which case we pay no penalty, or the branch is ;;
+mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2019年1月3日 10:57
收件人: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
抄送: 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>
主题: Re: add tsv110 pipeline scheduling
Hi , Maintainers
Happy new year!
On the 20th of last month, I submitted a tsv110 pipeline patch. I want to know if you have received it. Looking forward to your reply.
Best Regards,
wuyuan
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2018年12月20日 14:06
收件人: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
抄送: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
主题: Re: add tsv110 pipeline scheduling
Hi Ramana,
Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
The patch as follows :
2018-12-20 wuyuan <wuyuan5@huawei.com>
* config/aarch64/aarch64-cores.def: New CPU.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md : tsv110.md new file
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it ;;
+under the terms of the GNU General Public License as published by ;;
+the Free Software Foundation; either version 3, or (at your option) ;;
+any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but ;;
+WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
+General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License ;;
+along with GCC; see the file COPYING3. If not see ;;
+<http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has ;; the
+following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
+"tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
+"tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
+"tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
+"tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3:
+BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
+"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
+"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
+"tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
+tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type"
+"neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type"
+"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+
+"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that ;;
+precisely, treat them as blocking execution for two cycles when ;;
+issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift") (define_bypass 2
+"tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is ;;
+predicted in which case we pay no penalty, or the branch is ;;
+mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27120 bytes --]
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
@ 2019-01-03 2:56 wuyuan (E)
0 siblings, 0 replies; 12+ messages in thread
From: wuyuan (E) @ 2019-01-03 2:56 UTC (permalink / raw)
To: Ramana Radhakrishnan, gcc-patches
Cc: nickc, Richard Earnshaw, Kyrylo Tkachov, nd
[-- Attachment #1: Type: text/plain, Size: 29927 bytes --]
Hi , guys
Happy new year!
On the 20th of last month, I submitted a tsv110 pipeline patch. I want to know if you have received it. Looking forward to your reply.
Best Regards,
wuyuan
-----邮件原件-----
发件人: wuyuan (E)
发送时间: 2018年12月20日 14:06
收件人: 'Ramana Radhakrishnan' <Ramana.Radhakrishnan@arm.com>; 'gcc-patches@gcc.gnu.org' <gcc-patches@gcc.gnu.org>
抄送: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; 'nickc@redhat.com' <nickc@redhat.com>; 'Richard Earnshaw' <Richard.Earnshaw@arm.com>; 'Kyrylo Tkachov' <Kyrylo.Tkachov@arm.com>; 'nd' <nd@arm.com>; Zhangshaokun <zhangshaokun@hisilicon.com>
主题: Re: add tsv110 pipeline scheduling
Hi Ramana,
Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
The patch as follows :
2018-12-20 wuyuan <wuyuan5@huawei.com>
* config/aarch64/aarch64-cores.def: New CPU.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md : tsv110.md new file
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md old mode 100644 new mode 100755 index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md new file mode 100644 index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it ;;
+under the terms of the GNU General Public License as published by ;;
+the Free Software Foundation; either version 3, or (at your option) ;;
+any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but ;;
+WITHOUT ANY WARRANTY; without even the implied warranty of ;;
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
+General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License ;;
+along with GCC; see the file COPYING3. If not see ;;
+<http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has ;; the
+following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
+"tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
+"tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
+"tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
+"tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3:
+BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
+"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
+"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
+"tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue +
+tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type"
+"neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type"
+"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+
+"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that ;;
+precisely, treat them as blocking execution for two cycles when ;;
+issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift") (define_bypass 2
+"tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is ;;
+predicted in which case we pay no penalty, or the branch is ;;
+mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27120 bytes --]
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
@ 2018-12-20 6:06 wuyuan (E)
0 siblings, 0 replies; 12+ messages in thread
From: wuyuan (E) @ 2018-12-20 6:06 UTC (permalink / raw)
To: Ramana Radhakrishnan, gcc-patches
Cc: Zhanghaijian (A), Zhangyichao (AB), Yangfei (Felix),
nickc, Richard Earnshaw, Kyrylo Tkachov, nd, Zhangshaokun
[-- Attachment #1: Type: text/plain, Size: 28387 bytes --]
Hi Ramana,
Please ignore the patch in the previous email attachment (the ChangeLog has deleted in this patch..) I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv110 pipeline patch, please review.
The patch as follows :
2018-12-20 wuyuan <wuyuan5@huawei.com>
* config/aarch64/aarch64-cores.def: New CPU.
* config/aarch64/aarch64.md : Add "tsv110.md"
* config/aarch64/tsv110.md : tsv110.md new file
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27120 bytes --]
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: add tsv110 pipeline scheduling
@ 2018-12-20 2:34 wuyuan (E)
0 siblings, 0 replies; 12+ messages in thread
From: wuyuan (E) @ 2018-12-20 2:34 UTC (permalink / raw)
To: Ramana Radhakrishnan, gcc-patches
Cc: Zhanghaijian (A), Zhangyichao (AB), Yangfei (Felix),
nickc, Richard Earnshaw, Kyrylo Tkachov, nd, Zhangshaokun,
wuyuan (E)
[-- Attachment #1: Type: text/plain, Size: 63183 bytes --]
Hi Ramana,
I have already communicated with Shao Kun, he has fixed the problem of the previous patch. So I resubmitted the tsv 110 pipeline patch, please review.
The patch as follows :
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
old mode 100644
new mode 100755
index b1eed3b..5611dd0
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2018-12-20 wuyuan
+
+ * config/aarch64/aarch64-cores.def: New CPU.
+ * config/aarch64/aarch64.md : Add "tsv110.md"
+ * config/aarch64/tsv110.md : tsv110.md new file
+
2018-12-20 Alan Modra <amodra@gmail.com>
* config/rs6000/sysv4.h (GNU_USER_DYNAMIC_LINKER): Define.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
from: Ramana Radhakrishnan [mailto:Ramana.Radhakrishnan@arm.com]
send time: 2018年12月14日 17:34
to: wuyuan (E) <wuyuan5@huawei.com>; gcc-patches@gcc.gnu.org
copy: Zhanghaijian (A) <z.zhanghaijian@huawei.com>; Zhangyichao (AB) <zhangyichao.zhang@huawei.com>; Yangfei (Felix) <felix.yang@huawei.com>; nickc@redhat.com; Richard Earnshaw <Richard.Earnshaw@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; nd <nd@arm.com>
subject: Re: add taishanv110 pipeline scheduling
Hi Wuyuan,
On 06/12/2018 01:31, wuyuan (E) wrote:
> Hi ARM maintainers:
> The taishanv110 core uses generic pipeline scheduling, which restricted the performance of taishanv110 core. By adding the pipeline scheduling of taishanv110 core in GCC,The performance of taishanv110 has been improved.
> The patch as follows, please join.
Who is looking to fix the architectural version of the tsv110 like the LLVM description here https://reviews.llvm.org/D53908 ?
The GCC implementation considers this to be an armv8.4-A part while it really appears to be an armv8.2-A part with some optional extensions based on the link above ?
We are in the run up to the GCC 9 release so it would be good to get this fixed up before that.
regards
Ramana
>
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog old mode 100644 new mode
> 100755 index c4ec556..d6cf1d3
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,9 @@
> +2018-12-05 wuyuan <wuyuan5@huawei.com>
> +
> + * config/aarch64/aarch64-cores.def: New CPU.
> + * config/aarch64/aarch64.md : Add "tsv110.md"
> + * gcc/config/aarch64/tsv110.md : pipeline description
> +
> 2018-11-26 David Malcolm <dmalcolm@redhat.com>
>
> * dump-context.h (dump_context::dump_loc): Convert 1st
> param from diff --git a/gcc/config/aarch64/aarch64-cores.def
> b/gcc/config/aarch64/aarch64-cores.def
> index 74be5db..8e84844 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -99,7 +99,7 @@ AARCH64_CORE("ares", ares, cortexa57, 8_2A,
> AARCH64_FL_FOR_ARCH8_2 | AARCH64_F
> /* ARMv8.4-A Architecture Processors. */
>
> /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110", tsv110, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110", tsv110, tsv110, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
>
> /* Qualcomm ('Q') cores. */
> AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
> diff --git a/gcc/config/aarch64/aarch64.md
> b/gcc/config/aarch64/aarch64.md index 82af4d4..5278d6b 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -348,7 +348,7 @@
> (include "thunderx.md")
> (include "../arm/xgene1.md")
> (include "thunderx2t99.md")
> -
> +(include "tsv110.md")
> ;; -------------------------------------------------------------------
> ;; Jumps and other miscellaneous insns ;;
> -------------------------------------------------------------------
> diff --git a/gcc/config/aarch64/tsv110.md
> b/gcc/config/aarch64/tsv110.md new file mode 100644 index
> 0000000..e912447
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2014-2016 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it ;;
> +under the terms of the GNU General Public License as published by ;;
> +the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but ;;
> +WITHOUT ANY WARRANTY; without even the implied warranty of ;;
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;
> +General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see ;;
> +<http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> + "neon_arith_acc, neon_arith_acc_q,
> + neon_arith_basic, neon_arith_complex,
> + neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> + neon_shift_imm_complex,
> + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> + neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> + neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> + neon_bitops, neon_bitops_q, neon_from_gp,
> + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> + neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> + unknown"
> + (cond [
> + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> + neon_reduc_add_acc_q")
> + (const_string "neon_arith_acc")
> + (eq_attr "type" "neon_arith_acc_q")
> + (const_string "neon_arith_acc_q")
> + (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> + neon_add_widen, neon_neg, neon_neg_q,\
> + neon_reduc_add, neon_reduc_add_q,\
> + neon_reduc_add_long, neon_sub, neon_sub_q,\
> + neon_sub_long, neon_sub_widen, neon_logic,\
> + neon_logic_q, neon_tst, neon_tst_q,\
> + neon_compare, neon_compare_q,\
> + neon_compare_zero, neon_compare_zero_q,\
> + neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> + neon_reduc_minmax_q")
> + (const_string "neon_arith_basic")
> + (eq_attr "type" "neon_add_halve_narrow_q,\
> + neon_add_halve, neon_add_halve_q,\
> + neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> + neon_qneg_q, neon_qsub, neon_qsub_q,\
> + neon_sub_halve_narrow_q")
> + (const_string "neon_arith_complex")
> +
> + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> + neon_mul_h_scalar, neon_mul_s_scalar,\
> + neon_sat_mul_b, neon_sat_mul_h,\
> + neon_sat_mul_s, neon_sat_mul_h_scalar,\
> + neon_sat_mul_s_scalar,\
> + neon_mul_b_long, neon_mul_h_long,\
> + neon_mul_s_long,\
> + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> + neon_sat_mul_b_long, neon_sat_mul_h_long,\
> + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> + neon_sat_mul_s_scalar_long,\
> + neon_mla_b, neon_mla_h, neon_mla_s,\
> + neon_mla_h_scalar, neon_mla_s_scalar,\
> + neon_mla_b_long, neon_mla_h_long,\
> + neon_mla_s_long,\
> + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> + neon_sat_mla_b_long, neon_sat_mla_h_long,\
> + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> + neon_sat_mla_s_scalar_long")
> + (const_string "neon_multiply")
> + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> + neon_sat_mul_b_q, neon_sat_mul_h_q,\
> + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> + neon_sat_mul_s_scalar_q,\
> + neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> + neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> + (const_string "neon_multiply_q")
> +
> + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> + (const_string "neon_shift_acc")
> + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> + neon_shift_imm_narrow_q, neon_shift_imm_long")
> + (const_string "neon_shift_imm_basic")
> + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> + neon_sat_shift_imm_narrow_q")
> + (const_string "neon_shift_imm_complex")
> + (eq_attr "type" "neon_shift_reg")
> + (const_string "neon_shift_reg_basic")
> + (eq_attr "type" "neon_shift_reg_q")
> + (const_string "neon_shift_reg_basic_q")
> + (eq_attr "type" "neon_sat_shift_reg")
> + (const_string "neon_shift_reg_complex")
> + (eq_attr "type" "neon_sat_shift_reg_q")
> + (const_string "neon_shift_reg_complex_q")
> +
> + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> + neon_fp_abs_s, neon_fp_abs_s_q,\
> + neon_fp_neg_d, neon_fp_neg_d_q,\
> + neon_fp_abs_d, neon_fp_abs_d_q,\
> + neon_fp_minmax_s,neon_fp_minmax_d,\
> + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> + (const_string "neon_fp_negabs")
> + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> + neon_fp_reduc_add_s, neon_fp_compare_s,\
> + neon_fp_round_s,\
> + neon_fp_addsub_d, neon_fp_abd_d,\
> + neon_fp_reduc_add_d, neon_fp_compare_d,\
> + neon_fp_round_d")
> + (const_string "neon_fp_arith")
> + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> + neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> + neon_fp_minmax_s_q, neon_fp_round_s_q,\
> + neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> + neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> + neon_fp_minmax_d_q, neon_fp_round_d_q")
> + (const_string "neon_fp_arith_q")
> + (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> + neon_fp_reduc_minmax_d_q,\
> + neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> + (const_string "neon_fp_reductions_q")
> + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> + neon_fp_to_int_d, neon_int_to_fp_d")
> + (const_string "neon_fp_cvt_int")
> + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> + neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> + (const_string "neon_fp_cvt_int_q")
> + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> + (const_string "neon_fp_cvt16")
> + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> + neon_fp_mul_d")
> + (const_string "neon_fp_mul")
> + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> + neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> + (const_string "neon_fp_mul_q")
> + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> + neon_fp_mla_d")
> + (const_string "neon_fp_mla")
> + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> + (const_string "neon_fp_mla_q")
> + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> + neon_fp_recpx_s,\
> + neon_fp_recpe_d, neon_fp_rsqrte_d,\
> + neon_fp_recpx_d")
> + (const_string "neon_fp_recpe_rsqrte")
> + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> + neon_fp_recpx_s_q,\
> + neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> + neon_fp_recpx_d_q")
> + (const_string "neon_fp_recpe_rsqrte_q")
> + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> + neon_fp_recps_d, neon_fp_rsqrts_d")
> + (const_string "neon_fp_recps_rsqrts")
> + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> + neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> + (const_string "neon_fp_recps_rsqrts_q")
> + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> + neon_rev, neon_permute, neon_rbit,\
> + neon_tbl1, neon_tbl2, neon_zip,\
> + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> + neon_move, neon_move_q, neon_move_narrow_q")
> + (const_string "neon_bitops")
> + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> + neon_rev_q, neon_permute_q, neon_rbit_q")
> + (const_string "neon_bitops_q")
> + (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> + (const_string "neon_from_gp")
> + (eq_attr "type" "neon_from_gp_q")
> + (const_string "neon_from_gp_q")
> +
> + (eq_attr "type" "f_loads, f_loadd,\
> + neon_load1_1reg, neon_load1_1reg_q,\
> + neon_load1_2reg, neon_load1_2reg_q")
> + (const_string "neon_load_a")
> + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> + neon_load1_4reg, neon_load1_4reg_q")
> + (const_string "neon_load_b")
> + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> + neon_load1_all_lanes, neon_load1_all_lanes_q,\
> + neon_load2_2reg, neon_load2_2reg_q,\
> + neon_load2_all_lanes, neon_load2_all_lanes_q")
> + (const_string "neon_load_c")
> + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> + neon_load3_3reg, neon_load3_3reg_q,\
> + neon_load3_one_lane, neon_load3_one_lane_q,\
> + neon_load4_4reg, neon_load4_4reg_q")
> + (const_string "neon_load_d")
> + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> + neon_load3_all_lanes, neon_load3_all_lanes_q,\
> + neon_load4_all_lanes, neon_load4_all_lanes_q")
> + (const_string "neon_load_e")
> + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> + (const_string "neon_load_f")
> +
> + (eq_attr "type" "f_stores, f_stored,\
> + neon_store1_1reg")
> + (const_string "neon_store_a")
> + (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> + (const_string "neon_store_b")
> + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> + neon_store3_3reg, neon_store3_3reg_q,\
> + neon_store2_4reg, neon_store2_4reg_q,\
> + neon_store4_4reg, neon_store4_4reg_q,\
> + neon_store2_2reg, neon_store2_2reg_q,\
> + neon_store3_one_lane, neon_store3_one_lane_q,\
> + neon_store4_one_lane, neon_store4_one_lane_q,\
> + neon_store1_4reg, neon_store1_4reg_q,\
> + neon_store1_one_lane, neon_store1_one_lane_q,\
> + neon_store2_one_lane, neon_store2_one_lane_q")
> + (const_string "neon_store_complex")]
> + (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has ;; the
> +following functional units.
> +;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110") (define_reservation
> +"tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110") (define_reservation
> +"tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110") (define_reservation
> +"tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2. One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110") (define_reservation
> +"tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1,
> +FSU2 (define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> + "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> + "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4. Two pipeline for branch operations but same with alu2 and
> +alu3: BRU1, BRU2
> +
> +;; 5. Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110") (define_cpu_unit
> +"tsv110_ls2_issue" "tsv110") (define_reservation "tsv110_ls1"
> +"tsv110_ls1_issue") (define_reservation "tsv110_ls2"
> +"tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> + + tsv110_mdu_issue + tsv110_alu1_issue
> + + tsv110_alu2_issue +
> +tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alu_imm,logic_imm,\
> + alu_sreg,logic_reg,\
> + adc_imm,adc_reg,\
> + adr,bfm,clz,rbit,rev,\
> + shift_imm,shift_reg,\
> + mov_imm,mov_reg,\
> + mvn_imm,mvn_reg,\
> + mrs,multiple,no_insn"))
> + "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_imm,logics_imm,\
> + alus_sreg,logics_reg,\
> + adcs_imm,adcs_reg"))
> + "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "extend,\
> + alu_shift_imm,alu_shift_reg,\
> + crc,logic_shift_imm,logic_shift_reg,\
> + mov_shift,mvn_shift,\
> + mov_shift_reg,mvn_shift_reg"))
> + "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> + logics_shift_imm,logics_shift_reg"))
> + "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> + (and (eq_attr "tune" "tsv110")
> + (ior (eq_attr "mul32" "yes")
> + (eq_attr "mul64" "yes")))
> + "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "udiv,sdiv"))
> + "tsv110_mdu*10")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "block"))
> + "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "branch"))
> + "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "load1,load2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "store1,store2"))
> + "tsv110_ls1|tsv110_ls2")
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation "tsv110_neon_abd_aba" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_abd,neon_arith_acc"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_abd_aba_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_arith_acc_q"))
> + "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_neon_arith_basic" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation "tsv110_neon_arith_complex" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> + "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_mul_d_long"))
> + "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> + neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> + neon_shift_reg_complex"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation
> + "tsv110_neon_shift_acc_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> + neon_shift_reg_complex_q"))
> + "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_negabs" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_arith_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_minmax_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_reductions_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_cvt_int" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type"
> +"neon_fp_cvt_int,neon_fp_cvt_int_q"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mul_q" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> + neon_fp_recps_rsqrts"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_mla_q" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> + neon_fp_recps_rsqrts_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_fp_recpe_rsqrte_q" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops"))
> + "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_dup" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_from_gp,f_mcr"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_mov" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_mcrr"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_bitops_q" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> + "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_from_gp_q" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> + "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> + "tsv110_neon_to_gp" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> + "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_lane" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> + neon_load1_all_lanes,neon_load1_all_lanes_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg1" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type"
> +"f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> + "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg2" 6
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> + "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg3" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld1_reg4" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld2" 8
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> + neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> +
> +neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> + "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld3" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> + neon_load3_one_lane,neon_load3_one_lane_q,\
> + neon_load3_all_lanes,neon_load3_all_lanes_q"))
> + "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_lane" 9
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> + "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> + "tsv110_neon_ld4_reg" 11
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> + neon_load4_one_lane,neon_load4_one_lane_q"))
> +
> +"((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))"
> +)
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> + "tsv110_neon_store_a" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_a"))
> + "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> + "tsv110_neon_store_b" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_b"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the
> +number ;; of 64-bit chunks they will store, we don't attempt to model
> +that ;; precisely, treat them as blocking execution for two cycles
> +when ;; issued.
> +(define_insn_reservation
> + "tsv110_neon_store_complex" 0
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "tsv110_neon_type" "neon_store_complex"))
> + "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fconsts,fconstd,fmov"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvt"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvtf2i"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "f_cvti2f"))
> + "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fcmps,fcmpd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "ffariths,ffarithd"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> + neon_fp_div_s_q,neon_fp_div_d_q"))
> + "(tsv110_fsu1*12)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 24
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> + neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> + "(tsv110_fsu2*24)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_aese,crypto_aesmc"))
> + "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> + "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_fast"))
> + "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> + "tsv110_fsu1")
> +
> +;; We lie with calls. They take up all issue slots, but are
> +otherwise ;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> + (and (eq_attr "tune" "tsv110")
> + (eq_attr "type" "call"))
> + "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> + +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> + "tsv110_alu,tsv110_alu_shift") (define_bypass 2
> +"tsv110_alu_shift"
> + "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> + "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is
> +;; predicted in which case we pay no penalty, or the branch is ;;
> +mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> + "tsv110_call,tsv110_branch")
>
[-- Attachment #2: tsv110_pipeline.patch --]
[-- Type: application/octet-stream, Size: 27552 bytes --]
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
old mode 100644
new mode 100755
index b1eed3b..5611dd0
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2018-12-20 wuyuan
+
+ * config/aarch64/aarch64-cores.def: New CPU.
+ * config/aarch64/aarch64.md : Add "tsv110.md"
+ * config/aarch64/tsv110.md : tsv110.md new file
+
2018-12-20 Alan Modra <amodra@gmail.com>
* config/rs6000/sysv4.h (GNU_USER_DYNAMIC_LINKER): Define.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
old mode 100644
new mode 100755
index 20f4924..ea9b7c5
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -97,7 +97,7 @@ AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2
AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
old mode 100644
new mode 100755
index cf2732e..7f7673a
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -349,6 +349,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
+(include "tsv110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 0000000..758ab95
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tsv110")
+
+(define_attr "tsv110_neon_type"
+ "neon_arith_acc, neon_arith_acc_q,
+ neon_arith_basic, neon_arith_complex,
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+ neon_shift_imm_complex,
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+ neon_bitops, neon_bitops_q, neon_from_gp,
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+ neon_reduc_add_acc_q")
+ (const_string "neon_arith_acc")
+ (eq_attr "type" "neon_arith_acc_q")
+ (const_string "neon_arith_acc_q")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
+ neon_add_widen, neon_neg, neon_neg_q,\
+ neon_reduc_add, neon_reduc_add_q,\
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
+ neon_sub_long, neon_sub_widen, neon_logic,\
+ neon_logic_q, neon_tst, neon_tst_q,\
+ neon_compare, neon_compare_q,\
+ neon_compare_zero, neon_compare_zero_q,\
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+ neon_reduc_minmax_q")
+ (const_string "neon_arith_basic")
+ (eq_attr "type" "neon_add_halve_narrow_q,\
+ neon_add_halve, neon_add_halve_q,\
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
+ neon_sub_halve_narrow_q")
+ (const_string "neon_arith_complex")
+
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+ neon_mul_h_scalar, neon_mul_s_scalar,\
+ neon_sat_mul_b, neon_sat_mul_h,\
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
+ neon_sat_mul_s_scalar,\
+ neon_mul_b_long, neon_mul_h_long,\
+ neon_mul_s_long,\
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,\
+ neon_mla_b, neon_mla_h, neon_mla_s,\
+ neon_mla_h_scalar, neon_mla_s_scalar,\
+ neon_mla_b_long, neon_mla_h_long,\
+ neon_mla_s_long,\
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long")
+ (const_string "neon_multiply")
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar_q,\
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+ (const_string "neon_multiply_q")
+
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+ (const_string "neon_shift_acc")
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
+ (const_string "neon_shift_imm_basic")
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q")
+ (const_string "neon_shift_imm_complex")
+ (eq_attr "type" "neon_shift_reg")
+ (const_string "neon_shift_reg_basic")
+ (eq_attr "type" "neon_shift_reg_q")
+ (const_string "neon_shift_reg_basic_q")
+ (eq_attr "type" "neon_sat_shift_reg")
+ (const_string "neon_shift_reg_complex")
+ (eq_attr "type" "neon_sat_shift_reg_q")
+ (const_string "neon_shift_reg_complex_q")
+
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+ neon_fp_abs_s, neon_fp_abs_s_q,\
+ neon_fp_neg_d, neon_fp_neg_d_q,\
+ neon_fp_abs_d, neon_fp_abs_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
+ (const_string "neon_fp_negabs")
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
+ neon_fp_round_s,\
+ neon_fp_addsub_d, neon_fp_abd_d,\
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
+ neon_fp_round_d")
+ (const_string "neon_fp_arith")
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
+ (const_string "neon_fp_arith_q")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+ (const_string "neon_fp_reductions_q")
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+ neon_fp_to_int_d, neon_int_to_fp_d")
+ (const_string "neon_fp_cvt_int")
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+ (const_string "neon_fp_cvt_int_q")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+ (const_string "neon_fp_cvt16")
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+ neon_fp_mul_d")
+ (const_string "neon_fp_mul")
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+ (const_string "neon_fp_mul_q")
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+ neon_fp_mla_d")
+ (const_string "neon_fp_mla")
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+ (const_string "neon_fp_mla_q")
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+ neon_fp_recpx_s,\
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
+ neon_fp_recpx_d")
+ (const_string "neon_fp_recpe_rsqrte")
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+ neon_fp_recpx_s_q,\
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_d_q")
+ (const_string "neon_fp_recpe_rsqrte_q")
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+ neon_fp_recps_d, neon_fp_rsqrts_d")
+ (const_string "neon_fp_recps_rsqrts")
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+ (const_string "neon_fp_recps_rsqrts_q")
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+ neon_rev, neon_permute, neon_rbit,\
+ neon_tbl1, neon_tbl2, neon_zip,\
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+ neon_move, neon_move_q, neon_move_narrow_q")
+ (const_string "neon_bitops")
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+ neon_rev_q, neon_permute_q, neon_rbit_q")
+ (const_string "neon_bitops_q")
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+ (const_string "neon_from_gp")
+ (eq_attr "type" "neon_from_gp_q")
+ (const_string "neon_from_gp_q")
+
+ (eq_attr "type" "f_loads, f_loadd,\
+ neon_load1_1reg, neon_load1_1reg_q,\
+ neon_load1_2reg, neon_load1_2reg_q")
+ (const_string "neon_load_a")
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+ neon_load1_4reg, neon_load1_4reg_q")
+ (const_string "neon_load_b")
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
+ neon_load2_2reg, neon_load2_2reg_q,\
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
+ (const_string "neon_load_c")
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+ neon_load3_3reg, neon_load3_3reg_q,\
+ neon_load3_one_lane, neon_load3_one_lane_q,\
+ neon_load4_4reg, neon_load4_4reg_q")
+ (const_string "neon_load_d")
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
+ (const_string "neon_load_e")
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+ (const_string "neon_load_f")
+
+ (eq_attr "type" "f_stores, f_stored,\
+ neon_store1_1reg")
+ (const_string "neon_store_a")
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+ (const_string "neon_store_b")
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+ neon_store3_3reg, neon_store3_3reg_q,\
+ neon_store2_4reg, neon_store2_4reg_q,\
+ neon_store4_4reg, neon_store4_4reg_q,\
+ neon_store2_2reg, neon_store2_2reg_q,\
+ neon_store3_one_lane, neon_store3_one_lane_q,\
+ neon_store4_one_lane, neon_store4_one_lane_q,\
+ neon_store1_4reg, neon_store1_4reg_q,\
+ neon_store1_one_lane, neon_store1_one_lane_q,\
+ neon_store2_one_lane, neon_store2_one_lane_q")
+ (const_string "neon_store_complex")]
+ (const_string "unknown")))
+
+;; The tsv110 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
+
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
+
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "tsv110_fsu")
+
+(define_cpu_unit "tsv110_fsu1_issue"
+ "tsv110_fsu")
+(define_cpu_unit "tsv110_fsu2_issue"
+ "tsv110_fsu")
+
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
+ + tsv110_mdu_issue + tsv110_alu1_issue
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "tsv110_alu" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,no_insn"))
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
+
+(define_insn_reservation "tsv110_alus" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "tsv110_alu2|tsv110_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "tsv110_alu_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "tsv110_mdu")
+
+(define_insn_reservation "tsv110_alus_shift" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "tsv110_alu2")
+
+;; Multiplies instructions
+(define_insn_reservation "tsv110_mult" 3
+ (and (eq_attr "tune" "tsv110")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "tsv110_mdu")
+
+;; Integer divide
+(define_insn_reservation "tsv110_div" 10
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "udiv,sdiv"))
+ "tsv110_mdu*10")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "tsv110_block" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "block"))
+ "tsv110_block")
+
+;; Branch execution Unit
+;;
+;; Branches take two issue slot.
+;; No latency as there is no result
+(define_insn_reservation "tsv110_branch" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "branch"))
+ "tsv110_alu1|tsv110_alu2")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "tsv110_load1" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "load1,load2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Stores of up to two words.
+(define_insn_reservation "tsv110_store1" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "store1,store2"))
+ "tsv110_ls1|tsv110_ls2")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation "tsv110_neon_abd_aba" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_neon_arith_basic" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+(define_insn_reservation "tsv110_neon_arith_complex" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
+ "tsv110_fsu1,tsv110_fsu2")
+
+;; Integer Multiply Instructions.
+;; D-form
+(define_insn_reservation "tsv110_neon_multiply" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_mul_d_long"))
+ "tsv110_fsu1")
+
+;; Q-form
+(define_insn_reservation "tsv110_neon_multiply_q" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
+ neon_shift_reg_complex"))
+ "tsv110_fsu1")
+
+(define_insn_reservation
+ "tsv110_neon_shift_acc_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
+ neon_shift_reg_complex_q"))
+ "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_fp_negabs" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_arith_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_minmax_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_reductions_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_cvt_int" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mul_q" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
+ neon_fp_recps_rsqrts"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_mla_q" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
+ neon_fp_recps_rsqrts_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_bitops" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
+ "(tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_dup" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_from_gp,f_mcr"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_mov" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_mcrr"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_bitops_q" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
+ "(tsv110_fsu1+tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_from_gp_q" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
+ "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
+
+(define_insn_reservation
+ "tsv110_neon_to_gp" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "(tsv110_fsu1)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_ld1_lane" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg1" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
+ "((tsv110_ls1)|(tsv110_ls2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg2" 6
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "((tsv110_ls1*2)|(tsv110_ls2*2))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg3" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld1_reg4" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld2" 8
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
+ "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
+
+(define_insn_reservation
+ "tsv110_neon_ld3" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
+ neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_lane" 9
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
+
+(define_insn_reservation
+ "tsv110_neon_ld4_reg" 11
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_one_lane_q"))
+ "((tsv110_ls1*16)|(tsv110_ls2*16)|(tsv110_fsu1*16)|(tsv110_fsu2*16))")
+
+;; Store Instructions.
+
+(define_insn_reservation
+ "tsv110_neon_store_a" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation
+ "tsv110_neon_store_b" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+ "tsv110_neon_store_complex" 0
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
+ "tsv110_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "tsv110_fp_const" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fconsts,fconstd,fmov"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_mac" 7
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvt" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvt"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvtf2i"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_fp_cvti2f" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "f_cvti2f"))
+ "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
+
+(define_insn_reservation "tsv110_fp_cmp" 4
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_arith" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "ffariths,ffarithd"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_fp_divs" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
+ neon_fp_div_s_q,neon_fp_div_d_q"))
+ "(tsv110_fsu1*12)")
+
+(define_insn_reservation "tsv110_fp_sqrts" 12
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
+ "(tsv110_fsu2*12)")
+
+(define_insn_reservation "tsv110_crypto_aes" 3
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "tsv110_fsu1")
+
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
+ "(tsv110_fsu1|tsv110_fsu2)")
+
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "(tsv110_fsu1)")
+
+(define_insn_reservation "tsv110_crypto_complex" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
+ "tsv110_fsu1")
+
+;; We lie with calls. They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "tsv110_call" 1
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "call"))
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "tsv110_alu"
+ "tsv110_alu,tsv110_alu_shift")
+(define_bypass 2 "tsv110_alu_shift"
+ "tsv110_alu,tsv110_alu_shift")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
+ "tsv110_neon_*mla*")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2019-03-07 14:18 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-14 14:03 add tsv110 pipeline scheduling wuyuan (E)
2019-01-14 15:13 ` Kyrill Tkachov
2019-01-17 23:47 ` James Greenhalgh
-- strict thread matches above, loose matches on Subject: below --
2019-03-07 14:25 wuyuan (E)
2019-03-04 13:46 wuyuan (E)
2019-01-13 9:37 wuyuan (E)
2019-01-14 10:26 ` Kyrill Tkachov
2019-01-08 11:23 wuyuan (E)
2019-01-08 12:16 ` Kyrill Tkachov
2019-01-03 2:56 wuyuan (E)
2018-12-20 6:06 wuyuan (E)
2018-12-20 2:34 wuyuan (E)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).