public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Kyrill  Tkachov <kyrylo.tkachov@foss.arm.com>
To: "wuyuan (E)" <wuyuan5@huawei.com>,
	 "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: "Zhangyichao (AB)" <zhangyichao.zhang@huawei.com>,
	 "Zhanghaijian (A)" <z.zhanghaijian@huawei.com>,
	"Richard Earnshaw (lists)" <richard.earnshaw@arm.com>,
	 James Greenhalgh <james.greenhalgh@arm.com>,
	Marcus Shawcroft <marcus.shawcroft@arm.com>
Subject: Re: add tsv110 pipeline scheduling
Date: Mon, 14 Jan 2019 15:13:00 -0000	[thread overview]
Message-ID: <5C3CA709.2020604@foss.arm.com> (raw)
In-Reply-To: <EFB6E1955A2C5C41A00B7DEBDAA1E80C061C5770@dggeml527-mbx.china.huawei.com>

Hi Wuyuan,

On 14/01/19 14:02, wuyuan (E) wrote:
> Hi  Kyrill:
>           The gcc 7.3.0 does not discard the store1 and load1 command; I did not expect the community's latest gcc changes so large .
>           now I downloaded the latest GCC code, put the patch into GCC source code, the compiler can pass, thank you very much for your work!


For the future, please test the patches against the branch you plan to apply them to.
In this case, since you're submitting a trunk patch it needs to be applied and tested on trunk.

This latest version builds on trunk and looks ok to me but you'll need approval from the aarch64 maintainers to commit.
I've cc'ed them for you.

Thanks,
Kyrill

> 															    Best Regards,
> 															    wuyuan
>
>
>        * config/aarch64/aarch64-cores.def (tsv1100): Change scheduling model.
>        * config/aarch64/aarch64.md : Add "tsv110.md"
>        * config/aarch64/tsv110.md: New file.
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 70b0766..085c40f 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -103,7 +103,7 @@ AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
>   AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
>   
>   /* HiSilicon ('H') cores. */
> -AARCH64_CORE("tsv110",  tsv110, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
> +AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
>   
>   /* ARMv8.4-A Architecture Processors.  */
>   
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 513aec1..97e0703 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -356,6 +356,7 @@
>   (include "thunderx.md")
>   (include "../arm/xgene1.md")
>   (include "thunderx2t99.md")
> +(include "tsv110.md")
>   
>   ;; -------------------------------------------------------------------
>   ;; Jumps and other miscellaneous insns
> diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
> new file mode 100644
> index 0000000..e33c5cc
> --- /dev/null
> +++ b/gcc/config/aarch64/tsv110.md
> @@ -0,0 +1,708 @@
> +;; tsv110 pipeline description
> +;; Copyright (C) 2018 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but
> +;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;; General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_automaton "tsv110")
> +
> +(define_attr "tsv110_neon_type"
> +  "neon_arith_acc, neon_arith_acc_q,
> +   neon_arith_basic, neon_arith_complex,
> +   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
> +   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
> +   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
> +   neon_shift_imm_complex,
> +   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
> +   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
> +   neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
> +   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
> +   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
> +   neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
> +   neon_bitops, neon_bitops_q, neon_from_gp,
> +   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
> +   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
> +   neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
> +   unknown"
> +  (cond [
> +	  (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
> +			   neon_reduc_add_acc_q")
> +	    (const_string "neon_arith_acc")
> +	  (eq_attr "type" "neon_arith_acc_q")
> +	    (const_string "neon_arith_acc_q")
> +	  (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
> +			   neon_add_widen, neon_neg, neon_neg_q,\
> +			   neon_reduc_add, neon_reduc_add_q,\
> +			   neon_reduc_add_long, neon_sub, neon_sub_q,\
> +			   neon_sub_long, neon_sub_widen, neon_logic,\
> +			   neon_logic_q, neon_tst, neon_tst_q,\
> +			   neon_compare, neon_compare_q,\
> +			   neon_compare_zero, neon_compare_zero_q,\
> +			   neon_minmax, neon_minmax_q, neon_reduc_minmax,\
> +			   neon_reduc_minmax_q")
> +	    (const_string "neon_arith_basic")
> +	  (eq_attr "type" "neon_add_halve_narrow_q,\
> +			   neon_add_halve, neon_add_halve_q,\
> +			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
> +			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
> +			   neon_qneg_q, neon_qsub, neon_qsub_q,\
> +			   neon_sub_halve_narrow_q")
> +	    (const_string "neon_arith_complex")
> +
> +	  (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
> +			   neon_mul_h_scalar, neon_mul_s_scalar,\
> +			   neon_sat_mul_b, neon_sat_mul_h,\
> +			   neon_sat_mul_s, neon_sat_mul_h_scalar,\
> +			   neon_sat_mul_s_scalar,\
> +			   neon_mul_b_long, neon_mul_h_long,\
> +			   neon_mul_s_long,\
> +			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
> +			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
> +			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
> +			   neon_sat_mul_s_scalar_long,\
> +			   neon_mla_b, neon_mla_h, neon_mla_s,\
> +			   neon_mla_h_scalar, neon_mla_s_scalar,\
> +			   neon_mla_b_long, neon_mla_h_long,\
> +			   neon_mla_s_long,\
> +			   neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
> +			   neon_sat_mla_b_long, neon_sat_mla_h_long,\
> +			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
> +			   neon_sat_mla_s_scalar_long")
> +	    (const_string "neon_multiply")
> +	  (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
> +			   neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
> +			   neon_sat_mul_b_q, neon_sat_mul_h_q,\
> +			   neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
> +			   neon_sat_mul_s_scalar_q,\
> +			   neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
> +			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
> +	    (const_string "neon_multiply_q")
> +
> +	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
> +	    (const_string "neon_shift_acc")
> +	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
> +			   neon_shift_imm_narrow_q, neon_shift_imm_long")
> +	    (const_string "neon_shift_imm_basic")
> +	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
> +			   neon_sat_shift_imm_narrow_q")
> +	    (const_string "neon_shift_imm_complex")
> +	  (eq_attr "type" "neon_shift_reg")
> +	    (const_string "neon_shift_reg_basic")
> +	  (eq_attr "type" "neon_shift_reg_q")
> +	    (const_string "neon_shift_reg_basic_q")
> +	  (eq_attr "type" "neon_sat_shift_reg")
> +	    (const_string "neon_shift_reg_complex")
> +	  (eq_attr "type" "neon_sat_shift_reg_q")
> +	    (const_string "neon_shift_reg_complex_q")
> +
> +	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
> +			   neon_fp_abs_s, neon_fp_abs_s_q,\
> +			   neon_fp_neg_d, neon_fp_neg_d_q,\
> +			   neon_fp_abs_d, neon_fp_abs_d_q,\
> +			   neon_fp_minmax_s,neon_fp_minmax_d,\
> +			   neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
> +	    (const_string "neon_fp_negabs")
> +	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
> +			   neon_fp_reduc_add_s, neon_fp_compare_s,\
> +			   neon_fp_round_s,\
> +			   neon_fp_addsub_d, neon_fp_abd_d,\
> +			   neon_fp_reduc_add_d, neon_fp_compare_d,\
> +			   neon_fp_round_d")
> +	    (const_string "neon_fp_arith")
> +	  (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
> +			   neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
> +			   neon_fp_minmax_s_q, neon_fp_round_s_q,\
> +			   neon_fp_addsub_d_q, neon_fp_abd_d_q,\
> +			   neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
> +			   neon_fp_minmax_d_q, neon_fp_round_d_q")
> +	    (const_string "neon_fp_arith_q")
> +	  (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
> +			   neon_fp_reduc_minmax_d_q,\
> +			   neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
> +	    (const_string "neon_fp_reductions_q")
> +	  (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
> +			   neon_fp_to_int_d, neon_int_to_fp_d")
> +	    (const_string "neon_fp_cvt_int")
> +	  (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
> +			   neon_fp_to_int_d_q, neon_int_to_fp_d_q")
> +	    (const_string "neon_fp_cvt_int_q")
> +	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
> +	    (const_string "neon_fp_cvt16")
> +	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
> +			   neon_fp_mul_d")
> +	    (const_string "neon_fp_mul")
> +	  (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
> +			   neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
> +	    (const_string "neon_fp_mul_q")
> +	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
> +			   neon_fp_mla_d")
> +	    (const_string "neon_fp_mla")
> +	  (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
> +			   neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
> +	    (const_string "neon_fp_mla_q")
> +	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
> +			   neon_fp_recpx_s,\
> +			   neon_fp_recpe_d, neon_fp_rsqrte_d,\
> +			   neon_fp_recpx_d")
> +	    (const_string "neon_fp_recpe_rsqrte")
> +	  (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
> +			   neon_fp_recpx_s_q,\
> +			   neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
> +			   neon_fp_recpx_d_q")
> +	    (const_string "neon_fp_recpe_rsqrte_q")
> +	  (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
> +			   neon_fp_recps_d, neon_fp_rsqrts_d")
> +	    (const_string "neon_fp_recps_rsqrts")
> +	  (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
> +			   neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
> +	    (const_string "neon_fp_recps_rsqrts_q")
> +	  (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
> +			   neon_rev, neon_permute, neon_rbit,\
> +			   neon_tbl1, neon_tbl2, neon_zip,\
> +			   neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
> +			   neon_move, neon_move_q, neon_move_narrow_q")
> +	    (const_string "neon_bitops")
> +	  (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
> +			   neon_rev_q, neon_permute_q, neon_rbit_q")
> +	    (const_string "neon_bitops_q")
> +	  (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
> +	    (const_string "neon_from_gp")
> +	  (eq_attr "type" "neon_from_gp_q")
> +	    (const_string "neon_from_gp_q")
> +
> +	  (eq_attr "type" "f_loads, f_loadd,\
> +			   neon_load1_1reg, neon_load1_1reg_q,\
> +			   neon_load1_2reg, neon_load1_2reg_q")
> +	    (const_string "neon_load_a")
> +	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
> +			   neon_load1_4reg, neon_load1_4reg_q")
> +	    (const_string "neon_load_b")
> +	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
> +			   neon_load1_all_lanes, neon_load1_all_lanes_q,\
> +			   neon_load2_2reg, neon_load2_2reg_q,\
> +			   neon_load2_all_lanes, neon_load2_all_lanes_q")
> +	    (const_string "neon_load_c")
> +	  (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
> +			   neon_load3_3reg, neon_load3_3reg_q,\
> +			   neon_load3_one_lane, neon_load3_one_lane_q,\
> +			   neon_load4_4reg, neon_load4_4reg_q")
> +	    (const_string "neon_load_d")
> +	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
> +			   neon_load3_all_lanes, neon_load3_all_lanes_q,\
> +			   neon_load4_all_lanes, neon_load4_all_lanes_q")
> +	    (const_string "neon_load_e")
> +	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
> +	    (const_string "neon_load_f")
> +
> +	  (eq_attr "type" "f_stores, f_stored,\
> +			   neon_store1_1reg")
> +	    (const_string "neon_store_a")
> +	  (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
> +	    (const_string "neon_store_b")
> +	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
> +			   neon_store3_3reg, neon_store3_3reg_q,\
> +			   neon_store2_4reg, neon_store2_4reg_q,\
> +			   neon_store4_4reg, neon_store4_4reg_q,\
> +			   neon_store2_2reg, neon_store2_2reg_q,\
> +			   neon_store3_one_lane, neon_store3_one_lane_q,\
> +			   neon_store4_one_lane, neon_store4_one_lane_q,\
> +			   neon_store1_4reg, neon_store1_4reg_q,\
> +			   neon_store1_one_lane, neon_store1_one_lane_q,\
> +			   neon_store2_one_lane, neon_store2_one_lane_q")
> +	    (const_string "neon_store_complex")]
> +	  (const_string "unknown")))
> +
> +;; The tsv110 core is modelled as issues pipeline that has
> +;; the following functional units.
> +;; 1.  Three pipelines for integer operations: ALU1, ALU2, ALU3
> +
> +(define_cpu_unit "tsv110_alu1_issue" "tsv110")
> +(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
> +
> +(define_cpu_unit "tsv110_alu2_issue" "tsv110")
> +(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
> +
> +(define_cpu_unit "tsv110_alu3_issue" "tsv110")
> +(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
> +
> +;; 2.  One pipeline for complex integer operations: MDU
> +
> +(define_cpu_unit "tsv110_mdu_issue" "tsv110")
> +(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
> +
> +;; 3.  Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
> +(define_automaton "tsv110_fsu")
> +
> +(define_cpu_unit "tsv110_fsu1_issue"
> +		 "tsv110_fsu")
> +(define_cpu_unit "tsv110_fsu2_issue"
> +		 "tsv110_fsu")
> +
> +(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
> +(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
> +
> +;; 4.  Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
> +
> +;; 5.  Two pipelines for load and store operations: LS1, LS2.
> +
> +(define_cpu_unit "tsv110_ls1_issue" "tsv110")
> +(define_cpu_unit "tsv110_ls2_issue" "tsv110")
> +(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
> +(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
> +
> +;; Block all issue queues.
> +
> +(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
> +				  + tsv110_mdu_issue + tsv110_alu1_issue
> +				  + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
> +
> +;; Simple Execution Unit:
> +;;
> +;; Simple ALU without shift
> +(define_insn_reservation "tsv110_alu" 1
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "alu_imm,logic_imm,\
> +			alu_sreg,logic_reg,\
> +			adc_imm,adc_reg,\
> +			adr,bfm,clz,rbit,rev,\
> +			shift_imm,shift_reg,\
> +			mov_imm,mov_reg,\
> +			mvn_imm,mvn_reg,\
> +			mrs,multiple,no_insn"))
> +  "tsv110_alu1|tsv110_alu2|tsv110_alu3")
> +
> +(define_insn_reservation "tsv110_alus" 1
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "alus_imm,logics_imm,\
> +			alus_sreg,logics_reg,\
> +			adcs_imm,adcs_reg"))
> +  "tsv110_alu2|tsv110_alu3")
> +
> +;; ALU ops with shift
> +(define_insn_reservation "tsv110_alu_shift" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "extend,\
> +			alu_shift_imm,alu_shift_reg,\
> +			crc,logic_shift_imm,logic_shift_reg,\
> +			mov_shift,mvn_shift,\
> +			mov_shift_reg,mvn_shift_reg"))
> +  "tsv110_mdu")
> +
> +(define_insn_reservation "tsv110_alus_shift" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
> +			logics_shift_imm,logics_shift_reg"))
> +  "tsv110_alu2")
> +
> +;; Multiplies instructions
> +(define_insn_reservation "tsv110_mult" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (ior (eq_attr "mul32" "yes")
> +	    (eq_attr "widen_mul64" "yes")))
> +  "tsv110_mdu")
> +
> +;; Integer divide
> +(define_insn_reservation "tsv110_div" 10
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "udiv,sdiv"))
> +  "tsv110_mdu*8")
> +
> +;; Block all issue pipes for a cycle
> +(define_insn_reservation "tsv110_block" 1
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "block"))
> +  "tsv110_block")
> +
> +;; Branch execution Unit
> +;;
> +;; Branches take two issue slot.
> +;; No latency as there is no result
> +(define_insn_reservation "tsv110_branch" 0
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "branch"))
> +  "tsv110_alu1|tsv110_alu2")
> +
> +;; Load-store execution Unit
> +;;
> +;; Loads of up to two words.
> +(define_insn_reservation "tsv110_load1" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "load_4,load_8"))
> +  "tsv110_ls1|tsv110_ls2")
> +
> +;; Stores of up to two words.
> +(define_insn_reservation "tsv110_store1" 0
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "store_4,store_8"))
> +  "tsv110_ls1|tsv110_ls2")
> +
> +;; Advanced SIMD Unit - Integer Arithmetic Instructions.
> +
> +(define_insn_reservation  "tsv110_neon_abd_aba" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_abd,neon_arith_acc"))
> +  "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation  "tsv110_neon_abd_aba_q" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_arith_acc_q"))
> +  "(tsv110_fsu1,tsv110_fsu2)+(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation  "tsv110_neon_arith_basic" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_arith_basic"))
> +  "tsv110_fsu1,tsv110_fsu2")
> +
> +(define_insn_reservation  "tsv110_neon_arith_complex" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_arith_complex"))
> +  "tsv110_fsu1,tsv110_fsu2")
> +
> +;; Integer Multiply Instructions.
> +;; D-form
> +(define_insn_reservation "tsv110_neon_multiply" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_multiply"))
> +  "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_neon_multiply_dlong" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_mul_d_long"))
> +  "tsv110_fsu1")
> +
> +;; Q-form
> +(define_insn_reservation "tsv110_neon_multiply_q" 8
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_multiply_q"))
> +  "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Integer Shift Instructions.
> +
> +(define_insn_reservation
> +  "tsv110_neon_shift_acc" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_shift_acc,\
> +	   neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
> +	   neon_shift_reg_complex"))
> +  "tsv110_fsu1")
> +
> +(define_insn_reservation
> +  "tsv110_neon_shift_acc_q" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
> +	   neon_shift_reg_complex_q"))
> +  "(tsv110_fsu1,tsv110_fsu1)+(tsv110_fsu1,tsv110_fsu1)")
> +
> +;; Floating Point Instructions.
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_negabs" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_arith" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_arith"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_arith_q" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_minmax_q" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_reductions_q" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_cvt_int" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_mul" 5
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_mul"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_mul_q" 5
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_mla" 7
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_mla,\
> +	   neon_fp_recps_rsqrts"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_recpe_rsqrte" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_mla_q" 7
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
> +	   neon_fp_recps_rsqrts_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_fp_recpe_rsqrte_q" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +;; Miscellaneous Instructions.
> +
> +(define_insn_reservation
> +  "tsv110_neon_bitops" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_bitops"))
> +  "(tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_dup" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_from_gp,f_mcr"))
> +  "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_mov" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "f_mcrr"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_bitops_q" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_bitops_q"))
> +  "(tsv110_fsu1+tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_from_gp_q" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
> +  "(tsv110_alu1,tsv110_fsu1,tsv110_fsu2)")
> +
> +(define_insn_reservation
> +  "tsv110_neon_to_gp" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
> +  "(tsv110_fsu1)")
> +
> +;; Load Instructions.
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld1_lane" 8
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
> +	   neon_load1_all_lanes,neon_load1_all_lanes_q"))
> +  "((tsv110_ls1*2)|(tsv110_ls2*2)|(tsv110_fsu1*2)|(tsv110_fsu2*2))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld1_reg1" 6
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
> +  "((tsv110_ls1)|(tsv110_ls2))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld1_reg2" 6
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
> +  "((tsv110_ls1*2)|(tsv110_ls2*2))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld1_reg3" 7
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
> +  "((tsv110_ls1*6)|(tsv110_ls2*6))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld1_reg4" 7
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
> +  "((tsv110_ls1*8)|(tsv110_ls2*8))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld2" 8
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
> +	   neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
> +	   neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
> +  "((tsv110_ls1*4)|(tsv110_ls2*4)|(tsv110_fsu1*4)|(tsv110_fsu2*4))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld3" 9
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
> +	   neon_load3_one_lane,neon_load3_one_lane_q,\
> +	   neon_load3_all_lanes,neon_load3_all_lanes_q"))
> +  "((tsv110_ls1*6)|(tsv110_ls2*6)|(tsv110_fsu1*6)|(tsv110_fsu2*6))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld4_lane" 9
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> +	   neon_load4_one_lane,neon_load4_one_lane_q"))
> +  "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +(define_insn_reservation
> +  "tsv110_neon_ld4_reg" 11
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
> +	   neon_load4_one_lane,neon_load4_one_lane_q"))
> +  "((tsv110_ls1*8)|(tsv110_ls2*8)|(tsv110_fsu1*8)|(tsv110_fsu2*8))")
> +
> +;; Store Instructions.
> +
> +(define_insn_reservation
> +  "tsv110_neon_store_a" 0
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_store_a"))
> +  "tsv110_fsu1|tsv110_fsu2")
> +
> +(define_insn_reservation
> +  "tsv110_neon_store_b" 0
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_store_b"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +;; These block issue for a number of cycles proportional to the number
> +;; of 64-bit chunks they will store, we don't attempt to model that
> +;; precisely, treat them as blocking execution for two cycles when
> +;; issued.
> +(define_insn_reservation
> +  "tsv110_neon_store_complex" 0
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "tsv110_neon_type" "neon_store_complex"))
> +  "tsv110_block*2")
> +
> +;; Floating-Point Operations.
> +
> +(define_insn_reservation "tsv110_fp_const" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fconsts,fconstd,fmov"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_add_sub" 5
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_mac" 7
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvt" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "f_cvt"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_cvtf2i" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "f_cvtf2i"))
> +  "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_fp_cvti2f" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "f_cvti2f"))
> +  "((tsv110_alu1*3)|(tsv110_fsu1*3)|(tsv110_fsu2*3))")
> +
> +(define_insn_reservation "tsv110_fp_cmp" 4
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fcmps,fcmpd"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_arith" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "ffariths,ffarithd"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_fp_divs" 12
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
> +	   neon_fp_div_s_q,neon_fp_div_d_q"))
> +  "(tsv110_fsu1*8)")
> +
> +(define_insn_reservation "tsv110_fp_sqrts" 12
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
> +	   neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
> +  "(tsv110_fsu2*8)")
> +
> +(define_insn_reservation "tsv110_crypto_aes" 3
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "crypto_aese,crypto_aesmc"))
> +  "tsv110_fsu1")
> +
> +(define_insn_reservation "tsv110_crypto_sha1_fast" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
> +  "(tsv110_fsu1|tsv110_fsu2)")
> +
> +(define_insn_reservation "tsv110_crypto_sha256_fast" 2
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "crypto_sha1_fast"))
> +  "(tsv110_fsu1)")
> +
> +(define_insn_reservation "tsv110_crypto_complex" 5
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
> +  "tsv110_fsu1")
> +
> +;; We lie with calls.  They take up all issue slots, but are otherwise
> +;; not harmful.
> +(define_insn_reservation "tsv110_call" 1
> +  (and (eq_attr "tune" "tsv110")
> +       (eq_attr "type" "call"))
> +  "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
> +    +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
> +)
> +
> +;; Simple execution unit bypasses
> +(define_bypass 1 "tsv110_alu"
> +	         "tsv110_alu,tsv110_alu_shift")
> +(define_bypass 2 "tsv110_alu_shift"
> +	         "tsv110_alu,tsv110_alu_shift")
> +
> +;; An MLA or a MUL can feed a dependent MLA.
> +(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
> +		 "tsv110_neon_*mla*")
> +
> +;; We don't need to care about control hazards, either the branch is
> +;; predicted in which case we pay no penalty, or the branch is
> +;; mispredicted in which case instruction scheduling will be unlikely to
> +;; help.
> +(define_bypass 1 "tsv110_*"
> +		 "tsv110_call,tsv110_branch")
>
>
>
>

  reply	other threads:[~2019-01-14 15:13 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-14 14:03 wuyuan (E)
2019-01-14 15:13 ` Kyrill Tkachov [this message]
2019-01-17 23:47 ` James Greenhalgh
  -- strict thread matches above, loose matches on Subject: below --
2019-03-07 14:25 wuyuan (E)
2019-03-04 13:46 wuyuan (E)
2019-01-13  9:37 wuyuan (E)
2019-01-14 10:26 ` Kyrill Tkachov
2019-01-08 11:23 wuyuan (E)
2019-01-08 12:16 ` Kyrill Tkachov
2019-01-03  2:56 wuyuan (E)
2018-12-20  6:06 wuyuan (E)
2018-12-20  2:34 wuyuan (E)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5C3CA709.2020604@foss.arm.com \
    --to=kyrylo.tkachov@foss.arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=james.greenhalgh@arm.com \
    --cc=marcus.shawcroft@arm.com \
    --cc=richard.earnshaw@arm.com \
    --cc=wuyuan5@huawei.com \
    --cc=z.zhanghaijian@huawei.com \
    --cc=zhangyichao.zhang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).