Index: gcc/config/mips/loongson.md =================================================================== --- gcc/config/mips/loongson.md (revision 66) +++ gcc/config/mips/loongson.md (working copy) @@ -85,7 +85,7 @@ { return mips_output_move (operands[0], operands[1]); } - [(set_attr "type" "fpstore,fpload,*,mfc,mtc,*,fpstore,mtc") + [(set_attr "type" "fpstore,fpload,fmove,mfc,mtc,move,fpstore,mtc") (set_attr "mode" "")]) ;; Initialization of a vector. @@ -112,7 +112,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "packss\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Pack with unsigned saturation. (define_insn "vec_pack_usat_" @@ -124,6 +124,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "packus\t%0,%1,%2" + [(set_attr "type" "fmul")] ) ;; Addition, treating overflow by wraparound. @@ -136,7 +137,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "padd\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Addition of doubleword integers stored in FP registers. ;; Overflow is treated by wraparound. @@ -149,7 +150,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "paddd\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Addition, treating overflow by signed saturation. (define_insn "ssadd3" @@ -161,7 +162,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "padds\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Addition, treating overflow by unsigned saturation. (define_insn "usadd3" @@ -172,7 +173,8 @@ ) )] "HAVE_LOONGSON_VECTOR_MODES" - "paddus\t%0,%1,%2") + "paddus\t%0,%1,%2" + [(set_attr "type" "fadd")]) ;; Logical AND NOT. (define_insn "loongson_and_not_" @@ -184,7 +186,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pandn\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Average. (define_insn "loongson_average_" @@ -197,7 +199,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pavg\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Equality test. (define_insn "loongson_eq_" @@ -210,7 +212,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pcmpeq\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Greater-than test. (define_insn "loongson_gt_" @@ -223,7 +225,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pcmpgt\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Extract halfword. (define_insn "loongson_extract_halfword" @@ -236,7 +238,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pextr\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Insert halfword. (define_insn "loongson_insert_halfword_0" @@ -249,7 +251,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pinsr_0\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) (define_insn "loongson_insert_halfword_1" [(set (match_operand:VH 0 "register_operand" "=f") @@ -260,7 +262,7 @@ ] "HAVE_LOONGSON_VECTOR_MODES" "pinsr_1\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) (define_insn "loongson_insert_halfword_2" [(set (match_operand:VH 0 "register_operand" "=f") @@ -272,7 +274,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pinsr_2\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) (define_insn "loongson_insert_halfword_3" [(set (match_operand:VH 0 "register_operand" "=f") @@ -284,7 +286,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pinsr_3\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) ;; Multiply and add packed integers. (define_insn "loongson_mult_add" @@ -297,7 +299,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmadd\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Maximum of signed halfwords. (define_insn "smax3" @@ -309,7 +311,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmaxs\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Maximum of unsigned bytes. (define_insn "umax3" @@ -321,7 +323,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmaxu\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Minimum of signed halfwords. (define_insn "smin3" @@ -333,7 +335,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmins\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Minimum of unsigned bytes. (define_insn "umin3" @@ -345,7 +347,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pminu\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Move byte mask. (define_insn "loongson_move_byte_mask" @@ -357,7 +359,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmovmsk\t%0,%1" -) + [(set_attr "type" "fabs")]) ;; Multiply unsigned integers and store high result. (define_insn "umul3_highpart" @@ -370,7 +372,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmulhu\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Multiply signed integers and store high result. (define_insn "smul3_highpart" @@ -383,7 +385,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmulh\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Multiply signed integers and store low result. (define_insn "loongson_smul_lowpart" @@ -396,7 +398,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmull\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Multiply unsigned word integers. (define_insn "loongson_umul_word" @@ -409,7 +411,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pmulu\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Absolute difference. (define_insn "loongson_pasubub" @@ -422,7 +424,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pasubub\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Sum of unsigned byte integers. (define_insn "reduc_uplus_" @@ -434,7 +436,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "biadd\t%0,%1" -) + [(set_attr "type" "fabs")]) ;; Sum of absolute differences. (define_insn "loongson_psadbh" @@ -447,7 +449,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pasubub\t%0,%1,%2;biadd\t%0,%0" -) + [(set_attr "type" "fadd")]) ;; Shuffle halfwords. (define_insn "loongson_pshufh" @@ -461,7 +463,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "pshufh\t%0,%2,%3" -) + [(set_attr "type" "fmul")]) ;; Shift left logical. (define_insn "loongson_psll" @@ -473,7 +475,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psll\t%0,%1,%2" -) + [(set_attr "type" "fmul")]) ;; Shift right arithmetic. (define_insn "loongson_psra" @@ -485,7 +487,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psra\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) ;; Shift right logical. (define_insn "loongson_psrl" @@ -497,7 +499,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psrl\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) ;; Subtraction, treating overflow by wraparound. (define_insn "sub3" @@ -509,7 +511,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psub\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Subtraction of doubleword integers stored in FP registers. ;; Overflow is treated by wraparound. @@ -522,7 +524,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psubd\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Subtraction, treating overflow by signed saturation. (define_insn "sssub3" @@ -534,7 +536,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psubs\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Subtraction, treating overflow by unsigned saturation. (define_insn "ussub3" @@ -546,7 +548,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "psubus\t%0,%1,%2" -) + [(set_attr "type" "fadd")]) ;; Unpack high data. (define_insn "vec_interleave_high" @@ -559,7 +561,7 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "punpckh\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) ;; Unpack low data. (define_insn "vec_interleave_low" @@ -572,4 +574,4 @@ )] "HAVE_LOONGSON_VECTOR_MODES" "punpckl\t%0,%1,%2" -) + [(set_attr "type" "fdiv")]) Index: gcc/config/mips/loongson2ef.md =================================================================== --- gcc/config/mips/loongson2ef.md (revision 0) +++ gcc/config/mips/loongson2ef.md (revision 0) @@ -0,0 +1,486 @@ +;; Pipeline model for ST Microelectronics Loongson-2E/2F cores. + +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Automaton for integer instructions. +(define_automaton "ls2_alu") + +;; ALU1 and ALU2. +;; We need to query these units to adjust round-robin counter. +(define_query_cpu_unit "ls2_alu1_core,ls2_alu2_core" "ls2_alu") + +;; Pseudo units to help modeling of ALU1/2 round-robin dispatch strategy. +(define_cpu_unit "ls2_alu1_turn,ls2_alu2_turn" "ls2_alu") + +;; Pseudo units to enable/disable ls2_alu[12]_turn units. +;; ls2_alu[12]_turn unit can be subscribed only after ls2_alu[12]_turn_enabled +;; unit is subscribed. +(define_cpu_unit "ls2_alu1_turn_enabled,ls2_alu2_turn_enabled" "ls2_alu") +(presence_set "ls2_alu1_turn" "ls2_alu1_turn_enabled") +(presence_set "ls2_alu2_turn" "ls2_alu2_turn_enabled") + +;; Reservations for ALU1 (ALU2) instructions. +;; Instruction goes to ALU1 (ALU2) and makes next ALU1/2 instruction to +;; be dispatched to ALU2 (ALU1). +(define_reservation "ls2_alu1" + "(ls2_alu1_core+ls2_alu2_turn_enabled)|ls2_alu1_core") +(define_reservation "ls2_alu2" + "(ls2_alu2_core+ls2_alu1_turn_enabled)|ls2_alu2_core") + +;; Reservation for ALU1/2 instructions. +;; Instruction will go to ALU1 iff ls2_alu1_turn_enabled is subscribed and +;; switch the turn to ALU2 by subscribing ls2_alu2_turn_enabled. +;; Or to ALU2 otherwise. +(define_reservation "ls2_alu" + "(ls2_alu1_core+ls2_alu1_turn+ls2_alu2_turn_enabled) + |(ls2_alu1_core+ls2_alu1_turn) + |(ls2_alu2_core+ls2_alu2_turn+ls2_alu1_turn_enabled) + |(ls2_alu2_core+ls2_alu2_turn)") + +;; Automaton for floating-point instructions. +(define_automaton "ls2_falu") + +;; FALU1 and FALU2. +;; We need to query these units to adjust round-robin counter. +(define_query_cpu_unit "ls2_falu1_core,ls2_falu2_core" "ls2_falu") + +;; Pseudo units to help modeling of FALU1/2 round-robin dispatch strategy. +(define_cpu_unit "ls2_falu1_turn,ls2_falu2_turn" "ls2_falu") + +;; Pseudo units to enable/disable ls2_falu[12]_turn units. +;; ls2_falu[12]_turn unit can be subscribed only after +;; ls2_falu[12]_turn_enabled unit is subscribed. +(define_cpu_unit "ls2_falu1_turn_enabled,ls2_falu2_turn_enabled" + "ls2_falu") +(presence_set "ls2_falu1_turn" "ls2_falu1_turn_enabled") +(presence_set "ls2_falu2_turn" "ls2_falu2_turn_enabled") + +;; Reservations for FALU1 (FALU2) instructions. +;; Instruction goes to FALU1 (FALU2) and makes next FALU1/2 instruction to +;; be dispatched to FALU2 (FALU1). +(define_reservation "ls2_falu1" + "(ls2_falu1_core+ls2_falu2_turn_enabled)|ls2_falu1_core") +(define_reservation "ls2_falu2" + "(ls2_falu2_core+ls2_falu1_turn_enabled)|ls2_falu2_core") + +;; Reservation for FALU1/2 instructions. +;; Instruction will go to FALU1 iff ls2_falu1_turn_enabled is subscribed and +;; switch the turn to FALU2 by subscribing ls2_falu2_turn_enabled. +;; Or to FALU2 otherwise. +(define_reservation "ls2_falu" + "(ls2_falu1+ls2_falu1_turn+ls2_falu2_turn_enabled) + |(ls2_falu1+ls2_falu1_turn) + |(ls2_falu2+ls2_falu2_turn+ls2_falu1_turn_enabled) + |(ls2_falu2+ls2_falu2_turn)") + +;; The following 4 instructions each subscribe one of +;; ls2_[f]alu{1,2}_turn_enabled units according to this attribute. +;; These instructions are used in mips.c: sched_ls2_dfa_post_advance_cycle. + +(define_attr "ls2_turn_type" "alu1,alu2,falu1,falu2,unknown" + (const_string "unknown")) + +;; Subscribe ls2_alu1_turn_enabled. +(define_insn "ls2_alu1_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" +{ + gcc_unreachable (); + return ""; +} + [(set_attr "ls2_turn_type" "alu1")]) + +(define_insn_reservation "ls2_alu1_turn_enabled" 0 + (eq_attr "ls2_turn_type" "alu1") + "ls2_alu1_turn_enabled") + +;; Subscribe ls2_alu2_turn_enabled. +(define_insn "ls2_alu2_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" +{ + gcc_unreachable (); + return ""; +} + [(set_attr "ls2_turn_type" "alu2")]) + +(define_insn_reservation "ls2_alu2_turn_enabled" 0 + (eq_attr "ls2_turn_type" "alu2") + "ls2_alu2_turn_enabled") + +;; Subscribe ls2_falu1_turn_enabled. +(define_insn "ls2_falu1_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" +{ + gcc_unreachable (); + return ""; +} + [(set_attr "ls2_turn_type" "falu1")]) + +(define_insn_reservation "ls2_falu1_turn_enabled" 0 + (eq_attr "ls2_turn_type" "falu1") + "ls2_falu1_turn_enabled") + +;; Subscribe ls2_falu2_turn_enabled. +(define_insn "ls2_falu2_turn_enabled_insn" + [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN)] + "TUNE_LOONGSON_2EF" +{ + gcc_unreachable (); + return ""; +} + [(set_attr "ls2_turn_type" "falu2")]) + +(define_insn_reservation "ls2_falu2_turn_enabled" 0 + (eq_attr "ls2_turn_type" "falu2") + "ls2_falu2_turn_enabled") + +;; Automaton for memory operations. +(define_automaton "ls2_mem") + +;; Memory unit. +(define_query_cpu_unit "ls2_mem" "ls2_mem") + +;; Reservation for integer instructions. +(define_insn_reservation "ls2_alu" 2 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "arith,condmove,const,logical,mfhilo,move, + mthilo,nop,shift,signext,slt")) + "ls2_alu") + +;; Reservation for branch instructions. +(define_insn_reservation "ls2_branch" 2 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "branch,jump,call,trap")) + "ls2_alu1") + +;; Reservation for integer multiplication instructions. +(define_insn_reservation "ls2_imult" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "imul,imul3")) + "ls2_alu2,ls2_alu2_core") + +;; Reservation for integer division / remainder instructions. +;; These instructions use the SRT algorithm and hence take 2-38 cycles. +(define_insn_reservation "ls2_idiv" 20 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "idiv")) + "ls2_alu2,ls2_alu2_core*18") + +;; Reservation for memory load instructions. +(define_insn_reservation "ls2_load" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "load,fpload,mfc,mtc")) + "ls2_mem") + +;; Reservation for memory store instructions. +;; With stores we assume they don't alias with dependent loads. +;; Therefore we set the latency to zero. +(define_insn_reservation "ls2_store" 0 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "store,fpstore")) + "ls2_mem") + +;; Reservation for floating-point instructions of latency 3. +(define_insn_reservation "ls2_fp3" 3 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fabs,fneg,fcmp,fmove")) + "ls2_falu1") + +;; Reservation for floating-point instructions of latency 5. +(define_insn_reservation "ls2_fp5" 5 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fcvt")) + "ls2_falu1") + +;; Reservation for floating-point instructions that can go +;; to either of FALU1/2 units. +(define_insn_reservation "ls2_falu" 7 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fadd,fmul,fmadd")) + "ls2_falu") + +;; Reservation for floating-point division / remainder instructions. +;; These instructions use the SRT algorithm and hence take a variable amount +;; of cycles: +;; div.s takes 5-11 cycles +;; div.d takes 5-18 cycles +(define_insn_reservation "ls2_fdiv" 9 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fdiv")) + "ls2_falu2,ls2_falu2_core*7") + +;; Reservation for floating-point sqrt instructions. +;; These instructions use the SRT algorithm and hence take a variable amount +;; of cycles: +;; sqrt.s takes 5-17 cycles +;; sqrt.d takes 5-32 cycles +(define_insn_reservation "ls2_fsqrt" 15 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "fsqrt")) + "ls2_falu2,ls2_falu2_core*13") + +;; Two consecutive ALU instructions. +(define_insn_reservation "ls2_multi" 4 + (and (eq_attr "cpu" "loongson_2e,loongson_2f") + (eq_attr "type" "multi")) + "(ls2_alu1,ls2_alu2_core)|(ls2_alu2,ls2_alu1_core)") Index: gcc/config/mips/mips.md =================================================================== --- gcc/config/mips/mips.md (revision 66) +++ gcc/config/mips/mips.md (working copy) @@ -235,6 +235,12 @@ (UNSPEC_LOONGSON_PSHUFH 517) (UNSPEC_LOONGSON_UNPACK_HIGH 518) (UNSPEC_LOONGSON_UNPACK_LOW 519) + + ;; Used in loongson2ef.md + (UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN 530) + (UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN 531) + (UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN 532) + (UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN 533) ] ) @@ -437,7 +443,7 @@ ;; Attribute describing the processor. This attribute must match exactly ;; with the processor_type enumeration in mips.h. (define_attr "cpu" - "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000" + "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,m4k,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,loongson_2e,loongson_2f" (const (symbol_ref "mips_tune"))) ;; The type of hardware hazard associated with this instruction. @@ -783,6 +789,7 @@ (include "9000.md") (include "sb1.md") (include "sr71k.md") +(include "loongson2ef.md") (include "generic.md") ;; Index: gcc/config/mips/mips.c =================================================================== --- gcc/config/mips/mips.c (revision 66) +++ gcc/config/mips/mips.c (working copy) @@ -273,6 +273,40 @@ struct mips_frame_info GTY(()) { HOST_WIDE_INT hard_frame_pointer_offset; }; +/* Variables and flags used in scheduler hooks when tuning for + Loongson 2E/2F. */ +struct sched_ls2_def GTY(()) +{ + /* Variables to support Loongson 2E/2F round-robin [F]ALU1/2 dispatch + strategy. */ + + /* If true, then next ALU1/2 instruction will go to ALU1. */ + bool alu1_turn_p; + + /* If true, then next FALU1/2 unstruction will go to FALU1. */ + bool falu1_turn_p; + + /* Codes to query if [f]alu{1,2}_core units are subscribed or not. */ + int alu1_core_unit_code; + int alu2_core_unit_code; + int falu1_core_unit_code; + int falu2_core_unit_code; + + /* True if current cycle has a multi instruction. + This flag is used in sched_ls2_dfa_post_advance_cycle. */ + bool cycle_has_multi_p; + + /* Instructions to subscribe ls2_[f]alu{1,2}_turn_enabled units. + These are used in sched_ls2_dfa_post_advance_cycle to initialize + DFA state. + E.g., when alu1_turn_enabled_insn is issued it makes next ALU1/2 + instruction to go ALU1. */ + rtx alu1_turn_enabled_insn; + rtx alu2_turn_enabled_insn; + rtx falu1_turn_enabled_insn; + rtx falu2_turn_enabled_insn; +}; + struct machine_function GTY(()) { /* The register returned by mips16_gp_pseudo_reg; see there for details. */ rtx mips16_gp_pseudo_rtx; @@ -301,8 +335,14 @@ struct machine_function GTY(()) { /* True if we have emitted an instruction to initialize mips16_gp_pseudo_rtx. */ bool initialized_mips16_gp_pseudo_p; + + /* Data used when scheduling for Loongson 2E/2F. */ + struct sched_ls2_def _sched_ls2; }; +/* A convenient shortcut. */ +#define sched_ls2 (cfun->machine->_sched_ls2) + /* Information about a single argument. */ struct mips_arg_info { /* True if the argument is passed in a floating-point register, or @@ -9707,11 +9747,115 @@ mips_issue_rate (void) reach the theoretical max of 4. */ return 3; + case PROCESSOR_LOONGSON_2E: + case PROCESSOR_LOONGSON_2F: + return 4; + default: return 1; } } +/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook. + Init data used in mips_dfa_post_advance_cycle. */ +static void +mips_init_dfa_post_cycle_insn (void) +{ + if (TUNE_LOONGSON_2EF) + { + start_sequence (); + emit_insn (gen_ls2_alu1_turn_enabled_insn ()); + sched_ls2.alu1_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_alu2_turn_enabled_insn ()); + sched_ls2.alu2_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_falu1_turn_enabled_insn ()); + sched_ls2.falu1_turn_enabled_insn = get_insns (); + end_sequence (); + + start_sequence (); + emit_insn (gen_ls2_falu2_turn_enabled_insn ()); + sched_ls2.falu2_turn_enabled_insn = get_insns (); + end_sequence (); + + sched_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core"); + sched_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core"); + sched_ls2.falu1_core_unit_code = get_cpu_unit_code ("ls2_falu1_core"); + sched_ls2.falu2_core_unit_code = get_cpu_unit_code ("ls2_falu2_core"); + } +} + +/* Initialize STATE when scheduling for Loongson 2E/2F. + Support round-robin dispatch scheme by enabling only one of + ALU1/ALU2 and one of FALU1/FALU2 units for ALU1/2 and FALU1/2 instructions + respectively. */ +static void +sched_ls2_dfa_post_advance_cycle (state_t state) +{ + if (cpu_unit_reservation_p (state, sched_ls2.alu1_core_unit_code)) + { + /* Though there are no non-pipelined ALU1 insns, + we can get an instruction of type 'multi' before reload. */ + gcc_assert (sched_ls2.cycle_has_multi_p); + sched_ls2.alu1_turn_p = false; + } + + sched_ls2.cycle_has_multi_p = false; + + if (cpu_unit_reservation_p (state, sched_ls2.alu2_core_unit_code)) + /* We have a non-pipelined alu instruction in the core, + adjust round-robin counter. */ + sched_ls2.alu1_turn_p = true; + + if (sched_ls2.alu1_turn_p) + { + if (state_transition (state, sched_ls2.alu1_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + else + { + if (state_transition (state, sched_ls2.alu2_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + + if (cpu_unit_reservation_p (state, sched_ls2.falu1_core_unit_code)) + { + /* There are no non-pipelined FALU1 insns. */ + gcc_unreachable (); + sched_ls2.falu1_turn_p = false; + } + + if (cpu_unit_reservation_p (state, sched_ls2.falu2_core_unit_code)) + /* We have a non-pipelined falu instruction in the core, + adjust round-robin counter. */ + sched_ls2.falu1_turn_p = true; + + if (sched_ls2.falu1_turn_p) + { + if (state_transition (state, sched_ls2.falu1_turn_enabled_insn) >= 0) + gcc_unreachable (); + } + else + { + if (state_transition (state, sched_ls2.falu2_turn_enabled_insn) >= 0) + gcc_unreachable (); + } +} + +/* Implement TARGET_SCHED_DFA_POST_ADVANCE_CYCLE. + This hook is being called at the start of each cycle. */ +static void +mips_dfa_post_advance_cycle (void) +{ + if (TUNE_LOONGSON_2EF) + sched_ls2_dfa_post_advance_cycle (curr_state); +} + /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should be as wide as the scheduling freedom in the DFA. */ @@ -9722,6 +9866,9 @@ mips_multipass_dfa_lookahead (void) if (TUNE_SB1) return 4; + if (TUNE_LOONGSON_2EF) + return 4; + return 0; } @@ -9982,6 +10129,14 @@ mips_sched_init (FILE *file ATTRIBUTE_UN mips_macc_chains_last_hilo = 0; vr4130_last_insn = 0; mips_74k_agen_init (NULL_RTX); + + if (TUNE_LOONGSON_2EF) + { + /* Branch instructions go to ALU1, therefore basic block is most likely + to start with round-robin counter pointed to ALU2. */ + sched_ls2.alu1_turn_p = false; + sched_ls2.falu1_turn_p = true; + } } /* Implement TARGET_SCHED_REORDER and TARGET_SCHED_REORDER2. */ @@ -10007,6 +10162,33 @@ mips_sched_reorder (FILE *file ATTRIBUTE return mips_issue_rate (); } +/* Update round-robin counters for ALU1/2 and FALU1/2. */ +static void +mips_ls2_variable_issue (void) +{ + if (sched_ls2.alu1_turn_p) + { + if (cpu_unit_reservation_p (curr_state, sched_ls2.alu1_core_unit_code)) + sched_ls2.alu1_turn_p = false; + } + else + { + if (cpu_unit_reservation_p (curr_state, sched_ls2.alu2_core_unit_code)) + sched_ls2.alu1_turn_p = true; + } + + if (sched_ls2.falu1_turn_p) + { + if (cpu_unit_reservation_p (curr_state, sched_ls2.falu1_core_unit_code)) + sched_ls2.falu1_turn_p = false; + } + else + { + if (cpu_unit_reservation_p (curr_state, sched_ls2.falu2_core_unit_code)) + sched_ls2.falu1_turn_p = true; + } +} + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ static int @@ -10022,6 +10204,21 @@ mips_variable_issue (FILE *file ATTRIBUT vr4130_last_insn = insn; if (TUNE_74K) mips_74k_agen_init (insn); + else if (TUNE_LOONGSON_2EF) + { + mips_ls2_variable_issue (); + + if (recog_memoized (insn) >= 0) + { + sched_ls2.cycle_has_multi_p |= (get_attr_type (insn) + == TYPE_MULTI); + + /* Instructions of type 'multi' should all be split before + second scheduling pass. */ + gcc_assert (!sched_ls2.cycle_has_multi_p + || !reload_completed); + } + } } return more; } @@ -12835,6 +13032,10 @@ mips_expand_vector_init (rtx target, rtx #define TARGET_SCHED_ADJUST_COST mips_adjust_cost #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE mips_issue_rate +#undef TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN +#define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN mips_init_dfa_post_cycle_insn +#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE +#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE mips_dfa_post_advance_cycle #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ mips_multipass_dfa_lookahead Index: gcc/config/mips/mips.h =================================================================== --- gcc/config/mips/mips.h (revision 66) +++ gcc/config/mips/mips.h (working copy) @@ -265,6 +265,8 @@ enum mips_code_readable_setting { || mips_tune == PROCESSOR_74KF1_1 \ || mips_tune == PROCESSOR_74KF3_2) #define TUNE_20KC (mips_tune == PROCESSOR_20KC) +#define TUNE_LOONGSON_2EF (mips_tune == PROCESSOR_LOONGSON_2E \ + || mips_tune == PROCESSOR_LOONGSON_2F) /* Whether vector modes and intrinsics for ST Microelectronics Loongson-2E/2F processors should be enabled. In o32 pairs of @@ -908,10 +910,12 @@ enum mips_code_readable_setting { && !TARGET_MIPS16) /* Likewise mtc1 and mfc1. */ -#define ISA_HAS_XFER_DELAY (mips_isa <= 3) +#define ISA_HAS_XFER_DELAY (mips_isa <= 3 \ + && !TARGET_LOONGSON_2EF) /* Likewise floating-point comparisons. */ -#define ISA_HAS_FCMP_DELAY (mips_isa <= 3) +#define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \ + && !TARGET_LOONGSON_2EF) /* True if mflo and mfhi can be immediately followed by instructions which write to the HI and LO registers. @@ -928,7 +932,8 @@ enum mips_code_readable_setting { #define ISA_HAS_HILO_INTERLOCKS (ISA_MIPS32 \ || ISA_MIPS32R2 \ || ISA_MIPS64 \ - || TARGET_MIPS5500) + || TARGET_MIPS5500 \ + || TARGET_LOONGSON_2EF) /* ISA includes synci, jr.hb and jalr.hb. */ #define ISA_HAS_SYNCI (ISA_MIPS32R2 && !TARGET_MIPS16) @@ -3229,3 +3234,6 @@ extern const struct mips_cpu_info *mips_ extern const struct mips_rtx_cost_data *mips_cost; extern enum mips_code_readable_setting mips_code_readable; #endif + +/* Enable querying of DFA units. */ +#define CPU_UNITS_QUERY 1