From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1957) id D3D493852770; Mon, 18 Jul 2022 12:45:29 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D3D493852770 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Claudiu Zissulescu To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-1733] arc: Add ARCHS release 310a tune variant. X-Act-Checkin: gcc X-Git-Author: Claudiu Zissulescu X-Git-Refname: refs/heads/master X-Git-Oldrev: 87f46a16ec05beb51439f55a4d3c36d64b95b00f X-Git-Newrev: 7501eec65c60701f72621d04eeb5342bad2fe4fb Message-Id: <20220718124529.D3D493852770@sourceware.org> Date: Mon, 18 Jul 2022 12:45:29 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 18 Jul 2022 12:45:29 -0000 https://gcc.gnu.org/g:7501eec65c60701f72621d04eeb5342bad2fe4fb commit r13-1733-g7501eec65c60701f72621d04eeb5342bad2fe4fb Author: Claudiu Zissulescu Date: Mon Jul 18 15:07:00 2022 +0300 arc: Add ARCHS release 310a tune variant. Add mtune and mcpu options for ARCHS release 310a type CPU. The mtune=release31a is designed to be used as an alternative to the mcpu=hs4x_rel31 option. ARCHS4x release 31a uses DSP instructions which are implemented a bit different than mpy9. Hence, use safer mpy2 option. gcc/ * config/arc/arc-arch.h (arc_tune_attr): Add ARC_TUNE_ARCHS4X_REL31A variant. * config/arc/arc.cc (arc_override_options): Tune options for release 310a. (arc_sched_issue_rate): Use correct enum. (arc600_corereg_hazard): Textual change. (arc_hazard): Add release 310a tunning. * config/arc/arc.md (tune): Update and take into consideration new tune option. (tune_dspmpy): Likewise. (tune_store): New attribute. * config/arc/arc.opt (mtune): New tune option. * config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units. (hs4x_brcc_op): New instruction rezervation. (hs4x_data_store_1_op): Likewise. * config/arc/arc-cpus.def (hs4x_rel31): New cpu variant. * config/arc/arc-tables.opt: Regenerate. * config/arc/t-multilib: Likewise. * doc/invoke.texi (ARC): Update mcpu and tune sections. Signed-off-by: Claudiu Zissulescu Diff: --- gcc/config/arc/arc-arch.h | 3 +- gcc/config/arc/arc-cpus.def | 1 + gcc/config/arc/arc-tables.opt | 3 + gcc/config/arc/arc.cc | 192 ++++++++++++++++++++++++++---------------- gcc/config/arc/arc.md | 32 ++++--- gcc/config/arc/arc.opt | 3 + gcc/config/arc/arcHS4x.md | 17 +++- gcc/config/arc/t-multilib | 4 +- gcc/doc/invoke.texi | 16 ++++ 9 files changed, 181 insertions(+), 90 deletions(-) diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h index 4c728a87453..83b156ee34a 100644 --- a/gcc/config/arc/arc-arch.h +++ b/gcc/config/arc/arc-arch.h @@ -77,7 +77,8 @@ enum arc_tune_attr ARC_TUNE_CORE_3, ARC_TUNE_ARCHS4X, ARC_TUNE_ARCHS4XD, - ARC_TUNE_ARCHS4XD_SLOW + ARC_TUNE_ARCHS4XD_SLOW, + ARC_TUNE_ARCHS4X_REL31A }; /* Extra options for a processor template to hold any CPU specific diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def index baf61db02ed..5668b0fbf19 100644 --- a/gcc/config/arc/arc-cpus.def +++ b/gcc/config/arc/arc-cpus.def @@ -64,6 +64,7 @@ ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, NONE) ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, NONE) ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X) ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD) +ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A) ARC_CPU (arc600, 6xx, FL_BS, NONE, ARC600) ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, NONE, ARC600) diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt index 8cc5135205d..0a0d354db60 100644 --- a/gcc/config/arc/arc-tables.opt +++ b/gcc/config/arc/arc-tables.opt @@ -69,6 +69,9 @@ Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x) EnumValue Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd) +EnumValue +Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31) + EnumValue Enum(processor_type) String(arc600) Value(PROCESSOR_arc600) diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 77730c88e55..064790bf396 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -646,8 +646,8 @@ arc_sched_issue_rate (void) { switch (arc_tune) { - case TUNE_ARCHS4X: - case TUNE_ARCHS4XD: + case ARC_TUNE_ARCHS4X: + case ARC_TUNE_ARCHS4XD: return 3; default: break; @@ -1458,6 +1458,12 @@ arc_override_options (void) if (!OPTION_SET_P (unaligned_access) && TARGET_HS) unaligned_access = 1; + if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A)) + { + TARGET_CODE_DENSITY_FRAME = 0; + flag_delayed_branch = 0; + } + /* These need to be done at start up. It's convenient to do them here. */ arc_init (); } @@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) return arc_store_addr_hazard_internal_p (producer, consumer); } +/* Return length adjustment for INSN. + For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +static int +arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) +{ + if (!TARGET_ARC600) + return 0; + if (GET_CODE (PATTERN (pred)) == SEQUENCE) + pred = as_a (PATTERN (pred))->insn (1); + if (GET_CODE (PATTERN (succ)) == SEQUENCE) + succ = as_a (PATTERN (succ))->insn (0); + if (recog_memoized (pred) == CODE_FOR_mulsi_600 + || recog_memoized (pred) == CODE_FOR_umul_600 + || recog_memoized (pred) == CODE_FOR_mac_600 + || recog_memoized (pred) == CODE_FOR_mul64_600 + || recog_memoized (pred) == CODE_FOR_mac64_600 + || recog_memoized (pred) == CODE_FOR_umul64_600 + || recog_memoized (pred) == CODE_FOR_umac64_600) + return 0; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST) + { + const_rtx x = *iter; + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they + contain a SET. */ + continue; + } + rtx dest = XEXP (x, 0); + /* Check if this sets a an extension register. N.B. we use 61 for the + condition codes, which is definitely not an extension register. */ + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 + /* Check if the same register is used by the PAT. */ + && (refers_to_regno_p + (REGNO (dest), + REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, + PATTERN (succ), 0))) + return 4; + } + return 0; +} + +/* For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +int +arc_hazard (rtx_insn *pred, rtx_insn *succ) +{ + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + + if (TARGET_ARC600) + return arc600_corereg_hazard (pred, succ); + + return 0; +} + +/* When compiling for release 310a, insert a nop before any + conditional jump. */ + +static int +arc_check_release31a (rtx_insn *pred, rtx_insn *succ) +{ + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + + if (!JUMP_P (pred) && !single_set (pred)) + return 0; + + if (!JUMP_P (succ) && !single_set (succ)) + return 0; + + if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A)) + switch (get_attr_type (pred)) + { + case TYPE_STORE: + switch (get_attr_type (succ)) + { + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + case TYPE_LOOP_END: + return 1; + default: + break; + } + break; + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + case TYPE_LOOP_END: + if (get_attr_type (succ) == TYPE_STORE) + return 1; + break; + default: + break; + } + + return 0; +} + /* The same functionality as arc_hazard. It is called in machine reorg before any other optimization. Hence, the NOP size is taken into account when doing branch shortening. */ @@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void) for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { succ0 = next_real_insn (insn); - if (arc_hazard (insn, succ0)) - { - emit_insn_before (gen_nopv (), succ0); - } + if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0)) + emit_insn_before (gen_nopv (), succ0); } if (!TARGET_ARC700) @@ -9324,56 +9437,6 @@ disi_highpart (rtx in) return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4); } -/* Return length adjustment for INSN. - For ARC600: - A write to a core reg greater or equal to 32 must not be immediately - followed by a use. Anticipate the length requirement to insert a nop - between PRED and SUCC to prevent a hazard. */ - -static int -arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ) -{ - if (!TARGET_ARC600) - return 0; - if (GET_CODE (PATTERN (pred)) == SEQUENCE) - pred = as_a (PATTERN (pred))->insn (1); - if (GET_CODE (PATTERN (succ)) == SEQUENCE) - succ = as_a (PATTERN (succ))->insn (0); - if (recog_memoized (pred) == CODE_FOR_mulsi_600 - || recog_memoized (pred) == CODE_FOR_umul_600 - || recog_memoized (pred) == CODE_FOR_mac_600 - || recog_memoized (pred) == CODE_FOR_mul64_600 - || recog_memoized (pred) == CODE_FOR_mac64_600 - || recog_memoized (pred) == CODE_FOR_umul64_600 - || recog_memoized (pred) == CODE_FOR_umac64_600) - return 0; - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST) - { - const_rtx x = *iter; - switch (GET_CODE (x)) - { - case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: - break; - default: - /* This is also fine for PRE/POST_MODIFY, because they - contain a SET. */ - continue; - } - rtx dest = XEXP (x, 0); - /* Check if this sets an extension register. N.B. we use 61 for the - condition codes, which is definitely not an extension register. */ - if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 - /* Check if the same register is used by the PAT. */ - && (refers_to_regno_p - (REGNO (dest), - REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, - PATTERN (succ), 0))) - return 4; - } - return 0; -} - /* Given a rtx, check if it is an assembly instruction or not. */ static int @@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x) return 0; } -/* For ARC600: - A write to a core reg greater or equal to 32 must not be immediately - followed by a use. Anticipate the length requirement to insert a nop - between PRED and SUCC to prevent a hazard. */ - -int -arc_hazard (rtx_insn *pred, rtx_insn *succ) -{ - if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) - return 0; - - if (TARGET_ARC600) - return arc600_corereg_hazard (pred, succ); - - return 0; -} - /* Return length adjustment for INSN. */ int diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 39b358052c1..7170445309f 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -645,22 +645,21 @@ ;; is made that makes conditional execution required. (define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \ -core_3, archs4x, archs4xd, archs4xd_slow" +archs4x, archs4xd" (const - (cond [(symbol_ref "arc_tune == TUNE_ARC600") + (cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600") (const_string "arc600") (symbol_ref "arc_tune == ARC_TUNE_ARC7XX") (const_string "arc7xx") - (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") + (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD") (const_string "arc700_4_2_std") - (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") + (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC") (const_string "arc700_4_2_xmac") - (symbol_ref "arc_tune == ARC_TUNE_CORE_3") - (const_string "core_3") - (symbol_ref "arc_tune == TUNE_ARCHS4X") + (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")) (const_string "archs4x") - (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD") - (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")) + (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")) (const_string "archs4xd")] (const_string "none")))) @@ -671,13 +670,22 @@ core_3, archs4x, archs4xd, archs4xd_slow" (define_attr "tune_dspmpy" "none, slow, fast" (const - (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X") - (symbol_ref "arc_tune == TUNE_ARCHS4XD")) + (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")) (const_string "fast") - (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW") (const_string "slow")] (const_string "none")))) +(define_attr "tune_store" "none, normal, rel31a" + (const + (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")) + (const_string "normal") + (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A") + (const_string "rel31a")] + (const_string "none")))) + ;; Move instructions. (define_expand "movqi" [(set (match_operand:QI 0 "move_dest_operand" "") diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index eb85f49b283..0add5a2a21f 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) Value(ARC_TUNE_ARC700_4_2_XMAC) EnumValue Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3) +EnumValue +Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A) + mindexed-loads Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT) Enable the use of indexed loads. diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md index 5136eba2b98..1009833d400 100644 --- a/gcc/config/arc/arcHS4x.md +++ b/gcc/config/arc/arcHS4x.md @@ -27,14 +27,21 @@ (define_cpu_unit "hs4x_mult" "ARCHS4x") (define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x") (define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x") +(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x") (define_insn_reservation "hs4x_brj_op" 1 (and (match_test "TARGET_HS") (eq_attr "tune" "archs4x, archs4xd") (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \ -branch, brcc,brcc_no_delay_slot, sfunc")) +branch, sfunc")) "hs4x_issue0") +(define_insn_reservation "hs4x_brcc_op" 1 + (and (match_test "TARGET_HS") + (eq_attr "tune" "archs4x, archs4xd") + (eq_attr "type" "brcc,brcc_no_delay_slot,loop_end")) + "hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1") + (define_insn_reservation "hs4x_data_load_op" 4 (and (match_test "TARGET_HS") (eq_attr "tune" "archs4x, archs4xd") @@ -43,10 +50,16 @@ branch, brcc,brcc_no_delay_slot, sfunc")) (define_insn_reservation "hs4x_data_store_op" 1 (and (match_test "TARGET_HS") - (eq_attr "tune" "archs4x, archs4xd") + (eq_attr "tune_store" "normal") (eq_attr "type" "store")) "hs4x_issue1 + hs4x_ld_st") +(define_insn_reservation "hs4x_data_store_1_op" 2 + (and (match_test "TARGET_HS") + (eq_attr "tune_store" "rel31a") + (eq_attr "type" "store")) + "hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1") + ;; Advanced ALU (define_insn_reservation "hs4x_adv_alue_op" 4 (and (match_test "TARGET_HS") diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib index 8d97ad1deea..921945eba44 100644 --- a/gcc/config/arc/t-multilib +++ b/gcc/config/arc/t-multilib @@ -21,9 +21,9 @@ # along with GCC; see the file COPYING3. If not see # . -MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400 +MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400 -MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400 +MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400 # Aliases: MULTILIB_MATCHES = mcpu?arc600=mcpu?ARC600 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 84d6f0f9860..94fe57aa4e2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20053,6 +20053,15 @@ Compile for ARC HS38 CPU. @item hs38_linux Compile for ARC HS38 CPU with all hardware extensions on. +@item hs4x +Compile for ARC HS4x CPU. + +@item hs4xd +Compile for ARC HS4xD CPU. + +@item hs4x_rel31 +Compile for ARC HS4x CPU release 3.10a. + @item arc600_norm Compile for ARC 600 CPU with @code{norm} instructions enabled. @@ -20662,6 +20671,13 @@ Tune for ARC725D CPU. @item ARC750D Tune for ARC750D CPU. +@item core3 +Tune for ARCv2 core3 type CPU. This option enable usage of +@code{dbnz} instruction. + +@item release31a +Tune for ARC4x release 3.10a. + @end table @item -mmultcost=@var{num}