* [PATCH] [ARC] Add support for HS4x cpus.
@ 2018-06-13 9:09 Claudiu Zissulescu
2018-07-06 22:22 ` Andrew Burgess
0 siblings, 1 reply; 3+ messages in thread
From: Claudiu Zissulescu @ 2018-06-13 9:09 UTC (permalink / raw)
To: gcc-patches; +Cc: fbedard, andrew.burgess, Claudiu Zissulescu
From: Claudiu Zissulescu <claziss@synopsys.com>
This patch adds support for two ARCHS variations.
Ok to apply?
Claudiu
gcc/
2017-03-10 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-arch.h (arc_tune_attr): Add new tune parameters
for ARCHS4x.
* config/arc/arc-cpus.def (hs4x): New cpu.
(hs4xd): Likewise.
* config/arc/arc-tables.opt: Regenerate.
* config/arc/arc.c (arc_sched_issue_rate): New function.
(TARGET_SCHED_ISSUE_RATE): Define.
(TARGET_SCHED_EXPOSED_PIPELINE): Likewise.
* config/arc/arc.md (attr type): Add fpu_fuse, fpu_sdiv, fpu_ddiv,
fpu_cvt.
(attr tune): Add ARCHS4x tune values.
(attr tune_dspmpy): Define.
(*tst): Correct instruction type.
* config/arc/arcHS.md: Don't use this automaton for ARCHS4x cpus.
* config/arc/arcHS4x.md: New file.
* config/arc/fpu.md: Update instruction type attributes.
* config/arc/t-multilib: Regenerate.
---
gcc/config/arc/arc-arch.h | 5 +-
gcc/config/arc/arc-cpus.def | 8 +-
gcc/config/arc/arc-tables.opt | 6 +
gcc/config/arc/arc.c | 19 +++
gcc/config/arc/arc.md | 24 +++-
gcc/config/arc/arcHS.md | 6 +
gcc/config/arc/arcHS4x.md | 221 ++++++++++++++++++++++++++++++++++
gcc/config/arc/fpu.md | 16 +--
8 files changed, 289 insertions(+), 16 deletions(-)
create mode 100644 gcc/config/arc/arcHS4x.md
diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index 64866dd529b..01f95946623 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -73,7 +73,10 @@ enum arc_tune_attr
ARC_TUNE_ARC600,
ARC_TUNE_ARC700_4_2_STD,
ARC_TUNE_ARC700_4_2_XMAC,
- ARC_TUNE_CORE_3
+ ARC_TUNE_CORE_3,
+ ARC_TUNE_ARCHS4X,
+ ARC_TUNE_ARCHS4XD,
+ ARC_TUNE_ARCHS4XD_SLOW
};
/* CPU specific properties. */
diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
index 1fce81f6933..4aa422f1a39 100644
--- a/gcc/config/arc/arc-cpus.def
+++ b/gcc/config/arc/arc-cpus.def
@@ -59,10 +59,12 @@ ARC_CPU (archs, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE)
ARC_CPU (hs34, hs, FL_MPYOPT_2, NONE)
ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE)
ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE)
+ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4X)
+ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4XD)
-ARC_CPU (arc600, 6xx, FL_BS, ARC600)
-ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
-ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
+ARC_CPU (arc600, 6xx, FL_BS, ARC600)
+ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
+ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
ARC_CPU (arc600_mul32x16, 6xx, FL_BS|FL_NORM|FL_MUL32x16, ARC600)
ARC_CPU (arc601, 6xx, 0, ARC600)
ARC_CPU (arc601_norm, 6xx, FL_NORM, ARC600)
diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
index 3b17b3de7d5..2afaf5bd83c 100644
--- a/gcc/config/arc/arc-tables.opt
+++ b/gcc/config/arc/arc-tables.opt
@@ -63,6 +63,12 @@ Enum(processor_type) String(hs38) Value(PROCESSOR_hs38)
EnumValue
Enum(processor_type) String(hs38_linux) Value(PROCESSOR_hs38_linux)
+EnumValue
+Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
+
+EnumValue
+Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
+
EnumValue
Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 2bedc9af37e..03a2f4223c0 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -483,6 +483,22 @@ arc_autovectorize_vector_sizes (vector_sizes *sizes)
}
}
+
+/* Implements target hook TARGET_SCHED_ISSUE_RATE. */
+static int
+arc_sched_issue_rate (void)
+{
+ switch (arc_tune)
+ {
+ case TUNE_ARCHS4X:
+ case TUNE_ARCHS4XD:
+ return 3;
+ default:
+ break;
+ }
+ return 1;
+}
+
/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
static rtx arc_delegitimize_address (rtx);
@@ -565,6 +581,9 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
+
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 091f1092bed..5610bab694c 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -82,6 +82,7 @@
(include ("arc700.md"))
(include ("arcEM.md"))
(include ("arcHS.md"))
+(include ("arcHS4x.md"))
;; Predicates
@@ -204,7 +205,7 @@
simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
simd_valign, simd_valign_with_acc, simd_vcontrol,
simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem,
- fpu, block"
+ fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block"
(cond [(eq_attr "is_sfunc" "yes")
(cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
(match_test "flag_pic") (const_string "sfunc")]
@@ -593,7 +594,8 @@
;; somehow modify them to become inelegible for delay slots if a decision
;; is made that makes conditional execution required.
-(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3"
+(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
+archs4x, archs4xd, archs4xd_slow"
(const
(cond [(symbol_ref "arc_tune == TUNE_ARC600")
(const_string "arc600")
@@ -602,7 +604,12 @@
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
(const_string "arc700_4_2_xmac")
(symbol_ref "arc_tune == ARC_TUNE_CORE_3")
- (const_string "core_3")]
+ (const_string "core_3")
+ (symbol_ref "arc_tune == TUNE_ARCHS4X")
+ (const_string "archs4x")
+ (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
+ (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
+ (const_string "archs4xd")]
(const_string "none"))))
(define_attr "tune_arc700" "false,true"
@@ -610,6 +617,15 @@
(const_string "true")
(const_string "false")))
+(define_attr "tune_dspmpy" "none, slow, fast"
+ (const
+ (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
+ (symbol_ref "arc_tune == TUNE_ARCHS4XD"))
+ (const_string "fast")
+ (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
+ (const_string "slow")]
+ (const_string "none"))))
+
;; Move instructions.
(define_expand "movqi"
[(set (match_operand:QI 0 "move_dest_operand" "")
@@ -960,7 +976,7 @@
}
"
[(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false")
- (set_attr "type" "compare,compare,compare,compare,compare,compare,shift,compare")
+ (set_attr "type" "compare,compare,compare,compare,compare,compare,binary,compare")
(set_attr "length" "*,*,4,4,4,4,4,8")
(set_attr "predicable" "no,yes,no,yes,no,no,no,yes")
(set_attr "cond" "set_zn")])
diff --git a/gcc/config/arc/arcHS.md b/gcc/config/arc/arcHS.md
index 2a8588a02e4..d49b90c4970 100644
--- a/gcc/config/arc/arcHS.md
+++ b/gcc/config/arc/arcHS.md
@@ -25,32 +25,38 @@
(define_insn_reservation "hs_data_load" 4
(and (match_test "TARGET_HS")
+ (eq_attr "tune" "none")
(eq_attr "type" "load"))
"hs_issue+hs_ld_st,hs_ld_st,nothing*2")
(define_insn_reservation "hs_data_store" 1
(and (match_test "TARGET_HS")
+ (eq_attr "tune" "none")
(eq_attr "type" "store"))
"hs_issue+hs_ld_st")
(define_insn_reservation "hs_alu0" 2
(and (match_test "TARGET_HS")
+ (eq_attr "tune" "none")
(eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
"hs_issue+x1,x2")
(define_insn_reservation "hs_alu1" 4
(and (match_test "TARGET_HS")
+ (eq_attr "tune" "none")
(eq_attr "type" "move, cmove, unary, binary, compare, misc"))
"hs_issue+x1, nothing*3")
(define_insn_reservation "hs_divrem" 13
(and (match_test "TARGET_HS")
(match_test "TARGET_DIVREM")
+ (eq_attr "tune" "none")
(eq_attr "type" "div_rem"))
"hs_issue+divrem_hs, (divrem_hs)*12")
(define_insn_reservation "hs_mul" 3
(and (match_test "TARGET_HS")
+ (eq_attr "tune" "none")
(eq_attr "type" "mul16_em, multi, umulti"))
"hs_issue+mul_hs, nothing*3")
diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
new file mode 100644
index 00000000000..f804b6be694
--- /dev/null
+++ b/gcc/config/arc/arcHS4x.md
@@ -0,0 +1,221 @@
+;; DFA scheduling description of the Synopsys DesignWare ARC HS4x cpu
+;; for GNU C compiler
+;; Copyright (C) 2017 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARCHS4x")
+
+(define_cpu_unit "hs4x_issue0" "ARCHS4x")
+(define_cpu_unit "hs4x_issue1" "ARCHS4x")
+(define_cpu_unit "hs4x_ld_st" "ARCHS4x")
+(define_cpu_unit "hs4x_divrem" "ARCHS4x")
+(define_cpu_unit "hs4x_mult" "ARCHS4x")
+(define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
+(define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
+
+(define_insn_reservation "hs4x_brj_op" 1
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
+branch, brcc,brcc_no_delay_slot, sfunc"))
+ "hs4x_issue0")
+
+(define_insn_reservation "hs4x_data_load_op" 4
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "load"))
+ "hs4x_issue1 + hs4x_ld_st,hs4x_ld_st")
+
+(define_insn_reservation "hs4x_data_store_op" 1
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "store"))
+ "hs4x_issue1 + hs4x_ld_st")
+
+;; Advanced ALU
+(define_insn_reservation "hs4x_adv_alue_op" 4
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
+ "(hs4x_issue0 | hs4x_issue1), hs4x_x1")
+
+(define_insn_reservation "hs4x_adv_alul_op" 6
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4xd")
+ (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
+ "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_x2")
+
+;; Basic ALU
+(define_insn_reservation "hs4x_basic_alue_op" 1
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
+ "(hs4x_issue0 | hs4x_issue1) + hs4x_y1")
+
+(define_insn_reservation "hs4x_basic_alul_op" 4
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
+ "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_y2")
+
+(define_insn_reservation "hs4x_divrem_op" 13
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "div_rem"))
+ "hs4x_issue0 + hs4x_divrem, (hs4x_divrem)*12")
+
+;;Consider the DSPMPY fast here
+(define_insn_reservation "hs4x_mul_fast_op" 7
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune_dspmpy" "fast")
+ (eq_attr "type" "mul16_em, multi, umulti"))
+ "hs4x_issue0 + hs4x_mult")
+
+(define_insn_reservation "hs4x_mul_slow_op" 8
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune_dspmpy" "slow")
+ (eq_attr "type" "mul16_em, multi, umulti"))
+ "hs4x_issue0 + hs4x_mult")
+
+;; FPU unit
+(define_insn_reservation "hs4x_fpu_op" 8
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "fpu"))
+ "hs4x_issue0")
+
+;; FPU FUSE unit
+(define_insn_reservation "hs4x_fpu_fuse_op" 12
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "fpu_fuse"))
+ "hs4x_issue0")
+
+;; FPU SP SQRT/DIV unit
+(define_insn_reservation "hs4x_fpu_sdiv_op" 20
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "fpu_sdiv"))
+ "hs4x_issue0")
+
+;; FPU DP SQRT/DIV unit
+(define_insn_reservation "hs4x_fpu_ddiv_op" 34
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "fpu_ddiv"))
+ "hs4x_issue0")
+
+;; FPU CVT unit
+(define_insn_reservation "hs4x_fpu_cvt_op" 5
+ (and (match_test "TARGET_HS")
+ (eq_attr "tune" "archs4x, archs4xd")
+ (eq_attr "type" "fpu_cvt"))
+ "hs4x_issue0")
+
+;; BYPASS Advanced ALU ->
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_divrem_op")
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_mul_*op")
+(define_bypass 2 "hs4x_adv_alue_op" "hs4x_adv_alue_op")
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alue_op")
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alul_op")
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_data_load_op")
+(define_bypass 0 "hs4x_adv_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 2 "hs4x_adv_alue_op" "hs4x_data_store_op")
+(define_bypass 1 "hs4x_adv_alue_op" "hs4x_fpu_*op")
+
+(define_bypass 2 "hs4x_adv_alul_op" "hs4x_basic_alul_op")
+(define_bypass 2 "hs4x_adv_alul_op" "hs4x_adv_alul_op")
+(define_bypass 2 "hs4x_adv_alul_op" "hs4x_mul_*op")
+(define_bypass 0 "hs4x_adv_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 4 "hs4x_adv_alul_op" "hs4x_divrem_op")
+(define_bypass 5 "hs4x_adv_alul_op" "hs4x_fpu_*op")
+
+;; BYPASS Basic ALU ->
+(define_bypass 0 "hs4x_basic_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
+
+(define_bypass 1 "hs4x_basic_alul_op" "hs4x_basic_alul_op")
+(define_bypass 1 "hs4x_basic_alul_op" "hs4x_adv_alul_op")
+(define_bypass 0 "hs4x_basic_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 1 "hs4x_basic_alul_op" "hs4x_mul_*op")
+(define_bypass 3 "hs4x_basic_alul_op" "hs4x_divrem_op")
+(define_bypass 3 "hs4x_basic_alul_op" "hs4x_fpu_*op")
+
+;; BYPASS LD ->
+(define_bypass 1 "hs4x_data_load_op" "hs4x_basic_alul_op")
+(define_bypass 1 "hs4x_data_load_op" "hs4x_adv_alul_op")
+(define_bypass 3 "hs4x_data_load_op" "hs4x_divrem_op")
+(define_bypass 3 "hs4x_data_load_op" "hs4x_data_load_op")
+(define_bypass 3 "hs4x_data_load_op" "hs4x_mul_*op")
+(define_bypass 0 "hs4x_data_load_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 3 "hs4x_data_load_op" "hs4x_fpu_*op")
+
+;; BYPASS FAST MPY ->
+(define_bypass 4 "hs4x_mul_fast_op" "hs4x_basic_alul_op")
+(define_bypass 4 "hs4x_mul_fast_op" "hs4x_adv_alul_op")
+(define_bypass 4 "hs4x_mul_fast_op" "hs4x_mul_fast_op")
+(define_bypass 6 "hs4x_mul_fast_op" "hs4x_divrem_op")
+(define_bypass 0 "hs4x_mul_fast_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 6 "hs4x_mul_fast_op" "hs4x_fpu_*op")
+
+;; BYPASS SLOW MPY ->
+(define_bypass 5 "hs4x_mul_slow_op" "hs4x_basic_alul_op")
+(define_bypass 5 "hs4x_mul_slow_op" "hs4x_adv_alul_op")
+(define_bypass 5 "hs4x_mul_slow_op" "hs4x_mul_slow_op")
+(define_bypass 7 "hs4x_mul_slow_op" "hs4x_divrem_op")
+(define_bypass 0 "hs4x_mul_slow_op" "hs4x_data_store_op" "store_data_bypass_p")
+(define_bypass 7 "hs4x_mul_slow_op" "hs4x_fpu_*op")
+
+;;BYPASS FPU ->
+(define_bypass 5 "hs4x_fpu_op" "hs4x_basic_alul_op")
+(define_bypass 5 "hs4x_fpu_op" "hs4x_adv_alul_op")
+(define_bypass 5 "hs4x_fpu_op" "hs4x_mul_*op")
+(define_bypass 7 "hs4x_fpu_op" "hs4x_divrem_op")
+(define_bypass 5 "hs4x_fpu_op" "hs4x_fpu_*op")
+(define_bypass 0 "hs4x_fpu_op" "hs4x_data_store_op" "store_data_bypass_p")
+
+;;BYPASS FPU FUSE ->
+(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_basic_alul_op")
+(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_adv_alul_op")
+(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_mul_*op")
+(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_divrem_op")
+(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_fpu_*op")
+(define_bypass 0 "hs4x_fpu_fuse_op" "hs4x_data_store_op" "store_data_bypass_p")
+
+;;BYPASS FPU SP DIV ->
+(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_basic_alul_op")
+(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_adv_alul_op")
+(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_mul_*op")
+(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_divrem_op")
+(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_fpu_*op")
+(define_bypass 0 "hs4x_fpu_sdiv_op" "hs4x_data_store_op" "store_data_bypass_p")
+
+;;BYPASS FPU DP DIV ->
+(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_basic_alul_op")
+(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_adv_alul_op")
+(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_mul_*op")
+(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_divrem_op")
+(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_fpu_*op")
+(define_bypass 0 "hs4x_fpu_ddiv_op" "hs4x_data_store_op" "store_data_bypass_p")
+
+;;BYPASS FPU CVT ->
+(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_basic_alul_op")
+(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_adv_alul_op")
+(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_mul_*op")
+(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_divrem_op")
+(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_fpu_*op")
+(define_bypass 0 "hs4x_fpu_cvt_op" "hs4x_data_store_op" "store_data_bypass_p")
diff --git a/gcc/config/arc/fpu.md b/gcc/config/arc/fpu.md
index 5c56f76c679..de876cac0c1 100644
--- a/gcc/config/arc/fpu.md
+++ b/gcc/config/arc/fpu.md
@@ -92,7 +92,7 @@
(set_attr "predicable" "yes,no,yes,no,no")
(set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_fuse")])
(define_insn "fnmasf4_fpu"
[(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r")
@@ -107,7 +107,7 @@
(set_attr "predicable" "yes,no,yes,no,no")
(set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_fuse")])
(define_expand "fmadf4"
[(match_operand:DF 0 "even_register_operand" "")
@@ -177,7 +177,7 @@
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_fuse")])
(define_insn "fnmadf4_fpu"
[(set (match_operand:DF 0 "even_register_operand" "=r,r")
@@ -190,7 +190,7 @@
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_fuse")])
;; Division
(define_insn "*divsf3_fpu"
@@ -203,7 +203,7 @@
"fsdiv%? %0,%1,%2"
[(set_attr "length" "4,4,8,8,8")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")
+ (set_attr "type" "fpu_sdiv")
(set_attr "predicable" "yes,no,yes,no,no")
(set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
])
@@ -221,7 +221,7 @@
"TARGET_FP_SP_SQRT"
"fssqrt %0,%1"
[(set_attr "length" "4,8")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_sdiv")])
;; Comparison
(define_insn "*cmpsf_fpu"
@@ -306,7 +306,7 @@
"fddiv%? %0,%1,%2"
[(set_attr "length" "4,4")
(set_attr "iscompact" "false")
- (set_attr "type" "fpu")
+ (set_attr "type" "fpu_ddiv")
(set_attr "predicable" "yes,no")
(set_attr "cond" "canuse,nocond")
])
@@ -318,7 +318,7 @@
"TARGET_FP_DP_SQRT"
"fdsqrt %0,%1"
[(set_attr "length" "4")
- (set_attr "type" "fpu")])
+ (set_attr "type" "fpu_ddiv")])
;; Comparison
(define_insn "*cmpdf_fpu"
--
2.17.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [ARC] Add support for HS4x cpus.
2018-06-13 9:09 [PATCH] [ARC] Add support for HS4x cpus Claudiu Zissulescu
@ 2018-07-06 22:22 ` Andrew Burgess
2018-07-16 10:14 ` Claudiu Zissulescu
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Burgess @ 2018-07-06 22:22 UTC (permalink / raw)
To: Claudiu Zissulescu; +Cc: gcc-patches, fbedard, Claudiu Zissulescu
* Claudiu Zissulescu <claziss@gmail.com> [2018-06-13 12:09:18 +0300]:
> From: Claudiu Zissulescu <claziss@synopsys.com>
>
> This patch adds support for two ARCHS variations.
>
> Ok to apply?
> Claudiu
Sorry for the delay, this looks fine.
Thanks,
Andrew
>
> gcc/
> 2017-03-10 Claudiu Zissulescu <claziss@synopsys.com>
>
> * config/arc/arc-arch.h (arc_tune_attr): Add new tune parameters
> for ARCHS4x.
> * config/arc/arc-cpus.def (hs4x): New cpu.
> (hs4xd): Likewise.
> * config/arc/arc-tables.opt: Regenerate.
> * config/arc/arc.c (arc_sched_issue_rate): New function.
> (TARGET_SCHED_ISSUE_RATE): Define.
> (TARGET_SCHED_EXPOSED_PIPELINE): Likewise.
> * config/arc/arc.md (attr type): Add fpu_fuse, fpu_sdiv, fpu_ddiv,
> fpu_cvt.
> (attr tune): Add ARCHS4x tune values.
> (attr tune_dspmpy): Define.
> (*tst): Correct instruction type.
> * config/arc/arcHS.md: Don't use this automaton for ARCHS4x cpus.
> * config/arc/arcHS4x.md: New file.
> * config/arc/fpu.md: Update instruction type attributes.
> * config/arc/t-multilib: Regenerate.
> ---
> gcc/config/arc/arc-arch.h | 5 +-
> gcc/config/arc/arc-cpus.def | 8 +-
> gcc/config/arc/arc-tables.opt | 6 +
> gcc/config/arc/arc.c | 19 +++
> gcc/config/arc/arc.md | 24 +++-
> gcc/config/arc/arcHS.md | 6 +
> gcc/config/arc/arcHS4x.md | 221 ++++++++++++++++++++++++++++++++++
> gcc/config/arc/fpu.md | 16 +--
> 8 files changed, 289 insertions(+), 16 deletions(-)
> create mode 100644 gcc/config/arc/arcHS4x.md
>
> diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
> index 64866dd529b..01f95946623 100644
> --- a/gcc/config/arc/arc-arch.h
> +++ b/gcc/config/arc/arc-arch.h
> @@ -73,7 +73,10 @@ enum arc_tune_attr
> ARC_TUNE_ARC600,
> ARC_TUNE_ARC700_4_2_STD,
> ARC_TUNE_ARC700_4_2_XMAC,
> - ARC_TUNE_CORE_3
> + ARC_TUNE_CORE_3,
> + ARC_TUNE_ARCHS4X,
> + ARC_TUNE_ARCHS4XD,
> + ARC_TUNE_ARCHS4XD_SLOW
> };
>
> /* CPU specific properties. */
> diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
> index 1fce81f6933..4aa422f1a39 100644
> --- a/gcc/config/arc/arc-cpus.def
> +++ b/gcc/config/arc/arc-cpus.def
> @@ -59,10 +59,12 @@ ARC_CPU (archs, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE)
> ARC_CPU (hs34, hs, FL_MPYOPT_2, NONE)
> ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE)
> ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE)
> +ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4X)
> +ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4XD)
>
> -ARC_CPU (arc600, 6xx, FL_BS, ARC600)
> -ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
> -ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
> +ARC_CPU (arc600, 6xx, FL_BS, ARC600)
> +ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
> +ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
> ARC_CPU (arc600_mul32x16, 6xx, FL_BS|FL_NORM|FL_MUL32x16, ARC600)
> ARC_CPU (arc601, 6xx, 0, ARC600)
> ARC_CPU (arc601_norm, 6xx, FL_NORM, ARC600)
> diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
> index 3b17b3de7d5..2afaf5bd83c 100644
> --- a/gcc/config/arc/arc-tables.opt
> +++ b/gcc/config/arc/arc-tables.opt
> @@ -63,6 +63,12 @@ Enum(processor_type) String(hs38) Value(PROCESSOR_hs38)
> EnumValue
> Enum(processor_type) String(hs38_linux) Value(PROCESSOR_hs38_linux)
>
> +EnumValue
> +Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
> +
> +EnumValue
> +Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
> +
> EnumValue
> Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
>
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 2bedc9af37e..03a2f4223c0 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -483,6 +483,22 @@ arc_autovectorize_vector_sizes (vector_sizes *sizes)
> }
> }
>
> +
> +/* Implements target hook TARGET_SCHED_ISSUE_RATE. */
> +static int
> +arc_sched_issue_rate (void)
> +{
> + switch (arc_tune)
> + {
> + case TUNE_ARCHS4X:
> + case TUNE_ARCHS4XD:
> + return 3;
> + default:
> + break;
> + }
> + return 1;
> +}
> +
> /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
> static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
> static rtx arc_delegitimize_address (rtx);
> @@ -565,6 +581,9 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
> #undef TARGET_SCHED_ADJUST_PRIORITY
> #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
>
> +#undef TARGET_SCHED_ISSUE_RATE
> +#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
> +
> #undef TARGET_VECTOR_MODE_SUPPORTED_P
> #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
>
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 091f1092bed..5610bab694c 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -82,6 +82,7 @@
> (include ("arc700.md"))
> (include ("arcEM.md"))
> (include ("arcHS.md"))
> +(include ("arcHS4x.md"))
>
> ;; Predicates
>
> @@ -204,7 +205,7 @@
> simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
> simd_valign, simd_valign_with_acc, simd_vcontrol,
> simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem,
> - fpu, block"
> + fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block"
> (cond [(eq_attr "is_sfunc" "yes")
> (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
> (match_test "flag_pic") (const_string "sfunc")]
> @@ -593,7 +594,8 @@
> ;; somehow modify them to become inelegible for delay slots if a decision
> ;; is made that makes conditional execution required.
>
> -(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3"
> +(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
> +archs4x, archs4xd, archs4xd_slow"
> (const
> (cond [(symbol_ref "arc_tune == TUNE_ARC600")
> (const_string "arc600")
> @@ -602,7 +604,12 @@
> (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
> (const_string "arc700_4_2_xmac")
> (symbol_ref "arc_tune == ARC_TUNE_CORE_3")
> - (const_string "core_3")]
> + (const_string "core_3")
> + (symbol_ref "arc_tune == TUNE_ARCHS4X")
> + (const_string "archs4x")
> + (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
> + (const_string "archs4xd")]
> (const_string "none"))))
>
> (define_attr "tune_arc700" "false,true"
> @@ -610,6 +617,15 @@
> (const_string "true")
> (const_string "false")))
>
> +(define_attr "tune_dspmpy" "none, slow, fast"
> + (const
> + (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD"))
> + (const_string "fast")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
> + (const_string "slow")]
> + (const_string "none"))))
> +
> ;; Move instructions.
> (define_expand "movqi"
> [(set (match_operand:QI 0 "move_dest_operand" "")
> @@ -960,7 +976,7 @@
> }
> "
> [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false")
> - (set_attr "type" "compare,compare,compare,compare,compare,compare,shift,compare")
> + (set_attr "type" "compare,compare,compare,compare,compare,compare,binary,compare")
> (set_attr "length" "*,*,4,4,4,4,4,8")
> (set_attr "predicable" "no,yes,no,yes,no,no,no,yes")
> (set_attr "cond" "set_zn")])
> diff --git a/gcc/config/arc/arcHS.md b/gcc/config/arc/arcHS.md
> index 2a8588a02e4..d49b90c4970 100644
> --- a/gcc/config/arc/arcHS.md
> +++ b/gcc/config/arc/arcHS.md
> @@ -25,32 +25,38 @@
>
> (define_insn_reservation "hs_data_load" 4
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "load"))
> "hs_issue+hs_ld_st,hs_ld_st,nothing*2")
>
> (define_insn_reservation "hs_data_store" 1
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "store"))
> "hs_issue+hs_ld_st")
>
> (define_insn_reservation "hs_alu0" 2
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> "hs_issue+x1,x2")
>
> (define_insn_reservation "hs_alu1" 4
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> "hs_issue+x1, nothing*3")
>
> (define_insn_reservation "hs_divrem" 13
> (and (match_test "TARGET_HS")
> (match_test "TARGET_DIVREM")
> + (eq_attr "tune" "none")
> (eq_attr "type" "div_rem"))
> "hs_issue+divrem_hs, (divrem_hs)*12")
>
> (define_insn_reservation "hs_mul" 3
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "mul16_em, multi, umulti"))
> "hs_issue+mul_hs, nothing*3")
>
> diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
> new file mode 100644
> index 00000000000..f804b6be694
> --- /dev/null
> +++ b/gcc/config/arc/arcHS4x.md
> @@ -0,0 +1,221 @@
> +;; DFA scheduling description of the Synopsys DesignWare ARC HS4x cpu
> +;; for GNU C compiler
> +;; Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <http://www.gnu.org/licenses/>.
> +
> +(define_automaton "ARCHS4x")
> +
> +(define_cpu_unit "hs4x_issue0" "ARCHS4x")
> +(define_cpu_unit "hs4x_issue1" "ARCHS4x")
> +(define_cpu_unit "hs4x_ld_st" "ARCHS4x")
> +(define_cpu_unit "hs4x_divrem" "ARCHS4x")
> +(define_cpu_unit "hs4x_mult" "ARCHS4x")
> +(define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
> +(define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
> +
> +(define_insn_reservation "hs4x_brj_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
> +branch, brcc,brcc_no_delay_slot, sfunc"))
> + "hs4x_issue0")
> +
> +(define_insn_reservation "hs4x_data_load_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "load"))
> + "hs4x_issue1 + hs4x_ld_st,hs4x_ld_st")
> +
> +(define_insn_reservation "hs4x_data_store_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "store"))
> + "hs4x_issue1 + hs4x_ld_st")
> +
> +;; Advanced ALU
> +(define_insn_reservation "hs4x_adv_alue_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> + "(hs4x_issue0 | hs4x_issue1), hs4x_x1")
> +
> +(define_insn_reservation "hs4x_adv_alul_op" 6
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4xd")
> + (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> + "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_x2")
> +
> +;; Basic ALU
> +(define_insn_reservation "hs4x_basic_alue_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> + "(hs4x_issue0 | hs4x_issue1) + hs4x_y1")
> +
> +(define_insn_reservation "hs4x_basic_alul_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> + "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_y2")
> +
> +(define_insn_reservation "hs4x_divrem_op" 13
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "div_rem"))
> + "hs4x_issue0 + hs4x_divrem, (hs4x_divrem)*12")
> +
> +;;Consider the DSPMPY fast here
> +(define_insn_reservation "hs4x_mul_fast_op" 7
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune_dspmpy" "fast")
> + (eq_attr "type" "mul16_em, multi, umulti"))
> + "hs4x_issue0 + hs4x_mult")
> +
> +(define_insn_reservation "hs4x_mul_slow_op" 8
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune_dspmpy" "slow")
> + (eq_attr "type" "mul16_em, multi, umulti"))
> + "hs4x_issue0 + hs4x_mult")
> +
> +;; FPU unit
> +(define_insn_reservation "hs4x_fpu_op" 8
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu"))
> + "hs4x_issue0")
> +
> +;; FPU FUSE unit
> +(define_insn_reservation "hs4x_fpu_fuse_op" 12
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_fuse"))
> + "hs4x_issue0")
> +
> +;; FPU SP SQRT/DIV unit
> +(define_insn_reservation "hs4x_fpu_sdiv_op" 20
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_sdiv"))
> + "hs4x_issue0")
> +
> +;; FPU DP SQRT/DIV unit
> +(define_insn_reservation "hs4x_fpu_ddiv_op" 34
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_ddiv"))
> + "hs4x_issue0")
> +
> +;; FPU CVT unit
> +(define_insn_reservation "hs4x_fpu_cvt_op" 5
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_cvt"))
> + "hs4x_issue0")
> +
> +;; BYPASS Advanced ALU ->
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_divrem_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_mul_*op")
> +(define_bypass 2 "hs4x_adv_alue_op" "hs4x_adv_alue_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alue_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_data_load_op")
> +(define_bypass 0 "hs4x_adv_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 2 "hs4x_adv_alue_op" "hs4x_data_store_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_fpu_*op")
> +
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_basic_alul_op")
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_adv_alul_op")
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_mul_*op")
> +(define_bypass 0 "hs4x_adv_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 4 "hs4x_adv_alul_op" "hs4x_divrem_op")
> +(define_bypass 5 "hs4x_adv_alul_op" "hs4x_fpu_*op")
> +
> +;; BYPASS Basic ALU ->
> +(define_bypass 0 "hs4x_basic_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_adv_alul_op")
> +(define_bypass 0 "hs4x_basic_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_mul_*op")
> +(define_bypass 3 "hs4x_basic_alul_op" "hs4x_divrem_op")
> +(define_bypass 3 "hs4x_basic_alul_op" "hs4x_fpu_*op")
> +
> +;; BYPASS LD ->
> +(define_bypass 1 "hs4x_data_load_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_data_load_op" "hs4x_adv_alul_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_divrem_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_data_load_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_mul_*op")
> +(define_bypass 0 "hs4x_data_load_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_fpu_*op")
> +
> +;; BYPASS FAST MPY ->
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_basic_alul_op")
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_adv_alul_op")
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_mul_fast_op")
> +(define_bypass 6 "hs4x_mul_fast_op" "hs4x_divrem_op")
> +(define_bypass 0 "hs4x_mul_fast_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 6 "hs4x_mul_fast_op" "hs4x_fpu_*op")
> +
> +;; BYPASS SLOW MPY ->
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_basic_alul_op")
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_adv_alul_op")
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_mul_slow_op")
> +(define_bypass 7 "hs4x_mul_slow_op" "hs4x_divrem_op")
> +(define_bypass 0 "hs4x_mul_slow_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 7 "hs4x_mul_slow_op" "hs4x_fpu_*op")
> +
> +;;BYPASS FPU ->
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_basic_alul_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_adv_alul_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_mul_*op")
> +(define_bypass 7 "hs4x_fpu_op" "hs4x_divrem_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU FUSE ->
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_basic_alul_op")
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_adv_alul_op")
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_mul_*op")
> +(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_divrem_op")
> +(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_fuse_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU SP DIV ->
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_basic_alul_op")
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_adv_alul_op")
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_mul_*op")
> +(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_divrem_op")
> +(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_sdiv_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU DP DIV ->
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_basic_alul_op")
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_adv_alul_op")
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_mul_*op")
> +(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_divrem_op")
> +(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_ddiv_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU CVT ->
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_adv_alul_op")
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_mul_*op")
> +(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_divrem_op")
> +(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_cvt_op" "hs4x_data_store_op" "store_data_bypass_p")
> diff --git a/gcc/config/arc/fpu.md b/gcc/config/arc/fpu.md
> index 5c56f76c679..de876cac0c1 100644
> --- a/gcc/config/arc/fpu.md
> +++ b/gcc/config/arc/fpu.md
> @@ -92,7 +92,7 @@
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_insn "fnmasf4_fpu"
> [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r")
> @@ -107,7 +107,7 @@
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_expand "fmadf4"
> [(match_operand:DF 0 "even_register_operand" "")
> @@ -177,7 +177,7 @@
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_insn "fnmadf4_fpu"
> [(set (match_operand:DF 0 "even_register_operand" "=r,r")
> @@ -190,7 +190,7 @@
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> ;; Division
> (define_insn "*divsf3_fpu"
> @@ -203,7 +203,7 @@
> "fsdiv%? %0,%1,%2"
> [(set_attr "length" "4,4,8,8,8")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")
> + (set_attr "type" "fpu_sdiv")
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> ])
> @@ -221,7 +221,7 @@
> "TARGET_FP_SP_SQRT"
> "fssqrt %0,%1"
> [(set_attr "length" "4,8")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_sdiv")])
>
> ;; Comparison
> (define_insn "*cmpsf_fpu"
> @@ -306,7 +306,7 @@
> "fddiv%? %0,%1,%2"
> [(set_attr "length" "4,4")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")
> + (set_attr "type" "fpu_ddiv")
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> ])
> @@ -318,7 +318,7 @@
> "TARGET_FP_DP_SQRT"
> "fdsqrt %0,%1"
> [(set_attr "length" "4")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_ddiv")])
>
> ;; Comparison
> (define_insn "*cmpdf_fpu"
> --
> 2.17.0
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH] [ARC] Add support for HS4x cpus.
2018-07-06 22:22 ` Andrew Burgess
@ 2018-07-16 10:14 ` Claudiu Zissulescu
0 siblings, 0 replies; 3+ messages in thread
From: Claudiu Zissulescu @ 2018-07-16 10:14 UTC (permalink / raw)
To: Andrew Burgess; +Cc: gcc-patches, Francois.Bedard
Committed,
Claudiu
________________________________________
From: Andrew Burgess [andrew.burgess@embecosm.com]
Sent: Saturday, July 07, 2018 12:21 AM
To: Claudiu Zissulescu
Cc: gcc-patches@gcc.gnu.org; Francois.Bedard@synopsys.com; Claudiu Zissulescu
Subject: Re: [PATCH] [ARC] Add support for HS4x cpus.
* Claudiu Zissulescu <claziss@gmail.com> [2018-06-13 12:09:18 +0300]:
> From: Claudiu Zissulescu <claziss@synopsys.com>
>
> This patch adds support for two ARCHS variations.
>
> Ok to apply?
> Claudiu
Sorry for the delay, this looks fine.
Thanks,
Andrew
>
> gcc/
> 2017-03-10 Claudiu Zissulescu <claziss@synopsys.com>
>
> * config/arc/arc-arch.h (arc_tune_attr): Add new tune parameters
> for ARCHS4x.
> * config/arc/arc-cpus.def (hs4x): New cpu.
> (hs4xd): Likewise.
> * config/arc/arc-tables.opt: Regenerate.
> * config/arc/arc.c (arc_sched_issue_rate): New function.
> (TARGET_SCHED_ISSUE_RATE): Define.
> (TARGET_SCHED_EXPOSED_PIPELINE): Likewise.
> * config/arc/arc.md (attr type): Add fpu_fuse, fpu_sdiv, fpu_ddiv,
> fpu_cvt.
> (attr tune): Add ARCHS4x tune values.
> (attr tune_dspmpy): Define.
> (*tst): Correct instruction type.
> * config/arc/arcHS.md: Don't use this automaton for ARCHS4x cpus.
> * config/arc/arcHS4x.md: New file.
> * config/arc/fpu.md: Update instruction type attributes.
> * config/arc/t-multilib: Regenerate.
> ---
> gcc/config/arc/arc-arch.h | 5 +-
> gcc/config/arc/arc-cpus.def | 8 +-
> gcc/config/arc/arc-tables.opt | 6 +
> gcc/config/arc/arc.c | 19 +++
> gcc/config/arc/arc.md | 24 +++-
> gcc/config/arc/arcHS.md | 6 +
> gcc/config/arc/arcHS4x.md | 221 ++++++++++++++++++++++++++++++++++
> gcc/config/arc/fpu.md | 16 +--
> 8 files changed, 289 insertions(+), 16 deletions(-)
> create mode 100644 gcc/config/arc/arcHS4x.md
>
> diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
> index 64866dd529b..01f95946623 100644
> --- a/gcc/config/arc/arc-arch.h
> +++ b/gcc/config/arc/arc-arch.h
> @@ -73,7 +73,10 @@ enum arc_tune_attr
> ARC_TUNE_ARC600,
> ARC_TUNE_ARC700_4_2_STD,
> ARC_TUNE_ARC700_4_2_XMAC,
> - ARC_TUNE_CORE_3
> + ARC_TUNE_CORE_3,
> + ARC_TUNE_ARCHS4X,
> + ARC_TUNE_ARCHS4XD,
> + ARC_TUNE_ARCHS4XD_SLOW
> };
>
> /* CPU specific properties. */
> diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
> index 1fce81f6933..4aa422f1a39 100644
> --- a/gcc/config/arc/arc-cpus.def
> +++ b/gcc/config/arc/arc-cpus.def
> @@ -59,10 +59,12 @@ ARC_CPU (archs, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE)
> ARC_CPU (hs34, hs, FL_MPYOPT_2, NONE)
> ARC_CPU (hs38, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE)
> ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE)
> +ARC_CPU (hs4x, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4X)
> +ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, ARCHS4XD)
>
> -ARC_CPU (arc600, 6xx, FL_BS, ARC600)
> -ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
> -ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
> +ARC_CPU (arc600, 6xx, FL_BS, ARC600)
> +ARC_CPU (arc600_norm, 6xx, FL_BS|FL_NORM, ARC600)
> +ARC_CPU (arc600_mul64, 6xx, FL_BS|FL_NORM|FL_MUL64, ARC600)
> ARC_CPU (arc600_mul32x16, 6xx, FL_BS|FL_NORM|FL_MUL32x16, ARC600)
> ARC_CPU (arc601, 6xx, 0, ARC600)
> ARC_CPU (arc601_norm, 6xx, FL_NORM, ARC600)
> diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
> index 3b17b3de7d5..2afaf5bd83c 100644
> --- a/gcc/config/arc/arc-tables.opt
> +++ b/gcc/config/arc/arc-tables.opt
> @@ -63,6 +63,12 @@ Enum(processor_type) String(hs38) Value(PROCESSOR_hs38)
> EnumValue
> Enum(processor_type) String(hs38_linux) Value(PROCESSOR_hs38_linux)
>
> +EnumValue
> +Enum(processor_type) String(hs4x) Value(PROCESSOR_hs4x)
> +
> +EnumValue
> +Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
> +
> EnumValue
> Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
>
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index 2bedc9af37e..03a2f4223c0 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -483,6 +483,22 @@ arc_autovectorize_vector_sizes (vector_sizes *sizes)
> }
> }
>
> +
> +/* Implements target hook TARGET_SCHED_ISSUE_RATE. */
> +static int
> +arc_sched_issue_rate (void)
> +{
> + switch (arc_tune)
> + {
> + case TUNE_ARCHS4X:
> + case TUNE_ARCHS4XD:
> + return 3;
> + default:
> + break;
> + }
> + return 1;
> +}
> +
> /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */
> static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
> static rtx arc_delegitimize_address (rtx);
> @@ -565,6 +581,9 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
> #undef TARGET_SCHED_ADJUST_PRIORITY
> #define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
>
> +#undef TARGET_SCHED_ISSUE_RATE
> +#define TARGET_SCHED_ISSUE_RATE arc_sched_issue_rate
> +
> #undef TARGET_VECTOR_MODE_SUPPORTED_P
> #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
>
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 091f1092bed..5610bab694c 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -82,6 +82,7 @@
> (include ("arc700.md"))
> (include ("arcEM.md"))
> (include ("arcHS.md"))
> +(include ("arcHS4x.md"))
>
> ;; Predicates
>
> @@ -204,7 +205,7 @@
> simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
> simd_valign, simd_valign_with_acc, simd_vcontrol,
> simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem,
> - fpu, block"
> + fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block"
> (cond [(eq_attr "is_sfunc" "yes")
> (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
> (match_test "flag_pic") (const_string "sfunc")]
> @@ -593,7 +594,8 @@
> ;; somehow modify them to become inelegible for delay slots if a decision
> ;; is made that makes conditional execution required.
>
> -(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3"
> +(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
> +archs4x, archs4xd, archs4xd_slow"
> (const
> (cond [(symbol_ref "arc_tune == TUNE_ARC600")
> (const_string "arc600")
> @@ -602,7 +604,12 @@
> (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
> (const_string "arc700_4_2_xmac")
> (symbol_ref "arc_tune == ARC_TUNE_CORE_3")
> - (const_string "core_3")]
> + (const_string "core_3")
> + (symbol_ref "arc_tune == TUNE_ARCHS4X")
> + (const_string "archs4x")
> + (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
> + (const_string "archs4xd")]
> (const_string "none"))))
>
> (define_attr "tune_arc700" "false,true"
> @@ -610,6 +617,15 @@
> (const_string "true")
> (const_string "false")))
>
> +(define_attr "tune_dspmpy" "none, slow, fast"
> + (const
> + (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD"))
> + (const_string "fast")
> + (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
> + (const_string "slow")]
> + (const_string "none"))))
> +
> ;; Move instructions.
> (define_expand "movqi"
> [(set (match_operand:QI 0 "move_dest_operand" "")
> @@ -960,7 +976,7 @@
> }
> "
> [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false")
> - (set_attr "type" "compare,compare,compare,compare,compare,compare,shift,compare")
> + (set_attr "type" "compare,compare,compare,compare,compare,compare,binary,compare")
> (set_attr "length" "*,*,4,4,4,4,4,8")
> (set_attr "predicable" "no,yes,no,yes,no,no,no,yes")
> (set_attr "cond" "set_zn")])
> diff --git a/gcc/config/arc/arcHS.md b/gcc/config/arc/arcHS.md
> index 2a8588a02e4..d49b90c4970 100644
> --- a/gcc/config/arc/arcHS.md
> +++ b/gcc/config/arc/arcHS.md
> @@ -25,32 +25,38 @@
>
> (define_insn_reservation "hs_data_load" 4
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "load"))
> "hs_issue+hs_ld_st,hs_ld_st,nothing*2")
>
> (define_insn_reservation "hs_data_store" 1
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "store"))
> "hs_issue+hs_ld_st")
>
> (define_insn_reservation "hs_alu0" 2
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> "hs_issue+x1,x2")
>
> (define_insn_reservation "hs_alu1" 4
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> "hs_issue+x1, nothing*3")
>
> (define_insn_reservation "hs_divrem" 13
> (and (match_test "TARGET_HS")
> (match_test "TARGET_DIVREM")
> + (eq_attr "tune" "none")
> (eq_attr "type" "div_rem"))
> "hs_issue+divrem_hs, (divrem_hs)*12")
>
> (define_insn_reservation "hs_mul" 3
> (and (match_test "TARGET_HS")
> + (eq_attr "tune" "none")
> (eq_attr "type" "mul16_em, multi, umulti"))
> "hs_issue+mul_hs, nothing*3")
>
> diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
> new file mode 100644
> index 00000000000..f804b6be694
> --- /dev/null
> +++ b/gcc/config/arc/arcHS4x.md
> @@ -0,0 +1,221 @@
> +;; DFA scheduling description of the Synopsys DesignWare ARC HS4x cpu
> +;; for GNU C compiler
> +;; Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +;; This file is part of GCC.
> +
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +;; GNU General Public License for more details.
> +
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3. If not see
> +;; <https://urldefense.proofpoint.com/v2/url?u=http-3A__www.gnu.org_licenses_&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=b2VHJoEjhKNyJsq83CfdaxxygdKCK0wyHCxOiY76Jo4&m=dQEcXUonN7Ud9M5ZTyw0du9Ct6uhqx78swnuq3cB7eM&s=ZVmFCp0MBQ2MdJGrz9LLOX3bcmhFKgcM5_E27G-RIL0&e=>.
> +
> +(define_automaton "ARCHS4x")
> +
> +(define_cpu_unit "hs4x_issue0" "ARCHS4x")
> +(define_cpu_unit "hs4x_issue1" "ARCHS4x")
> +(define_cpu_unit "hs4x_ld_st" "ARCHS4x")
> +(define_cpu_unit "hs4x_divrem" "ARCHS4x")
> +(define_cpu_unit "hs4x_mult" "ARCHS4x")
> +(define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
> +(define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
> +
> +(define_insn_reservation "hs4x_brj_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
> +branch, brcc,brcc_no_delay_slot, sfunc"))
> + "hs4x_issue0")
> +
> +(define_insn_reservation "hs4x_data_load_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "load"))
> + "hs4x_issue1 + hs4x_ld_st,hs4x_ld_st")
> +
> +(define_insn_reservation "hs4x_data_store_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "store"))
> + "hs4x_issue1 + hs4x_ld_st")
> +
> +;; Advanced ALU
> +(define_insn_reservation "hs4x_adv_alue_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> + "(hs4x_issue0 | hs4x_issue1), hs4x_x1")
> +
> +(define_insn_reservation "hs4x_adv_alul_op" 6
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4xd")
> + (eq_attr "type" "cc_arith, two_cycle_core, shift, lr, sr"))
> + "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_x2")
> +
> +;; Basic ALU
> +(define_insn_reservation "hs4x_basic_alue_op" 1
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> + "(hs4x_issue0 | hs4x_issue1) + hs4x_y1")
> +
> +(define_insn_reservation "hs4x_basic_alul_op" 4
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "move, cmove, unary, binary, compare, misc"))
> + "(hs4x_issue0 | hs4x_issue1), nothing*2, hs4x_y2")
> +
> +(define_insn_reservation "hs4x_divrem_op" 13
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "div_rem"))
> + "hs4x_issue0 + hs4x_divrem, (hs4x_divrem)*12")
> +
> +;;Consider the DSPMPY fast here
> +(define_insn_reservation "hs4x_mul_fast_op" 7
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune_dspmpy" "fast")
> + (eq_attr "type" "mul16_em, multi, umulti"))
> + "hs4x_issue0 + hs4x_mult")
> +
> +(define_insn_reservation "hs4x_mul_slow_op" 8
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune_dspmpy" "slow")
> + (eq_attr "type" "mul16_em, multi, umulti"))
> + "hs4x_issue0 + hs4x_mult")
> +
> +;; FPU unit
> +(define_insn_reservation "hs4x_fpu_op" 8
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu"))
> + "hs4x_issue0")
> +
> +;; FPU FUSE unit
> +(define_insn_reservation "hs4x_fpu_fuse_op" 12
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_fuse"))
> + "hs4x_issue0")
> +
> +;; FPU SP SQRT/DIV unit
> +(define_insn_reservation "hs4x_fpu_sdiv_op" 20
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_sdiv"))
> + "hs4x_issue0")
> +
> +;; FPU DP SQRT/DIV unit
> +(define_insn_reservation "hs4x_fpu_ddiv_op" 34
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_ddiv"))
> + "hs4x_issue0")
> +
> +;; FPU CVT unit
> +(define_insn_reservation "hs4x_fpu_cvt_op" 5
> + (and (match_test "TARGET_HS")
> + (eq_attr "tune" "archs4x, archs4xd")
> + (eq_attr "type" "fpu_cvt"))
> + "hs4x_issue0")
> +
> +;; BYPASS Advanced ALU ->
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_divrem_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_mul_*op")
> +(define_bypass 2 "hs4x_adv_alue_op" "hs4x_adv_alue_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alue_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_data_load_op")
> +(define_bypass 0 "hs4x_adv_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 2 "hs4x_adv_alue_op" "hs4x_data_store_op")
> +(define_bypass 1 "hs4x_adv_alue_op" "hs4x_fpu_*op")
> +
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_basic_alul_op")
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_adv_alul_op")
> +(define_bypass 2 "hs4x_adv_alul_op" "hs4x_mul_*op")
> +(define_bypass 0 "hs4x_adv_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 4 "hs4x_adv_alul_op" "hs4x_divrem_op")
> +(define_bypass 5 "hs4x_adv_alul_op" "hs4x_fpu_*op")
> +
> +;; BYPASS Basic ALU ->
> +(define_bypass 0 "hs4x_basic_alue_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_adv_alul_op")
> +(define_bypass 0 "hs4x_basic_alul_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 1 "hs4x_basic_alul_op" "hs4x_mul_*op")
> +(define_bypass 3 "hs4x_basic_alul_op" "hs4x_divrem_op")
> +(define_bypass 3 "hs4x_basic_alul_op" "hs4x_fpu_*op")
> +
> +;; BYPASS LD ->
> +(define_bypass 1 "hs4x_data_load_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_data_load_op" "hs4x_adv_alul_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_divrem_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_data_load_op")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_mul_*op")
> +(define_bypass 0 "hs4x_data_load_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 3 "hs4x_data_load_op" "hs4x_fpu_*op")
> +
> +;; BYPASS FAST MPY ->
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_basic_alul_op")
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_adv_alul_op")
> +(define_bypass 4 "hs4x_mul_fast_op" "hs4x_mul_fast_op")
> +(define_bypass 6 "hs4x_mul_fast_op" "hs4x_divrem_op")
> +(define_bypass 0 "hs4x_mul_fast_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 6 "hs4x_mul_fast_op" "hs4x_fpu_*op")
> +
> +;; BYPASS SLOW MPY ->
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_basic_alul_op")
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_adv_alul_op")
> +(define_bypass 5 "hs4x_mul_slow_op" "hs4x_mul_slow_op")
> +(define_bypass 7 "hs4x_mul_slow_op" "hs4x_divrem_op")
> +(define_bypass 0 "hs4x_mul_slow_op" "hs4x_data_store_op" "store_data_bypass_p")
> +(define_bypass 7 "hs4x_mul_slow_op" "hs4x_fpu_*op")
> +
> +;;BYPASS FPU ->
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_basic_alul_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_adv_alul_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_mul_*op")
> +(define_bypass 7 "hs4x_fpu_op" "hs4x_divrem_op")
> +(define_bypass 5 "hs4x_fpu_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU FUSE ->
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_basic_alul_op")
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_adv_alul_op")
> +(define_bypass 9 "hs4x_fpu_fuse_op" "hs4x_mul_*op")
> +(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_divrem_op")
> +(define_bypass 11 "hs4x_fpu_fuse_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_fuse_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU SP DIV ->
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_basic_alul_op")
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_adv_alul_op")
> +(define_bypass 16 "hs4x_fpu_sdiv_op" "hs4x_mul_*op")
> +(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_divrem_op")
> +(define_bypass 19 "hs4x_fpu_sdiv_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_sdiv_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU DP DIV ->
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_basic_alul_op")
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_adv_alul_op")
> +(define_bypass 31 "hs4x_fpu_ddiv_op" "hs4x_mul_*op")
> +(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_divrem_op")
> +(define_bypass 34 "hs4x_fpu_ddiv_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_ddiv_op" "hs4x_data_store_op" "store_data_bypass_p")
> +
> +;;BYPASS FPU CVT ->
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_basic_alul_op")
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_adv_alul_op")
> +(define_bypass 1 "hs4x_fpu_cvt_op" "hs4x_mul_*op")
> +(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_divrem_op")
> +(define_bypass 4 "hs4x_fpu_cvt_op" "hs4x_fpu_*op")
> +(define_bypass 0 "hs4x_fpu_cvt_op" "hs4x_data_store_op" "store_data_bypass_p")
> diff --git a/gcc/config/arc/fpu.md b/gcc/config/arc/fpu.md
> index 5c56f76c679..de876cac0c1 100644
> --- a/gcc/config/arc/fpu.md
> +++ b/gcc/config/arc/fpu.md
> @@ -92,7 +92,7 @@
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_insn "fnmasf4_fpu"
> [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r")
> @@ -107,7 +107,7 @@
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_expand "fmadf4"
> [(match_operand:DF 0 "even_register_operand" "")
> @@ -177,7 +177,7 @@
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> (define_insn "fnmadf4_fpu"
> [(set (match_operand:DF 0 "even_register_operand" "=r,r")
> @@ -190,7 +190,7 @@
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_fuse")])
>
> ;; Division
> (define_insn "*divsf3_fpu"
> @@ -203,7 +203,7 @@
> "fsdiv%? %0,%1,%2"
> [(set_attr "length" "4,4,8,8,8")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")
> + (set_attr "type" "fpu_sdiv")
> (set_attr "predicable" "yes,no,yes,no,no")
> (set_attr "cond" "canuse,nocond,canuse_limm,nocond,nocond")
> ])
> @@ -221,7 +221,7 @@
> "TARGET_FP_SP_SQRT"
> "fssqrt %0,%1"
> [(set_attr "length" "4,8")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_sdiv")])
>
> ;; Comparison
> (define_insn "*cmpsf_fpu"
> @@ -306,7 +306,7 @@
> "fddiv%? %0,%1,%2"
> [(set_attr "length" "4,4")
> (set_attr "iscompact" "false")
> - (set_attr "type" "fpu")
> + (set_attr "type" "fpu_ddiv")
> (set_attr "predicable" "yes,no")
> (set_attr "cond" "canuse,nocond")
> ])
> @@ -318,7 +318,7 @@
> "TARGET_FP_DP_SQRT"
> "fdsqrt %0,%1"
> [(set_attr "length" "4")
> - (set_attr "type" "fpu")])
> + (set_attr "type" "fpu_ddiv")])
>
> ;; Comparison
> (define_insn "*cmpdf_fpu"
> --
> 2.17.0
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-07-16 10:14 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-13 9:09 [PATCH] [ARC] Add support for HS4x cpus Claudiu Zissulescu
2018-07-06 22:22 ` Andrew Burgess
2018-07-16 10:14 ` Claudiu Zissulescu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).