public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4956] i386: correct division modeling in lujiazui.md
@ 2023-01-02 16:39 Alexander Monakov
  0 siblings, 0 replies; only message in thread
From: Alexander Monakov @ 2023-01-02 16:39 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:ec1db9017939bb8289c9bd63aace66c0f3957ecd

commit r13-4956-gec1db9017939bb8289c9bd63aace66c0f3957ecd
Author: Alexander Monakov <amonakov@ispras.ru>
Date:   Fri Dec 9 20:47:55 2022 +0300

    i386: correct division modeling in lujiazui.md
    
    Model the divider in Lujiazui processors as a separate automaton to
    significantly reduce the overall model size. This should also result
    in improved accuracy, as pipe 0 should be able to accept new
    instructions while the divider is occupied.
    
    It is unclear why integer divisions are modeled as if pipes 0-3 are all
    occupied. I've opted to keep a single-cycle reservation of all four
    pipes together, so GCC should continue trying to pack instructions
    around a division accordingly.
    
    Currently top three symbols in insn-automata.o are:
    
    106102 r lujiazui_core_check
    106102 r lujiazui_core_transitions
    196123 r lujiazui_core_min_issue_delay
    
    This patch shrinks all lujiazui tables to:
    
    3 r lujiazui_decoder_min_issue_delay
    20 r lujiazui_decoder_transitions
    32 r lujiazui_agu_min_issue_delay
    126 r lujiazui_agu_transitions
    304 r lujiazui_div_base
    352 r lujiazui_div_check
    352 r lujiazui_div_transitions
    1152 r lujiazui_core_min_issue_delay
    1592 r lujiazui_agu_translate
    1592 r lujiazui_core_translate
    1592 r lujiazui_decoder_translate
    1592 r lujiazui_div_translate
    3952 r lujiazui_div_min_issue_delay
    9216 r lujiazui_core_transitions
    
    This continues the work on reducing i386 insn-automata.o size started
    with similar fixes for division and multiplication instructions in
    znver.md.
    
    gcc/ChangeLog:
    
            PR target/87832
            * config/i386/lujiazui.md (lujiazui_div): New automaton.
            (lua_div): New unit.
            (lua_idiv_qi): Correct unit in the reservation.
            (lua_idiv_qi_load): Ditto.
            (lua_idiv_hi): Ditto.
            (lua_idiv_hi_load): Ditto.
            (lua_idiv_si): Ditto.
            (lua_idiv_si_load): Ditto.
            (lua_idiv_di): Ditto.
            (lua_idiv_di_load): Ditto.
            (lua_fdiv_SF): Ditto.
            (lua_fdiv_SF_load): Ditto.
            (lua_fdiv_DF): Ditto.
            (lua_fdiv_DF_load): Ditto.
            (lua_fdiv_XF): Ditto.
            (lua_fdiv_XF_load): Ditto.
            (lua_ssediv_SF): Ditto.
            (lua_ssediv_load_SF): Ditto.
            (lua_ssediv_V4SF): Ditto.
            (lua_ssediv_load_V4SF): Ditto.
            (lua_ssediv_V8SF): Ditto.
            (lua_ssediv_load_V8SF): Ditto.
            (lua_ssediv_SD): Ditto.
            (lua_ssediv_load_SD): Ditto.
            (lua_ssediv_V2DF): Ditto.
            (lua_ssediv_load_V2DF): Ditto.
            (lua_ssediv_V4DF): Ditto.
            (lua_ssediv_load_V4DF): Ditto.

Diff:
---
 gcc/config/i386/lujiazui.md | 58 +++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/config/i386/lujiazui.md b/gcc/config/i386/lujiazui.md
index 9046c09f283..58a230c70f4 100644
--- a/gcc/config/i386/lujiazui.md
+++ b/gcc/config/i386/lujiazui.md
@@ -19,8 +19,8 @@
 
 ;; Scheduling for ZHAOXIN lujiazui processor.
 
-;; Modeling automatons for decoders, execution pipes and AGU pipes.
-(define_automaton "lujiazui_decoder,lujiazui_core,lujiazui_agu")
+;; Modeling automatons for decoders, execution pipes, AGU pipes, and divider.
+(define_automaton "lujiazui_decoder,lujiazui_core,lujiazui_agu,lujiazui_div")
 
 ;; The rules for the decoder are simple:
 ;;  - an instruction with 1 uop can be decoded by any of the three
@@ -55,6 +55,8 @@
 (define_cpu_unit "lua_p0,lua_p1,lua_p2,lua_p3" "lujiazui_core")
 (define_cpu_unit "lua_p4,lua_p5" "lujiazui_agu")
 
+(define_cpu_unit "lua_div" "lujiazui_div")
+
 (define_reservation "lua_p03" "lua_p0|lua_p3")
 (define_reservation "lua_p12" "lua_p1|lua_p2")
 (define_reservation "lua_p1p2" "lua_p1+lua_p2")
@@ -229,56 +231,56 @@
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "QI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p0p1p2p3*21")
+			 "lua_decoder0,lua_p0p1p2p3,lua_div*21")
 
 (define_insn_reservation "lua_idiv_qi_load" 25
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "QI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p45,lua_p0p1p2p3*21")
+			 "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*21")
 
 (define_insn_reservation "lua_idiv_hi" 22
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "HI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p0p1p2p3*22")
+			 "lua_decoder0,lua_p0p1p2p3,lua_div*22")
 
 (define_insn_reservation "lua_idiv_hi_load" 26
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "HI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p45,lua_p0p1p2p3*22")
+			 "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*22")
 
 (define_insn_reservation "lua_idiv_si" 20
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "SI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p0p1p2p3*20")
+			 "lua_decoder0,lua_p0p1p2p3,lua_div*20")
 
 (define_insn_reservation "lua_idiv_si_load" 24
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "SI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p45,lua_p0p1p2p3*20")
+			 "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*20")
 
 (define_insn_reservation "lua_idiv_di" 150
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p0p1p2p3*150")
+			 "lua_decoder0,lua_p0p1p2p3,lua_div*150")
 
 (define_insn_reservation "lua_idiv_di_load" 154
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "type" "idiv"))))
-			 "lua_decoder0,lua_p45,lua_p0p1p2p3*150")
+			 "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*150")
 
 ;; x87 floating point operations.
 
@@ -406,42 +408,42 @@
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "SF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decodern,lua_p0*15")
+			 "lua_decodern,lua_p0,lua_div*15")
 
 (define_insn_reservation "lua_fdiv_SF_load" 19
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "SF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decoder01,lua_p45,lua_p0*15")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*15")
 
 (define_insn_reservation "lua_fdiv_DF" 18
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "DF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decodern,lua_p0*18")
+			 "lua_decodern,lua_p0,lua_div*18")
 
 (define_insn_reservation "lua_fdiv_DF_load" 22
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "DF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decoder01,lua_p45,lua_p0*18")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*18")
 
 (define_insn_reservation "lua_fdiv_XF" 22
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "XF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decoder0,lua_p0*22")
+			 "lua_decoder0,lua_p0,lua_div*22")
 
 (define_insn_reservation "lua_fdiv_XF_load" 26
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "XF")
 				    (eq_attr "type" "fdiv,fpspc"))))
-			 "lua_decoder0,lua_p45,lua_p0*22")
+			 "lua_decoder0,lua_p45,lua_p0,lua_div*22")
 
 ;; MMX instructions.
 
@@ -593,84 +595,84 @@
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decodern,lua_p0*13")
+			 "lua_decodern,lua_p0,lua_div*13")
 
 (define_insn_reservation "lua_ssediv_load_SF" 17
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder01,lua_p45,lua_p0*13")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*13")
 
 (define_insn_reservation "lua_ssediv_V4SF" 23
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decodern,lua_p0*23")
+			 "lua_decodern,lua_p0,lua_div*23")
 
 (define_insn_reservation "lua_ssediv_load_V4SF" 27
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder01,lua_p45,lua_p0*23")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*23")
 
 (define_insn_reservation "lua_ssediv_V8SF" 47
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V8SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder0,lua_p0*47")
+			 "lua_decoder0,lua_p0,lua_div*47")
 
 (define_insn_reservation "lua_ssediv_load_V8SF" 51
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V8SF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder0,lua_p45,lua_p0*47")
+			 "lua_decoder0,lua_p45,lua_p0,lua_div*47")
 
 (define_insn_reservation "lua_ssediv_SD" 17
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decodern,lua_p0*17")
+			 "lua_decodern,lua_p0,lua_div*17")
 
 (define_insn_reservation "lua_ssediv_load_SD" 21
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder01,lua_p45,lua_p0*17")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*17")
 
 (define_insn_reservation "lua_ssediv_V2DF" 30
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V2DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decodern,lua_p0*30")
+			 "lua_decodern,lua_p0,lua_div*30")
 
 (define_insn_reservation "lua_ssediv_load_V2DF" 34
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V2DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder01,lua_p45,lua_p0*30")
+			 "lua_decoder01,lua_p45,lua_p0,lua_div*30")
 
 (define_insn_reservation "lua_ssediv_V4DF" 56
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "none")
 				   (and (eq_attr "mode" "V4DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder0,lua_p0*56")
+			 "lua_decoder0,lua_p0,lua_div*56")
 
 (define_insn_reservation "lua_ssediv_load_V4DF" 60
 			 (and (eq_attr "cpu" "lujiazui")
 			      (and (eq_attr "memory" "load")
 				   (and (eq_attr "mode" "V4DF")
 					(eq_attr "type" "ssediv"))))
-			 "lua_decoder0,lua_p4p5,lua_p0*56")
+			 "lua_decoder0,lua_p4p5,lua_p0,lua_div*56")
 
 
 (define_insn_reservation "lua_sseicvt_si" 2

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-01-02 16:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-02 16:39 [gcc r13-4956] i386: correct division modeling in lujiazui.md Alexander Monakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).