public inbox for
 help / color / mirror / Atom feed
From: Alexander Monakov <>
Cc: "Jan Hubička" <>,
	"Joshi, Tejas Sanjay" <>,
	"Kumar, Venkataramanan" <>,
	"Alexander Monakov" <>
Subject: [PATCH 1/2] i386: correct x87&SSE division modeling in
Date: Tue,  1 Nov 2022 19:26:36 +0300	[thread overview]
Message-ID: <> (raw)
In-Reply-To: <>

Correct modeling of division instructions in the SIMD/FP domain for
AMD Zen architectures and avoid combinatorial explosion of automaton
tables by modeling the separate floating-point division unit and
correcting reservations to reflect reciprocal throughput of the
corresponding instructions, similar to earlier commit
5cee5f94000 ("i386: correct integer division modeling in").

Division is partially pipelined and some instructions have fractional
throughput (e.g. Zen 3 can issue divss and divsd each 3.5 and 4.5
cycles on average, respectively). Considering these CPUs implement
out-of-order execution, the model doesn't need to be exact to the last
cycle, so simplify it by using 4/5 cycles for SF/DF modes, and not
modeling the fact that FP3 pipe is occupied for one cycle.

Top znver table sizes in insn-automata.o:


428108 r znver1_fp_min_issue_delay
856216 r znver1_fp_transitions


30056 r znver1_fp_min_issue_delay
120224 r znver1_fp_transitions


	PR target/87832
	* config/i386/ (znver1_fdiv): New automaton.
	(znver1-fdiv): New unit.
	(znver1_fp_op_div): Correct unit and cycles in the reservation.
	(znver1_fp_op_div_load): Ditto.
	(znver1_fp_op_idiv_load): Ditto.
	(znver2_fp_op_idiv_load): Ditto.
	(znver1_ssediv_ss_ps): Ditto.
	(znver1_ssediv_ss_ps_load): Ditto.
	(znver1_ssediv_sd_pd): Ditto.
	(znver1_ssediv_sd_pd_load): Ditto.
	(znver1_ssediv_avx256_ps): Ditto.
	(znver1_ssediv_avx256_ps_load): Ditto.
	(znver1_ssediv_avx256_pd): Ditto.
	(znver1_ssediv_avx256_pd_load): Ditto.
 gcc/config/i386/ | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/ b/gcc/config/i386/
index 4aa098fd8..c52f8b532 100644
--- a/gcc/config/i386/
+++ b/gcc/config/i386/
@@ -24,7 +24,7 @@ (define_attr "znver1_decode" "direct,vector,double"
 ;; AMD znver1, znver2 and znver3 Scheduling
 ;; Modeling automatons for zen decoders, integer execution pipes,
 ;; SIMD/FP domain, AGU pipes, and dividers.
-(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv")
+(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu, znver1_idiv, znver1_fdiv")
 ;; Decoders unit has 4 decoders and all of them can decode fast path
 ;; and vector type instructions.
@@ -95,6 +95,7 @@ (define_reservation "znver2-fvector" "znver1-fp0+znver1-fp1
 ;; Dividers
 (define_cpu_unit "znver1-idiv" "znver1_idiv")
+(define_cpu_unit "znver1-fdiv" "znver1_fdiv")
 ;; Call instruction
 (define_insn_reservation "znver1_call" 1
@@ -591,27 +592,27 @@ (define_insn_reservation "znver1_fp_op_div" 15
 			 (and (eq_attr "cpu" "znver1,znver2,znver3")
 			      (and (eq_attr "type" "fdiv")
 				   (eq_attr "memory" "none")))
-			 "znver1-direct,znver1-fp3*15")
+			 "znver1-direct,znver1-fdiv*6")
 (define_insn_reservation "znver1_fp_op_div_load" 22
 			 (and (eq_attr "cpu" "znver1,znver2,znver3")
 			      (and (eq_attr "type" "fdiv")
 				   (eq_attr "memory" "load")))
-			 "znver1-direct,znver1-load,znver1-fp3*15")
+			 "znver1-direct,znver1-load,znver1-fdiv*6")
 (define_insn_reservation "znver1_fp_op_idiv_load" 27
 			 (and (eq_attr "cpu" "znver1")
 			      (and (eq_attr "type" "fdiv")
 				   (and (eq_attr "fp_int_src" "true")
 					(eq_attr "memory" "load"))))
-			 "znver1-double,znver1-load,znver1-fp3*19")
+			 "znver1-double,znver1-load,znver1-fdiv*6")
 (define_insn_reservation "znver2_fp_op_idiv_load" 26
 			 (and (eq_attr "cpu" "znver2,znver3")
 			      (and (eq_attr "type" "fdiv")
 				   (and (eq_attr "fp_int_src" "true")
 					(eq_attr "memory" "load"))))
-			 "znver1-double,znver1-load,znver1-fp3*19")
+			 "znver1-double,znver1-load,znver1-fdiv*6")
 ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
@@ -1088,7 +1089,7 @@ (define_insn_reservation "znver1_ssediv_ss_ps" 10
 					      (eq_attr "mode" "V8SF,V4SF,SF")))
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "memory" "none")))
-			 "znver1-direct,znver1-fp3*10")
+			 "znver1-direct,znver1-fdiv*4")
 (define_insn_reservation "znver1_ssediv_ss_ps_load" 17
 			 (and (ior (and (eq_attr "cpu" "znver1")
@@ -1099,7 +1100,7 @@ (define_insn_reservation "znver1_ssediv_ss_ps_load" 17
 					      (eq_attr "mode" "V8SF,V4SF,SF")))
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "memory" "load")))
-			 "znver1-direct,znver1-load,znver1-fp3*10")
+			 "znver1-direct,znver1-load,znver1-fdiv*4")
 (define_insn_reservation "znver1_ssediv_sd_pd" 13
 			 (and (ior (and (eq_attr "cpu" "znver1")
@@ -1110,7 +1111,7 @@ (define_insn_reservation "znver1_ssediv_sd_pd" 13
 					      (eq_attr "mode" "V4DF,V2DF,DF")))
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "memory" "none")))
-			 "znver1-direct,znver1-fp3*13")
+			 "znver1-direct,znver1-fdiv*5")
 (define_insn_reservation "znver1_ssediv_sd_pd_load" 20
 			 (and (ior (and (eq_attr "cpu" "znver1")
@@ -1121,35 +1122,35 @@ (define_insn_reservation "znver1_ssediv_sd_pd_load" 20
 					      (eq_attr "mode" "V4DF,V2DF,DF")))
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "memory" "load")))
-			 "znver1-direct,znver1-load,znver1-fp3*13")
+			 "znver1-direct,znver1-load,znver1-fdiv*5")
 (define_insn_reservation "znver1_ssediv_avx256_ps" 12
 			 (and (eq_attr "cpu" "znver1")
 			      (and (eq_attr "mode" "V8SF")
 				   (and (eq_attr "memory" "none")
 					(eq_attr "type" "ssediv"))))
-			 "znver1-double,znver1-fp3*12")
+			 "znver1-double,znver1-fdiv*8")
 (define_insn_reservation "znver1_ssediv_avx256_ps_load" 19
 			 (and (eq_attr "cpu" "znver1")
 			      (and (eq_attr "mode" "V8SF")
 				   (and (eq_attr "type" "ssediv")
 					(eq_attr "memory" "load"))))
-			 "znver1-double,znver1-load,znver1-fp3*12")
+			 "znver1-double,znver1-load,znver1-fdiv*8")
 (define_insn_reservation "znver1_ssediv_avx256_pd" 15
 			 (and (eq_attr "cpu" "znver1")
 			      (and (eq_attr "mode" "V4DF")
 				   (and (eq_attr "type" "ssediv")
 					(eq_attr "memory" "none"))))
-			 "znver1-double,znver1-fp3*15")
+			 "znver1-double,znver1-fdiv*10")
 (define_insn_reservation "znver1_ssediv_avx256_pd_load" 22 
 			 (and (eq_attr "cpu" "znver1")
 			      (and (eq_attr "mode" "V4DF")
 				   (and (eq_attr "type" "ssediv")
 					(eq_attr "memory" "load"))))
-			 "znver1-double,znver1-load,znver1-fp3*15")
+			 "znver1-double,znver1-load,znver1-fdiv*10")
 (define_insn_reservation "znver1_ssemul_ss_ps" 3
 			 (and (ior (and (eq_attr "cpu" "znver1")

  reply	other threads:[~2022-11-01 16:26 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-01 16:26 [PATCH 0/2] i386: slim down insn-automata [PR 87832] Alexander Monakov
2022-11-01 16:26 ` Alexander Monakov [this message]
2022-11-01 16:26 ` [PATCH 2/2] i386: correct x87&SSE multiplication modeling in Alexander Monakov
2022-11-16 11:53   ` Kumar, Venkataramanan
2022-11-16 12:21     ` Jan Hubička
2022-11-16 13:13       ` Alexander Monakov
2022-11-16 13:28         ` Jan Hubička
2022-11-07 11:27 ` [PATCH 0/2] i386: slim down insn-automata [PR 87832] Alexander Monakov
2022-11-14 11:19   ` Alexander Monakov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \ \ \ \ \ \ \

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).