From: Alexander Monakov <amonakov@ispras.ru>
To: gcc-patches@gcc.gnu.org
Cc: "Jan Hubička" <honza.hubicka@gmail.com>,
"Joshi, Tejas Sanjay" <TejasSanjay.Joshi@amd.com>,
"Kumar, Venkataramanan" <Venkataramanan.Kumar@amd.com>,
"Alexander Monakov" <amonakov@ispras.ru>
Subject: [PATCH 2/2] i386: correct x87&SSE multiplication modeling in znver.md
Date: Tue, 1 Nov 2022 19:26:37 +0300 [thread overview]
Message-ID: <20221101162637.14238-3-amonakov@ispras.ru> (raw)
In-Reply-To: <20221101162637.14238-1-amonakov@ispras.ru>
All multiplication instructions are fully pipelined, except AVX256
instructions on Zen 1, which issue over two cycles on a 128-bit unit.
Correct the model accordingly to reduce combinatorial explosion in
automaton tables.
Top znver table sizes in insn-automata.o:
Before:
30056 r znver1_fp_min_issue_delay
120224 r znver1_fp_transitions
After:
6720 r znver1_fp_min_issue_delay
53760 r znver1_fp_transitions
gcc/ChangeLog:
PR target/87832
* config/i386/znver.md: (znver1_fp_op_mul): Correct cycles in
the reservation.
(znver1_fp_op_mul_load): Ditto.
(znver1_mmx_mul): Ditto.
(znver1_mmx_load): Ditto.
(znver1_ssemul_ss_ps): Ditto.
(znver1_ssemul_ss_ps_load): Ditto.
(znver1_ssemul_avx256_ps): Ditto.
(znver1_ssemul_avx256_ps_load): Ditto.
(znver1_ssemul_sd_pd): Ditto.
(znver1_ssemul_sd_pd_load): Ditto.
(znver2_ssemul_sd_pd): Ditto.
(znver2_ssemul_sd_pd_load): Ditto.
(znver1_ssemul_avx256_pd): Ditto.
(znver1_ssemul_avx256_pd_load): Ditto.
(znver1_sseimul): Ditto.
(znver1_sseimul_avx256): Ditto.
(znver1_sseimul_load): Ditto.
(znver1_sseimul_avx256_load): Ditto.
(znver1_sseimul_di): Ditto.
(znver1_sseimul_load_di): Ditto.
---
gcc/config/i386/znver.md | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md
index c52f8b532..882f250f1 100644
--- a/gcc/config/i386/znver.md
+++ b/gcc/config/i386/znver.md
@@ -573,13 +573,13 @@ (define_insn_reservation "znver1_fp_op_mul" 5
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*5")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_fp_op_mul_load" 12
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*5")
+ "znver1-direct,znver1-load,znver1-fp0")
(define_insn_reservation "znver1_fp_op_imul_load" 16
(and (eq_attr "cpu" "znver1,znver2,znver3")
@@ -684,13 +684,13 @@ (define_insn_reservation "znver1_mmx_mul" 3
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_mmx_load" 10
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
;; TODO
(define_insn_reservation "znver1_avx256_log" 1
@@ -1161,7 +1161,7 @@ (define_insn_reservation "znver1_ssemul_ss_ps" 3
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
- "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_ss_ps_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1172,47 +1172,47 @@ (define_insn_reservation "znver1_ssemul_ss_ps_load" 10
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_avx256_ps" 3
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-double,(znver1-fp0|znver1-fp1)*3")
+ "znver1-double,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_avx256_ps_load" 10
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V8SF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_sd_pd" 4
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V2DF,DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-direct,(znver1-fp0|znver1-fp1)*4")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_sd_pd_load" 11
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V2DF,DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver2_ssemul_sd_pd" 3
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
- "znver1-direct,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver2_ssemul_sd_pd_load" 10
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
+ "znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_ssemul_avx256_pd" 5
@@ -1220,14 +1220,14 @@ (define_insn_reservation "znver1_ssemul_avx256_pd" 5
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none"))))
- "znver1-double,(znver1-fp0|znver1-fp1)*4")
+ "znver1-double,znver1-fp0*2|znver1-fp1*2")
(define_insn_reservation "znver1_ssemul_avx256_pd_load" 12
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4")
+ "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2")
;;SSE imul
(define_insn_reservation "znver1_sseimul" 3
@@ -1239,14 +1239,14 @@ (define_insn_reservation "znver1_sseimul" 3
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_sseimul_avx256" 4
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none"))))
- "znver1-double,znver1-fp0*4")
+ "znver1-double,znver1-fp0*2")
(define_insn_reservation "znver1_sseimul_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
@@ -1257,28 +1257,28 @@ (define_insn_reservation "znver1_sseimul_load" 10
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load")))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
(define_insn_reservation "znver1_sseimul_avx256_load" 11
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
- "znver1-double,znver1-load,znver1-fp0*4")
+ "znver1-double,znver1-load,znver1-fp0*2")
(define_insn_reservation "znver1_sseimul_di" 3
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "memory" "none")
(eq_attr "type" "sseimul"))))
- "znver1-direct,znver1-fp0*3")
+ "znver1-direct,znver1-fp0")
(define_insn_reservation "znver1_sseimul_load_di" 10
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
- "znver1-direct,znver1-load,znver1-fp0*3")
+ "znver1-direct,znver1-load,znver1-fp0")
;; SSE compares
(define_insn_reservation "znver1_sse_cmp" 1
--
2.37.2
next prev parent reply other threads:[~2022-11-01 16:26 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-01 16:26 [PATCH 0/2] i386: slim down insn-automata [PR 87832] Alexander Monakov
2022-11-01 16:26 ` [PATCH 1/2] i386: correct x87&SSE division modeling in znver.md Alexander Monakov
2022-11-01 16:26 ` Alexander Monakov [this message]
2022-11-16 11:53 ` [PATCH 2/2] i386: correct x87&SSE multiplication " Kumar, Venkataramanan
2022-11-16 12:21 ` Jan Hubička
2022-11-16 13:13 ` Alexander Monakov
2022-11-16 13:28 ` Jan Hubička
2022-11-07 11:27 ` [PATCH 0/2] i386: slim down insn-automata [PR 87832] Alexander Monakov
2022-11-14 11:19 ` Alexander Monakov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221101162637.14238-3-amonakov@ispras.ru \
--to=amonakov@ispras.ru \
--cc=TejasSanjay.Joshi@amd.com \
--cc=Venkataramanan.Kumar@amd.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=honza.hubicka@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).