From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1592) id 27C97395A40F; Wed, 16 Nov 2022 13:41:58 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 27C97395A40F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668606118; bh=iJYlfCugnSE1f1AC6xU7ixw2TKazCHiV61fBBHAYfWU=; h=From:To:Subject:Date:From; b=mvIqxZqT8Q82kFX+vrVbAcZoRdsBJg74OYymM605AJSD2TKSU3D0W2rgUKlPqlID7 pw1zryy/xp+i+2QPk9GpiNx50N6Cl89irHnmEtwlolGYPycQ75JYxcVFE3jU6IqQ8y Bd2iMaGkMapEtgnJq7lxzrwJbmqa47ArkEVJ2cfg= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Alexander Monakov To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-4093] i386: correct x87&SSE multiplication modeling in znver.md X-Act-Checkin: gcc X-Git-Author: Alexander Monakov X-Git-Refname: refs/heads/master X-Git-Oldrev: dd744f06c9952f92738b0860630085f0f0b99574 X-Git-Newrev: d4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f Message-Id: <20221116134158.27C97395A40F@sourceware.org> Date: Wed, 16 Nov 2022 13:41:58 +0000 (GMT) List-Id: https://gcc.gnu.org/g:d4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f commit r13-4093-gd4cc7a8c4a623b62dd0d486d7780d91b58eb6f1f Author: Alexander Monakov Date: Tue Nov 1 17:53:13 2022 +0300 i386: correct x87&SSE multiplication modeling in znver.md All multiplication instructions are fully pipelined, except AVX256 instructions on Zen 1, which issue over two cycles on a 128-bit unit. Correct the model accordingly to reduce combinatorial explosion in automaton tables. Top znver table sizes in insn-automata.o: Before: 30056 r znver1_fp_min_issue_delay 120224 r znver1_fp_transitions After: 6720 r znver1_fp_min_issue_delay 53760 r znver1_fp_transitions gcc/ChangeLog: PR target/87832 * config/i386/znver.md: (znver1_fp_op_mul): Correct cycles in the reservation. (znver1_fp_op_mul_load): Ditto. (znver1_mmx_mul): Ditto. (znver1_mmx_load): Ditto. (znver1_ssemul_ss_ps): Ditto. (znver1_ssemul_ss_ps_load): Ditto. (znver1_ssemul_avx256_ps): Ditto. (znver1_ssemul_avx256_ps_load): Ditto. (znver1_ssemul_sd_pd): Ditto. (znver1_ssemul_sd_pd_load): Ditto. (znver2_ssemul_sd_pd): Ditto. (znver2_ssemul_sd_pd_load): Ditto. (znver1_ssemul_avx256_pd): Ditto. (znver1_ssemul_avx256_pd_load): Ditto. (znver1_sseimul): Ditto. (znver1_sseimul_avx256): Ditto. (znver1_sseimul_load): Ditto. (znver1_sseimul_avx256_load): Ditto. (znver1_sseimul_di): Ditto. (znver1_sseimul_load_di): Ditto. Diff: --- gcc/config/i386/znver.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md index c52f8b532ec..882f250f1b6 100644 --- a/gcc/config/i386/znver.md +++ b/gcc/config/i386/znver.md @@ -573,13 +573,13 @@ (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0*5") + "znver1-direct,znver1-fp0") (define_insn_reservation "znver1_fp_op_mul_load" 12 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,znver1-fp0*5") + "znver1-direct,znver1-load,znver1-fp0") (define_insn_reservation "znver1_fp_op_imul_load" 16 (and (eq_attr "cpu" "znver1,znver2,znver3") @@ -684,13 +684,13 @@ (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0*3") + "znver1-direct,znver1-fp0") (define_insn_reservation "znver1_mmx_load" 10 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,znver1-fp0*3") + "znver1-direct,znver1-load,znver1-fp0") ;; TODO (define_insn_reservation "znver1_avx256_log" 1 @@ -1161,7 +1161,7 @@ (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))) (and (eq_attr "type" "ssemul") (eq_attr "memory" "none"))) - "znver1-direct,(znver1-fp0|znver1-fp1)*3") + "znver1-direct,znver1-fp0|znver1-fp1") (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 (and (ior (and (eq_attr "cpu" "znver1") @@ -1172,47 +1172,47 @@ (eq_attr "mode" "V8SF,V4SF,SF"))) (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") (define_insn_reservation "znver1_ssemul_avx256_ps" 3 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "none")))) - "znver1-double,(znver1-fp0|znver1-fp1)*3") + "znver1-double,znver1-fp0*2|znver1-fp1*2") (define_insn_reservation "znver1_ssemul_avx256_ps_load" 10 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V8SF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load")))) - "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3") + "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2") (define_insn_reservation "znver1_ssemul_sd_pd" 4 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V2DF,DF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "none")))) - "znver1-direct,(znver1-fp0|znver1-fp1)*4") + "znver1-direct,znver1-fp0|znver1-fp1") (define_insn_reservation "znver1_ssemul_sd_pd_load" 11 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V2DF,DF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load")))) - "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4") + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") (define_insn_reservation "znver2_ssemul_sd_pd" 3 (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemul") (eq_attr "memory" "none"))) - "znver1-direct,(znver1-fp0|znver1-fp1)*3") + "znver1-direct,znver1-fp0|znver1-fp1") (define_insn_reservation "znver2_ssemul_sd_pd_load" 10 (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") (define_insn_reservation "znver1_ssemul_avx256_pd" 5 @@ -1220,14 +1220,14 @@ (and (eq_attr "mode" "V4DF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "none")))) - "znver1-double,(znver1-fp0|znver1-fp1)*4") + "znver1-double,znver1-fp0*2|znver1-fp1*2") (define_insn_reservation "znver1_ssemul_avx256_pd_load" 12 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V4DF") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load")))) - "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4") + "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2") ;;SSE imul (define_insn_reservation "znver1_sseimul" 3 @@ -1239,14 +1239,14 @@ (eq_attr "mode" "TI,OI"))) (and (eq_attr "type" "sseimul") (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp0*3") + "znver1-direct,znver1-fp0") (define_insn_reservation "znver1_sseimul_avx256" 4 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "OI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "none")))) - "znver1-double,znver1-fp0*4") + "znver1-double,znver1-fp0*2") (define_insn_reservation "znver1_sseimul_load" 10 (and (ior (and (eq_attr "cpu" "znver1") @@ -1257,28 +1257,28 @@ (eq_attr "mode" "TI,OI"))) (and (eq_attr "type" "sseimul") (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,znver1-fp0*3") + "znver1-direct,znver1-load,znver1-fp0") (define_insn_reservation "znver1_sseimul_avx256_load" 11 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "OI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load")))) - "znver1-double,znver1-load,znver1-fp0*4") + "znver1-double,znver1-load,znver1-fp0*2") (define_insn_reservation "znver1_sseimul_di" 3 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "DI") (and (eq_attr "memory" "none") (eq_attr "type" "sseimul")))) - "znver1-direct,znver1-fp0*3") + "znver1-direct,znver1-fp0") (define_insn_reservation "znver1_sseimul_load_di" 10 (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "DI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load")))) - "znver1-direct,znver1-load,znver1-fp0*3") + "znver1-direct,znver1-load,znver1-fp0") ;; SSE compares (define_insn_reservation "znver1_sse_cmp" 1