public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work079)] Optimize multiply/add of DImode extended to TImode. Date: Tue, 1 Mar 2022 06:34:09 +0000 (GMT) [thread overview] Message-ID: <20220301063409.6865A3858C78@sourceware.org> (raw) https://gcc.gnu.org/g:7497ffea286bbe13cbe846a314761e45b5a024e8 commit 7497ffea286bbe13cbe846a314761e45b5a024e8 Author: Michael Meissner <meissner@linux.ibm.com> Date: Tue Mar 1 01:33:51 2022 -0500 Optimize multiply/add of DImode extended to TImode. On power9 and power10 systems, we have instructions that support doing 64-bit integers converted to 128-bit integers and producing 128-bit results. This patch adds support to generate these instructions. Previously we had define_expands to handle conversion of the 64-bit extend to 128-bit and multiply. This patch changes these define_expands to define_insn_and_split and then it provides combiner patterns to generate thes multiply/add instructions. 2022-03-01 Michael Meissner <meissner@linux.ibm.com> gcc/ PR target/103109 * config/rs6000/rs6000.md (su_int32): New code attribute. (<u>mul<mode><dmode>3): Convert into define_insn_and_split. (maddld<mode>4): Add generator function. (<u>mulditi3_<u>adddi3): New insn. (<u>mulditi3_add_const): New insn. (addti3): Convert into define_insn_and_split. (subti3): Likewise. Diff: --- gcc/config/rs6000/rs6000.md | 160 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 17 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index fdfbc6566a5..b5fc1855c35 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -676,6 +676,9 @@ (float "") (unsigned_float "uns")]) +(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand") + (zero_extend "c32bit_cint_operand")]) + ; Various instructions that come in SI and DI forms. ; A generic w/d attribute, for things like cmpw/cmpd. (define_mode_attr wd [(QI "b") @@ -3199,13 +3202,16 @@ "mulhw<u> %0,%1,%2" [(set_attr "type" "mul")]) -(define_expand "<u>mul<mode><dmode>3" - [(set (match_operand:<DMODE> 0 "gpc_reg_operand") +(define_insn_and_split "<u>mul<mode><dmode>3" + [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r") (mult:<DMODE> (any_extend:<DMODE> - (match_operand:GPR 1 "gpc_reg_operand")) + (match_operand:GPR 1 "gpc_reg_operand" "r")) (any_extend:<DMODE> - (match_operand:GPR 2 "gpc_reg_operand"))))] + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] "!(<MODE>mode == SImode && TARGET_POWERPC64)" + "#" + "&& 1" + [(pc)] { rtx l = gen_reg_rtx (<MODE>mode); rtx h = gen_reg_rtx (<MODE>mode); @@ -3214,9 +3220,10 @@ emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l); emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h); DONE; -}) +} + [(set_attr "length" "8")]) -(define_insn "*maddld<mode>4" +(define_insn "maddld<mode>4" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand:GPR 2 "gpc_reg_operand" "r")) @@ -3225,6 +3232,113 @@ "maddld %0,%1,%2,%3" [(set_attr "type" "mul")]) +(define_insn_and_split "*<u>mulditi3_<u>adddi3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r") + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))] + "TARGET_MADDLD && TARGET_POWERPC64" + "#" + "&& 1" + [(pc)] +{ + rtx dest = operands[0]; + rtx dest_hi = gen_highpart (DImode, dest); + rtx dest_lo = gen_lowpart (DImode, dest); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx tmp_hi, tmp_lo; + + if (can_create_pseudo_p ()) + { + tmp_hi = gen_reg_rtx (DImode); + tmp_lo = gen_reg_rtx (DImode); + } + else + { + tmp_hi = dest_hi; + tmp_lo = dest_lo; + } + + emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3)); + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3)); + + if (can_create_pseudo_p ()) + { + emit_move_insn (dest_hi, tmp_hi); + emit_move_insn (dest_lo, tmp_lo); + } + DONE; +} + [(set_attr "length" "8")]) + +;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We +;; force the constant into a register to generate li, maddhd, and maddld, +;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern +;; with the pattern that handles registers, since constants don't have a sign +;; or zero extend around them. +(define_insn_and_split "*<u>mulditi3_add_const" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r") + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (match_operand 3 "<su_int32>" "r")))] + "TARGET_MADDLD && TARGET_POWERPC64 +" + "#" + "&& 1" + [(pc)] +{ + rtx dest = operands[0]; + rtx dest_hi = gen_highpart (DImode, dest); + rtx dest_lo = gen_lowpart (DImode, dest); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = force_reg (DImode, operands[3]); + rtx tmp_hi, tmp_lo; + + if (can_create_pseudo_p ()) + { + tmp_hi = gen_reg_rtx (DImode); + tmp_lo = gen_reg_rtx (DImode); + } + else + { + tmp_hi = dest_hi; + tmp_lo = dest_lo; + } + + emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3)); + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3)); + + if (can_create_pseudo_p ()) + { + emit_move_insn (dest_hi, tmp_hi); + emit_move_insn (dest_lo, tmp_lo); + } + DONE; +} + [(set_attr "length" "8")]) + +(define_insn "<u>mulditi3_<u>adddi3_upper" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (truncate:DI + (lshiftrt:TI + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))) + (const_int 64))))] + "TARGET_MADDLD && TARGET_POWERPC64" + "maddhd<u> %0,%1,%2,%3" + [(set_attr "type" "mul") + (set_attr "size" "64")]) + (define_insn "udiv<mode>3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") @@ -7029,12 +7143,19 @@ ;; allocator from allocating registers that overlap with the inputs ;; (for example, having an input in 7,8 and an output in 6,7). We ;; also allow for the output being the same as one of the inputs. - -(define_expand "addti3" - [(set (match_operand:TI 0 "gpc_reg_operand") - (plus:TI (match_operand:TI 1 "gpc_reg_operand") - (match_operand:TI 2 "reg_or_short_operand")))] +;; +;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to allow +;; for combine to make things like multiply and add with extend operations. + +(define_insn_and_split "addti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") + (plus:TI (match_operand:TI 1 "gpc_reg_operand" "r,0,r") + (match_operand:TI 2 "reg_or_short_operand" "rn,r,0"))) + (clobber (reg:DI CA_REGNO))] "TARGET_64BIT" + "#" + "&& 1" + [(pc)] { rtx lo0 = gen_lowpart (DImode, operands[0]); rtx lo1 = gen_lowpart (DImode, operands[1]); @@ -7051,13 +7172,17 @@ emit_insn (gen_adddi3_carry (lo0, lo1, lo2)); emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2)); DONE; -}) +} + [(set_attr "length" "8")]) -(define_expand "subti3" - [(set (match_operand:TI 0 "gpc_reg_operand") - (minus:TI (match_operand:TI 1 "reg_or_short_operand") - (match_operand:TI 2 "gpc_reg_operand")))] +(define_insn_and_split "subti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") + (minus:TI (match_operand:TI 1 "reg_or_short_operand" "rn,0,r") + (match_operand:TI 2 "gpc_reg_operand" "r,r,0")))] "TARGET_64BIT" + "#" + "&& 1" + [(pc)] { rtx lo0 = gen_lowpart (DImode, operands[0]); rtx lo1 = gen_lowpart (DImode, operands[1]); @@ -7074,7 +7199,8 @@ emit_insn (gen_subfdi3_carry (lo0, lo2, lo1)); emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1)); DONE; -}) +} + [(set_attr "length" "8")]) \f ;; 128-bit logical operations expanders
next reply other threads:[~2022-03-01 6:34 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-03-01 6:34 Michael Meissner [this message] 2022-03-02 18:12 Michael Meissner 2022-03-02 19:28 Michael Meissner 2022-03-03 2:01 Michael Meissner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220301063409.6865A3858C78@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).