From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id D6550385802C; Wed, 2 Mar 2022 18:12:52 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D6550385802C Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work079)] Optimize multiply/add of DImode extended to TImode. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work079 X-Git-Oldrev: a18d3c65db16ed501a872c1fd99455bad33f732d X-Git-Newrev: 0696a98ba3b6ef146e5441670818a586e394cbe7 Message-Id: <20220302181252.D6550385802C@sourceware.org> Date: Wed, 2 Mar 2022 18:12:52 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Mar 2022 18:12:52 -0000 https://gcc.gnu.org/g:0696a98ba3b6ef146e5441670818a586e394cbe7 commit 0696a98ba3b6ef146e5441670818a586e394cbe7 Author: Michael Meissner Date: Tue Mar 1 01:33:51 2022 -0500 Optimize multiply/add of DImode extended to TImode. On power9 and power10 systems, we have instructions that support doing 64-bit integers converted to 128-bit integers and producing 128-bit results. This patch adds support to generate these instructions. Previously we had define_expands to handle conversion of the 64-bit extend to 128-bit and multiply. This patch changes these define_expands to define_insn_and_split and then it provides combiner patterns to generate thes multiply/add instructions. 2022-03-01 Michael Meissner gcc/ PR target/103109 * config/rs6000/rs6000.md (su_int32): New code attribute. (mul3): Convert into define_insn_and_split. (maddld4): Add generator function. (mulditi3_adddi3): New insn. (mulditi3_add_const): New insn. (addti3): Convert into define_insn_and_split. (subti3): Likewise. Diff: --- gcc/config/rs6000/rs6000.md | 160 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 143 insertions(+), 17 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index fdfbc6566a5..b5fc1855c35 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -676,6 +676,9 @@ (float "") (unsigned_float "uns")]) +(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand") + (zero_extend "c32bit_cint_operand")]) + ; Various instructions that come in SI and DI forms. ; A generic w/d attribute, for things like cmpw/cmpd. (define_mode_attr wd [(QI "b") @@ -3199,13 +3202,16 @@ "mulhw %0,%1,%2" [(set_attr "type" "mul")]) -(define_expand "mul3" - [(set (match_operand: 0 "gpc_reg_operand") +(define_insn_and_split "mul3" + [(set (match_operand: 0 "gpc_reg_operand" "=&r") (mult: (any_extend: - (match_operand:GPR 1 "gpc_reg_operand")) + (match_operand:GPR 1 "gpc_reg_operand" "r")) (any_extend: - (match_operand:GPR 2 "gpc_reg_operand"))))] + (match_operand:GPR 2 "gpc_reg_operand" "r"))))] "!(mode == SImode && TARGET_POWERPC64)" + "#" + "&& 1" + [(pc)] { rtx l = gen_reg_rtx (mode); rtx h = gen_reg_rtx (mode); @@ -3214,9 +3220,10 @@ emit_move_insn (gen_lowpart (mode, operands[0]), l); emit_move_insn (gen_highpart (mode, operands[0]), h); DONE; -}) +} + [(set_attr "length" "8")]) -(define_insn "*maddld4" +(define_insn "maddld4" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand:GPR 2 "gpc_reg_operand" "r")) @@ -3225,6 +3232,113 @@ "maddld %0,%1,%2,%3" [(set_attr "type" "mul")]) +(define_insn_and_split "*mulditi3_adddi3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r") + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))] + "TARGET_MADDLD && TARGET_POWERPC64" + "#" + "&& 1" + [(pc)] +{ + rtx dest = operands[0]; + rtx dest_hi = gen_highpart (DImode, dest); + rtx dest_lo = gen_lowpart (DImode, dest); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = operands[3]; + rtx tmp_hi, tmp_lo; + + if (can_create_pseudo_p ()) + { + tmp_hi = gen_reg_rtx (DImode); + tmp_lo = gen_reg_rtx (DImode); + } + else + { + tmp_hi = dest_hi; + tmp_lo = dest_lo; + } + + emit_insn (gen_mulditi3_adddi3_upper (tmp_hi, op1, op2, op3)); + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3)); + + if (can_create_pseudo_p ()) + { + emit_move_insn (dest_hi, tmp_hi); + emit_move_insn (dest_lo, tmp_lo); + } + DONE; +} + [(set_attr "length" "8")]) + +;; Optimize 128-bit multiply with zero/sign extend and adding a constant. We +;; force the constant into a register to generate li, maddhd, and maddld, +;; instead of mulld, mulhd, addic, and addze. We can't combine this pattern +;; with the pattern that handles registers, since constants don't have a sign +;; or zero extend around them. +(define_insn_and_split "*mulditi3_add_const" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r") + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (match_operand 3 "" "r")))] + "TARGET_MADDLD && TARGET_POWERPC64 +" + "#" + "&& 1" + [(pc)] +{ + rtx dest = operands[0]; + rtx dest_hi = gen_highpart (DImode, dest); + rtx dest_lo = gen_lowpart (DImode, dest); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = force_reg (DImode, operands[3]); + rtx tmp_hi, tmp_lo; + + if (can_create_pseudo_p ()) + { + tmp_hi = gen_reg_rtx (DImode); + tmp_lo = gen_reg_rtx (DImode); + } + else + { + tmp_hi = dest_hi; + tmp_lo = dest_lo; + } + + emit_insn (gen_mulditi3_adddi3_upper (tmp_hi, op1, op2, op3)); + emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3)); + + if (can_create_pseudo_p ()) + { + emit_move_insn (dest_hi, tmp_hi); + emit_move_insn (dest_lo, tmp_lo); + } + DONE; +} + [(set_attr "length" "8")]) + +(define_insn "mulditi3_adddi3_upper" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (truncate:DI + (lshiftrt:TI + (plus:TI + (mult:TI + (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r")) + (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r"))) + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))) + (const_int 64))))] + "TARGET_MADDLD && TARGET_POWERPC64" + "maddhd %0,%1,%2,%3" + [(set_attr "type" "mul") + (set_attr "size" "64")]) + (define_insn "udiv3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") @@ -7029,12 +7143,19 @@ ;; allocator from allocating registers that overlap with the inputs ;; (for example, having an input in 7,8 and an output in 6,7). We ;; also allow for the output being the same as one of the inputs. - -(define_expand "addti3" - [(set (match_operand:TI 0 "gpc_reg_operand") - (plus:TI (match_operand:TI 1 "gpc_reg_operand") - (match_operand:TI 2 "reg_or_short_operand")))] +;; +;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to allow +;; for combine to make things like multiply and add with extend operations. + +(define_insn_and_split "addti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") + (plus:TI (match_operand:TI 1 "gpc_reg_operand" "r,0,r") + (match_operand:TI 2 "reg_or_short_operand" "rn,r,0"))) + (clobber (reg:DI CA_REGNO))] "TARGET_64BIT" + "#" + "&& 1" + [(pc)] { rtx lo0 = gen_lowpart (DImode, operands[0]); rtx lo1 = gen_lowpart (DImode, operands[1]); @@ -7051,13 +7172,17 @@ emit_insn (gen_adddi3_carry (lo0, lo1, lo2)); emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2)); DONE; -}) +} + [(set_attr "length" "8")]) -(define_expand "subti3" - [(set (match_operand:TI 0 "gpc_reg_operand") - (minus:TI (match_operand:TI 1 "reg_or_short_operand") - (match_operand:TI 2 "gpc_reg_operand")))] +(define_insn_and_split "subti3" + [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r") + (minus:TI (match_operand:TI 1 "reg_or_short_operand" "rn,0,r") + (match_operand:TI 2 "gpc_reg_operand" "r,r,0")))] "TARGET_64BIT" + "#" + "&& 1" + [(pc)] { rtx lo0 = gen_lowpart (DImode, operands[0]); rtx lo1 = gen_lowpart (DImode, operands[1]); @@ -7074,7 +7199,8 @@ emit_insn (gen_subfdi3_carry (lo0, lo2, lo1)); emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1)); DONE; -}) +} + [(set_attr "length" "8")]) ;; 128-bit logical operations expanders