amdgcn: Align VGPR pairs Aligning the registers is not needed by the architecture, but doing so allows us to remove the requirement for bug-prone early-clobber constraints from many split patterns (and avoid adding more in future). 2020-02-20 Andrew Stubbs gcc/ * config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs. * config/gcn/gcn-valu.md (addv64di3): Remove early-clobber. (addv64di3_exec): Likewise. (subv64di3): Likewise. (subv64di3_exec): Likewise. (addv64di3_zext): Likewise. (addv64di3_zext_exec): Likewise. (addv64di3_zext_dup): Likewise. (addv64di3_zext_dup_exec): Likewise. (addv64di3_zext_dup2): Likewise. (addv64di3_zext_dup2_exec): Likewise. (addv64di3_sext_dup2): Likewise. (addv64di3_sext_dup2_exec): Likewise. (v64di3): Likewise. (v64di3_exec): Likewise. (*_dpp_shr_v64di): Likewise. (*plus_carry_dpp_shr_v64di): Likewise. * config/gcn/gcn.md (adddi3): Likewise. (addptrdi3): Likewise. (di3): Likewise. diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 6b774b1ef4c..5519c12d03c 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1143,10 +1143,10 @@ (set_attr "length" "4,8,4,8")]) (define_insn_and_split "addv64di3" - [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (plus:V64DI - (match_operand:V64DI 1 "register_operand" "%vDb,vDb0") - (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb"))) + (match_operand:V64DI 1 "register_operand" "%vDb") + (match_operand:V64DI 2 "gcn_alu_operand" " vDb"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1172,14 +1172,13 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_exec" - [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (vec_merge:V64DI (plus:V64DI - (match_operand:V64DI 1 "register_operand" "%vDb,vDb0,vDb") - (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb,vDb")) - (match_operand:V64DI 3 "gcn_register_or_unspec_operand" - " U, U, 0") - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e"))) + (match_operand:V64DI 1 "register_operand" "%vDb") + (match_operand:V64DI 2 "gcn_alu_operand" " vDb")) + (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1210,10 +1209,10 @@ (set_attr "length" "8")]) (define_insn_and_split "subv64di3" - [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v") - (minus:V64DI - (match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0, v, v0") - (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vDb0,vDb"))) + [(set (match_operand:V64DI 0 "register_operand" "= v, v") + (minus:V64DI + (match_operand:V64DI 1 "gcn_alu_operand" "vDb, v") + (match_operand:V64DI 2 "gcn_alu_operand" " v,vDb"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1239,14 +1238,13 @@ (set_attr "length" "8")]) (define_insn_and_split "subv64di3_exec" - [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v, v") (vec_merge:V64DI (minus:V64DI - (match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0, v, v0") - (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vSvB0,vSvB")) - (match_operand:V64DI 3 "gcn_register_or_unspec_operand" - " U0, U0, U0, U0") - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e"))) + (match_operand:V64DI 1 "gcn_alu_operand" "vSvB, v") + (match_operand:V64DI 2 "gcn_alu_operand" " v,vSvB")) + (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) (clobber (reg:DI VCC_REG))] "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" @@ -1278,11 +1276,11 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext" - [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v, v") (plus:V64DI (zero_extend:V64DI - (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB, vA, vB")) - (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA,0vDb,0vDA"))) + (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB")) + (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1306,15 +1304,14 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext_exec" - [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v, v") (vec_merge:V64DI (plus:V64DI (zero_extend:V64DI - (match_operand:V64SI 1 "gcn_alu_operand" "0vA, vA,0vB, vB")) - (match_operand:V64DI 2 "gcn_alu_operand" "vDb,0vDb,vDA,0vDA")) - (match_operand:V64DI 3 "gcn_register_or_unspec_operand" - " U0, U0, U0, U0") - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e"))) + (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB")) + (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")) + (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1343,12 +1340,12 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext_dup" - [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v, v") (plus:V64DI (zero_extend:V64DI (vec_duplicate:V64SI - (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv"))) - (match_operand:V64DI 2 "gcn_alu_operand" "vDA0,vDb0"))) + (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv"))) + (match_operand:V64DI 2 "gcn_alu_operand" "vDA,vDb"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1372,15 +1369,15 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext_dup_exec" - [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") + [(set (match_operand:V64DI 0 "register_operand" "= v, v") (vec_merge:V64DI (plus:V64DI (zero_extend:V64DI (vec_duplicate:V64SI - (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv"))) - (match_operand:V64DI 2 "gcn_alu_operand" "vDb0,vDA0")) - (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0") - (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) + (match_operand:SI 1 "gcn_alu_operand" "ASv,BSv"))) + (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")) + (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) (clobber (reg:DI VCC_REG))] "" "#" @@ -1409,7 +1406,7 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext_dup2" - [(set (match_operand:V64DI 0 "register_operand" "= &v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (plus:V64DI (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA")) (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv")))) @@ -1435,7 +1432,7 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_zext_dup2_exec" - [(set (match_operand:V64DI 0 "register_operand" "=&v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (vec_merge:V64DI (plus:V64DI (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" @@ -1472,7 +1469,7 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_sext_dup2" - [(set (match_operand:V64DI 0 "register_operand" "=&v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (plus:V64DI (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA")) (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) @@ -1500,7 +1497,7 @@ (set_attr "length" "8")]) (define_insn_and_split "addv64di3_sext_dup2_exec" - [(set (match_operand:V64DI 0 "register_operand" "=&v") + [(set (match_operand:V64DI 0 "register_operand" "= v") (vec_merge:V64DI (plus:V64DI (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" @@ -1907,10 +1904,10 @@ (set_attr "length" "8,8")]) (define_insn_and_split "v64di3" - [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") + [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD") (bitop:V64DI - (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") - (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] + (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") + (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] "" "@ # @@ -1932,7 +1929,7 @@ (set_attr "length" "16,8")]) (define_insn_and_split "v64di3_exec" - [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") + [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD") (vec_merge:V64DI (bitop:V64DI (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") @@ -2963,11 +2960,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*_dpp_shr_v64di" - [(set (match_operand:V64DI 0 "register_operand" "=&v") + [(set (match_operand:V64DI 0 "register_operand" "=v") (unspec:V64DI - [(match_operand:V64DI 1 "register_operand" "v0") - (match_operand:V64DI 2 "register_operand" "v0") - (match_operand:SI 3 "const_int_operand" "n")] + [(match_operand:V64DI 1 "register_operand" "v") + (match_operand:V64DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_2REG_UNSPEC))] "" "#" @@ -3029,11 +3026,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*plus_carry_dpp_shr_v64di" - [(set (match_operand:V64DI 0 "register_operand" "=&v") + [(set (match_operand:V64DI 0 "register_operand" "=v") (unspec:V64DI - [(match_operand:V64DI 1 "register_operand" "v0") - (match_operand:V64DI 2 "register_operand" "v0") - (match_operand:SI 3 "const_int_operand" "n")] + [(match_operand:V64DI 1 "register_operand" "v") + (match_operand:V64DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 0b80ee299bf..e6ac3796a58 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -458,7 +458,15 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode) || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode)) || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode)); if (VGPR_REGNO_P (regno)) - return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode) + /* Vector instructions do not care about the alignment of register + pairs, but where there is no 64-bit instruction, many of the + define_split do not work if the input and output registers partially + overlap. We tried to fix this with early clobber and match + constraints, but it was bug prone, added complexity, and conflicts + with the 'U0' constraints on vec_merge. + Therefore, we restrict ourselved to aligned registers. */ + return (vgpr_1reg_mode_p (mode) + || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode)) /* TImode is used by DImode compare_and_swap. */ || mode == TImode); return false; diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index a4705361d4a..b527d9a7a8b 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1065,22 +1065,16 @@ ; through some RTL optimisation passes, and means the CC reg we set isn't ; dependent on the constraint alternative (which doesn't seem to work well). -; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but -; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier). - ; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be ; used as an operand due to the read of VCC, so we restrict constants to the ; inlinable range for that alternative. (define_insn_and_split "adddi3" - [(set (match_operand:DI 0 "register_operand" - "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v") - (plus:DI (match_operand:DI 1 "register_operand" - " Sg, 0, 0, Sg, v, 0, 0, v") - (match_operand:DI 2 "nonmemory_operand" - " 0,SgB, 0,SgB, 0,vA, 0,vA"))) - (clobber (match_scratch:BI 3 "= cs, cs, cs, cs, X, X, X, X")) - (clobber (match_scratch:DI 4 "= X, X, X, X,cV,cV,cV,cV"))] + [(set (match_operand:DI 0 "register_operand" "=Sg, v") + (plus:DI (match_operand:DI 1 "register_operand" " Sg, v") + (match_operand:DI 2 "nonmemory_operand" "SgB,vA"))) + (clobber (match_scratch:BI 3 "=cs, X")) + (clobber (match_scratch:DI 4 "= X,cV"))] "" "#" "&& reload_completed" @@ -1109,7 +1103,7 @@ cc)); DONE; } - [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult") + [(set_attr "type" "mult,vmult") (set_attr "length" "8")]) (define_expand "adddi3_scc" @@ -1196,11 +1190,14 @@ ; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp. ; Note that it is not safe to save/clobber/restore SCC because doing so will ; break data-flow analysis, so this must use vector registers. +; +; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever +; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway. (define_insn "addptrdi3" - [(set (match_operand:DI 0 "register_operand" "= &v") - (plus:DI (match_operand:DI 1 "register_operand" " v0") - (match_operand:DI 2 "nonmemory_operand" "vDA0")))] + [(set (match_operand:DI 0 "register_operand" "= v") + (plus:DI (match_operand:DI 1 "register_operand" " v0") + (match_operand:DI 2 "nonmemory_operand" "vDA")))] "" { rtx new_operands[4] = { operands[0], operands[1], operands[2], @@ -1470,15 +1467,14 @@ (define_code_iterator vec_and_scalar64_com [and ior xor]) (define_insn_and_split "di3" - [(set (match_operand:DI 0 "register_operand" "= Sg, &v, &v") + [(set (match_operand:DI 0 "register_operand" "= Sg, v") (vec_and_scalar64_com:DI - (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB,vSvDB") - (match_operand:DI 2 "gcn_alu_operand" " SgC, v, 0"))) - (clobber (match_scratch:BI 3 "= cs, X, X"))] + (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB") + (match_operand:DI 2 "gcn_alu_operand" " SgC, v"))) + (clobber (match_scratch:BI 3 "= cs, X"))] "" "@ s_0\t%0, %1, %2 - # #" "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)" [(parallel [(set (match_dup 4) @@ -1495,7 +1491,7 @@ operands[8] = gcn_operand_part (DImode, operands[1], 1); operands[9] = gcn_operand_part (DImode, operands[2], 1); } - [(set_attr "type" "sop2,vop2,vop2") + [(set_attr "type" "sop2,vop2") (set_attr "length" "8")]) (define_insn "di3"