public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ibm/heads/perf)] amdgcn: Align VGPR pairs
@ 2020-03-19 5:46 Jiu Fu Guo
0 siblings, 0 replies; only message in thread
From: Jiu Fu Guo @ 2020-03-19 5:46 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:3abfd4f3410af27060a11b8adaa9836d5a77eae1
commit 3abfd4f3410af27060a11b8adaa9836d5a77eae1
Author: Andrew Stubbs <ams@codesourcery.com>
Date: Fri Feb 21 11:07:55 2020 +0000
amdgcn: Align VGPR pairs
Aligning the registers is not needed by the architecture, but doing so
allows us to remove the requirement for bug-prone early-clobber
constraints from many split patterns (and avoid adding more in future).
2020-02-21 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
* config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
(addv64di3_exec): Likewise.
(subv64di3): Likewise.
(subv64di3_exec): Likewise.
(addv64di3_zext): Likewise.
(addv64di3_zext_exec): Likewise.
(addv64di3_zext_dup): Likewise.
(addv64di3_zext_dup_exec): Likewise.
(addv64di3_zext_dup2): Likewise.
(addv64di3_zext_dup2_exec): Likewise.
(addv64di3_sext_dup2): Likewise.
(addv64di3_sext_dup2_exec): Likewise.
(<expander>v64di3): Likewise.
(<expander>v64di3_exec): Likewise.
(*<reduc_op>_dpp_shr_v64di): Likewise.
(*plus_carry_dpp_shr_v64di): Likewise.
* config/gcn/gcn.md (adddi3): Likewise.
(addptrdi3): Likewise.
(<expander>di3): Likewise.
Diff:
---
gcc/ChangeLog | 23 +++++++++++
gcc/config/gcn/gcn-valu.md | 101 ++++++++++++++++++++++-----------------------
gcc/config/gcn/gcn.c | 10 ++++-
gcc/config/gcn/gcn.md | 38 ++++++++---------
4 files changed, 98 insertions(+), 74 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 543e95df1e0..1845563d0bb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2020-02-21 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
+ * config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
+ (addv64di3_exec): Likewise.
+ (subv64di3): Likewise.
+ (subv64di3_exec): Likewise.
+ (addv64di3_zext): Likewise.
+ (addv64di3_zext_exec): Likewise.
+ (addv64di3_zext_dup): Likewise.
+ (addv64di3_zext_dup_exec): Likewise.
+ (addv64di3_zext_dup2): Likewise.
+ (addv64di3_zext_dup2_exec): Likewise.
+ (addv64di3_sext_dup2): Likewise.
+ (addv64di3_sext_dup2_exec): Likewise.
+ (<expander>v64di3): Likewise.
+ (<expander>v64di3_exec): Likewise.
+ (*<reduc_op>_dpp_shr_v64di): Likewise.
+ (*plus_carry_dpp_shr_v64di): Likewise.
+ * config/gcn/gcn.md (adddi3): Likewise.
+ (addptrdi3): Likewise.
+ (<expander>di3): Likewise.
+
2020-02-21 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-valu.md (vec_seriesv64di): Use gen_vec_duplicatev64di.
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index edac362fd46..d5e6d0a625a 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1171,10 +1171,10 @@
(set_attr "length" "4,8,4,8")])
(define_insn_and_split "addv64di3"
- [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
- (match_operand:V64DI 1 "register_operand" "%vDb,vDb0")
- (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb")))
+ (match_operand:V64DI 1 "register_operand" "%vDb")
+ (match_operand:V64DI 2 "gcn_alu_operand" " vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1200,14 +1200,13 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_exec"
- [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
- (match_operand:V64DI 1 "register_operand" "%vDb,vDb0,vDb")
- (match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb,vDb"))
- (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
- " U, U, 0")
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e")))
+ (match_operand:V64DI 1 "register_operand" "%vDb")
+ (match_operand:V64DI 2 "gcn_alu_operand" " vDb"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1238,10 +1237,10 @@
(set_attr "length" "8")])
(define_insn_and_split "subv64di3"
- [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
- (minus:V64DI
- (match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0, v, v0")
- (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vDb0,vDb")))
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
+ (minus:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "vDb, v")
+ (match_operand:V64DI 2 "gcn_alu_operand" " v,vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1267,14 +1266,13 @@
(set_attr "length" "8")])
(define_insn_and_split "subv64di3_exec"
- [(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(minus:V64DI
- (match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0, v, v0")
- (match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vSvB0,vSvB"))
- (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
- " U0, U0, U0, U0")
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
+ (match_operand:V64DI 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:V64DI 2 "gcn_alu_operand" " v,vSvB"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
"register_operand (operands[1], VOIDmode)
|| register_operand (operands[2], VOIDmode)"
@@ -1306,11 +1304,11 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext"
- [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
(plus:V64DI
(zero_extend:V64DI
- (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB, vA, vB"))
- (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA,0vDb,0vDA")))
+ (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1334,15 +1332,14 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_exec"
- [(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
- (match_operand:V64SI 1 "gcn_alu_operand" "0vA, vA,0vB, vB"))
- (match_operand:V64DI 2 "gcn_alu_operand" "vDb,0vDb,vDA,0vDA"))
- (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
- " U0, U0, U0, U0")
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
+ (match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1371,12 +1368,12 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup"
- [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
- (match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
- (match_operand:V64DI 2 "gcn_alu_operand" "vDA0,vDb0")))
+ (match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDA,vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1400,15 +1397,15 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup_exec"
- [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
- (match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
- (match_operand:V64DI 2 "gcn_alu_operand" "vDb0,vDA0"))
- (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
+ (match_operand:SI 1 "gcn_alu_operand" "ASv,BSv")))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@@ -1437,7 +1434,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2"
- [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
@@ -1463,7 +1460,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2_exec"
- [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@@ -1500,7 +1497,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2"
- [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
@@ -1528,7 +1525,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2_exec"
- [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@@ -1935,10 +1932,10 @@
(set_attr "length" "8,8")])
(define_insn_and_split "<expander>v64di3"
- [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+ [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
(bitop:V64DI
- (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
- (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
+ (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
+ (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
""
"@
#
@@ -1960,7 +1957,7 @@
(set_attr "length" "16,8")])
(define_insn_and_split "<expander>v64di3_exec"
- [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+ [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
(vec_merge:V64DI
(bitop:V64DI
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
@@ -2999,11 +2996,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
- [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ [(set (match_operand:V64DI 0 "register_operand" "=v")
(unspec:V64DI
- [(match_operand:V64DI 1 "register_operand" "v0")
- (match_operand:V64DI 2 "register_operand" "v0")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(match_operand:V64DI 1 "register_operand" "v")
+ (match_operand:V64DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@@ -3065,11 +3062,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_v64di"
- [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ [(set (match_operand:V64DI 0 "register_operand" "=v")
(unspec:V64DI
- [(match_operand:V64DI 1 "register_operand" "v0")
- (match_operand:V64DI 2 "register_operand" "v0")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(match_operand:V64DI 1 "register_operand" "v")
+ (match_operand:V64DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 5bcad7d1d41..531f47aff4c 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -458,7 +458,15 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|| (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
|| (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
if (VGPR_REGNO_P (regno))
- return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)
+ /* Vector instructions do not care about the alignment of register
+ pairs, but where there is no 64-bit instruction, many of the
+ define_split do not work if the input and output registers partially
+ overlap. We tried to fix this with early clobber and match
+ constraints, but it was bug prone, added complexity, and conflicts
+ with the 'U0' constraints on vec_merge.
+ Therefore, we restrict ourselved to aligned registers. */
+ return (vgpr_1reg_mode_p (mode)
+ || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
/* TImode is used by DImode compare_and_swap. */
|| mode == TImode);
return false;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index a4705361d4a..b527d9a7a8b 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1065,22 +1065,16 @@
; through some RTL optimisation passes, and means the CC reg we set isn't
; dependent on the constraint alternative (which doesn't seem to work well).
-; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
-; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
-
; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
; used as an operand due to the read of VCC, so we restrict constants to the
; inlinable range for that alternative.
(define_insn_and_split "adddi3"
- [(set (match_operand:DI 0 "register_operand"
- "=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
- (plus:DI (match_operand:DI 1 "register_operand"
- " Sg, 0, 0, Sg, v, 0, 0, v")
- (match_operand:DI 2 "nonmemory_operand"
- " 0,SgB, 0,SgB, 0,vA, 0,vA")))
- (clobber (match_scratch:BI 3 "= cs, cs, cs, cs, X, X, X, X"))
- (clobber (match_scratch:DI 4 "= X, X, X, X,cV,cV,cV,cV"))]
+ [(set (match_operand:DI 0 "register_operand" "=Sg, v")
+ (plus:DI (match_operand:DI 1 "register_operand" " Sg, v")
+ (match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
+ (clobber (match_scratch:BI 3 "=cs, X"))
+ (clobber (match_scratch:DI 4 "= X,cV"))]
""
"#"
"&& reload_completed"
@@ -1109,7 +1103,7 @@
cc));
DONE;
}
- [(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
+ [(set_attr "type" "mult,vmult")
(set_attr "length" "8")])
(define_expand "adddi3_scc"
@@ -1196,11 +1190,14 @@
; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
; Note that it is not safe to save/clobber/restore SCC because doing so will
; break data-flow analysis, so this must use vector registers.
+;
+; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
+; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
(define_insn "addptrdi3"
- [(set (match_operand:DI 0 "register_operand" "= &v")
- (plus:DI (match_operand:DI 1 "register_operand" " v0")
- (match_operand:DI 2 "nonmemory_operand" "vDA0")))]
+ [(set (match_operand:DI 0 "register_operand" "= v")
+ (plus:DI (match_operand:DI 1 "register_operand" " v0")
+ (match_operand:DI 2 "nonmemory_operand" "vDA")))]
""
{
rtx new_operands[4] = { operands[0], operands[1], operands[2],
@@ -1470,15 +1467,14 @@
(define_code_iterator vec_and_scalar64_com [and ior xor])
(define_insn_and_split "<expander>di3"
- [(set (match_operand:DI 0 "register_operand" "= Sg, &v, &v")
+ [(set (match_operand:DI 0 "register_operand" "= Sg, v")
(vec_and_scalar64_com:DI
- (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB,vSvDB")
- (match_operand:DI 2 "gcn_alu_operand" " SgC, v, 0")))
- (clobber (match_scratch:BI 3 "= cs, X, X"))]
+ (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB")
+ (match_operand:DI 2 "gcn_alu_operand" " SgC, v")))
+ (clobber (match_scratch:BI 3 "= cs, X"))]
""
"@
s_<mnemonic>0\t%0, %1, %2
- #
#"
"reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
[(parallel [(set (match_dup 4)
@@ -1495,7 +1491,7 @@
operands[8] = gcn_operand_part (DImode, operands[1], 1);
operands[9] = gcn_operand_part (DImode, operands[2], 1);
}
- [(set_attr "type" "sop2,vop2,vop2")
+ [(set_attr "type" "sop2,vop2")
(set_attr "length" "8")])
(define_insn "<expander>di3"
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-03-19 5:46 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-19 5:46 [gcc(refs/vendors/ibm/heads/perf)] amdgcn: Align VGPR pairs Jiu Fu Guo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).