From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1464) id BBF063858002; Fri, 20 May 2022 23:00:49 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org BBF063858002 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Peter Bergner To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-8406] rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556] X-Act-Checkin: gcc X-Git-Author: Peter Bergner X-Git-Refname: refs/heads/releases/gcc-12 X-Git-Oldrev: fdf50499a40399a48ac5e5d521ef93ed302be157 X-Git-Newrev: c83d78585078d6918853fbe0f74a3a78e88e3e32 Message-Id: <20220520230049.BBF063858002@sourceware.org> Date: Fri, 20 May 2022 23:00:49 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 May 2022 23:00:49 -0000 https://gcc.gnu.org/g:c83d78585078d6918853fbe0f74a3a78e88e3e32 commit r12-8406-gc83d78585078d6918853fbe0f74a3a78e88e3e32 Author: Peter Bergner Date: Tue May 17 21:09:29 2022 -0500 rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556] When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code uses all 8 accumulators, which overlap all vs0-vs31 vector registers. Current trunk assigns one of the normal vector inputs to one of the MMA instructions, which forces us to spill one of the accumulators to memory, leading to poor performance. The solution here is to replace the "wa" constraints for the vector input operands in the MMA instruction patterns with "v,?wa" so that we prefer using the altivec registers vs32-vs63 over the vs0-vs31 registers. 2022-05-17 Peter Bergner Segher Boessenkool gcc/ PR target/105556 * config/rs6000/mma.md (mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_, mma_): Replace "wa" constraints with "v,?wa". Update other operands accordingly. (cherry picked from commit c6e36f05fbb081abb068958d8900ad34b303a70b) Diff: --- gcc/config/rs6000/mma.md | 150 +++++++++++++++++++++++------------------------ 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 907c9d6d516..a183b6a168a 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -490,50 +490,50 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] "TARGET_MMA" " %A0,%x1,%x2" [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] "TARGET_MMA" " %A0,%x2,%x3" [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] "TARGET_MMA" " %A0,%x1,%x2" [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:OO 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] "TARGET_MMA" " %A0,%x2,%x3" [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "u8bit_cint_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n")] MMA_VVI4I4I8))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4,%5" @@ -541,13 +541,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "u8bit_cint_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "u8bit_cint_operand" "n,n")] MMA_AVVI4I4I8))] "TARGET_MMA" " %A0,%x2,%x3,%4,%5,%6" @@ -555,12 +555,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_VVI4I4I2))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4,%5" @@ -568,13 +568,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_3_operand" "n,n")] MMA_AVVI4I4I2))] "TARGET_MMA" " %A0,%x2,%x3,%4,%5,%6" @@ -582,11 +582,11 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n")] MMA_VVI4I4))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4" @@ -594,12 +594,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4))] "TARGET_MMA" " %A0,%x2,%x3,%4,%5" @@ -607,11 +607,11 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_3_operand" "n,n")] MMA_PVI4I2))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4" @@ -619,12 +619,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:OO 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_APVI4I2))] "TARGET_MMA" " %A0,%x2,%x3,%4,%5" @@ -632,12 +632,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_VVI4I4I4))] "TARGET_MMA" " %A0,%x1,%x2,%3,%4,%5" @@ -645,13 +645,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4I4))] "TARGET_MMA" " %A0,%x2,%x3,%4,%5,%6"