[gcc(refs/users/meissner/heads/work079)] Optimize multiply/add of DImode extended to TImode.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work079)] Optimize multiply/add of DImode extended to TImode.
Date: Tue,  1 Mar 2022 06:34:09 +0000 (GMT)	[thread overview]
Message-ID: <20220301063409.6865A3858C78@sourceware.org> (raw)

https://gcc.gnu.org/g:7497ffea286bbe13cbe846a314761e45b5a024e8

commit 7497ffea286bbe13cbe846a314761e45b5a024e8
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 1 01:33:51 2022 -0500

    Optimize multiply/add of DImode extended to TImode.
    
    On power9 and power10 systems, we have instructions that support doing
    64-bit integers converted to 128-bit integers and producing 128-bit
    results.  This patch adds support to generate these instructions.
    
    Previously we had define_expands to handle conversion of the 64-bit extend
    to 128-bit and multiply.  This patch changes these define_expands to
    define_insn_and_split and then it provides combiner patterns to generate
    thes multiply/add instructions.
    
    2022-03-01   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/103109
            * config/rs6000/rs6000.md (su_int32): New code attribute.
            (<u>mul<mode><dmode>3): Convert into define_insn_and_split.
            (maddld<mode>4): Add generator function.
            (<u>mulditi3_<u>adddi3): New insn.
            (<u>mulditi3_add_const): New insn.
            (addti3): Convert into define_insn_and_split.
            (subti3): Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.md | 160 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 143 insertions(+), 17 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fdfbc6566a5..b5fc1855c35 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -676,6 +676,9 @@
 		       (float		"")
 		       (unsigned_float	"uns")])
 
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+			    (zero_extend "c32bit_cint_operand")])
+
 ; Various instructions that come in SI and DI forms.
 ; A generic w/d attribute, for things like cmpw/cmpd.
 (define_mode_attr wd [(QI    "b")
@@ -3199,13 +3202,16 @@
   "mulhw<u> %0,%1,%2"
   [(set_attr "type" "mul")])
 
-(define_expand "<u>mul<mode><dmode>3"
-  [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+  [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
 	(mult:<DMODE> (any_extend:<DMODE>
-			(match_operand:GPR 1 "gpc_reg_operand"))
+		       (match_operand:GPR 1 "gpc_reg_operand" "r"))
 		      (any_extend:<DMODE>
-			(match_operand:GPR 2 "gpc_reg_operand"))))]
+		       (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
   "!(<MODE>mode == SImode && TARGET_POWERPC64)"
+  "#"
+  "&& 1"
+  [(pc)]
 {
   rtx l = gen_reg_rtx (<MODE>mode);
   rtx h = gen_reg_rtx (<MODE>mode);
@@ -3214,9 +3220,10 @@
   emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
   emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
   DONE;
-})
+}
+  [(set_attr "length" "8")])
 
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
 			    (match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3225,6 +3232,113 @@
   "maddld %0,%1,%2,%3"
   [(set_attr "type" "mul")])
 
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+	(plus:TI
+	 (mult:TI
+	  (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+	 (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx dest_hi = gen_highpart (DImode, dest);
+  rtx dest_lo = gen_lowpart (DImode, dest);
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  rtx tmp_hi, tmp_lo;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp_hi = gen_reg_rtx (DImode);
+      tmp_lo = gen_reg_rtx (DImode);
+    }
+  else
+    {
+      tmp_hi = dest_hi;
+      tmp_lo = dest_lo;
+    }
+
+  emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+  emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+  if (can_create_pseudo_p ())
+    {
+      emit_move_insn (dest_hi, tmp_hi);
+      emit_move_insn (dest_lo, tmp_lo);
+    }
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant.  We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze.  We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+	(plus:TI
+	 (mult:TI
+	  (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+	 (match_operand 3 "<su_int32>" "r")))]
+  "TARGET_MADDLD && TARGET_POWERPC64
+"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx dest_hi = gen_highpart (DImode, dest);
+  rtx dest_lo = gen_lowpart (DImode, dest);
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = force_reg (DImode, operands[3]);
+  rtx tmp_hi, tmp_lo;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp_hi = gen_reg_rtx (DImode);
+      tmp_lo = gen_reg_rtx (DImode);
+    }
+  else
+    {
+      tmp_hi = dest_hi;
+      tmp_lo = dest_lo;
+    }
+
+  emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+  emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+  if (can_create_pseudo_p ())
+    {
+      emit_move_insn (dest_hi, tmp_hi);
+      emit_move_insn (dest_lo, tmp_lo);
+    }
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (plus:TI
+	   (mult:TI
+	    (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	    (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+	   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+	  (const_int 64))))]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+  "maddhd<u> %0,%1,%2,%3"
+  [(set_attr "type" "mul")
+   (set_attr "size" "64")])
+
 (define_insn "udiv<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
         (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
@@ -7029,12 +7143,19 @@
 ;; allocator from allocating registers that overlap with the inputs
 ;; (for example, having an input in 7,8 and an output in 6,7).  We
 ;; also allow for the output being the same as one of the inputs.
-
-(define_expand "addti3"
-  [(set (match_operand:TI 0 "gpc_reg_operand")
-	(plus:TI (match_operand:TI 1 "gpc_reg_operand")
-		 (match_operand:TI 2 "reg_or_short_operand")))]
+;;
+;; Addti3/subti3 are define_insn_and_splits instead of define_expand, to allow
+;; for combine to make things like multiply and add with extend operations.
+
+(define_insn_and_split "addti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r")
+	(plus:TI (match_operand:TI 1 "gpc_reg_operand" "r,0,r")
+		 (match_operand:TI 2 "reg_or_short_operand" "rn,r,0")))
+   (clobber (reg:DI CA_REGNO))]
   "TARGET_64BIT"
+  "#"
+  "&& 1"
+  [(pc)]
 {
   rtx lo0 = gen_lowpart (DImode, operands[0]);
   rtx lo1 = gen_lowpart (DImode, operands[1]);
@@ -7051,13 +7172,17 @@
   emit_insn (gen_adddi3_carry (lo0, lo1, lo2));
   emit_insn (gen_adddi3_carry_in (hi0, hi1, hi2));
   DONE;
-})
+}
+  [(set_attr "length" "8")])
 
-(define_expand "subti3"
-  [(set (match_operand:TI 0 "gpc_reg_operand")
-	(minus:TI (match_operand:TI 1 "reg_or_short_operand")
-		  (match_operand:TI 2 "gpc_reg_operand")))]
+(define_insn_and_split "subti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,r,r")
+	(minus:TI (match_operand:TI 1 "reg_or_short_operand" "rn,0,r")
+		  (match_operand:TI 2 "gpc_reg_operand" "r,r,0")))]
   "TARGET_64BIT"
+  "#"
+  "&& 1"
+  [(pc)]
 {
   rtx lo0 = gen_lowpart (DImode, operands[0]);
   rtx lo1 = gen_lowpart (DImode, operands[1]);
@@ -7074,7 +7199,8 @@
   emit_insn (gen_subfdi3_carry (lo0, lo2, lo1));
   emit_insn (gen_subfdi3_carry_in (hi0, hi2, hi1));
   DONE;
-})
+}
+  [(set_attr "length" "8")])
 \f
 ;; 128-bit logical operations expanders

next             reply	other threads:[~2022-03-01  6:34 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-01  6:34 Michael Meissner [this message]
2022-03-02 18:12 Michael Meissner
2022-03-02 19:28 Michael Meissner
2022-03-03  2:01 Michael Meissner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220301063409.6865A3858C78@sourceware.org \
    --to=meissner@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).