public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: acsawdey@linux.ibm.com
To: gcc-patches@gcc.gnu.org
Cc: segher@kernel.crashing.org, wschmidt@linux.ibm.com,
	will_schmidt@vnet.ibm.com, Aaron Sawdey <acsawdey@linux.ibm.com>
Subject: [PATCH,rs6000 1/2] combine patterns for add-add fusion
Date: Mon, 26 Apr 2021 15:21:29 -0500	[thread overview]
Message-ID: <20210426202130.3882980-2-acsawdey@linux.ibm.com> (raw)
In-Reply-To: <20210426202130.3882980-1-acsawdey@linux.ibm.com>

From: Aaron Sawdey <acsawdey@linux.ibm.com>

This patch adds a function to genfusion.pl to add a couple
more patterns so combine can do fusion of pairs of add and
vaddudm instructions.

gcc/ChangeLog:

	* gcc/config/rs6000/genfusion.pl (gen_addadd): New function.
	* gcc/config/rs6000/fusion.md: Regenerate file.
	* gcc/config/rs6000/rs6000-cpus.def: Add
	OPTION_MASK_P10_FUSION_2ADD to masks.
	* gcc/config/rs6000/rs6000.c (rs6000_option_override_internal):
	Handle default value of OPTION_MASK_P10_FUSION_2ADD.
	* gcc/config/rs6000/rs6000.opt: Add -mpower10-fusion-2add.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/fusion-p10-addadd.c: New file.
---
 gcc/config/rs6000/fusion.md                   | 36 +++++++++++++++
 gcc/config/rs6000/genfusion.pl                | 44 +++++++++++++++++++
 gcc/config/rs6000/rs6000-cpus.def             |  4 +-
 gcc/config/rs6000/rs6000.c                    |  3 ++
 gcc/config/rs6000/rs6000.opt                  |  4 ++
 .../gcc.target/powerpc/fusion-p10-addadd.c    | 41 +++++++++++++++++
 6 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 6d71bc2df73..6dfe1fa4508 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -2658,3 +2658,39 @@ (define_insn "*fuse_vxor_vxor"
   [(set_attr "type" "fused_vector")
    (set_attr "cost" "6")
    (set_attr "length" "8")])
+
+;; add-add fusion pattern generated by gen_addadd
+(define_insn "*fuse_add_add"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=0,1,&r,r")
+        (plus:GPR
+           (plus:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")
+                     (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r"))
+           (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
+   (clobber (match_scratch:GPR 4 "=X,X,X,&r"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
+  "@
+   add %3,%1,%0\;add %3,%3,%2
+   add %3,%1,%0\;add %3,%3,%2
+   add %3,%1,%0\;add %3,%3,%2
+   add %4,%1,%0\;add %3,%4,%2"
+  [(set_attr "type" "fuse_arithlog")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+
+;; vaddudm-vaddudm fusion pattern generated by gen_addadd
+(define_insn "*fuse_vaddudm_vaddudm"
+  [(set (match_operand:V2DI 3 "altivec_register_operand" "=0,1,&v,v")
+        (plus:V2DI
+           (plus:V2DI (match_operand:V2DI 0 "altivec_register_operand" "v,v,v,v")
+                     (match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v"))
+           (match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v")))
+   (clobber (match_scratch:V2DI 4 "=X,X,X,&v"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
+  "@
+   vaddudm %3,%1,%0\;vaddudm %3,%3,%2
+   vaddudm %3,%1,%0\;vaddudm %3,%3,%2
+   vaddudm %3,%1,%0\;vaddudm %3,%3,%2
+   vaddudm %4,%1,%0\;vaddudm %3,%4,%2"
+  [(set_attr "type" "fuse_vec")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index ce48fd94f95..8ed3c3617ec 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -240,8 +240,52 @@ EOF
   }
 }
 
+sub gen_addadd
+{
+    my ($kind, $vchr, $op, $ty, $mode, $pred, $constraint);
+  KIND: foreach $kind ('scalar','vector') {
+      if ( $kind eq 'vector' ) {
+	  $vchr = "v";
+	  $op = "vaddudm";
+	  $ty = "fuse_vec";
+	  $mode = "V2DI";
+	  $pred = "altivec_register_operand";
+	  $constraint = "v";
+      } else {
+	  $vchr = "";
+	  $op = "add";
+	  $ty = "fuse_arithlog";
+	  $mode = "GPR";
+	  $pred = "gpc_reg_operand";
+	  $constraint = "r";
+      }
+    my $c4 = "${constraint},${constraint},${constraint},${constraint}";
+    print <<"EOF";
+
+;; ${op}-${op} fusion pattern generated by gen_addadd
+(define_insn "*fuse_${op}_${op}"
+  [(set (match_operand:${mode} 3 "${pred}" "=0,1,&${constraint},${constraint}")
+        (plus:${mode}
+           (plus:${mode} (match_operand:${mode} 0 "${pred}" "${c4}")
+                     (match_operand:${mode} 1 "${pred}" "%${c4}"))
+           (match_operand:${mode} 2 "${pred}" "${c4}")))
+   (clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
+  "@
+   ${op} %3,%1,%0\\;${op} %3,%3,%2
+   ${op} %3,%1,%0\\;${op} %3,%3,%2
+   ${op} %3,%1,%0\\;${op} %3,%3,%2
+   ${op} %4,%1,%0\\;${op} %3,%4,%2"
+  [(set_attr "type" "${ty}")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+EOF
+  }
+}
+
 gen_ld_cmpi_p10();
 gen_2logical();
+gen_addadd();
 
 exit(0);
 
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index cbbb42c1b3a..d46a91dd11b 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -85,7 +85,8 @@
 				 | OTHER_POWER10_MASKS			\
 				 | OPTION_MASK_P10_FUSION		\
 				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
-				 | OPTION_MASK_P10_FUSION_2LOGICAL)
+				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
+				 | OPTION_MASK_P10_FUSION_2ADD)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -135,6 +136,7 @@
 				 | OPTION_MASK_P10_FUSION		\
 				 | OPTION_MASK_P10_FUSION_LD_CMPI	\
 				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
+				 | OPTION_MASK_P10_FUSION_2ADD    	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
 				 | OPTION_MASK_MFCRF			\
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 844fee88cf3..9488a54a1d7 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4467,6 +4467,9 @@ rs6000_option_override_internal (bool global_init_p)
   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
     rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
 
+  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
     {
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..fc14325ed33 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -502,6 +502,10 @@ mpower10-fusion-2logical
 Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags)
 Fuse certain integer operations together for better performance on power10.
 
+mpower10-fusion-2add
+Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
+Fuse certain add operations together for better performance on power10.
+
 mcrypto
 Target Mask(CRYPTO) Var(rs6000_isa_flags)
 Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
new file mode 100644
index 00000000000..41d71dbf3cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */
+
+long addadd0(long a, long b, long c)
+{
+  return a+b+c;
+}
+long addadd1(long a, long b, long c, long *t)
+{
+  long r=a+b+c;
+  *t = b;
+  return r;
+}
+long addadd2(long s, long a, long b, long c)
+{
+  return b+c+a;
+}
+
+typedef vector long vlong;
+vlong vaddadd(vlong a, vlong b, vlong c)
+{
+  return a+b+c;
+}
+vlong vaddadd1(vlong a, vlong b, vlong c, vlong *t)
+{
+  vlong r=a+b+c;
+  *t = b;
+  return r;
+}
+vlong vaddadd2(vlong s, vlong a, vlong b, vlong c)
+{
+  return a+b+c;
+}
+
+/* { dg-final { scan-assembler-times "fuse_add_add/0"                1 } } */
+/* { dg-final { scan-assembler-times "fuse_add_add/1"                1 } } */
+/* { dg-final { scan-assembler-times "fuse_add_add/2"                1 } } */
+/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0"        1 } } */
+/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1"        1 } } */
+/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2"        1 } } */
-- 
2.27.0


  reply	other threads:[~2021-04-26 20:23 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-26 20:21 [PATCH,rs6000 0/2] p10 add-add and add-logical fusion series acsawdey
2021-04-26 20:21 ` acsawdey [this message]
2021-05-19 21:29   ` [PATCH,rs6000 1/2] combine patterns for add-add fusion Segher Boessenkool
2021-04-26 20:21 ` [PATCH,rs6000 2/2] Fusion patterns for add-logical/logical-add acsawdey
2021-05-19 22:15   ` Segher Boessenkool
2021-05-24 19:41     ` Aaron Sawdey
2021-05-11 13:50 ` [PATCH,rs6000 0/2] p10 add-add and add-logical fusion series Aaron Sawdey

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210426202130.3882980-2-acsawdey@linux.ibm.com \
    --to=acsawdey@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=segher@kernel.crashing.org \
    --cc=will_schmidt@vnet.ibm.com \
    --cc=wschmidt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).