From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1881) id 849D23850402; Mon, 14 Jun 2021 22:27:52 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 849D23850402 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Aaron Sawdey To: gcc-cvs@gcc.gnu.org Subject: [gcc r11-8574] combine patterns for add-add fusion X-Act-Checkin: gcc X-Git-Author: Aaron Sawdey X-Git-Refname: refs/heads/releases/gcc-11 X-Git-Oldrev: 2d5c7fc3ab945ef7cc63aa214a3eb5843e3d9e98 X-Git-Newrev: a677a2d513aa29f5963dd82ee50b83fe7d3d4227 Message-Id: <20210614222752.849D23850402@sourceware.org> Date: Mon, 14 Jun 2021 22:27:52 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 14 Jun 2021 22:27:52 -0000 https://gcc.gnu.org/g:a677a2d513aa29f5963dd82ee50b83fe7d3d4227 commit r11-8574-ga677a2d513aa29f5963dd82ee50b83fe7d3d4227 Author: Aaron Sawdey Date: Mon Jan 25 21:11:52 2021 -0600 combine patterns for add-add fusion This patch adds a function to genfusion.pl to add a couple more patterns so combine can do fusion of pairs of add and vaddudm instructions. Backport from mainline (rolled in a bugfix patch that followed the original one). gcc/ChangeLog: * config/rs6000/genfusion.pl (gen_addadd): New function. * config/rs6000/fusion.md: Regenerate file. * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION_2ADD to masks. * config/rs6000/rs6000.c (rs6000_option_override_internal): Handle default value of OPTION_MASK_P10_FUSION_2ADD. * config/rs6000/rs6000.opt: Add -mpower10-fusion-2add. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-addadd.c: New file. Diff: --- gcc/config/rs6000/fusion.md | 36 ++++++++++++++++++ gcc/config/rs6000/genfusion.pl | 44 ++++++++++++++++++++++ gcc/config/rs6000/rs6000-cpus.def | 4 +- gcc/config/rs6000/rs6000.c | 10 ++++- gcc/config/rs6000/rs6000.opt | 4 ++ .../gcc.target/powerpc/fusion-p10-addadd.c | 40 ++++++++++++++++++++ 6 files changed, 135 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 6d71bc2df73..4d810e6ba13 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -2658,3 +2658,39 @@ [(set_attr "type" "fused_vector") (set_attr "cost" "6") (set_attr "length" "8")]) + +;; add-add fusion pattern generated by gen_addadd +(define_insn "*fuse_add_add" + [(set (match_operand:GPR 3 "gpc_reg_operand" "=0,1,&r,r") + (plus:GPR + (plus:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r") + (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) + (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) + (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] + "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "@ + add %3,%1,%0\;add %3,%3,%2 + add %3,%1,%0\;add %3,%3,%2 + add %3,%1,%0\;add %3,%3,%2 + add %4,%1,%0\;add %3,%4,%2" + [(set_attr "type" "fused_arith_logical") + (set_attr "cost" "6") + (set_attr "length" "8")]) + +;; vaddudm-vaddudm fusion pattern generated by gen_addadd +(define_insn "*fuse_vaddudm_vaddudm" + [(set (match_operand:V2DI 3 "altivec_register_operand" "=0,1,&v,v") + (plus:V2DI + (plus:V2DI (match_operand:V2DI 0 "altivec_register_operand" "v,v,v,v") + (match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v")) + (match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v"))) + (clobber (match_scratch:V2DI 4 "=X,X,X,&v"))] + "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "@ + vaddudm %3,%1,%0\;vaddudm %3,%3,%2 + vaddudm %3,%1,%0\;vaddudm %3,%3,%2 + vaddudm %3,%1,%0\;vaddudm %3,%3,%2 + vaddudm %4,%1,%0\;vaddudm %3,%4,%2" + [(set_attr "type" "fused_vector") + (set_attr "cost" "6") + (set_attr "length" "8")]) diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index ce48fd94f95..1fd46cc0604 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -240,8 +240,52 @@ EOF } } +sub gen_addadd +{ + my ($kind, $vchr, $op, $type, $mode, $pred, $constraint); + foreach $kind ('scalar','vector') { + if ( $kind eq 'vector' ) { + $vchr = "v"; + $op = "vaddudm"; + $type = "fused_vector"; + $mode = "V2DI"; + $pred = "altivec_register_operand"; + $constraint = "v"; + } else { + $vchr = ""; + $op = "add"; + $type = "fused_arith_logical"; + $mode = "GPR"; + $pred = "gpc_reg_operand"; + $constraint = "r"; + } + my $c4 = "${constraint},${constraint},${constraint},${constraint}"; + print <<"EOF"; + +;; ${op}-${op} fusion pattern generated by gen_addadd +(define_insn "*fuse_${op}_${op}" + [(set (match_operand:${mode} 3 "${pred}" "=0,1,&${constraint},${constraint}") + (plus:${mode} + (plus:${mode} (match_operand:${mode} 0 "${pred}" "${c4}") + (match_operand:${mode} 1 "${pred}" "%${c4}")) + (match_operand:${mode} 2 "${pred}" "${c4}"))) + (clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))] + "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "@ + ${op} %3,%1,%0\\;${op} %3,%3,%2 + ${op} %3,%1,%0\\;${op} %3,%3,%2 + ${op} %3,%1,%0\\;${op} %3,%3,%2 + ${op} %4,%1,%0\\;${op} %3,%4,%2" + [(set_attr "type" "${type}") + (set_attr "cost" "6") + (set_attr "length" "8")]) +EOF + } +} + gen_ld_cmpi_p10(); gen_2logical(); +gen_addadd; exit(0); diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index cbbb42c1b3a..d46a91dd11b 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -85,7 +85,8 @@ | OTHER_POWER10_MASKS \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_P10_FUSION_LD_CMPI \ - | OPTION_MASK_P10_FUSION_2LOGICAL) + | OPTION_MASK_P10_FUSION_2LOGICAL \ + | OPTION_MASK_P10_FUSION_2ADD) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -135,6 +136,7 @@ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_P10_FUSION_LD_CMPI \ | OPTION_MASK_P10_FUSION_2LOGICAL \ + | OPTION_MASK_P10_FUSION_2ADD \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 00ecf8ac908..ec60fcf2493 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4461,16 +4461,22 @@ rs6000_option_override_internal (bool global_init_p) if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) rs6000_isa_flags |= OPTION_MASK_MMA; - if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0) + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION; if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI; - if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0) + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL; + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0) + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD; + /* Turn off vector pair/mma options on non-power10 systems. */ else if (!TARGET_POWER10 && TARGET_MMA) { diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 2685fa71517..e30dc040651 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -502,6 +502,10 @@ mpower10-fusion-2logical Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags) Fuse certain integer operations together for better performance on power10. +mpower10-fusion-2add +Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags) +Fuse certain add operations together for better performance on power10. + mcrypto Target Mask(CRYPTO) Var(rs6000_isa_flags) Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c new file mode 100644 index 00000000000..494ccdb5cdf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */ + +long addadd0(long a, long b, long c) +{ + return a+b+c; +} +long addadd1(long a, long b, long c, long *t) +{ + long r=a+b+c; + *t = b; + return r; +} +long addadd2(long s, long a, long b, long c) +{ + return b+c+a; +} + +typedef vector long vlong; +vlong vaddadd(vlong a, vlong b, vlong c) +{ + return a+b+c; +} +vlong vaddadd1(vlong a, vlong b, vlong c, vlong *t) +{ + vlong r=a+b+c; + *t = b; + return r; +} +vlong vaddadd2(vlong s, vlong a, vlong b, vlong c) +{ + return a+b+c; +} + +/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 } } */ +/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 } } */