public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Pat Haugen <pthaugen@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r11-8996] Enable store fusion on Power10. Date: Wed, 15 Sep 2021 23:03:15 +0000 (GMT) [thread overview] Message-ID: <20210915230315.7ACDF3857835@sourceware.org> (raw) https://gcc.gnu.org/g:b429c81170ccfa49894404524dac12be3704c574 commit r11-8996-gb429c81170ccfa49894404524dac12be3704c574 Author: Pat Haugen <pthaugen@linux.ibm.com> Date: Mon Aug 30 10:58:21 2021 -0500 Enable store fusion on Power10. gcc/ChangeLog: * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add OPTION_MASK_P10_FUSION_2STORE. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable store fusion for Power10. (is_fusable_store): New. (power10_sched_reorder): Likewise. (rs6000_sched_reorder): Do Power10 specific reordering. (rs6000_sched_reorder2): Likewise. * config/rs6000/rs6000.opt: Add new option. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-stst.c: New test. * gcc.target/powerpc/fusion-p10-stst2.c: New test. Diff: --- gcc/config/rs6000/rs6000-cpus.def | 4 +- gcc/config/rs6000/rs6000.c | 95 ++++++++++++++++++++++ gcc/config/rs6000/rs6000.opt | 4 + gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c | 31 +++++++ .../gcc.target/powerpc/fusion-p10-stst2.c | 30 +++++++ 5 files changed, 163 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 6758296c0fd..f5812da0184 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -90,7 +90,8 @@ | OPTION_MASK_P10_FUSION_2LOGICAL \ | OPTION_MASK_P10_FUSION_LOGADD \ | OPTION_MASK_P10_FUSION_ADDLOG \ - | OPTION_MASK_P10_FUSION_2ADD) + | OPTION_MASK_P10_FUSION_2ADD \ + | OPTION_MASK_P10_FUSION_2STORE) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -143,6 +144,7 @@ | OPTION_MASK_P10_FUSION_LOGADD \ | OPTION_MASK_P10_FUSION_ADDLOG \ | OPTION_MASK_P10_FUSION_2ADD \ + | OPTION_MASK_P10_FUSION_2STORE \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ca0fb85c44c..8cdb9369ee0 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4485,6 +4485,10 @@ rs6000_option_override_internal (bool global_init_p) && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD; + if (TARGET_POWER10 + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; + /* Turn off vector pair/mma options on non-power10 systems. */ else if (!TARGET_POWER10 && TARGET_MMA) { @@ -18872,6 +18876,89 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos) return cached_can_issue_more; } +/* Determine if INSN is a store to memory that can be fused with a similar + adjacent store. */ + +static bool +is_fusable_store (rtx_insn *insn, rtx *str_mem) +{ + /* Insn must be a non-prefixed base+disp form store. */ + if (is_store_insn (insn, str_mem) + && get_attr_prefixed (insn) == PREFIXED_NO + && get_attr_update (insn) == UPDATE_NO + && get_attr_indexed (insn) == INDEXED_NO) + { + /* Further restrictions by mode and size. */ + if (!MEM_SIZE_KNOWN_P (*str_mem)) + return false; + + machine_mode mode = GET_MODE (*str_mem); + HOST_WIDE_INT size = MEM_SIZE (*str_mem); + + if (INTEGRAL_MODE_P (mode)) + /* Must be word or dword size. */ + return (size == 4 || size == 8); + else if (FLOAT_MODE_P (mode)) + /* Must be dword size. */ + return (size == 8); + } + + return false; +} + +/* Do Power10 specific reordering of the ready list. */ + +static int +power10_sched_reorder (rtx_insn **ready, int lastpos) +{ + rtx mem1; + + /* Do store fusion during sched2 only. */ + if (!reload_completed) + return cached_can_issue_more; + + /* If the prior insn finished off a store fusion pair then simply + reset the counter and return, nothing more to do. */ + if (load_store_pendulum != 0) + { + load_store_pendulum = 0; + return cached_can_issue_more; + } + + /* Try to pair certain store insns to adjacent memory locations + so that the hardware will fuse them to a single operation. */ + if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE + && is_fusable_store (last_scheduled_insn, &mem1)) + { + + /* A fusable store was just scheduled. Scan the ready list for another + store that it can fuse with. */ + int pos = lastpos; + while (pos >= 0) + { + rtx mem2; + /* GPR stores can be ascending or descending offsets, FPR/VSR stores + must be ascending only. */ + if (is_fusable_store (ready[pos], &mem2) + && ((INTEGRAL_MODE_P (GET_MODE (mem1)) + && adjacent_mem_locations (mem1, mem2)) + || (FLOAT_MODE_P (GET_MODE (mem1)) + && (adjacent_mem_locations (mem1, mem2) == mem1)))) + { + /* Found a fusable store. Move it to the end of the ready list + so it is scheduled next. */ + move_to_end_of_ready (ready, pos, lastpos); + + load_store_pendulum = -1; + break; + } + pos--; + } + } + + return cached_can_issue_more; +} + /* We are about to begin issuing insns for this clock cycle. */ static int @@ -18898,6 +18985,10 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, if (rs6000_tune == PROCESSOR_POWER6) load_store_pendulum = 0; + /* Do Power10 dependent reordering. */ + if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn) + power10_sched_reorder (ready, n_ready - 1); + return rs6000_issue_rate (); } @@ -18919,6 +19010,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, && recog_memoized (last_scheduled_insn) >= 0) return power9_sched_reorder2 (ready, *pn_ready - 1); + /* Do Power10 dependent reordering. */ + if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn) + return power10_sched_reorder (ready, *pn_ready - 1); + return cached_can_issue_more; } diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 0538db387dc..3753de19557 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -514,6 +514,10 @@ mpower10-fusion-2add Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags) Fuse dependent pairs of add or vaddudm instructions for better performance on power10. +mpower10-fusion-2store +Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags) +Fuse certain store operations together for better performance on power10. + mcrypto Target Mask(CRYPTO) Var(rs6000_isa_flags) Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c new file mode 100644 index 00000000000..528a7e542ab --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Verify store fusion is enabled */ + +void fuse_stw (int *i, int a, int b, int c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + +void fuse_std (long *i, long a, long b, long c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + +void fuse_stfd (double *i, double a, double b, double c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + +/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {std 4,8\(3\)\n\tstd 6,16\(3\)} 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} 1 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c new file mode 100644 index 00000000000..62f1a92c2b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power10 -mno-power10-fusion -O2" } */ + +/* Verify store fusion is disabled */ + +void fuse_stw (int *i, int a, int b, int c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + +void fuse_std (long *i, long a, long b, long c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + +void fuse_stfd (double *i, double a, double b, double c) +{ + i[1] = a; + i[5] = b; + i[2] = c; +} + + +/* { dg-final { scan-assembler-not {stw 4,4\(3\)\n\tstw 6,8\(3\)} } } */ +/* { dg-final { scan-assembler-not {std 4,8\(3\)\n\tstd 6,16\(3\)} { target lp64 } } } */ +/* { dg-final { scan-assembler-not {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} } } */
reply other threads:[~2021-09-15 23:03 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210915230315.7ACDF3857835@sourceware.org \ --to=pthaugen@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).