public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-8996] Enable store fusion on Power10.
@ 2021-09-15 23:03 Pat Haugen
  0 siblings, 0 replies; only message in thread
From: Pat Haugen @ 2021-09-15 23:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b429c81170ccfa49894404524dac12be3704c574

commit r11-8996-gb429c81170ccfa49894404524dac12be3704c574
Author: Pat Haugen <pthaugen@linux.ibm.com>
Date:   Mon Aug 30 10:58:21 2021 -0500

    Enable store fusion on Power10.
    
    gcc/ChangeLog:
    
            * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
            OPTION_MASK_P10_FUSION_2STORE.
            (POWERPC_MASKS): Likewise.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
            store fusion for Power10.
            (is_fusable_store): New.
            (power10_sched_reorder): Likewise.
            (rs6000_sched_reorder): Do Power10 specific reordering.
            (rs6000_sched_reorder2): Likewise.
            * config/rs6000/rs6000.opt: Add new option.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/powerpc/fusion-p10-stst.c: New test.
            * gcc.target/powerpc/fusion-p10-stst2.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-cpus.def                  |  4 +-
 gcc/config/rs6000/rs6000.c                         | 95 ++++++++++++++++++++++
 gcc/config/rs6000/rs6000.opt                       |  4 +
 gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c | 31 +++++++
 .../gcc.target/powerpc/fusion-p10-stst2.c          | 30 +++++++
 5 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 6758296c0fd..f5812da0184 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -90,7 +90,8 @@
 				 | OPTION_MASK_P10_FUSION_2LOGICAL	\
 				 | OPTION_MASK_P10_FUSION_LOGADD 	\
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
-				 | OPTION_MASK_P10_FUSION_2ADD)
+				 | OPTION_MASK_P10_FUSION_2ADD		\
+				 | OPTION_MASK_P10_FUSION_2STORE)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -143,6 +144,7 @@
 				 | OPTION_MASK_P10_FUSION_LOGADD 	\
 				 | OPTION_MASK_P10_FUSION_ADDLOG	\
 				 | OPTION_MASK_P10_FUSION_2ADD    	\
+				 | OPTION_MASK_P10_FUSION_2STORE	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
 				 | OPTION_MASK_MFCRF			\
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ca0fb85c44c..8cdb9369ee0 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4485,6 +4485,10 @@ rs6000_option_override_internal (bool global_init_p)
       && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
     rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
 
+  if (TARGET_POWER10
+      && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
     {
@@ -18872,6 +18876,89 @@ power9_sched_reorder2 (rtx_insn **ready, int lastpos)
   return cached_can_issue_more;
 }
 
+/* Determine if INSN is a store to memory that can be fused with a similar
+   adjacent store.  */
+
+static bool
+is_fusable_store (rtx_insn *insn, rtx *str_mem)
+{
+  /* Insn must be a non-prefixed base+disp form store.  */
+  if (is_store_insn (insn, str_mem)
+      && get_attr_prefixed (insn) == PREFIXED_NO
+      && get_attr_update (insn) == UPDATE_NO
+      && get_attr_indexed (insn) == INDEXED_NO)
+    {
+      /* Further restrictions by mode and size.  */
+      if (!MEM_SIZE_KNOWN_P (*str_mem))
+	return false;
+
+      machine_mode mode = GET_MODE (*str_mem);
+      HOST_WIDE_INT size = MEM_SIZE (*str_mem);
+
+      if (INTEGRAL_MODE_P (mode))
+	/* Must be word or dword size.  */
+	return (size == 4 || size == 8);
+      else if (FLOAT_MODE_P (mode))
+	/* Must be dword size.  */
+	return (size == 8);
+    }
+
+  return false;
+}
+
+/* Do Power10 specific reordering of the ready list.  */
+
+static int
+power10_sched_reorder (rtx_insn **ready, int lastpos)
+{
+  rtx mem1;
+
+  /* Do store fusion during sched2 only.  */
+  if (!reload_completed)
+    return cached_can_issue_more;
+
+  /* If the prior insn finished off a store fusion pair then simply
+     reset the counter and return, nothing more to do.  */
+  if (load_store_pendulum != 0)
+    {
+      load_store_pendulum = 0;
+      return cached_can_issue_more;
+    }
+
+  /* Try to pair certain store insns to adjacent memory locations
+     so that the hardware will fuse them to a single operation.  */
+  if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE
+      && is_fusable_store (last_scheduled_insn, &mem1))
+    {
+
+      /* A fusable store was just scheduled.  Scan the ready list for another
+	 store that it can fuse with.  */
+      int pos = lastpos;
+      while (pos >= 0)
+	{
+	  rtx mem2;
+	  /* GPR stores can be ascending or descending offsets, FPR/VSR stores
+	     must be ascending only.  */
+	  if (is_fusable_store (ready[pos], &mem2)
+	      && ((INTEGRAL_MODE_P (GET_MODE (mem1))
+		   && adjacent_mem_locations (mem1, mem2))
+		  || (FLOAT_MODE_P (GET_MODE (mem1))
+		   && (adjacent_mem_locations (mem1, mem2) == mem1))))
+	    {
+	      /* Found a fusable store.  Move it to the end of the ready list
+		 so it is scheduled next.  */
+	      move_to_end_of_ready (ready, pos, lastpos);
+
+	      load_store_pendulum = -1;
+	      break;
+	    }
+	  pos--;
+	}
+    }
+
+  return cached_can_issue_more;
+}
+
 /* We are about to begin issuing insns for this clock cycle. */
 
 static int
@@ -18898,6 +18985,10 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
   if (rs6000_tune == PROCESSOR_POWER6)
     load_store_pendulum = 0;
 
+  /* Do Power10 dependent reordering.  */
+  if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
+    power10_sched_reorder (ready, n_ready - 1);
+
   return rs6000_issue_rate ();
 }
 
@@ -18919,6 +19010,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
       && recog_memoized (last_scheduled_insn) >= 0)
     return power9_sched_reorder2 (ready, *pn_ready - 1);
 
+  /* Do Power10 dependent reordering.  */
+  if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
+    return power10_sched_reorder (ready, *pn_ready - 1);
+
   return cached_can_issue_more;
 }
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0538db387dc..3753de19557 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -514,6 +514,10 @@ mpower10-fusion-2add
 Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
 Fuse dependent pairs of add or vaddudm instructions for better performance on power10.
 
+mpower10-fusion-2store
+Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
+Fuse certain store operations together for better performance on power10.
+
 mcrypto
 Target Mask(CRYPTO) Var(rs6000_isa_flags)
 Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c
new file mode 100644
index 00000000000..528a7e542ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Verify store fusion is enabled */
+
+void fuse_stw (int *i, int a, int b, int c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+void fuse_std (long *i, long a, long b, long c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+void fuse_stfd (double *i, double a, double b, double c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {stw 4,4\(3\)\n\tstw 6,8\(3\)} 2 { target ilp32 } } } */
+/* { dg-final { scan-assembler-times {std 4,8\(3\)\n\tstd 6,16\(3\)} 1 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c
new file mode 100644
index 00000000000..62f1a92c2b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-stst2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -mno-power10-fusion -O2" } */
+
+/* Verify store fusion is disabled */
+
+void fuse_stw (int *i, int a, int b, int c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+void fuse_std (long *i, long a, long b, long c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+void fuse_stfd (double *i, double a, double b, double c)
+{
+  i[1] = a;
+  i[5] = b;
+  i[2] = c;
+}
+
+
+/* { dg-final { scan-assembler-not {stw 4,4\(3\)\n\tstw 6,8\(3\)} } } */
+/* { dg-final { scan-assembler-not {std 4,8\(3\)\n\tstd 6,16\(3\)} { target lp64 } } } */
+/* { dg-final { scan-assembler-not {stfd 1,8\(3\)\n\tstfd 3,16\(3\)} } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-15 23:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-15 23:03 [gcc r11-8996] Enable store fusion on Power10 Pat Haugen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).