public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/106397 - array prefetch and LC SSA
@ 2022-07-22  8:16 Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2022-07-22  8:16 UTC (permalink / raw)
  To: gcc-patches

The following fixes maintaining LC SSA when array prefetch inserts
mfence instructions on loop exits that do not use memory.  It also
fixes the latent issue that it might split exit edges for this
which will break LC SSA for non-virtuals as well.  It should also
make the process cheaper by accumulating the required (LC) SSA
update until the end of the pass.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/106397
	* tree-ssa-loop-prefetch.cc (emit_mfence_after_loop): Do
	not update SSA form here.
	(mark_nontemporal_stores): Return whether we marked any
	non-temporal stores and inserted mfence.
	(loop_prefetch_arrays): Note when we need to update SSA.
	(tree_ssa_prefetch_arrays): Perform required (LC) SSA update
	at the end of the pass.

	* gcc.dg/pr106397.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr106397.c | 17 +++++++++++++++++
 gcc/tree-ssa-loop-prefetch.cc   | 27 +++++++++++++++++----------
 2 files changed, 34 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106397.c

diff --git a/gcc/testsuite/gcc.dg/pr106397.c b/gcc/testsuite/gcc.dg/pr106397.c
new file mode 100644
index 00000000000..a6b2e913346
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106397.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fprefetch-loop-arrays --param l2-cache-size=0 --param prefetch-latency=3 -fprefetch-loop-arrays" } */
+
+int
+bar (void)
+{
+  /* No return statement. */
+}
+
+__attribute__ ((simd)) int
+foo (void)
+{
+  if (bar ())
+    return 0;
+
+  __builtin_unreachable ();
+}
diff --git a/gcc/tree-ssa-loop-prefetch.cc b/gcc/tree-ssa-loop-prefetch.cc
index 8f190ae469b..b6690b0e805 100644
--- a/gcc/tree-ssa-loop-prefetch.cc
+++ b/gcc/tree-ssa-loop-prefetch.cc
@@ -1308,8 +1308,6 @@ emit_mfence_after_loop (class loop *loop)
 
       gsi_insert_before (&bsi, call, GSI_NEW_STMT);
     }
-
-  update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* Returns true if we can use storent in loop, false otherwise.  */
@@ -1340,23 +1338,27 @@ may_use_storent_in_loop_p (class loop *loop)
 }
 
 /* Marks nontemporal stores in LOOP.  GROUPS contains the description of memory
-   references in the loop.  */
+   references in the loop.  Returns whether we inserted any mfence call.  */
 
-static void
+static bool
 mark_nontemporal_stores (class loop *loop, struct mem_ref_group *groups)
 {
   struct mem_ref *ref;
   bool any = false;
 
   if (!may_use_storent_in_loop_p (loop))
-    return;
+    return false;
 
   for (; groups; groups = groups->next)
     for (ref = groups->refs; ref; ref = ref->next)
       any |= mark_nontemporal_store (ref);
 
   if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
-    emit_mfence_after_loop (loop);
+    {
+      emit_mfence_after_loop (loop);
+      return true;
+    }
+  return false;
 }
 
 /* Determines whether we can profitably unroll LOOP FACTOR times, and if
@@ -1874,10 +1876,11 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
 
 
 /* Issue prefetch instructions for array references in LOOP.  Returns
-   true if the LOOP was unrolled.  */
+   true if the LOOP was unrolled and updates NEED_LC_SSA_UPDATE if we need
+   to update SSA for virtual operands and LC SSA for a split edge.  */
 
 static bool
-loop_prefetch_arrays (class loop *loop)
+loop_prefetch_arrays (class loop *loop, bool &need_lc_ssa_update)
 {
   struct mem_ref_group *refs;
   unsigned ahead, ninsns, time, unroll_factor;
@@ -1952,7 +1955,7 @@ loop_prefetch_arrays (class loop *loop)
 					  unroll_factor))
     goto fail;
 
-  mark_nontemporal_stores (loop, refs);
+  need_lc_ssa_update |= mark_nontemporal_stores (loop, refs);
 
   /* Step 4: what to prefetch?  */
   if (!schedule_prefetches (refs, unroll_factor, ahead))
@@ -1980,6 +1983,7 @@ unsigned int
 tree_ssa_prefetch_arrays (void)
 {
   bool unrolled = false;
+  bool need_lc_ssa_update = false;
   int todo_flags = 0;
 
   if (!targetm.have_prefetch ()
@@ -2028,12 +2032,15 @@ tree_ssa_prefetch_arrays (void)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Processing loop %d:\n", loop->num);
 
-      unrolled |= loop_prefetch_arrays (loop);
+      unrolled |= loop_prefetch_arrays (loop, need_lc_ssa_update);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "\n\n");
     }
 
+  if (need_lc_ssa_update)
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+
   if (unrolled)
     {
       scev_reset ();
-- 
2.35.3

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] tree-optimization/106397 - array prefetch and LC SSA
@ 2022-07-22  8:16 Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2022-07-22  8:16 UTC (permalink / raw)
  To: gcc-patches

The following fixes maintaining LC SSA when array prefetch inserts
mfence instructions on loop exits that do not use memory.  It also
fixes the latent issue that it might split exit edges for this
which will break LC SSA for non-virtuals as well.  It should also
make the process cheaper by accumulating the required (LC) SSA
update until the end of the pass.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/106397
	* tree-ssa-loop-prefetch.cc (emit_mfence_after_loop): Do
	not update SSA form here.
	(mark_nontemporal_stores): Return whether we marked any
	non-temporal stores and inserted mfence.
	(loop_prefetch_arrays): Note when we need to update SSA.
	(tree_ssa_prefetch_arrays): Perform required (LC) SSA update
	at the end of the pass.

	* gcc.dg/pr106397.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr106397.c | 17 +++++++++++++++++
 gcc/tree-ssa-loop-prefetch.cc   | 27 +++++++++++++++++----------
 2 files changed, 34 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106397.c

diff --git a/gcc/testsuite/gcc.dg/pr106397.c b/gcc/testsuite/gcc.dg/pr106397.c
new file mode 100644
index 00000000000..a6b2e913346
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106397.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fprefetch-loop-arrays --param l2-cache-size=0 --param prefetch-latency=3 -fprefetch-loop-arrays" } */
+
+int
+bar (void)
+{
+  /* No return statement. */
+}
+
+__attribute__ ((simd)) int
+foo (void)
+{
+  if (bar ())
+    return 0;
+
+  __builtin_unreachable ();
+}
diff --git a/gcc/tree-ssa-loop-prefetch.cc b/gcc/tree-ssa-loop-prefetch.cc
index 8f190ae469b..b6690b0e805 100644
--- a/gcc/tree-ssa-loop-prefetch.cc
+++ b/gcc/tree-ssa-loop-prefetch.cc
@@ -1308,8 +1308,6 @@ emit_mfence_after_loop (class loop *loop)
 
       gsi_insert_before (&bsi, call, GSI_NEW_STMT);
     }
-
-  update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* Returns true if we can use storent in loop, false otherwise.  */
@@ -1340,23 +1338,27 @@ may_use_storent_in_loop_p (class loop *loop)
 }
 
 /* Marks nontemporal stores in LOOP.  GROUPS contains the description of memory
-   references in the loop.  */
+   references in the loop.  Returns whether we inserted any mfence call.  */
 
-static void
+static bool
 mark_nontemporal_stores (class loop *loop, struct mem_ref_group *groups)
 {
   struct mem_ref *ref;
   bool any = false;
 
   if (!may_use_storent_in_loop_p (loop))
-    return;
+    return false;
 
   for (; groups; groups = groups->next)
     for (ref = groups->refs; ref; ref = ref->next)
       any |= mark_nontemporal_store (ref);
 
   if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
-    emit_mfence_after_loop (loop);
+    {
+      emit_mfence_after_loop (loop);
+      return true;
+    }
+  return false;
 }
 
 /* Determines whether we can profitably unroll LOOP FACTOR times, and if
@@ -1874,10 +1876,11 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
 
 
 /* Issue prefetch instructions for array references in LOOP.  Returns
-   true if the LOOP was unrolled.  */
+   true if the LOOP was unrolled and updates NEED_LC_SSA_UPDATE if we need
+   to update SSA for virtual operands and LC SSA for a split edge.  */
 
 static bool
-loop_prefetch_arrays (class loop *loop)
+loop_prefetch_arrays (class loop *loop, bool &need_lc_ssa_update)
 {
   struct mem_ref_group *refs;
   unsigned ahead, ninsns, time, unroll_factor;
@@ -1952,7 +1955,7 @@ loop_prefetch_arrays (class loop *loop)
 					  unroll_factor))
     goto fail;
 
-  mark_nontemporal_stores (loop, refs);
+  need_lc_ssa_update |= mark_nontemporal_stores (loop, refs);
 
   /* Step 4: what to prefetch?  */
   if (!schedule_prefetches (refs, unroll_factor, ahead))
@@ -1980,6 +1983,7 @@ unsigned int
 tree_ssa_prefetch_arrays (void)
 {
   bool unrolled = false;
+  bool need_lc_ssa_update = false;
   int todo_flags = 0;
 
   if (!targetm.have_prefetch ()
@@ -2028,12 +2032,15 @@ tree_ssa_prefetch_arrays (void)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Processing loop %d:\n", loop->num);
 
-      unrolled |= loop_prefetch_arrays (loop);
+      unrolled |= loop_prefetch_arrays (loop, need_lc_ssa_update);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "\n\n");
     }
 
+  if (need_lc_ssa_update)
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+
   if (unrolled)
     {
       scev_reset ();
-- 
2.35.3

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] tree-optimization/106397 - array prefetch and LC SSA
@ 2022-07-22  8:16 Richard Biener
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Biener @ 2022-07-22  8:16 UTC (permalink / raw)
  To: gcc-patches

The following fixes maintaining LC SSA when array prefetch inserts
mfence instructions on loop exits that do not use memory.  It also
fixes the latent issue that it might split exit edges for this
which will break LC SSA for non-virtuals as well.  It should also
make the process cheaper by accumulating the required (LC) SSA
update until the end of the pass.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/106397
	* tree-ssa-loop-prefetch.cc (emit_mfence_after_loop): Do
	not update SSA form here.
	(mark_nontemporal_stores): Return whether we marked any
	non-temporal stores and inserted mfence.
	(loop_prefetch_arrays): Note when we need to update SSA.
	(tree_ssa_prefetch_arrays): Perform required (LC) SSA update
	at the end of the pass.

	* gcc.dg/pr106397.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr106397.c | 17 +++++++++++++++++
 gcc/tree-ssa-loop-prefetch.cc   | 27 +++++++++++++++++----------
 2 files changed, 34 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106397.c

diff --git a/gcc/testsuite/gcc.dg/pr106397.c b/gcc/testsuite/gcc.dg/pr106397.c
new file mode 100644
index 00000000000..a6b2e913346
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106397.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fprefetch-loop-arrays --param l2-cache-size=0 --param prefetch-latency=3 -fprefetch-loop-arrays" } */
+
+int
+bar (void)
+{
+  /* No return statement. */
+}
+
+__attribute__ ((simd)) int
+foo (void)
+{
+  if (bar ())
+    return 0;
+
+  __builtin_unreachable ();
+}
diff --git a/gcc/tree-ssa-loop-prefetch.cc b/gcc/tree-ssa-loop-prefetch.cc
index 8f190ae469b..b6690b0e805 100644
--- a/gcc/tree-ssa-loop-prefetch.cc
+++ b/gcc/tree-ssa-loop-prefetch.cc
@@ -1308,8 +1308,6 @@ emit_mfence_after_loop (class loop *loop)
 
       gsi_insert_before (&bsi, call, GSI_NEW_STMT);
     }
-
-  update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* Returns true if we can use storent in loop, false otherwise.  */
@@ -1340,23 +1338,27 @@ may_use_storent_in_loop_p (class loop *loop)
 }
 
 /* Marks nontemporal stores in LOOP.  GROUPS contains the description of memory
-   references in the loop.  */
+   references in the loop.  Returns whether we inserted any mfence call.  */
 
-static void
+static bool
 mark_nontemporal_stores (class loop *loop, struct mem_ref_group *groups)
 {
   struct mem_ref *ref;
   bool any = false;
 
   if (!may_use_storent_in_loop_p (loop))
-    return;
+    return false;
 
   for (; groups; groups = groups->next)
     for (ref = groups->refs; ref; ref = ref->next)
       any |= mark_nontemporal_store (ref);
 
   if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
-    emit_mfence_after_loop (loop);
+    {
+      emit_mfence_after_loop (loop);
+      return true;
+    }
+  return false;
 }
 
 /* Determines whether we can profitably unroll LOOP FACTOR times, and if
@@ -1874,10 +1876,11 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
 
 
 /* Issue prefetch instructions for array references in LOOP.  Returns
-   true if the LOOP was unrolled.  */
+   true if the LOOP was unrolled and updates NEED_LC_SSA_UPDATE if we need
+   to update SSA for virtual operands and LC SSA for a split edge.  */
 
 static bool
-loop_prefetch_arrays (class loop *loop)
+loop_prefetch_arrays (class loop *loop, bool &need_lc_ssa_update)
 {
   struct mem_ref_group *refs;
   unsigned ahead, ninsns, time, unroll_factor;
@@ -1952,7 +1955,7 @@ loop_prefetch_arrays (class loop *loop)
 					  unroll_factor))
     goto fail;
 
-  mark_nontemporal_stores (loop, refs);
+  need_lc_ssa_update |= mark_nontemporal_stores (loop, refs);
 
   /* Step 4: what to prefetch?  */
   if (!schedule_prefetches (refs, unroll_factor, ahead))
@@ -1980,6 +1983,7 @@ unsigned int
 tree_ssa_prefetch_arrays (void)
 {
   bool unrolled = false;
+  bool need_lc_ssa_update = false;
   int todo_flags = 0;
 
   if (!targetm.have_prefetch ()
@@ -2028,12 +2032,15 @@ tree_ssa_prefetch_arrays (void)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Processing loop %d:\n", loop->num);
 
-      unrolled |= loop_prefetch_arrays (loop);
+      unrolled |= loop_prefetch_arrays (loop, need_lc_ssa_update);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "\n\n");
     }
 
+  if (need_lc_ssa_update)
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+
   if (unrolled)
     {
       scev_reset ();
-- 
2.35.3

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-07-22  8:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-22  8:16 [PATCH] tree-optimization/106397 - array prefetch and LC SSA Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2022-07-22  8:16 Richard Biener
2022-07-22  8:16 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).