public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v8] tree-ssa-sink: Improve code sinking pass.
@ 2023-07-18 13:33 Ajit Agarwal
  2023-08-01  8:17 ` [PING^1] " Ajit Agarwal
  0 siblings, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-07-18 13:33 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Jeff Law, Segher Boessenkool, Peter Bergner

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
    {
      bar();
      j = l;
    }
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
    {
      l = a + b + c + d +e + f;     
      bar();
      j = l;
    }
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	PR tree-optimization/81953
	* tree-ssa-sink.cc (statement_sink_location): Move statements before
	calls.
	(def_use_same_block): New function.
	(select_best_block): Add heuristics to select the best blocks in the
	immediate post dominator.

gcc/testsuite/ChangeLog:

	PR tree-optimization/81953
	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
	* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++++++
 gcc/tree-ssa-sink.cc                        | 59 ++++++++++++---------
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 00000000000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 00000000000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      if (b != 3)
+        x = 3;
+      else
+        x = 5;
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
    tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
    We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
 		   basic_block late_bb,
-		   gimple *stmt)
+		   gimple *stmt,
+		   gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+     operands, then increase the threshold by 7% as those are even more
+     profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+    {
+      threshold += 7;
+      if (threshold > 100)
+	threshold = 100;
+    }
 
   while (temp_bb != early_bb)
     {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
 	best_bb = temp_bb;
 
+      /* Placing a statement before a setjmp-like function would be invalid
+	 (it cannot be reevaluated when execution follows an abnormal edge).
+	 If we selected a block with abnormal predecessors, just punt.  */
+      if (bb_has_abnormal_pred (temp_bb))
+	return early_bb;
+
+      /* if we have temp_bb post dominated by use block block then immediate
+       * dominator would be our best block.  */
+      if (use
+	  && bb_loop_depth(temp_bb) == bb_loop_depth (early_bb)
+	  && !(temp_bb->count * 100 >= early_bb->count * threshold)
+	  && dominated_by_p (CDI_POST_DOMINATORS, temp_bb, gimple_bb (use)))
+	best_bb = temp_bb;
+
       /* Walk up the dominator tree, hopefully we'll find a shallower
  	 loop nest.  */
       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
     }
 
-  /* Placing a statement before a setjmp-like function would be invalid
-     (it cannot be reevaluated when execution follows an abnormal edge).
-     If we selected a block with abnormal predecessors, just punt.  */
-  if (bb_has_abnormal_pred (best_bb))
-    return early_bb;
-
   /* If we found a shallower loop nest, then we always consider that
      a win.  This will always give us the most control dependent block
      within that loop nest.  */
   if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
     return best_bb;
 
-  /* Get the sinking threshold.  If the statement to be moved has memory
-     operands, then increase the threshold by 7% as those are even more
-     profitable to avoid, clamping at 100%.  */
-  threshold = param_sink_frequency_threshold;
-  if (gimple_vuse (stmt) || gimple_vdef (stmt))
-    {
-      threshold += 7;
-      if (threshold > 100)
-	threshold = 100;
-    }
-
   /* If BEST_BB is at the same nesting level, then require it to have
      significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
@@ -439,7 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
 	return false;
 
-      commondom = select_best_block (frombb, commondom, stmt);
+      commondom = select_best_block (frombb, commondom, stmt, NULL);
 
       if (commondom == frombb)
 	return false;	
@@ -456,19 +465,17 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	    continue;
 	  break;
 	}
+
       use = USE_STMT (one_use);
 
       if (gimple_code (use) != GIMPLE_PHI)
 	{
-	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
+	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use);
 
 	  if (sinkbb == frombb)
 	    return false;
 
-	  if (sinkbb == gimple_bb (use))
-	    *togsi = gsi_for_stmt (use);
-	  else
-	    *togsi = gsi_after_labels (sinkbb);
+	  *togsi = gsi_after_labels (sinkbb);
 
 	  return true;
 	}
@@ -480,7 +487,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
   if (!sinkbb)
     return false;
   
-  sinkbb = select_best_block (frombb, sinkbb, stmt);
+  sinkbb = select_best_block (frombb, sinkbb, stmt, NULL);
   if (!sinkbb || sinkbb == frombb)
     return false;
 
-- 
2.39.3


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
  2023-07-18 13:33 [PATCH v8] tree-ssa-sink: Improve code sinking pass Ajit Agarwal
@ 2023-08-01  8:17 ` Ajit Agarwal
  2023-08-21  6:44   ` [PING^2] " Ajit Agarwal
  0 siblings, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-08-01  8:17 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Jeff Law, Peter Bergner, Segher Boessenkool,
	Rashmi.Sridhar

Ping! 


-------- Forwarded Message --------
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Segher Boessenkool <segher@kernel.crashing.org>, Peter Bergner <bergner@linux.ibm.com>

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
    {
      bar();
      j = l;
    }
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
    {
      l = a + b + c + d +e + f;     
      bar();
      j = l;
    }
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	PR tree-optimization/81953
	* tree-ssa-sink.cc (statement_sink_location): Move statements before
	calls.
	(def_use_same_block): New function.
	(select_best_block): Add heuristics to select the best blocks in the
	immediate post dominator.

gcc/testsuite/ChangeLog:

	PR tree-optimization/81953
	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
	* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++++++
 gcc/tree-ssa-sink.cc                        | 59 ++++++++++++---------
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 00000000000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 00000000000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      if (b != 3)
+        x = 3;
+      else
+        x = 5;
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
    tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
    We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
 		   basic_block late_bb,
-		   gimple *stmt)
+		   gimple *stmt,
+		   gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+     operands, then increase the threshold by 7% as those are even more
+     profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+    {
+      threshold += 7;
+      if (threshold > 100)
+	threshold = 100;
+    }
 
   while (temp_bb != early_bb)
     {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
 	best_bb = temp_bb;
 
+      /* Placing a statement before a setjmp-like function would be invalid
+	 (it cannot be reevaluated when execution follows an abnormal edge).
+	 If we selected a block with abnormal predecessors, just punt.  */
+      if (bb_has_abnormal_pred (temp_bb))
+	return early_bb;
+
+      /* if we have temp_bb post dominated by use block block then immediate
+       * dominator would be our best block.  */
+      if (use
+	  && bb_loop_depth(temp_bb) == bb_loop_depth (early_bb)
+	  && !(temp_bb->count * 100 >= early_bb->count * threshold)
+	  && dominated_by_p (CDI_POST_DOMINATORS, temp_bb, gimple_bb (use)))
+	best_bb = temp_bb;
+
       /* Walk up the dominator tree, hopefully we'll find a shallower
  	 loop nest.  */
       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
     }
 
-  /* Placing a statement before a setjmp-like function would be invalid
-     (it cannot be reevaluated when execution follows an abnormal edge).
-     If we selected a block with abnormal predecessors, just punt.  */
-  if (bb_has_abnormal_pred (best_bb))
-    return early_bb;
-
   /* If we found a shallower loop nest, then we always consider that
      a win.  This will always give us the most control dependent block
      within that loop nest.  */
   if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
     return best_bb;
 
-  /* Get the sinking threshold.  If the statement to be moved has memory
-     operands, then increase the threshold by 7% as those are even more
-     profitable to avoid, clamping at 100%.  */
-  threshold = param_sink_frequency_threshold;
-  if (gimple_vuse (stmt) || gimple_vdef (stmt))
-    {
-      threshold += 7;
-      if (threshold > 100)
-	threshold = 100;
-    }
-
   /* If BEST_BB is at the same nesting level, then require it to have
      significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
@@ -439,7 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
 	return false;
 
-      commondom = select_best_block (frombb, commondom, stmt);
+      commondom = select_best_block (frombb, commondom, stmt, NULL);
 
       if (commondom == frombb)
 	return false;	
@@ -456,19 +465,17 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	    continue;
 	  break;
 	}
+
       use = USE_STMT (one_use);
 
       if (gimple_code (use) != GIMPLE_PHI)
 	{
-	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
+	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use);
 
 	  if (sinkbb == frombb)
 	    return false;
 
-	  if (sinkbb == gimple_bb (use))
-	    *togsi = gsi_for_stmt (use);
-	  else
-	    *togsi = gsi_after_labels (sinkbb);
+	  *togsi = gsi_after_labels (sinkbb);
 
 	  return true;
 	}
@@ -480,7 +487,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
   if (!sinkbb)
     return false;
   
-  sinkbb = select_best_block (frombb, sinkbb, stmt);
+  sinkbb = select_best_block (frombb, sinkbb, stmt, NULL);
   if (!sinkbb || sinkbb == frombb)
     return false;
 
-- 
2.39.3


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PING^2] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
  2023-08-01  8:17 ` [PING^1] " Ajit Agarwal
@ 2023-08-21  6:44   ` Ajit Agarwal
  2023-09-12  7:46     ` [PING^3] " Ajit Agarwal
  0 siblings, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-08-21  6:44 UTC (permalink / raw)
  To: gcc-patches
  Cc: Richard Biener, Jeff Law, Segher Boessenkool, Peter Bergner,
	Rashmi.Sridhar

Ping!


-------- Forwarded Message --------
Subject: [PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 1 Aug 2023 13:47:10 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Peter Bergner <bergner@linux.ibm.com>, Segher Boessenkool <segher@kernel.crashing.org>, Rashmi.Sridhar@ibm.com

Ping! 


-------- Forwarded Message --------
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Segher Boessenkool <segher@kernel.crashing.org>, Peter Bergner <bergner@linux.ibm.com>

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
    {
      bar();
      j = l;
    }
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
    {
      l = a + b + c + d +e + f;     
      bar();
      j = l;
    }
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	PR tree-optimization/81953
	* tree-ssa-sink.cc (statement_sink_location): Move statements before
	calls.
	(def_use_same_block): New function.
	(select_best_block): Add heuristics to select the best blocks in the
	immediate post dominator.

gcc/testsuite/ChangeLog:

	PR tree-optimization/81953
	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
	* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++++++
 gcc/tree-ssa-sink.cc                        | 59 ++++++++++++---------
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 00000000000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 00000000000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      if (b != 3)
+        x = 3;
+      else
+        x = 5;
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
    tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
    We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
 		   basic_block late_bb,
-		   gimple *stmt)
+		   gimple *stmt,
+		   gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+     operands, then increase the threshold by 7% as those are even more
+     profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+    {
+      threshold += 7;
+      if (threshold > 100)
+	threshold = 100;
+    }
 
   while (temp_bb != early_bb)
     {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
 	best_bb = temp_bb;
 
+      /* Placing a statement before a setjmp-like function would be invalid
+	 (it cannot be reevaluated when execution follows an abnormal edge).
+	 If we selected a block with abnormal predecessors, just punt.  */
+      if (bb_has_abnormal_pred (temp_bb))
+	return early_bb;
+
+      /* if we have temp_bb post dominated by use block block then immediate
+       * dominator would be our best block.  */
+      if (use
+	  && bb_loop_depth(temp_bb) == bb_loop_depth (early_bb)
+	  && !(temp_bb->count * 100 >= early_bb->count * threshold)
+	  && dominated_by_p (CDI_POST_DOMINATORS, temp_bb, gimple_bb (use)))
+	best_bb = temp_bb;
+
       /* Walk up the dominator tree, hopefully we'll find a shallower
  	 loop nest.  */
       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
     }
 
-  /* Placing a statement before a setjmp-like function would be invalid
-     (it cannot be reevaluated when execution follows an abnormal edge).
-     If we selected a block with abnormal predecessors, just punt.  */
-  if (bb_has_abnormal_pred (best_bb))
-    return early_bb;
-
   /* If we found a shallower loop nest, then we always consider that
      a win.  This will always give us the most control dependent block
      within that loop nest.  */
   if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
     return best_bb;
 
-  /* Get the sinking threshold.  If the statement to be moved has memory
-     operands, then increase the threshold by 7% as those are even more
-     profitable to avoid, clamping at 100%.  */
-  threshold = param_sink_frequency_threshold;
-  if (gimple_vuse (stmt) || gimple_vdef (stmt))
-    {
-      threshold += 7;
-      if (threshold > 100)
-	threshold = 100;
-    }
-
   /* If BEST_BB is at the same nesting level, then require it to have
      significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
@@ -439,7 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
 	return false;
 
-      commondom = select_best_block (frombb, commondom, stmt);
+      commondom = select_best_block (frombb, commondom, stmt, NULL);
 
       if (commondom == frombb)
 	return false;	
@@ -456,19 +465,17 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	    continue;
 	  break;
 	}
+
       use = USE_STMT (one_use);
 
       if (gimple_code (use) != GIMPLE_PHI)
 	{
-	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
+	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use);
 
 	  if (sinkbb == frombb)
 	    return false;
 
-	  if (sinkbb == gimple_bb (use))
-	    *togsi = gsi_for_stmt (use);
-	  else
-	    *togsi = gsi_after_labels (sinkbb);
+	  *togsi = gsi_after_labels (sinkbb);
 
 	  return true;
 	}
@@ -480,7 +487,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
   if (!sinkbb)
     return false;
   
-  sinkbb = select_best_block (frombb, sinkbb, stmt);
+  sinkbb = select_best_block (frombb, sinkbb, stmt, NULL);
   if (!sinkbb || sinkbb == frombb)
     return false;
 
-- 
2.39.3


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PING^3] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
  2023-08-21  6:44   ` [PING^2] " Ajit Agarwal
@ 2023-09-12  7:46     ` Ajit Agarwal
  0 siblings, 0 replies; 11+ messages in thread
From: Ajit Agarwal @ 2023-09-12  7:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Jeff Law, Peter Bergner, Segher Boessenkool



Ping!
-------- Forwarded Message --------
Subject: [PING^2] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Mon, 21 Aug 2023 12:14:03 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Segher Boessenkool <segher@kernel.crashing.org>, Peter Bergner <bergner@linux.ibm.com>, Rashmi.Sridhar@ibm.com

Ping!


-------- Forwarded Message --------
Subject: [PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 1 Aug 2023 13:47:10 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Peter Bergner <bergner@linux.ibm.com>, Segher Boessenkool <segher@kernel.crashing.org>, Rashmi.Sridhar@ibm.com

Ping! 


-------- Forwarded Message --------
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
CC: Richard Biener <richard.guenther@gmail.com>, Jeff Law <jeffreyalaw@gmail.com>, Segher Boessenkool <segher@kernel.crashing.org>, Peter Bergner <bergner@linux.ibm.com>

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
    {
      bar();
      j = l;
    }
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
    {
      l = a + b + c + d +e + f;     
      bar();
      j = l;
    }
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	PR tree-optimization/81953
	* tree-ssa-sink.cc (statement_sink_location): Move statements before
	calls.
	(def_use_same_block): New function.
	(select_best_block): Add heuristics to select the best blocks in the
	immediate post dominator.

gcc/testsuite/ChangeLog:

	PR tree-optimization/81953
	* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
	* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++++++
 gcc/tree-ssa-sink.cc                        | 59 ++++++++++++---------
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 00000000000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 00000000000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      if (b != 3)
+        x = 3;
+      else
+        x = 5;
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
    tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
    We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
 		   basic_block late_bb,
-		   gimple *stmt)
+		   gimple *stmt,
+		   gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+     operands, then increase the threshold by 7% as those are even more
+     profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+    {
+      threshold += 7;
+      if (threshold > 100)
+	threshold = 100;
+    }
 
   while (temp_bb != early_bb)
     {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
 	best_bb = temp_bb;
 
+      /* Placing a statement before a setjmp-like function would be invalid
+	 (it cannot be reevaluated when execution follows an abnormal edge).
+	 If we selected a block with abnormal predecessors, just punt.  */
+      if (bb_has_abnormal_pred (temp_bb))
+	return early_bb;
+
+      /* if we have temp_bb post dominated by use block block then immediate
+       * dominator would be our best block.  */
+      if (use
+	  && bb_loop_depth(temp_bb) == bb_loop_depth (early_bb)
+	  && !(temp_bb->count * 100 >= early_bb->count * threshold)
+	  && dominated_by_p (CDI_POST_DOMINATORS, temp_bb, gimple_bb (use)))
+	best_bb = temp_bb;
+
       /* Walk up the dominator tree, hopefully we'll find a shallower
  	 loop nest.  */
       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
     }
 
-  /* Placing a statement before a setjmp-like function would be invalid
-     (it cannot be reevaluated when execution follows an abnormal edge).
-     If we selected a block with abnormal predecessors, just punt.  */
-  if (bb_has_abnormal_pred (best_bb))
-    return early_bb;
-
   /* If we found a shallower loop nest, then we always consider that
      a win.  This will always give us the most control dependent block
      within that loop nest.  */
   if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb))
     return best_bb;
 
-  /* Get the sinking threshold.  If the statement to be moved has memory
-     operands, then increase the threshold by 7% as those are even more
-     profitable to avoid, clamping at 100%.  */
-  threshold = param_sink_frequency_threshold;
-  if (gimple_vuse (stmt) || gimple_vdef (stmt))
-    {
-      threshold += 7;
-      if (threshold > 100)
-	threshold = 100;
-    }
-
   /* If BEST_BB is at the same nesting level, then require it to have
      significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
@@ -439,7 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
       if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb))
 	return false;
 
-      commondom = select_best_block (frombb, commondom, stmt);
+      commondom = select_best_block (frombb, commondom, stmt, NULL);
 
       if (commondom == frombb)
 	return false;	
@@ -456,19 +465,17 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	    continue;
 	  break;
 	}
+
       use = USE_STMT (one_use);
 
       if (gimple_code (use) != GIMPLE_PHI)
 	{
-	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
+	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use);
 
 	  if (sinkbb == frombb)
 	    return false;
 
-	  if (sinkbb == gimple_bb (use))
-	    *togsi = gsi_for_stmt (use);
-	  else
-	    *togsi = gsi_after_labels (sinkbb);
+	  *togsi = gsi_after_labels (sinkbb);
 
 	  return true;
 	}
@@ -480,7 +487,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
   if (!sinkbb)
     return false;
   
-  sinkbb = select_best_block (frombb, sinkbb, stmt);
+  sinkbb = select_best_block (frombb, sinkbb, stmt, NULL);
   if (!sinkbb || sinkbb == frombb)
     return false;
 
-- 
2.39.3


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-30 12:21       ` Ajit Agarwal
@ 2023-10-30 13:09         ` Ajit Agarwal
  0 siblings, 0 replies; 11+ messages in thread
From: Ajit Agarwal @ 2023-10-30 13:09 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner



On 30/10/23 5:51 pm, Ajit Agarwal wrote:
> Hello Richard:
> 
> On 17/10/23 2:47 pm, Richard Biener wrote:
>> On Tue, Oct 17, 2023 at 10:53 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>
>>> Hello Richard:
>>>
>>> On 17/10/23 2:03 pm, Richard Biener wrote:
>>>> On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>>
>>>>> This patch improves code sinking pass to sink statements before call to reduce
>>>>> register pressure.
>>>>> Review comments are incorporated. Synced and modified with latest trunk sources.
>>>>>
>>>>> For example :
>>>>>
>>>>> void bar();
>>>>> int j;
>>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>>> {
>>>>>   int l;
>>>>>   l = a + b + c + d +e + f;
>>>>>   if (a != 5)
>>>>>     {
>>>>>       bar();
>>>>>       j = l;
>>>>>     }
>>>>> }
>>>>>
>>>>> Code Sinking does the following:
>>>>>
>>>>> void bar();
>>>>> int j;
>>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>>> {
>>>>>   int l;
>>>>>
>>>>>   if (a != 5)
>>>>>     {
>>>>>       l = a + b + c + d +e + f;
>>>>>       bar();
>>>>>       j = l;
>>>>>     }
>>>>> }
>>>>>
>>>>> Bootstrapped regtested on powerpc64-linux-gnu.
>>>>>
>>>>> Thanks & Regards
>>>>> Ajit
>>>>>
>>>>> tree-ssa-sink: Improve code sinking pass
>>>>>
>>>>> Currently, code sinking will sink code after function calls.  This increases
>>>>> register pressure for callee-saved registers.  The following patch improves
>>>>> code sinking by placing the sunk code before calls in the use block or in
>>>>> the immediate dominator of the use blocks.
>>>>
>>>> The patch no longer does what the description above says.
>>> Why you think so. Please let me know.
>>
>> You talk about calls above but the patch doesn't do anything about calls.  You
>> also don't do anything about register pressure, rather the effect of
>> your changes
>> are to move some stmts by a smaller "distance", whatever effect that has.
>>
>>>>
> 
> I have incorporated the changes in version 11 of the patch.
>>>> More comments below.
>>>>
>>>>> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>>         PR tree-optimization/81953
>>>>>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
>>>>>         calls.
>>>>>         (select_best_block): Add heuristics to select the best blocks in the
>>>>>         immediate post dominator.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>>         PR tree-optimization/81953
>>>>>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
>>>>>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
>>>>> ---
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
>>>>>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
>>>>>  3 files changed, 56 insertions(+), 17 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>>>
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>>> new file mode 100644
>>>>> index 00000000000..d3b79ca5803
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>>> @@ -0,0 +1,15 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>>> +void bar();
>>>>> +int j;
>>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>>> +{
>>>>> +  int l;
>>>>> +  l = a + b + c + d +e + f;
>>>>> +  if (a != 5)
>>>>> +    {
>>>>> +      bar();
>>>>> +      j = l;
>>>>> +    }
>>>>> +}
>>>>> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>>> new file mode 100644
>>>>> index 00000000000..84e7938c54f
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>>> @@ -0,0 +1,19 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>>> +void bar();
>>>>> +int j, x;
>>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>>> +{
>>>>> +  int l;
>>>>> +  l = a + b + c + d +e + f;
>>>>> +  if (a != 5)
>>>>> +    {
>>>>> +      bar();
>>>>> +      if (b != 3)
>>>>> +        x = 3;
>>>>> +      else
>>>>> +        x = 5;
>>>>> +      j = l;
>>>>> +    }
>>>>> +}
>>>>> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>>>>> index a360c5cdd6e..95298bc8402 100644
>>>>> --- a/gcc/tree-ssa-sink.cc
>>>>> +++ b/gcc/tree-ssa-sink.cc
>>>>> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
>>>>>
>>>>>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
>>>>>     tree, return the best basic block between them (inclusive) to place
>>>>> -   statements.
>>>>> +   statements. The best basic block should be an immediate dominator of
>>>>> +   best basic block if the use stmt is after the call.
>>>>>
>>>>>     We want the most control dependent block in the shallowest loop nest.
>>>>>
>>>>> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
>>>>>    basic_block best_bb = late_bb;
>>>>>    basic_block temp_bb = late_bb;
>>>>>    int threshold;
>>>>> +  /* Get the sinking threshold.  If the statement to be moved has memory
>>>>> +     operands, then increase the threshold by 7% as those are even more
>>>>> +     profitable to avoid, clamping at 100%.  */
>>>>> +  threshold = param_sink_frequency_threshold;
>>>>> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>>> +    {
>>>>> +      threshold += 7;
>>>>> +      if (threshold > 100)
>>>>> +       threshold = 100;
>>>>> +    }
>>>>>
>>>>>    while (temp_bb != early_bb)
>>>>>      {
>>>>> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
>>>>>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>>>>>         best_bb = temp_bb;
>>>>>
>>>>> +      /* if we have temp_bb post dominated by use block block then immediate
>>>>> +       * dominator would be our best block.  */
>>>>> +      if (!gimple_vuse (stmt)
>>>>> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
>>>>> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
>>>>> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
>>>>
>>>> this isn't a post-dominance check, in fact this always returns true.  This
>>>> also overrides the best found loop depth which probably means finding
>>>> both inside the same loop doesn't work.
>>>
>>> I can remove dominated check. You would like me to do in different loop than doing inside the same
>>> loop. Please let me know.
>>>
>>>
>>>> What's the intent of the change?
>>>
>>> The purpose of this change is to assign best_bb the immediate dominator if both early_bb and temp_bb have same loop depth.
>>
>> So why is the change then not simply
>>
>> -      if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>> +     if (bb_loop_depth (temp_bb) <= bb_loop_depth (best_bb))
>>         best_bb = temp_bb;
> 
> I have made changes in version 10 of the patch with additional check of avoiding
> memory operand statements to move immediate dominator. I have not heard from you.
> I was wondering what wrong with my changes.
> 
> I have made changes in the version 11 of the patch as you have suggested above with later avoid sinking to immediate
> dominator with memory operands.
> 
> Please let me know if this is okay for trunk.

For gimple_vuse (stmt) true we do code sinking in nearest common dominator  with same nesting loop depth 
and moving to domoinator  of commondom breaks the code in gcc testsuite. Thats why I have made additional 
checks of gimple_vuse (stmt) to place in common dominator instead of moving to dominator of commondom.


Thanks & Regards
Ajit
>> ?  Not that I think this is desirable.  We want to sink to the least
>> executed place which
>> doesn't map 1:1 to loop depth but control flow forks.  The heuristic using
>> basic-block counts is prone to profile errors (but otherwise should cover the
>> general idea of the existing code).
>>
> 
> I have been looking at better heuristics on top of basic block count changes in original code, but that 
> will come in later patches after I commit the version 10/11 of the patch.
> 
> Thanks & Regards
> Ajit
>>> Thanks & Regards
>>> Ajit
>>>>
>>>>> +       best_bb = temp_bb;
>>>>> +
>>>>>        /* Walk up the dominator tree, hopefully we'll find a shallower
>>>>>          loop nest.  */
>>>>>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
>>>>> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
>>>>>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
>>>>>      return early_bb;
>>>>>
>>>>> -  /* Get the sinking threshold.  If the statement to be moved has memory
>>>>> -     operands, then increase the threshold by 7% as those are even more
>>>>> -     profitable to avoid, clamping at 100%.  */
>>>>> -  threshold = param_sink_frequency_threshold;
>>>>> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>>> -    {
>>>>> -      threshold += 7;
>>>>> -      if (threshold > 100)
>>>>> -       threshold = 100;
>>>>> -    }
>>>>> -
>>>>>    /* If BEST_BB is at the same nesting level, then require it to have
>>>>>       significantly lower execution frequency to avoid gratuitous movement.  */
>>>>>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
>>>>> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>>             continue;
>>>>>           break;
>>>>>         }
>>>>> +
>>>>>        use = USE_STMT (one_use);
>>>>>
>>>>>        if (gimple_code (use) != GIMPLE_PHI)
>>>>> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>>           if (sinkbb == frombb)
>>>>>             return false;
>>>>>
>>>>> -         if (sinkbb == gimple_bb (use))
>>>>> -           *togsi = gsi_for_stmt (use);
>>>>> -         else
>>>>> -           *togsi = gsi_after_labels (sinkbb);
>>>>> +         *togsi = gsi_after_labels (sinkbb);
>>>>>
>>>>>           return true;
>>>>>         }
>>>>> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
>>>>>    mark_dfs_back_edges (fun);
>>>>>    memset (&sink_stats, 0, sizeof (sink_stats));
>>>>>    calculate_dominance_info (CDI_DOMINATORS);
>>>>> -
>>>>>    virtual_operand_live vop_live;
>>>>>
>>>>>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
>>>>> --
>>>>> 2.39.3
>>>>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-17  9:17     ` Richard Biener
  2023-10-17 13:23       ` Ajit Agarwal
@ 2023-10-30 12:21       ` Ajit Agarwal
  2023-10-30 13:09         ` Ajit Agarwal
  1 sibling, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-10-30 12:21 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner

Hello Richard:

On 17/10/23 2:47 pm, Richard Biener wrote:
> On Tue, Oct 17, 2023 at 10:53 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>> Hello Richard:
>>
>> On 17/10/23 2:03 pm, Richard Biener wrote:
>>> On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>> This patch improves code sinking pass to sink statements before call to reduce
>>>> register pressure.
>>>> Review comments are incorporated. Synced and modified with latest trunk sources.
>>>>
>>>> For example :
>>>>
>>>> void bar();
>>>> int j;
>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>> {
>>>>   int l;
>>>>   l = a + b + c + d +e + f;
>>>>   if (a != 5)
>>>>     {
>>>>       bar();
>>>>       j = l;
>>>>     }
>>>> }
>>>>
>>>> Code Sinking does the following:
>>>>
>>>> void bar();
>>>> int j;
>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>> {
>>>>   int l;
>>>>
>>>>   if (a != 5)
>>>>     {
>>>>       l = a + b + c + d +e + f;
>>>>       bar();
>>>>       j = l;
>>>>     }
>>>> }
>>>>
>>>> Bootstrapped regtested on powerpc64-linux-gnu.
>>>>
>>>> Thanks & Regards
>>>> Ajit
>>>>
>>>> tree-ssa-sink: Improve code sinking pass
>>>>
>>>> Currently, code sinking will sink code after function calls.  This increases
>>>> register pressure for callee-saved registers.  The following patch improves
>>>> code sinking by placing the sunk code before calls in the use block or in
>>>> the immediate dominator of the use blocks.
>>>
>>> The patch no longer does what the description above says.
>> Why you think so. Please let me know.
> 
> You talk about calls above but the patch doesn't do anything about calls.  You
> also don't do anything about register pressure, rather the effect of
> your changes
> are to move some stmts by a smaller "distance", whatever effect that has.
> 
>>>

I have incorporated the changes in version 11 of the patch.
>>> More comments below.
>>>
>>>> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>>         PR tree-optimization/81953
>>>>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
>>>>         calls.
>>>>         (select_best_block): Add heuristics to select the best blocks in the
>>>>         immediate post dominator.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>>         PR tree-optimization/81953
>>>>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
>>>>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
>>>> ---
>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
>>>>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
>>>>  3 files changed, 56 insertions(+), 17 deletions(-)
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>>
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>> new file mode 100644
>>>> index 00000000000..d3b79ca5803
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>> @@ -0,0 +1,15 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>> +void bar();
>>>> +int j;
>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>> +{
>>>> +  int l;
>>>> +  l = a + b + c + d +e + f;
>>>> +  if (a != 5)
>>>> +    {
>>>> +      bar();
>>>> +      j = l;
>>>> +    }
>>>> +}
>>>> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>> new file mode 100644
>>>> index 00000000000..84e7938c54f
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>> @@ -0,0 +1,19 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>> +void bar();
>>>> +int j, x;
>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>> +{
>>>> +  int l;
>>>> +  l = a + b + c + d +e + f;
>>>> +  if (a != 5)
>>>> +    {
>>>> +      bar();
>>>> +      if (b != 3)
>>>> +        x = 3;
>>>> +      else
>>>> +        x = 5;
>>>> +      j = l;
>>>> +    }
>>>> +}
>>>> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>>>> index a360c5cdd6e..95298bc8402 100644
>>>> --- a/gcc/tree-ssa-sink.cc
>>>> +++ b/gcc/tree-ssa-sink.cc
>>>> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
>>>>
>>>>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
>>>>     tree, return the best basic block between them (inclusive) to place
>>>> -   statements.
>>>> +   statements. The best basic block should be an immediate dominator of
>>>> +   best basic block if the use stmt is after the call.
>>>>
>>>>     We want the most control dependent block in the shallowest loop nest.
>>>>
>>>> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
>>>>    basic_block best_bb = late_bb;
>>>>    basic_block temp_bb = late_bb;
>>>>    int threshold;
>>>> +  /* Get the sinking threshold.  If the statement to be moved has memory
>>>> +     operands, then increase the threshold by 7% as those are even more
>>>> +     profitable to avoid, clamping at 100%.  */
>>>> +  threshold = param_sink_frequency_threshold;
>>>> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>> +    {
>>>> +      threshold += 7;
>>>> +      if (threshold > 100)
>>>> +       threshold = 100;
>>>> +    }
>>>>
>>>>    while (temp_bb != early_bb)
>>>>      {
>>>> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
>>>>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>>>>         best_bb = temp_bb;
>>>>
>>>> +      /* if we have temp_bb post dominated by use block block then immediate
>>>> +       * dominator would be our best block.  */
>>>> +      if (!gimple_vuse (stmt)
>>>> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
>>>> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
>>>> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
>>>
>>> this isn't a post-dominance check, in fact this always returns true.  This
>>> also overrides the best found loop depth which probably means finding
>>> both inside the same loop doesn't work.
>>
>> I can remove dominated check. You would like me to do in different loop than doing inside the same
>> loop. Please let me know.
>>
>>
>>> What's the intent of the change?
>>
>> The purpose of this change is to assign best_bb the immediate dominator if both early_bb and temp_bb have same loop depth.
> 
> So why is the change then not simply
> 
> -      if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
> +     if (bb_loop_depth (temp_bb) <= bb_loop_depth (best_bb))
>         best_bb = temp_bb;

I have made changes in version 10 of the patch with additional check of avoiding
memory operand statements to move immediate dominator. I have not heard from you.
I was wondering what wrong with my changes.

I have made changes in the version 11 of the patch as you have suggested above with later avoid sinking to immediate
dominator with memory operands.

Please let me know if this is okay for trunk.
> 
> ?  Not that I think this is desirable.  We want to sink to the least
> executed place which
> doesn't map 1:1 to loop depth but control flow forks.  The heuristic using
> basic-block counts is prone to profile errors (but otherwise should cover the
> general idea of the existing code).
> 

I have been looking at better heuristics on top of basic block count changes in original code, but that 
will come in later patches after I commit the version 10/11 of the patch.

Thanks & Regards
Ajit
>> Thanks & Regards
>> Ajit
>>>
>>>> +       best_bb = temp_bb;
>>>> +
>>>>        /* Walk up the dominator tree, hopefully we'll find a shallower
>>>>          loop nest.  */
>>>>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
>>>> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
>>>>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
>>>>      return early_bb;
>>>>
>>>> -  /* Get the sinking threshold.  If the statement to be moved has memory
>>>> -     operands, then increase the threshold by 7% as those are even more
>>>> -     profitable to avoid, clamping at 100%.  */
>>>> -  threshold = param_sink_frequency_threshold;
>>>> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>> -    {
>>>> -      threshold += 7;
>>>> -      if (threshold > 100)
>>>> -       threshold = 100;
>>>> -    }
>>>> -
>>>>    /* If BEST_BB is at the same nesting level, then require it to have
>>>>       significantly lower execution frequency to avoid gratuitous movement.  */
>>>>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
>>>> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>             continue;
>>>>           break;
>>>>         }
>>>> +
>>>>        use = USE_STMT (one_use);
>>>>
>>>>        if (gimple_code (use) != GIMPLE_PHI)
>>>> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>           if (sinkbb == frombb)
>>>>             return false;
>>>>
>>>> -         if (sinkbb == gimple_bb (use))
>>>> -           *togsi = gsi_for_stmt (use);
>>>> -         else
>>>> -           *togsi = gsi_after_labels (sinkbb);
>>>> +         *togsi = gsi_after_labels (sinkbb);
>>>>
>>>>           return true;
>>>>         }
>>>> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
>>>>    mark_dfs_back_edges (fun);
>>>>    memset (&sink_stats, 0, sizeof (sink_stats));
>>>>    calculate_dominance_info (CDI_DOMINATORS);
>>>> -
>>>>    virtual_operand_live vop_live;
>>>>
>>>>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
>>>> --
>>>> 2.39.3
>>>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-17  9:17     ` Richard Biener
@ 2023-10-17 13:23       ` Ajit Agarwal
  2023-10-30 12:21       ` Ajit Agarwal
  1 sibling, 0 replies; 11+ messages in thread
From: Ajit Agarwal @ 2023-10-17 13:23 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner

Hello Richard:

Below review comments are incorporated in version 10 of the patch,
Please review and let me know if its okay for trunk.


Thanks & Regards
Ajit

On 17/10/23 2:47 pm, Richard Biener wrote:
> On Tue, Oct 17, 2023 at 10:53 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>> Hello Richard:
>>
>> On 17/10/23 2:03 pm, Richard Biener wrote:
>>> On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>>>
>>>> This patch improves code sinking pass to sink statements before call to reduce
>>>> register pressure.
>>>> Review comments are incorporated. Synced and modified with latest trunk sources.
>>>>
>>>> For example :
>>>>
>>>> void bar();
>>>> int j;
>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>> {
>>>>   int l;
>>>>   l = a + b + c + d +e + f;
>>>>   if (a != 5)
>>>>     {
>>>>       bar();
>>>>       j = l;
>>>>     }
>>>> }
>>>>
>>>> Code Sinking does the following:
>>>>
>>>> void bar();
>>>> int j;
>>>> void foo(int a, int b, int c, int d, int e, int f)
>>>> {
>>>>   int l;
>>>>
>>>>   if (a != 5)
>>>>     {
>>>>       l = a + b + c + d +e + f;
>>>>       bar();
>>>>       j = l;
>>>>     }
>>>> }
>>>>
>>>> Bootstrapped regtested on powerpc64-linux-gnu.
>>>>
>>>> Thanks & Regards
>>>> Ajit
>>>>
>>>> tree-ssa-sink: Improve code sinking pass
>>>>
>>>> Currently, code sinking will sink code after function calls.  This increases
>>>> register pressure for callee-saved registers.  The following patch improves
>>>> code sinking by placing the sunk code before calls in the use block or in
>>>> the immediate dominator of the use blocks.
>>>
>>> The patch no longer does what the description above says.
>> Why you think so. Please let me know.
> 
> You talk about calls above but the patch doesn't do anything about calls.  You
> also don't do anything about register pressure, rather the effect of
> your changes
> are to move some stmts by a smaller "distance", whatever effect that has.
> 
>>>
>>> More comments below.
>>>
>>>> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>>         PR tree-optimization/81953
>>>>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
>>>>         calls.
>>>>         (select_best_block): Add heuristics to select the best blocks in the
>>>>         immediate post dominator.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>>         PR tree-optimization/81953
>>>>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
>>>>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
>>>> ---
>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
>>>>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
>>>>  3 files changed, 56 insertions(+), 17 deletions(-)
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>>
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>> new file mode 100644
>>>> index 00000000000..d3b79ca5803
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>>> @@ -0,0 +1,15 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>> +void bar();
>>>> +int j;
>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>> +{
>>>> +  int l;
>>>> +  l = a + b + c + d +e + f;
>>>> +  if (a != 5)
>>>> +    {
>>>> +      bar();
>>>> +      j = l;
>>>> +    }
>>>> +}
>>>> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>> new file mode 100644
>>>> index 00000000000..84e7938c54f
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>>> @@ -0,0 +1,19 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>>>> +void bar();
>>>> +int j, x;
>>>> +void foo(int a, int b, int c, int d, int e, int f)
>>>> +{
>>>> +  int l;
>>>> +  l = a + b + c + d +e + f;
>>>> +  if (a != 5)
>>>> +    {
>>>> +      bar();
>>>> +      if (b != 3)
>>>> +        x = 3;
>>>> +      else
>>>> +        x = 5;
>>>> +      j = l;
>>>> +    }
>>>> +}
>>>> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
>>>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>>>> index a360c5cdd6e..95298bc8402 100644
>>>> --- a/gcc/tree-ssa-sink.cc
>>>> +++ b/gcc/tree-ssa-sink.cc
>>>> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
>>>>
>>>>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
>>>>     tree, return the best basic block between them (inclusive) to place
>>>> -   statements.
>>>> +   statements. The best basic block should be an immediate dominator of
>>>> +   best basic block if the use stmt is after the call.
>>>>
>>>>     We want the most control dependent block in the shallowest loop nest.
>>>>
>>>> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
>>>>    basic_block best_bb = late_bb;
>>>>    basic_block temp_bb = late_bb;
>>>>    int threshold;
>>>> +  /* Get the sinking threshold.  If the statement to be moved has memory
>>>> +     operands, then increase the threshold by 7% as those are even more
>>>> +     profitable to avoid, clamping at 100%.  */
>>>> +  threshold = param_sink_frequency_threshold;
>>>> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>> +    {
>>>> +      threshold += 7;
>>>> +      if (threshold > 100)
>>>> +       threshold = 100;
>>>> +    }
>>>>
>>>>    while (temp_bb != early_bb)
>>>>      {
>>>> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
>>>>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>>>>         best_bb = temp_bb;
>>>>
>>>> +      /* if we have temp_bb post dominated by use block block then immediate
>>>> +       * dominator would be our best block.  */
>>>> +      if (!gimple_vuse (stmt)
>>>> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
>>>> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
>>>> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
>>>
>>> this isn't a post-dominance check, in fact this always returns true.  This
>>> also overrides the best found loop depth which probably means finding
>>> both inside the same loop doesn't work.
>>
>> I can remove dominated check. You would like me to do in different loop than doing inside the same
>> loop. Please let me know.
>>
>>
>>> What's the intent of the change?
>>
>> The purpose of this change is to assign best_bb the immediate dominator if both early_bb and temp_bb have same loop depth.
> 
> So why is the change then not simply
> 
> -      if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
> +     if (bb_loop_depth (temp_bb) <= bb_loop_depth (best_bb))
>         best_bb = temp_bb;
> 
> ?  Not that I think this is desirable.  We want to sink to the least
> executed place which
> doesn't map 1:1 to loop depth but control flow forks.  The heuristic using
> basic-block counts is prone to profile errors (but otherwise should cover the
> general idea of the existing code).
> 
>> Thanks & Regards
>> Ajit
>>>
>>>> +       best_bb = temp_bb;
>>>> +
>>>>        /* Walk up the dominator tree, hopefully we'll find a shallower
>>>>          loop nest.  */
>>>>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
>>>> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
>>>>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
>>>>      return early_bb;
>>>>
>>>> -  /* Get the sinking threshold.  If the statement to be moved has memory
>>>> -     operands, then increase the threshold by 7% as those are even more
>>>> -     profitable to avoid, clamping at 100%.  */
>>>> -  threshold = param_sink_frequency_threshold;
>>>> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>>>> -    {
>>>> -      threshold += 7;
>>>> -      if (threshold > 100)
>>>> -       threshold = 100;
>>>> -    }
>>>> -
>>>>    /* If BEST_BB is at the same nesting level, then require it to have
>>>>       significantly lower execution frequency to avoid gratuitous movement.  */
>>>>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
>>>> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>             continue;
>>>>           break;
>>>>         }
>>>> +
>>>>        use = USE_STMT (one_use);
>>>>
>>>>        if (gimple_code (use) != GIMPLE_PHI)
>>>> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>>>           if (sinkbb == frombb)
>>>>             return false;
>>>>
>>>> -         if (sinkbb == gimple_bb (use))
>>>> -           *togsi = gsi_for_stmt (use);
>>>> -         else
>>>> -           *togsi = gsi_after_labels (sinkbb);
>>>> +         *togsi = gsi_after_labels (sinkbb);
>>>>
>>>>           return true;
>>>>         }
>>>> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
>>>>    mark_dfs_back_edges (fun);
>>>>    memset (&sink_stats, 0, sizeof (sink_stats));
>>>>    calculate_dominance_info (CDI_DOMINATORS);
>>>> -
>>>>    virtual_operand_live vop_live;
>>>>
>>>>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
>>>> --
>>>> 2.39.3
>>>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-17  8:53   ` Ajit Agarwal
@ 2023-10-17  9:17     ` Richard Biener
  2023-10-17 13:23       ` Ajit Agarwal
  2023-10-30 12:21       ` Ajit Agarwal
  0 siblings, 2 replies; 11+ messages in thread
From: Richard Biener @ 2023-10-17  9:17 UTC (permalink / raw)
  To: Ajit Agarwal; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner

On Tue, Oct 17, 2023 at 10:53 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
> Hello Richard:
>
> On 17/10/23 2:03 pm, Richard Biener wrote:
> > On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
> >>
> >> This patch improves code sinking pass to sink statements before call to reduce
> >> register pressure.
> >> Review comments are incorporated. Synced and modified with latest trunk sources.
> >>
> >> For example :
> >>
> >> void bar();
> >> int j;
> >> void foo(int a, int b, int c, int d, int e, int f)
> >> {
> >>   int l;
> >>   l = a + b + c + d +e + f;
> >>   if (a != 5)
> >>     {
> >>       bar();
> >>       j = l;
> >>     }
> >> }
> >>
> >> Code Sinking does the following:
> >>
> >> void bar();
> >> int j;
> >> void foo(int a, int b, int c, int d, int e, int f)
> >> {
> >>   int l;
> >>
> >>   if (a != 5)
> >>     {
> >>       l = a + b + c + d +e + f;
> >>       bar();
> >>       j = l;
> >>     }
> >> }
> >>
> >> Bootstrapped regtested on powerpc64-linux-gnu.
> >>
> >> Thanks & Regards
> >> Ajit
> >>
> >> tree-ssa-sink: Improve code sinking pass
> >>
> >> Currently, code sinking will sink code after function calls.  This increases
> >> register pressure for callee-saved registers.  The following patch improves
> >> code sinking by placing the sunk code before calls in the use block or in
> >> the immediate dominator of the use blocks.
> >
> > The patch no longer does what the description above says.
> Why you think so. Please let me know.

You talk about calls above but the patch doesn't do anything about calls.  You
also don't do anything about register pressure, rather the effect of
your changes
are to move some stmts by a smaller "distance", whatever effect that has.

> >
> > More comments below.
> >
> >> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
> >>
> >> gcc/ChangeLog:
> >>
> >>         PR tree-optimization/81953
> >>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
> >>         calls.
> >>         (select_best_block): Add heuristics to select the best blocks in the
> >>         immediate post dominator.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>         PR tree-optimization/81953
> >>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
> >>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
> >> ---
> >>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
> >>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
> >>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
> >>  3 files changed, 56 insertions(+), 17 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> >> new file mode 100644
> >> index 00000000000..d3b79ca5803
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> >> @@ -0,0 +1,15 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
> >> +void bar();
> >> +int j;
> >> +void foo(int a, int b, int c, int d, int e, int f)
> >> +{
> >> +  int l;
> >> +  l = a + b + c + d +e + f;
> >> +  if (a != 5)
> >> +    {
> >> +      bar();
> >> +      j = l;
> >> +    }
> >> +}
> >> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
> >> new file mode 100644
> >> index 00000000000..84e7938c54f
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
> >> @@ -0,0 +1,19 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
> >> +void bar();
> >> +int j, x;
> >> +void foo(int a, int b, int c, int d, int e, int f)
> >> +{
> >> +  int l;
> >> +  l = a + b + c + d +e + f;
> >> +  if (a != 5)
> >> +    {
> >> +      bar();
> >> +      if (b != 3)
> >> +        x = 3;
> >> +      else
> >> +        x = 5;
> >> +      j = l;
> >> +    }
> >> +}
> >> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
> >> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
> >> index a360c5cdd6e..95298bc8402 100644
> >> --- a/gcc/tree-ssa-sink.cc
> >> +++ b/gcc/tree-ssa-sink.cc
> >> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
> >>
> >>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
> >>     tree, return the best basic block between them (inclusive) to place
> >> -   statements.
> >> +   statements. The best basic block should be an immediate dominator of
> >> +   best basic block if the use stmt is after the call.
> >>
> >>     We want the most control dependent block in the shallowest loop nest.
> >>
> >> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
> >>    basic_block best_bb = late_bb;
> >>    basic_block temp_bb = late_bb;
> >>    int threshold;
> >> +  /* Get the sinking threshold.  If the statement to be moved has memory
> >> +     operands, then increase the threshold by 7% as those are even more
> >> +     profitable to avoid, clamping at 100%.  */
> >> +  threshold = param_sink_frequency_threshold;
> >> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
> >> +    {
> >> +      threshold += 7;
> >> +      if (threshold > 100)
> >> +       threshold = 100;
> >> +    }
> >>
> >>    while (temp_bb != early_bb)
> >>      {
> >> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
> >>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
> >>         best_bb = temp_bb;
> >>
> >> +      /* if we have temp_bb post dominated by use block block then immediate
> >> +       * dominator would be our best block.  */
> >> +      if (!gimple_vuse (stmt)
> >> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
> >> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
> >> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
> >
> > this isn't a post-dominance check, in fact this always returns true.  This
> > also overrides the best found loop depth which probably means finding
> > both inside the same loop doesn't work.
>
> I can remove dominated check. You would like me to do in different loop than doing inside the same
> loop. Please let me know.
>
>
> > What's the intent of the change?
>
> The purpose of this change is to assign best_bb the immediate dominator if both early_bb and temp_bb have same loop depth.

So why is the change then not simply

-      if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
+     if (bb_loop_depth (temp_bb) <= bb_loop_depth (best_bb))
        best_bb = temp_bb;

?  Not that I think this is desirable.  We want to sink to the least
executed place which
doesn't map 1:1 to loop depth but control flow forks.  The heuristic using
basic-block counts is prone to profile errors (but otherwise should cover the
general idea of the existing code).

> Thanks & Regards
> Ajit
> >
> >> +       best_bb = temp_bb;
> >> +
> >>        /* Walk up the dominator tree, hopefully we'll find a shallower
> >>          loop nest.  */
> >>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
> >> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
> >>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
> >>      return early_bb;
> >>
> >> -  /* Get the sinking threshold.  If the statement to be moved has memory
> >> -     operands, then increase the threshold by 7% as those are even more
> >> -     profitable to avoid, clamping at 100%.  */
> >> -  threshold = param_sink_frequency_threshold;
> >> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
> >> -    {
> >> -      threshold += 7;
> >> -      if (threshold > 100)
> >> -       threshold = 100;
> >> -    }
> >> -
> >>    /* If BEST_BB is at the same nesting level, then require it to have
> >>       significantly lower execution frequency to avoid gratuitous movement.  */
> >>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
> >> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
> >>             continue;
> >>           break;
> >>         }
> >> +
> >>        use = USE_STMT (one_use);
> >>
> >>        if (gimple_code (use) != GIMPLE_PHI)
> >> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
> >>           if (sinkbb == frombb)
> >>             return false;
> >>
> >> -         if (sinkbb == gimple_bb (use))
> >> -           *togsi = gsi_for_stmt (use);
> >> -         else
> >> -           *togsi = gsi_after_labels (sinkbb);
> >> +         *togsi = gsi_after_labels (sinkbb);
> >>
> >>           return true;
> >>         }
> >> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
> >>    mark_dfs_back_edges (fun);
> >>    memset (&sink_stats, 0, sizeof (sink_stats));
> >>    calculate_dominance_info (CDI_DOMINATORS);
> >> -
> >>    virtual_operand_live vop_live;
> >>
> >>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
> >> --
> >> 2.39.3
> >>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-17  8:33 ` Richard Biener
@ 2023-10-17  8:53   ` Ajit Agarwal
  2023-10-17  9:17     ` Richard Biener
  0 siblings, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-10-17  8:53 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner

Hello Richard:

On 17/10/23 2:03 pm, Richard Biener wrote:
> On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>> This patch improves code sinking pass to sink statements before call to reduce
>> register pressure.
>> Review comments are incorporated. Synced and modified with latest trunk sources.
>>
>> For example :
>>
>> void bar();
>> int j;
>> void foo(int a, int b, int c, int d, int e, int f)
>> {
>>   int l;
>>   l = a + b + c + d +e + f;
>>   if (a != 5)
>>     {
>>       bar();
>>       j = l;
>>     }
>> }
>>
>> Code Sinking does the following:
>>
>> void bar();
>> int j;
>> void foo(int a, int b, int c, int d, int e, int f)
>> {
>>   int l;
>>
>>   if (a != 5)
>>     {
>>       l = a + b + c + d +e + f;
>>       bar();
>>       j = l;
>>     }
>> }
>>
>> Bootstrapped regtested on powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>> tree-ssa-sink: Improve code sinking pass
>>
>> Currently, code sinking will sink code after function calls.  This increases
>> register pressure for callee-saved registers.  The following patch improves
>> code sinking by placing the sunk code before calls in the use block or in
>> the immediate dominator of the use blocks.
> 
> The patch no longer does what the description above says.
Why you think so. Please let me know.
> 
> More comments below.
> 
>> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>
>> gcc/ChangeLog:
>>
>>         PR tree-optimization/81953
>>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
>>         calls.
>>         (select_best_block): Add heuristics to select the best blocks in the
>>         immediate post dominator.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         PR tree-optimization/81953
>>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
>>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
>>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
>>  3 files changed, 56 insertions(+), 17 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> new file mode 100644
>> index 00000000000..d3b79ca5803
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>> +void bar();
>> +int j;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +    {
>> +      bar();
>> +      j = l;
>> +    }
>> +}
>> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>> new file mode 100644
>> index 00000000000..84e7938c54f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>> +void bar();
>> +int j, x;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +    {
>> +      bar();
>> +      if (b != 3)
>> +        x = 3;
>> +      else
>> +        x = 5;
>> +      j = l;
>> +    }
>> +}
>> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>> index a360c5cdd6e..95298bc8402 100644
>> --- a/gcc/tree-ssa-sink.cc
>> +++ b/gcc/tree-ssa-sink.cc
>> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
>>
>>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
>>     tree, return the best basic block between them (inclusive) to place
>> -   statements.
>> +   statements. The best basic block should be an immediate dominator of
>> +   best basic block if the use stmt is after the call.
>>
>>     We want the most control dependent block in the shallowest loop nest.
>>
>> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
>>    basic_block best_bb = late_bb;
>>    basic_block temp_bb = late_bb;
>>    int threshold;
>> +  /* Get the sinking threshold.  If the statement to be moved has memory
>> +     operands, then increase the threshold by 7% as those are even more
>> +     profitable to avoid, clamping at 100%.  */
>> +  threshold = param_sink_frequency_threshold;
>> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>> +    {
>> +      threshold += 7;
>> +      if (threshold > 100)
>> +       threshold = 100;
>> +    }
>>
>>    while (temp_bb != early_bb)
>>      {
>> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
>>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>>         best_bb = temp_bb;
>>
>> +      /* if we have temp_bb post dominated by use block block then immediate
>> +       * dominator would be our best block.  */
>> +      if (!gimple_vuse (stmt)
>> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
>> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
>> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
> 
> this isn't a post-dominance check, in fact this always returns true.  This
> also overrides the best found loop depth which probably means finding
> both inside the same loop doesn't work.

I can remove dominated check. You would like me to do in different loop than doing inside the same
loop. Please let me know.


> What's the intent of the change?

The purpose of this change is to assign best_bb the immediate dominator if both early_bb and temp_bb have same loop depth.

Thanks & Regards
Ajit
> 
>> +       best_bb = temp_bb;
>> +
>>        /* Walk up the dominator tree, hopefully we'll find a shallower
>>          loop nest.  */
>>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
>> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
>>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
>>      return early_bb;
>>
>> -  /* Get the sinking threshold.  If the statement to be moved has memory
>> -     operands, then increase the threshold by 7% as those are even more
>> -     profitable to avoid, clamping at 100%.  */
>> -  threshold = param_sink_frequency_threshold;
>> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
>> -    {
>> -      threshold += 7;
>> -      if (threshold > 100)
>> -       threshold = 100;
>> -    }
>> -
>>    /* If BEST_BB is at the same nesting level, then require it to have
>>       significantly lower execution frequency to avoid gratuitous movement.  */
>>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
>> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>             continue;
>>           break;
>>         }
>> +
>>        use = USE_STMT (one_use);
>>
>>        if (gimple_code (use) != GIMPLE_PHI)
>> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>>           if (sinkbb == frombb)
>>             return false;
>>
>> -         if (sinkbb == gimple_bb (use))
>> -           *togsi = gsi_for_stmt (use);
>> -         else
>> -           *togsi = gsi_after_labels (sinkbb);
>> +         *togsi = gsi_after_labels (sinkbb);
>>
>>           return true;
>>         }
>> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
>>    mark_dfs_back_edges (fun);
>>    memset (&sink_stats, 0, sizeof (sink_stats));
>>    calculate_dominance_info (CDI_DOMINATORS);
>> -
>>    virtual_operand_live vop_live;
>>
>>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
>> --
>> 2.39.3
>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8] tree-ssa-sink: Improve code sinking pass
  2023-10-12  8:42 Ajit Agarwal
@ 2023-10-17  8:33 ` Richard Biener
  2023-10-17  8:53   ` Ajit Agarwal
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Biener @ 2023-10-17  8:33 UTC (permalink / raw)
  To: Ajit Agarwal; +Cc: gcc-patches, Jeff Law, Segher Boessenkool, Peter Bergner

On Thu, Oct 12, 2023 at 10:42 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
> This patch improves code sinking pass to sink statements before call to reduce
> register pressure.
> Review comments are incorporated. Synced and modified with latest trunk sources.
>
> For example :
>
> void bar();
> int j;
> void foo(int a, int b, int c, int d, int e, int f)
> {
>   int l;
>   l = a + b + c + d +e + f;
>   if (a != 5)
>     {
>       bar();
>       j = l;
>     }
> }
>
> Code Sinking does the following:
>
> void bar();
> int j;
> void foo(int a, int b, int c, int d, int e, int f)
> {
>   int l;
>
>   if (a != 5)
>     {
>       l = a + b + c + d +e + f;
>       bar();
>       j = l;
>     }
> }
>
> Bootstrapped regtested on powerpc64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
> tree-ssa-sink: Improve code sinking pass
>
> Currently, code sinking will sink code after function calls.  This increases
> register pressure for callee-saved registers.  The following patch improves
> code sinking by placing the sunk code before calls in the use block or in
> the immediate dominator of the use blocks.

The patch no longer does what the description above says.

More comments below.

> 2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>
> gcc/ChangeLog:
>
>         PR tree-optimization/81953
>         * tree-ssa-sink.cc (statement_sink_location): Move statements before
>         calls.
>         (select_best_block): Add heuristics to select the best blocks in the
>         immediate post dominator.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/81953
>         * gcc.dg/tree-ssa/ssa-sink-20.c: New test.
>         * gcc.dg/tree-ssa/ssa-sink-21.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
>  gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
>  3 files changed, 56 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> new file mode 100644
> index 00000000000..d3b79ca5803
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
> +void bar();
> +int j;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +    {
> +      bar();
> +      j = l;
> +    }
> +}
> +/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
> new file mode 100644
> index 00000000000..84e7938c54f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
> +void bar();
> +int j, x;
> +void foo(int a, int b, int c, int d, int e, int f)
> +{
> +  int l;
> +  l = a + b + c + d +e + f;
> +  if (a != 5)
> +    {
> +      bar();
> +      if (b != 3)
> +        x = 3;
> +      else
> +        x = 5;
> +      j = l;
> +    }
> +}
> +/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
> index a360c5cdd6e..95298bc8402 100644
> --- a/gcc/tree-ssa-sink.cc
> +++ b/gcc/tree-ssa-sink.cc
> @@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
>
>  /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
>     tree, return the best basic block between them (inclusive) to place
> -   statements.
> +   statements. The best basic block should be an immediate dominator of
> +   best basic block if the use stmt is after the call.
>
>     We want the most control dependent block in the shallowest loop nest.
>
> @@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
>    basic_block best_bb = late_bb;
>    basic_block temp_bb = late_bb;
>    int threshold;
> +  /* Get the sinking threshold.  If the statement to be moved has memory
> +     operands, then increase the threshold by 7% as those are even more
> +     profitable to avoid, clamping at 100%.  */
> +  threshold = param_sink_frequency_threshold;
> +  if (gimple_vuse (stmt) || gimple_vdef (stmt))
> +    {
> +      threshold += 7;
> +      if (threshold > 100)
> +       threshold = 100;
> +    }
>
>    while (temp_bb != early_bb)
>      {
> @@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
>        if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
>         best_bb = temp_bb;
>
> +      /* if we have temp_bb post dominated by use block block then immediate
> +       * dominator would be our best block.  */
> +      if (!gimple_vuse (stmt)
> +         && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
> +         && !(temp_bb->count * 100 >= early_bb->count * threshold)
> +         && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))

this isn't a post-dominance check, in fact this always returns true.  This
also overrides the best found loop depth which probably means finding
both inside the same loop doesn't work.

What's the intent of the change?

> +       best_bb = temp_bb;
> +
>        /* Walk up the dominator tree, hopefully we'll find a shallower
>          loop nest.  */
>        temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
> @@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
>        && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
>      return early_bb;
>
> -  /* Get the sinking threshold.  If the statement to be moved has memory
> -     operands, then increase the threshold by 7% as those are even more
> -     profitable to avoid, clamping at 100%.  */
> -  threshold = param_sink_frequency_threshold;
> -  if (gimple_vuse (stmt) || gimple_vdef (stmt))
> -    {
> -      threshold += 7;
> -      if (threshold > 100)
> -       threshold = 100;
> -    }
> -
>    /* If BEST_BB is at the same nesting level, then require it to have
>       significantly lower execution frequency to avoid gratuitous movement.  */
>    if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
> @@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>             continue;
>           break;
>         }
> +
>        use = USE_STMT (one_use);
>
>        if (gimple_code (use) != GIMPLE_PHI)
> @@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
>           if (sinkbb == frombb)
>             return false;
>
> -         if (sinkbb == gimple_bb (use))
> -           *togsi = gsi_for_stmt (use);
> -         else
> -           *togsi = gsi_after_labels (sinkbb);
> +         *togsi = gsi_after_labels (sinkbb);
>
>           return true;
>         }
> @@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
>    mark_dfs_back_edges (fun);
>    memset (&sink_stats, 0, sizeof (sink_stats));
>    calculate_dominance_info (CDI_DOMINATORS);
> -
>    virtual_operand_live vop_live;
>
>    int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
> --
> 2.39.3
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v8] tree-ssa-sink: Improve code sinking pass
@ 2023-10-12  8:42 Ajit Agarwal
  2023-10-17  8:33 ` Richard Biener
  0 siblings, 1 reply; 11+ messages in thread
From: Ajit Agarwal @ 2023-10-12  8:42 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Jeff Law, Segher Boessenkool, Peter Bergner

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated. Synced and modified with latest trunk sources.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
    {
      bar();
      j = l;
    }
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;

  if (a != 5)
    {
      l = a + b + c + d +e + f;
      bar();
      j = l;
    }
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-10-12  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	PR tree-optimization/81953
	* tree-ssa-sink.cc (statement_sink_location): Move statements before
	calls.
	(select_best_block): Add heuristics to select the best blocks in the
	immediate post dominator.

gcc/testsuite/ChangeLog:

	PR tree-optimization/81953
	* gcc.dg/tree-ssa/ssa-sink-20.c: New test.
	* gcc.dg/tree-ssa/ssa-sink-21.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 15 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c | 19 ++++++++++
 gcc/tree-ssa-sink.cc                        | 39 ++++++++++++---------
 3 files changed, 56 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 00000000000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
new file mode 100644
index 00000000000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-22.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+    {
+      bar();
+      if (b != 3)
+        x = 3;
+      else
+        x = 5;
+      j = l;
+    }
+}
+/* { dg-final { scan-tree-dump {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index a360c5cdd6e..95298bc8402 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -174,7 +174,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool *debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
    tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
    We want the most control dependent block in the shallowest loop nest.
 
@@ -196,6 +197,16 @@ select_best_block (basic_block early_bb,
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+     operands, then increase the threshold by 7% as those are even more
+     profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+    {
+      threshold += 7;
+      if (threshold > 100)
+	threshold = 100;
+    }
 
   while (temp_bb != early_bb)
     {
@@ -204,6 +215,14 @@ select_best_block (basic_block early_bb,
       if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
 	best_bb = temp_bb;
 
+      /* if we have temp_bb post dominated by use block block then immediate
+       * dominator would be our best block.  */
+      if (!gimple_vuse (stmt)
+	  && bb_loop_depth (temp_bb) == bb_loop_depth (early_bb)
+	  && !(temp_bb->count * 100 >= early_bb->count * threshold)
+	  && dominated_by_p (CDI_DOMINATORS, late_bb, temp_bb))
+	best_bb = temp_bb;
+
       /* Walk up the dominator tree, hopefully we'll find a shallower
  	 loop nest.  */
       temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb);
@@ -233,17 +252,6 @@ select_best_block (basic_block early_bb,
       && !dominated_by_p (CDI_DOMINATORS, best_bb->loop_father->latch, best_bb))
     return early_bb;
 
-  /* Get the sinking threshold.  If the statement to be moved has memory
-     operands, then increase the threshold by 7% as those are even more
-     profitable to avoid, clamping at 100%.  */
-  threshold = param_sink_frequency_threshold;
-  if (gimple_vuse (stmt) || gimple_vdef (stmt))
-    {
-      threshold += 7;
-      if (threshold > 100)
-	threshold = 100;
-    }
-
   /* If BEST_BB is at the same nesting level, then require it to have
      significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
@@ -430,6 +438,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	    continue;
 	  break;
 	}
+
       use = USE_STMT (one_use);
 
       if (gimple_code (use) != GIMPLE_PHI)
@@ -439,10 +448,7 @@ statement_sink_location (gimple *stmt, basic_block frombb,
 	  if (sinkbb == frombb)
 	    return false;
 
-	  if (sinkbb == gimple_bb (use))
-	    *togsi = gsi_for_stmt (use);
-	  else
-	    *togsi = gsi_after_labels (sinkbb);
+	  *togsi = gsi_after_labels (sinkbb);
 
 	  return true;
 	}
@@ -825,7 +831,6 @@ pass_sink_code::execute (function *fun)
   mark_dfs_back_edges (fun);
   memset (&sink_stats, 0, sizeof (sink_stats));
   calculate_dominance_info (CDI_DOMINATORS);
-
   virtual_operand_live vop_live;
 
   int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
-- 
2.39.3


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-10-30 13:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-18 13:33 [PATCH v8] tree-ssa-sink: Improve code sinking pass Ajit Agarwal
2023-08-01  8:17 ` [PING^1] " Ajit Agarwal
2023-08-21  6:44   ` [PING^2] " Ajit Agarwal
2023-09-12  7:46     ` [PING^3] " Ajit Agarwal
2023-10-12  8:42 Ajit Agarwal
2023-10-17  8:33 ` Richard Biener
2023-10-17  8:53   ` Ajit Agarwal
2023-10-17  9:17     ` Richard Biener
2023-10-17 13:23       ` Ajit Agarwal
2023-10-30 12:21       ` Ajit Agarwal
2023-10-30 13:09         ` Ajit Agarwal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).