public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] A jump threading opportunity for condition branch
@ 2019-05-21 13:45 Jiufu Guo
  2019-05-22 12:38 ` Richard Biener
  2019-05-29 20:12 ` Jeff Law
  0 siblings, 2 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-21 13:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: jakub, rguenther, dberlin, segher, wschmidt

Hi,

This patch implements a new opportunity of jump threading for PR77820.
In this optimization, conditional jumps are merged with unconditional jump.
And then moving CMP result to GPR is eliminated.

It looks like below:

  <P0>
  p0 = a CMP b
  goto <X>;

  <P1>
  p1 = c CMP d
  goto <X>;

  <X>
  # phi = PHI <p0 (P0), p1 (P1)>
  if (phi != 0) goto <Y>; else goto <Z>;

Could be transformed to:

  <P0>
  p0 = a CMP b
  if (p0 != 0) goto <Y>; else goto <Z>;

  <P1>
  p1 = c CMP d
  if (p1 != 0) goto <Y>; else goto <Z>;


This optimization eliminates:
1. saving CMP result: p0 = a CMP b.
2. additional CMP on branch: if (phi != 0).
3. converting CMP result if there is phi = (INT_CONV) p0 if there is.

Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
and new testcases are added. Is this ok for trunk?

Thanks!
Jiufu Guo


[gcc]
2019-05-21  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* tree-ssa-threadedge.c (cmp_from_unconditional_block): New function.
	* tree-ssa-threadedge.c (is_trivial_join_block): New function.
	* tree-ssa-threadedge.c (thread_across_edge): Call is_trivial_join_block.

[gcc/testsuite]
2019-05-21  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.

---
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 32 +++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 27 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 31 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
 gcc/tree-ssa-threadedge.c                        | 91 +++++++++++++++++++++++-
 6 files changed, 219 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
new file mode 100644
index 0000000..ad4890a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, long x)
+{
+  _Bool t;
+  if (x)
+    {
+      g (a + 1);
+      t = a < b;
+      c = d + x;
+    }
+  else
+    {
+      g (b + 1);
+      a = c + d;
+      t = c > d;
+    }
+
+  if (t)
+    {
+      g1 (c);
+    }
+
+  g (a);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
new file mode 100644
index 0000000..ca67d65
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  _Bool t;
+  if (x)
+    {
+      t = c < d;
+    }
+  else
+    {
+      t = a < b;
+    }
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
new file mode 100644
index 0000000..a126e97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  if (x)
+    {
+      t = a < b;
+    }
+  else if (d == x)
+    {
+      t = c < b;
+    }
+  else
+    {
+      t = d > c;
+    }
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
new file mode 100644
index 0000000..5a50c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  _Bool l1 = 0, l2 = 0;
+  if (x)
+    {
+      g (a);
+      c = a + b;
+      t = a < b;
+      l1 = 1;
+    }
+  else
+    {
+      g1 (b);
+      t = c > d;
+      d = c + b;
+      l2 = 1;
+    }
+
+  if (t)
+    {
+      if (l1 | l2)
+      g1 (c);
+    }
+  else
+    {
+      g (d);
+      g1 (a + b);
+    }
+  g (c + d);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
index e9b4f26..1d7b587 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
@@ -69,4 +69,4 @@ lookharder (string)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
+/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index c3ea2d6..23000f6 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
   return 0;
 }
 
+/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
+   defined in the incoming basic block. Otherwise return false.  */
+static bool
+cmp_from_unconditional_block (gphi *phi, int index)
+{
+  tree value = gimple_phi_arg_def (phi, index);
+  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (value);
+
+  if (!is_gimple_assign (def))
+    return false;
+
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+    {
+      value = gimple_assign_rhs1 (def);
+      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
+	return false;
+
+      def = SSA_NAME_DEF_STMT (value);
+
+      if (!is_gimple_assign (def))
+	return false;
+    }
+
+  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
+    return false;
+
+  /* Check if phi's incoming value is defined in the incoming basic_block.  */
+  edge e = gimple_phi_arg_edge (phi, index);
+  if (def->bb != e->src)
+    return false;
+
+  if (!single_succ_p (def->bb))
+    return false;
+
+  return true;
+}
+
+/* There are basic blocks look like:
+  <P0>
+  p0 = a CMP b ; or p0 = (INT)( a CMP b)
+  goto <X>;
+
+  <P1>
+  p1 = c CMP d
+  goto <X>;
+
+  <X>
+  # phi = PHI <p0 (P0), p1 (P1)>
+  if (phi != 0) goto <Y>; else goto <Z>;
+
+  Then, <X>: a trivial join block.
+
+ Check if BB is <X> in like above.  */
+
+bool
+is_trivial_join_block (basic_block bb)
+{
+  gimple *gs = last_and_only_stmt (bb);
+  if (gs == NULL)
+    return false;
+
+  if (gimple_code (gs) != GIMPLE_COND)
+    return false;
+
+  tree cond = gimple_cond_lhs (gs);
+
+  if(TREE_CODE (cond) != SSA_NAME)
+    return false;
+
+  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
+    return false;
+
+  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
+
+  for (unsigned int i = 0; i < phi->nargs; i++)
+    if (!cmp_from_unconditional_block (phi, i))
+      return false;
+
+  return true;
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -1317,10 +1401,11 @@ thread_across_edge (gcond *dummy_cond,
 
 	/* If we were able to thread through a successor of E->dest, then
 	   record the jump threading opportunity.  */
-	if (found)
+	if (found || is_trivial_join_block (e->dest))
 	  {
-	    propagate_threaded_block_debug_into (path->last ()->e->dest,
-						 taken_edge->dest);
+	    if (taken_edge->dest != path->last ()->e->dest)
+	      propagate_threaded_block_debug_into (path->last ()->e->dest,
+						   taken_edge->dest);
 	    register_jump_thread (path);
 	  }
 	else
-- 
2.7.4

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-21 13:45 [PATCH] A jump threading opportunity for condition branch Jiufu Guo
@ 2019-05-22 12:38 ` Richard Biener
  2019-05-23 12:06   ` Jiufu Guo
  2019-05-29 20:12 ` Jeff Law
  1 sibling, 1 reply; 38+ messages in thread
From: Richard Biener @ 2019-05-22 12:38 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

On Tue, 21 May 2019, Jiufu Guo wrote:

> Hi,
> 
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional jump.
> And then moving CMP result to GPR is eliminated.
> 
> It looks like below:
> 
>   <P0>
>   p0 = a CMP b
>   goto <X>;
> 
>   <P1>
>   p1 = c CMP d
>   goto <X>;
> 
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
> 
> Could be transformed to:
> 
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
> 
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
> 
> 
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> 
> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
> and new testcases are added. Is this ok for trunk?
> 
> Thanks!
> Jiufu Guo
> 
> 
> [gcc]
> 2019-05-21  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* tree-ssa-threadedge.c (cmp_from_unconditional_block): New function.
> 	* tree-ssa-threadedge.c (is_trivial_join_block): New function.
> 	* tree-ssa-threadedge.c (thread_across_edge): Call is_trivial_join_block.
> 
> [gcc/testsuite]
> 2019-05-21  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
> 
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 32 +++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 27 +++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 31 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>  gcc/tree-ssa-threadedge.c                        | 91 +++++++++++++++++++++++-
>  6 files changed, 219 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 0000000..ad4890a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, long x)
> +{
> +  _Bool t;
> +  if (x)
> +    {
> +      g (a + 1);
> +      t = a < b;
> +      c = d + x;
> +    }
> +  else
> +    {
> +      g (b + 1);
> +      a = c + d;
> +      t = c > d;
> +    }
> +
> +  if (t)
> +    {
> +      g1 (c);
> +    }
> +
> +  g (a);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> new file mode 100644
> index 0000000..ca67d65
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +    {
> +      t = c < d;
> +    }
> +  else
> +    {
> +      t = a < b;
> +    }
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> new file mode 100644
> index 0000000..a126e97
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  if (x)
> +    {
> +      t = a < b;
> +    }
> +  else if (d == x)
> +    {
> +      t = c < b;
> +    }
> +  else
> +    {
> +      t = d > c;
> +    }
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> new file mode 100644
> index 0000000..5a50c2d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  _Bool l1 = 0, l2 = 0;
> +  if (x)
> +    {
> +      g (a);
> +      c = a + b;
> +      t = a < b;
> +      l1 = 1;
> +    }
> +  else
> +    {
> +      g1 (b);
> +      t = c > d;
> +      d = c + b;
> +      l2 = 1;
> +    }
> +
> +  if (t)
> +    {
> +      if (l1 | l2)
> +      g1 (c);
> +    }
> +  else
> +    {
> +      g (d);
> +      g1 (a + b);
> +    }
> +  g (c + d);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> index e9b4f26..1d7b587 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> @@ -69,4 +69,4 @@ lookharder (string)
>      }
>  }
>  
> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index c3ea2d6..23000f6 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
>    return 0;
>  }
>  
> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
> +   defined in the incoming basic block. Otherwise return false.  */
> +static bool
> +cmp_from_unconditional_block (gphi *phi, int index)
> +{
> +  tree value = gimple_phi_arg_def (phi, index);
> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> +    return false;

Not sure why we should reject a constant here but I guess we
expect it to find a simplified condition anyways ;)

> +
> +  gimple *def = SSA_NAME_DEF_STMT (value);
> +
> +  if (!is_gimple_assign (def))
> +    return false;
> +
> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> +    {
> +      value = gimple_assign_rhs1 (def);
> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> +	return false;
> +
> +      def = SSA_NAME_DEF_STMT (value);
> +
> +      if (!is_gimple_assign (def))
> +	return false;

too much vertial space.

> +    }
> +
> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> +    return false;
> +
> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
> +  edge e = gimple_phi_arg_edge (phi, index);
> +  if (def->bb != e->src)
> +    return false;

why does this matter?

> +
> +  if (!single_succ_p (def->bb))
> +    return false;

Or this?  The actual threading will ensure this will hold true.

> +  return true;
> +}
> +
> +/* There are basic blocks look like:
> +  <P0>
> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
> +  goto <X>;
> +
> +  <P1>
> +  p1 = c CMP d
> +  goto <X>;
> +
> +  <X>
> +  # phi = PHI <p0 (P0), p1 (P1)>
> +  if (phi != 0) goto <Y>; else goto <Z>;
> +
> +  Then, <X>: a trivial join block.
> +
> + Check if BB is <X> in like above.  */
> +
> +bool
> +is_trivial_join_block (basic_block bb)

I'd make this work on a specific edge.

edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
{
  basic_block b = e->dest;

maybe too elaborate name ;)

> +{
> +  gimple *gs = last_and_only_stmt (bb);
> +  if (gs == NULL)
> +    return false;
> +
> +  if (gimple_code (gs) != GIMPLE_COND)
> +    return false;
> +
> +  tree cond = gimple_cond_lhs (gs);
> +
> +  if(TREE_CODE (cond) != SSA_NAME)
> +    return false;

space after if( too much vertical space in this function
for my taste btw.

For the forwarding to work we want a NE_EXPR or EQ_EXPR
as gimple_cond_code and integer_one_p or integer_zero_p
gimple_cond_rhs.

> +
> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
> +    return false;
> +
> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));

I think to match your pattern you want to check that
gimple_bb (phi) == bb as well here.

> +  for (unsigned int i = 0; i < phi->nargs; i++)
> +    if (!cmp_from_unconditional_block (phi, i))

Just process the incoming edge argument and inline the
helper.  You can use PHI_ARG_DEF_FROM_EDGE here.

Thanks for integrating this into jump-threading - it does look
like a good fit.

How often does this trigger during bootstrap?

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-22 12:38 ` Richard Biener
@ 2019-05-23 12:06   ` Jiufu Guo
  2019-05-23 12:11     ` Richard Biener
  2019-05-29 20:18     ` Jeff Law
  0 siblings, 2 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-23 12:06 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

Hi,

Richard Biener <rguenther@suse.de> writes:

> On Tue, 21 May 2019, Jiufu Guo wrote:
>
>> Hi,
>> 
>> This patch implements a new opportunity of jump threading for PR77820.
>> In this optimization, conditional jumps are merged with unconditional jump.
>> And then moving CMP result to GPR is eliminated.
>> 
>> It looks like below:
>> 
>>   <P0>
>>   p0 = a CMP b
>>   goto <X>;
>> 
>>   <P1>
>>   p1 = c CMP d
>>   goto <X>;
>> 
>>   <X>
>>   # phi = PHI <p0 (P0), p1 (P1)>
>>   if (phi != 0) goto <Y>; else goto <Z>;
>> 
>> Could be transformed to:
>> 
>>   <P0>
>>   p0 = a CMP b
>>   if (p0 != 0) goto <Y>; else goto <Z>;
>> 
>>   <P1>
>>   p1 = c CMP d
>>   if (p1 != 0) goto <Y>; else goto <Z>;
>> 
>> 
>> This optimization eliminates:
>> 1. saving CMP result: p0 = a CMP b.
>> 2. additional CMP on branch: if (phi != 0).
>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>> 
>> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
>> and new testcases are added. Is this ok for trunk?
>> 
>> Thanks!
>> Jiufu Guo
>> 
...
>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>> index c3ea2d6..23000f6 100644
>> --- a/gcc/tree-ssa-threadedge.c
>> +++ b/gcc/tree-ssa-threadedge.c
>> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
>>    return 0;
>>  }
>>  
>> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
>> +   defined in the incoming basic block. Otherwise return false.  */
>> +static bool
>> +cmp_from_unconditional_block (gphi *phi, int index)
>> +{
>> +  tree value = gimple_phi_arg_def (phi, index);
>> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> +    return false;
>
> Not sure why we should reject a constant here but I guess we
> expect it to find a simplified condition anyways ;)
>
Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
found this case is already handled by other jump-threading code, like
'ethread' pass.

>> +
>> +  gimple *def = SSA_NAME_DEF_STMT (value);
>> +
>> +  if (!is_gimple_assign (def))
>> +    return false;
>> +
>> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>> +    {
>> +      value = gimple_assign_rhs1 (def);
>> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> +	return false;
>> +
>> +      def = SSA_NAME_DEF_STMT (value);
>> +
>> +      if (!is_gimple_assign (def))
>> +	return false;
>
> too much vertial space.
>
Thanks, I will refine it. 
>> +    }
>> +
>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>> +    return false;
>> +
>> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>> +  edge e = gimple_phi_arg_edge (phi, index);
>> +  if (def->bb != e->src)
>> +    return false;
>
> why does this matter?
>
Through preparing pathes and duplicating block, this transform can also
help to combine a cmp in previous block and a gcond in current block.
"if (def->bb != e->src)" make sure the cmp is define in the incoming
block of the current; and then combining "cmp with gcond" is safe.  If
the cmp is defined far from the incoming block, it would be hard to
achieve the combining, and the transform may not needed.

>> +
>> +  if (!single_succ_p (def->bb))
>> +    return false;
>
> Or this?  The actual threading will ensure this will hold true.
>
Yes, other thread code check this and ensure it to be true, like
function thread_through_normal_block. Since this new function is invoked
outside thread_through_normal_block, so, checking single_succ_p is also
needed for this case.

>> +  return true;
>> +}
>> +
>> +/* There are basic blocks look like:
>> +  <P0>
>> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
>> +  goto <X>;
>> +
>> +  <P1>
>> +  p1 = c CMP d
>> +  goto <X>;
>> +
>> +  <X>
>> +  # phi = PHI <p0 (P0), p1 (P1)>
>> +  if (phi != 0) goto <Y>; else goto <Z>;
>> +
>> +  Then, <X>: a trivial join block.
>> +
>> + Check if BB is <X> in like above.  */
>> +
>> +bool
>> +is_trivial_join_block (basic_block bb)
>
> I'd make this work on a specific edge.
>
> edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
> {
>   basic_block b = e->dest;
>
> maybe too elaborate name ;)
>
Thanks for help to name the function!  It is very valuable for me ;)
>> +{
>> +  gimple *gs = last_and_only_stmt (bb);
>> +  if (gs == NULL)
>> +    return false;
>> +
>> +  if (gimple_code (gs) != GIMPLE_COND)
>> +    return false;
>> +
>> +  tree cond = gimple_cond_lhs (gs);
>> +
>> +  if (TREE_CODE (cond) != SSA_NAME)
>> +    return false;
>
> space after if( too much vertical space in this function
> for my taste btw.
Will update this.
>
> For the forwarding to work we want a NE_EXPR or EQ_EXPR
> as gimple_cond_code and integer_one_p or integer_zero_p
> gimple_cond_rhs.
Right, checking those would be more safe.  Since no issue found, during
bootstrap and regression tests, so I did not add these checking.  I will
add this checking.
>
>> +
>> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
>> +    return false;
>> +
>> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
>
> I think to match your pattern you want to check that
> gimple_bb (phi) == bb as well here.
Right, it should be checked. I will update.
>
>> +  for (unsigned int i = 0; i < phi->nargs; i++)
>> +    if (!cmp_from_unconditional_block (phi, i))
>
> Just process the incoming edge argument and inline the
> helper.  You can use PHI_ARG_DEF_FROM_EDGE here.
I will refine code, and try to use it.
>
> Thanks for integrating this into jump-threading - it does look
> like a good fit.
>
> How often does this trigger during bootstrap?
Thanks for your sugguestion, this could help to evaluate patch. During
bootstrap(stage 2 or 3), in gcc source code, 1300-1500 basic blocks are
fullfile this tranform.
> Thanks,
> Richard.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-23 12:06   ` Jiufu Guo
@ 2019-05-23 12:11     ` Richard Biener
  2019-05-23 14:40       ` Jiufu Guo
  2019-05-29 20:22       ` Jeff Law
  2019-05-29 20:18     ` Jeff Law
  1 sibling, 2 replies; 38+ messages in thread
From: Richard Biener @ 2019-05-23 12:11 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

On Thu, 23 May 2019, Jiufu Guo wrote:

> Hi,
> 
> Richard Biener <rguenther@suse.de> writes:
> 
> > On Tue, 21 May 2019, Jiufu Guo wrote:
> >
> >> Hi,
> >> 
> >> This patch implements a new opportunity of jump threading for PR77820.
> >> In this optimization, conditional jumps are merged with unconditional jump.
> >> And then moving CMP result to GPR is eliminated.
> >> 
> >> It looks like below:
> >> 
> >>   <P0>
> >>   p0 = a CMP b
> >>   goto <X>;
> >> 
> >>   <P1>
> >>   p1 = c CMP d
> >>   goto <X>;
> >> 
> >>   <X>
> >>   # phi = PHI <p0 (P0), p1 (P1)>
> >>   if (phi != 0) goto <Y>; else goto <Z>;
> >> 
> >> Could be transformed to:
> >> 
> >>   <P0>
> >>   p0 = a CMP b
> >>   if (p0 != 0) goto <Y>; else goto <Z>;
> >> 
> >>   <P1>
> >>   p1 = c CMP d
> >>   if (p1 != 0) goto <Y>; else goto <Z>;
> >> 
> >> 
> >> This optimization eliminates:
> >> 1. saving CMP result: p0 = a CMP b.
> >> 2. additional CMP on branch: if (phi != 0).
> >> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> >> 
> >> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
> >> and new testcases are added. Is this ok for trunk?
> >> 
> >> Thanks!
> >> Jiufu Guo
> >> 
> ...
> >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> >> index c3ea2d6..23000f6 100644
> >> --- a/gcc/tree-ssa-threadedge.c
> >> +++ b/gcc/tree-ssa-threadedge.c
> >> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
> >>    return 0;
> >>  }
> >>  
> >> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
> >> +   defined in the incoming basic block. Otherwise return false.  */
> >> +static bool
> >> +cmp_from_unconditional_block (gphi *phi, int index)
> >> +{
> >> +  tree value = gimple_phi_arg_def (phi, index);
> >> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> >> +    return false;
> >
> > Not sure why we should reject a constant here but I guess we
> > expect it to find a simplified condition anyways ;)
> >
> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
> found this case is already handled by other jump-threading code, like
> 'ethread' pass.
> 
> >> +
> >> +  gimple *def = SSA_NAME_DEF_STMT (value);
> >> +
> >> +  if (!is_gimple_assign (def))
> >> +    return false;
> >> +
> >> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> >> +    {
> >> +      value = gimple_assign_rhs1 (def);
> >> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> >> +	return false;
> >> +
> >> +      def = SSA_NAME_DEF_STMT (value);
> >> +
> >> +      if (!is_gimple_assign (def))
> >> +	return false;
> >
> > too much vertial space.
> >
> Thanks, I will refine it. 
> >> +    }
> >> +
> >> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> >> +    return false;
> >> +
> >> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
> >> +  edge e = gimple_phi_arg_edge (phi, index);
> >> +  if (def->bb != e->src)
> >> +    return false;
> >
> > why does this matter?
> >
> Through preparing pathes and duplicating block, this transform can also
> help to combine a cmp in previous block and a gcond in current block.
> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> block of the current; and then combining "cmp with gcond" is safe.  If
> the cmp is defined far from the incoming block, it would be hard to
> achieve the combining, and the transform may not needed.

We're in SSA form so the "combining" doesn't really care where the
definition comes from.

> >> +
> >> +  if (!single_succ_p (def->bb))
> >> +    return false;
> >
> > Or this?  The actual threading will ensure this will hold true.
> >
> Yes, other thread code check this and ensure it to be true, like
> function thread_through_normal_block. Since this new function is invoked
> outside thread_through_normal_block, so, checking single_succ_p is also
> needed for this case.

I mean threading will isolate the path making this trivially true.
It's also no requirement for combining, in fact due to the single-use
check the definition can be sinked across the edge already (if
the edges dest didn't have multiple predecessors which this threading
will fix as well).

> >> +  return true;
> >> +}
> >> +
> >> +/* There are basic blocks look like:
> >> +  <P0>
> >> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
> >> +  goto <X>;
> >> +
> >> +  <P1>
> >> +  p1 = c CMP d
> >> +  goto <X>;
> >> +
> >> +  <X>
> >> +  # phi = PHI <p0 (P0), p1 (P1)>
> >> +  if (phi != 0) goto <Y>; else goto <Z>;
> >> +
> >> +  Then, <X>: a trivial join block.
> >> +
> >> + Check if BB is <X> in like above.  */
> >> +
> >> +bool
> >> +is_trivial_join_block (basic_block bb)
> >
> > I'd make this work on a specific edge.
> >
> > edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
> > {
> >   basic_block b = e->dest;
> >
> > maybe too elaborate name ;)
> >
> Thanks for help to name the function!  It is very valuable for me ;)
> >> +{
> >> +  gimple *gs = last_and_only_stmt (bb);
> >> +  if (gs == NULL)
> >> +    return false;
> >> +
> >> +  if (gimple_code (gs) != GIMPLE_COND)
> >> +    return false;
> >> +
> >> +  tree cond = gimple_cond_lhs (gs);
> >> +
> >> +  if (TREE_CODE (cond) != SSA_NAME)
> >> +    return false;
> >
> > space after if( too much vertical space in this function
> > for my taste btw.
> Will update this.
> >
> > For the forwarding to work we want a NE_EXPR or EQ_EXPR
> > as gimple_cond_code and integer_one_p or integer_zero_p
> > gimple_cond_rhs.
> Right, checking those would be more safe.  Since no issue found, during
> bootstrap and regression tests, so I did not add these checking.  I will
> add this checking.
> >
> >> +
> >> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
> >> +    return false;
> >> +
> >> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
> >
> > I think to match your pattern you want to check that
> > gimple_bb (phi) == bb as well here.
> Right, it should be checked. I will update.
> >
> >> +  for (unsigned int i = 0; i < phi->nargs; i++)
> >> +    if (!cmp_from_unconditional_block (phi, i))
> >
> > Just process the incoming edge argument and inline the
> > helper.  You can use PHI_ARG_DEF_FROM_EDGE here.
> I will refine code, and try to use it.
> >
> > Thanks for integrating this into jump-threading - it does look
> > like a good fit.
> >
> > How often does this trigger during bootstrap?
> Thanks for your sugguestion, this could help to evaluate patch. During
> bootstrap(stage 2 or 3), in gcc source code, 1300-1500 basic blocks are
> fullfile this tranform.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-23 12:11     ` Richard Biener
@ 2019-05-23 14:40       ` Jiufu Guo
  2019-05-24 12:45         ` Richard Biener
  2019-05-29 20:22       ` Jeff Law
  1 sibling, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-05-23 14:40 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

Richard Biener <rguenther@suse.de> writes:

> On Thu, 23 May 2019, Jiufu Guo wrote:
>
>> Hi,
>> 
>> Richard Biener <rguenther@suse.de> writes:
>> 
>> > On Tue, 21 May 2019, Jiufu Guo wrote:
>> >
>> >> Hi,
>> >> 
>> >> This patch implements a new opportunity of jump threading for PR77820.
>> >> In this optimization, conditional jumps are merged with unconditional jump.
>> >> And then moving CMP result to GPR is eliminated.
>> >> 
>> >> It looks like below:
>> >> 
>> >>   <P0>
>> >>   p0 = a CMP b
>> >>   goto <X>;
>> >> 
>> >>   <P1>
>> >>   p1 = c CMP d
>> >>   goto <X>;
>> >> 
>> >>   <X>
>> >>   # phi = PHI <p0 (P0), p1 (P1)>
>> >>   if (phi != 0) goto <Y>; else goto <Z>;
>> >> 
>> >> Could be transformed to:
>> >> 
>> >>   <P0>
>> >>   p0 = a CMP b
>> >>   if (p0 != 0) goto <Y>; else goto <Z>;
>> >> 
>> >>   <P1>
>> >>   p1 = c CMP d
>> >>   if (p1 != 0) goto <Y>; else goto <Z>;
>> >> 
>> >> 
>> >> This optimization eliminates:
>> >> 1. saving CMP result: p0 = a CMP b.
>> >> 2. additional CMP on branch: if (phi != 0).
>> >> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>> >> 
>> >> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
>> >> and new testcases are added. Is this ok for trunk?
>> >> 
>> >> Thanks!
>> >> Jiufu Guo
>> >> 
>> ...
>> >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>> >> index c3ea2d6..23000f6 100644
>> >> --- a/gcc/tree-ssa-threadedge.c
>> >> +++ b/gcc/tree-ssa-threadedge.c
>> >> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
>> >>    return 0;
>> >>  }
>> >>  
>> >> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
>> >> +   defined in the incoming basic block. Otherwise return false.  */
>> >> +static bool
>> >> +cmp_from_unconditional_block (gphi *phi, int index)
>> >> +{
>> >> +  tree value = gimple_phi_arg_def (phi, index);
>> >> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> >> +    return false;
>> >
>> > Not sure why we should reject a constant here but I guess we
>> > expect it to find a simplified condition anyways ;)
>> >
>> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
>> found this case is already handled by other jump-threading code, like
>> 'ethread' pass.
>> 
>> >> +
>> >> +  gimple *def = SSA_NAME_DEF_STMT (value);
>> >> +
>> >> +  if (!is_gimple_assign (def))
>> >> +    return false;
>> >> +
>> >> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>> >> +    {
>> >> +      value = gimple_assign_rhs1 (def);
>> >> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> >> +	return false;
>> >> +
>> >> +      def = SSA_NAME_DEF_STMT (value);
>> >> +
>> >> +      if (!is_gimple_assign (def))
>> >> +	return false;
>> >
>> > too much vertial space.
>> >
>> Thanks, I will refine it. 
>> >> +    }
>> >> +
>> >> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>> >> +    return false;
>> >> +
>> >> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>> >> +  edge e = gimple_phi_arg_edge (phi, index);
>> >> +  if (def->bb != e->src)
>> >> +    return false;
>> >
>> > why does this matter?
>> >
>> Through preparing pathes and duplicating block, this transform can also
>> help to combine a cmp in previous block and a gcond in current block.
>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>> block of the current; and then combining "cmp with gcond" is safe.  If
>> the cmp is defined far from the incoming block, it would be hard to
>> achieve the combining, and the transform may not needed.
>
> We're in SSA form so the "combining" doesn't really care where the
> definition comes from.
>
>> >> +
>> >> +  if (!single_succ_p (def->bb))
>> >> +    return false;
>> >
>> > Or this?  The actual threading will ensure this will hold true.
>> >
>> Yes, other thread code check this and ensure it to be true, like
>> function thread_through_normal_block. Since this new function is invoked
>> outside thread_through_normal_block, so, checking single_succ_p is also
>> needed for this case.
>
> I mean threading will isolate the path making this trivially true.
> It's also no requirement for combining, in fact due to the single-use
> check the definition can be sinked across the edge already (if
> the edges dest didn't have multiple predecessors which this threading
> will fix as well).
>
I would relax these check and have a test.

And I refactor the code a little as below. Thanks for any comments!

bool
edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
{
  basic_block bb = e->dest;

  /* See if there is only one stmt which is gcond.  */
  gimple *gs = last_and_only_stmt (bb);
  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
    return false;

  /* See if gcond's condition is "(phi !=/== 0/1)".  */
  tree cond = gimple_cond_lhs (gs);
  if (TREE_CODE (cond) != SSA_NAME
      || gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI
      || gimple_bb (SSA_NAME_DEF_STMT (cond)) != bb)
    return false;
  enum tree_code code = gimple_cond_code (gs);
  tree rhs = gimple_cond_rhs (gs);
  if (!(code == NE_EXPR || code == EQ_EXPR || integer_onep (rhs)
	|| integer_zerop (rhs)))
    return false;

  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
  edge_iterator ei;
  edge in_e;
  FOR_EACH_EDGE (in_e, ei, bb->preds)
    {
      /* Check if phi's incoming value is CMP */
      gimple *def;
      tree value = PHI_ARG_DEF_FROM_EDGE (phi, in_e);
      if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
	def = SSA_NAME_DEF_STMT (value);
      else
	return false;

      /* Or if it is (INTCONV) (a CMP b). */
      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
	{
	  value = gimple_assign_rhs1 (def);
	  if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
	      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
	    def = SSA_NAME_DEF_STMT (value);
	  else
	    return false;
	}

      if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
	return false;
    }

  return true;
}

Thanks,
Jiufu Guo
>> >> +  return true;
>> >> +}
>> >> +
>> >> +/* There are basic blocks look like:
>> >> +  <P0>
>> >> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
>> >> +  goto <X>;
>> >> +
>> >> +  <P1>
>> >> +  p1 = c CMP d
>> >> +  goto <X>;
>> >> +
>> >> +  <X>
>> >> +  # phi = PHI <p0 (P0), p1 (P1)>
>> >> +  if (phi != 0) goto <Y>; else goto <Z>;
>> >> +
>> >> +  Then, <X>: a trivial join block.
>> >> +
>> >> + Check if BB is <X> in like above.  */
>> >> +
>> >> +bool
>> >> +is_trivial_join_block (basic_block bb)
>> >
>> > I'd make this work on a specific edge.
>> >
>> > edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
>> > {
>> >   basic_block b = e->dest;
>> >
>> > maybe too elaborate name ;)
>> >
>> Thanks for help to name the function!  It is very valuable for me ;)
>> >> +{
>> >> +  gimple *gs = last_and_only_stmt (bb);
>> >> +  if (gs == NULL)
>> >> +    return false;
>> >> +
>> >> +  if (gimple_code (gs) != GIMPLE_COND)
>> >> +    return false;
>> >> +
>> >> +  tree cond = gimple_cond_lhs (gs);
>> >> +
>> >> +  if (TREE_CODE (cond) != SSA_NAME)
>> >> +    return false;
>> >
>> > space after if( too much vertical space in this function
>> > for my taste btw.
>> Will update this.
>> >
>> > For the forwarding to work we want a NE_EXPR or EQ_EXPR
>> > as gimple_cond_code and integer_one_p or integer_zero_p
>> > gimple_cond_rhs.
>> Right, checking those would be more safe.  Since no issue found, during
>> bootstrap and regression tests, so I did not add these checking.  I will
>> add this checking.
>> >
>> >> +
>> >> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
>> >> +    return false;
>> >> +
>> >> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
>> >
>> > I think to match your pattern you want to check that
>> > gimple_bb (phi) == bb as well here.
>> Right, it should be checked. I will update.
>> >
>> >> +  for (unsigned int i = 0; i < phi->nargs; i++)
>> >> +    if (!cmp_from_unconditional_block (phi, i))
>> >
>> > Just process the incoming edge argument and inline the
>> > helper.  You can use PHI_ARG_DEF_FROM_EDGE here.
>> I will refine code, and try to use it.
>> >
>> > Thanks for integrating this into jump-threading - it does look
>> > like a good fit.
>> >
>> > How often does this trigger during bootstrap?
>> Thanks for your sugguestion, this could help to evaluate patch. During
>> bootstrap(stage 2 or 3), in gcc source code, 1300-1500 basic blocks are
>> fullfile this tranform.
>
> Thanks,
> Richard.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-23 14:40       ` Jiufu Guo
@ 2019-05-24 12:45         ` Richard Biener
  2019-05-24 14:52           ` Jiufu Guo
                             ` (2 more replies)
  0 siblings, 3 replies; 38+ messages in thread
From: Richard Biener @ 2019-05-24 12:45 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

[-- Attachment #1: Type: text/plain, Size: 10019 bytes --]

On Thu, 23 May 2019, Jiufu Guo wrote:

> Richard Biener <rguenther@suse.de> writes:
> 
> > On Thu, 23 May 2019, Jiufu Guo wrote:
> >
> >> Hi,
> >> 
> >> Richard Biener <rguenther@suse.de> writes:
> >> 
> >> > On Tue, 21 May 2019, Jiufu Guo wrote:
> >> >
> >> >> Hi,
> >> >> 
> >> >> This patch implements a new opportunity of jump threading for PR77820.
> >> >> In this optimization, conditional jumps are merged with unconditional jump.
> >> >> And then moving CMP result to GPR is eliminated.
> >> >> 
> >> >> It looks like below:
> >> >> 
> >> >>   <P0>
> >> >>   p0 = a CMP b
> >> >>   goto <X>;
> >> >> 
> >> >>   <P1>
> >> >>   p1 = c CMP d
> >> >>   goto <X>;
> >> >> 
> >> >>   <X>
> >> >>   # phi = PHI <p0 (P0), p1 (P1)>
> >> >>   if (phi != 0) goto <Y>; else goto <Z>;
> >> >> 
> >> >> Could be transformed to:
> >> >> 
> >> >>   <P0>
> >> >>   p0 = a CMP b
> >> >>   if (p0 != 0) goto <Y>; else goto <Z>;
> >> >> 
> >> >>   <P1>
> >> >>   p1 = c CMP d
> >> >>   if (p1 != 0) goto <Y>; else goto <Z>;
> >> >> 
> >> >> 
> >> >> This optimization eliminates:
> >> >> 1. saving CMP result: p0 = a CMP b.
> >> >> 2. additional CMP on branch: if (phi != 0).
> >> >> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> >> >> 
> >> >> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
> >> >> and new testcases are added. Is this ok for trunk?
> >> >> 
> >> >> Thanks!
> >> >> Jiufu Guo
> >> >> 
> >> ...
> >> >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> >> >> index c3ea2d6..23000f6 100644
> >> >> --- a/gcc/tree-ssa-threadedge.c
> >> >> +++ b/gcc/tree-ssa-threadedge.c
> >> >> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
> >> >>    return 0;
> >> >>  }
> >> >>  
> >> >> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
> >> >> +   defined in the incoming basic block. Otherwise return false.  */
> >> >> +static bool
> >> >> +cmp_from_unconditional_block (gphi *phi, int index)
> >> >> +{
> >> >> +  tree value = gimple_phi_arg_def (phi, index);
> >> >> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> >> >> +    return false;
> >> >
> >> > Not sure why we should reject a constant here but I guess we
> >> > expect it to find a simplified condition anyways ;)
> >> >
> >> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
> >> found this case is already handled by other jump-threading code, like
> >> 'ethread' pass.
> >> 
> >> >> +
> >> >> +  gimple *def = SSA_NAME_DEF_STMT (value);
> >> >> +
> >> >> +  if (!is_gimple_assign (def))
> >> >> +    return false;
> >> >> +
> >> >> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> >> >> +    {
> >> >> +      value = gimple_assign_rhs1 (def);
> >> >> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
> >> >> +	return false;
> >> >> +
> >> >> +      def = SSA_NAME_DEF_STMT (value);
> >> >> +
> >> >> +      if (!is_gimple_assign (def))
> >> >> +	return false;
> >> >
> >> > too much vertial space.
> >> >
> >> Thanks, I will refine it. 
> >> >> +    }
> >> >> +
> >> >> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> >> >> +    return false;
> >> >> +
> >> >> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
> >> >> +  edge e = gimple_phi_arg_edge (phi, index);
> >> >> +  if (def->bb != e->src)
> >> >> +    return false;
> >> >
> >> > why does this matter?
> >> >
> >> Through preparing pathes and duplicating block, this transform can also
> >> help to combine a cmp in previous block and a gcond in current block.
> >> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> >> block of the current; and then combining "cmp with gcond" is safe.  If
> >> the cmp is defined far from the incoming block, it would be hard to
> >> achieve the combining, and the transform may not needed.
> >
> > We're in SSA form so the "combining" doesn't really care where the
> > definition comes from.
> >
> >> >> +
> >> >> +  if (!single_succ_p (def->bb))
> >> >> +    return false;
> >> >
> >> > Or this?  The actual threading will ensure this will hold true.
> >> >
> >> Yes, other thread code check this and ensure it to be true, like
> >> function thread_through_normal_block. Since this new function is invoked
> >> outside thread_through_normal_block, so, checking single_succ_p is also
> >> needed for this case.
> >
> > I mean threading will isolate the path making this trivially true.
> > It's also no requirement for combining, in fact due to the single-use
> > check the definition can be sinked across the edge already (if
> > the edges dest didn't have multiple predecessors which this threading
> > will fix as well).
> >
> I would relax these check and have a test.
> 
> And I refactor the code a little as below. Thanks for any comments!
> 
> bool
> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> {
>   basic_block bb = e->dest;
> 
>   /* See if there is only one stmt which is gcond.  */
>   gimple *gs = last_and_only_stmt (bb);
>   if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>     return false;
> 
>   /* See if gcond's condition is "(phi !=/== 0/1)".  */
>   tree cond = gimple_cond_lhs (gs);
>   if (TREE_CODE (cond) != SSA_NAME
>       || gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI
>       || gimple_bb (SSA_NAME_DEF_STMT (cond)) != bb)
>     return false;
>   enum tree_code code = gimple_cond_code (gs);
>   tree rhs = gimple_cond_rhs (gs);
>   if (!(code == NE_EXPR || code == EQ_EXPR || integer_onep (rhs)
> 	|| integer_zerop (rhs)))

GCCs coding standard says that if a condition doesn't fit on
a single line you should split after each || or &&

>     return false;
> 
>   gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));

If you had used dyn_cast <gphi *> () above the GIMPLE_PHI
check would have been for phi != NULL and you'd save a line
of code.

>   edge_iterator ei;
>   edge in_e;
>   FOR_EACH_EDGE (in_e, ei, bb->preds)
>     {

As said in my first review I'd just check whether for the
edge we want to thread through the definition comes from a CMP.
Suppose you have

 # val_1 = PHI <a_2, b_3, c_4>
 if (val_1 != 0)

and only one edge has a b_3 = d_5 != 0 condition it's still
worth tail-duplicating the if block.

otherwise it looks ok to me.

Thanks,
Richard.

>       /* Check if phi's incoming value is CMP */
>       gimple *def;
>       tree value = PHI_ARG_DEF_FROM_EDGE (phi, in_e);
>       if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
> 	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> 	def = SSA_NAME_DEF_STMT (value);
>       else
> 	return false;
> 
>       /* Or if it is (INTCONV) (a CMP b). */
>       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> 	{
> 	  value = gimple_assign_rhs1 (def);
> 	  if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
> 	      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> 	    def = SSA_NAME_DEF_STMT (value);
> 	  else
> 	    return false;
> 	}
>       if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> 	return false;
>     }
> 
>   return true;
> }
> 
> Thanks,
> Jiufu Guo
> >> >> +  return true;
> >> >> +}
> >> >> +
> >> >> +/* There are basic blocks look like:
> >> >> +  <P0>
> >> >> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
> >> >> +  goto <X>;
> >> >> +
> >> >> +  <P1>
> >> >> +  p1 = c CMP d
> >> >> +  goto <X>;
> >> >> +
> >> >> +  <X>
> >> >> +  # phi = PHI <p0 (P0), p1 (P1)>
> >> >> +  if (phi != 0) goto <Y>; else goto <Z>;
> >> >> +
> >> >> +  Then, <X>: a trivial join block.
> >> >> +
> >> >> + Check if BB is <X> in like above.  */
> >> >> +
> >> >> +bool
> >> >> +is_trivial_join_block (basic_block bb)
> >> >
> >> > I'd make this work on a specific edge.
> >> >
> >> > edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
> >> > {
> >> >   basic_block b = e->dest;
> >> >
> >> > maybe too elaborate name ;)
> >> >
> >> Thanks for help to name the function!  It is very valuable for me ;)
> >> >> +{
> >> >> +  gimple *gs = last_and_only_stmt (bb);
> >> >> +  if (gs == NULL)
> >> >> +    return false;
> >> >> +
> >> >> +  if (gimple_code (gs) != GIMPLE_COND)
> >> >> +    return false;
> >> >> +
> >> >> +  tree cond = gimple_cond_lhs (gs);
> >> >> +
> >> >> +  if (TREE_CODE (cond) != SSA_NAME)
> >> >> +    return false;
> >> >
> >> > space after if( too much vertical space in this function
> >> > for my taste btw.
> >> Will update this.
> >> >
> >> > For the forwarding to work we want a NE_EXPR or EQ_EXPR
> >> > as gimple_cond_code and integer_one_p or integer_zero_p
> >> > gimple_cond_rhs.
> >> Right, checking those would be more safe.  Since no issue found, during
> >> bootstrap and regression tests, so I did not add these checking.  I will
> >> add this checking.
> >> >
> >> >> +
> >> >> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
> >> >> +    return false;
> >> >> +
> >> >> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
> >> >
> >> > I think to match your pattern you want to check that
> >> > gimple_bb (phi) == bb as well here.
> >> Right, it should be checked. I will update.
> >> >
> >> >> +  for (unsigned int i = 0; i < phi->nargs; i++)
> >> >> +    if (!cmp_from_unconditional_block (phi, i))
> >> >
> >> > Just process the incoming edge argument and inline the
> >> > helper.  You can use PHI_ARG_DEF_FROM_EDGE here.
> >> I will refine code, and try to use it.
> >> >
> >> > Thanks for integrating this into jump-threading - it does look
> >> > like a good fit.
> >> >
> >> > How often does this trigger during bootstrap?
> >> Thanks for your sugguestion, this could help to evaluate patch. During
> >> bootstrap(stage 2 or 3), in gcc source code, 1300-1500 basic blocks are
> >> fullfile this tranform.
> >
> > Thanks,
> > Richard.
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-24 12:45         ` Richard Biener
@ 2019-05-24 14:52           ` Jiufu Guo
  2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
  2019-05-29 20:26           ` [PATCH] " Jeff Law
  2 siblings, 0 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-24 14:52 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

Richard Biener <rguenther@suse.de> writes:

> On Thu, 23 May 2019, Jiufu Guo wrote:
>
>> Richard Biener <rguenther@suse.de> writes:
>> 
>> > On Thu, 23 May 2019, Jiufu Guo wrote:
>> >
>> >> Hi,
>> >> 
>> >> Richard Biener <rguenther@suse.de> writes:
>> >> 
>> >> > On Tue, 21 May 2019, Jiufu Guo wrote:
>> >> >
>> >> >> Hi,
>> >> >> 
>> >> >> This patch implements a new opportunity of jump threading for PR77820.
>> >> >> In this optimization, conditional jumps are merged with unconditional jump.
>> >> >> And then moving CMP result to GPR is eliminated.
>> >> >> 
>> >> >> It looks like below:
>> >> >> 
>> >> >>   <P0>
>> >> >>   p0 = a CMP b
>> >> >>   goto <X>;
>> >> >> 
>> >> >>   <P1>
>> >> >>   p1 = c CMP d
>> >> >>   goto <X>;
>> >> >> 
>> >> >>   <X>
>> >> >>   # phi = PHI <p0 (P0), p1 (P1)>
>> >> >>   if (phi != 0) goto <Y>; else goto <Z>;
>> >> >> 
>> >> >> Could be transformed to:
>> >> >> 
>> >> >>   <P0>
>> >> >>   p0 = a CMP b
>> >> >>   if (p0 != 0) goto <Y>; else goto <Z>;
>> >> >> 
>> >> >>   <P1>
>> >> >>   p1 = c CMP d
>> >> >>   if (p1 != 0) goto <Y>; else goto <Z>;
>> >> >> 
>> >> >> 
>> >> >> This optimization eliminates:
>> >> >> 1. saving CMP result: p0 = a CMP b.
>> >> >> 2. additional CMP on branch: if (phi != 0).
>> >> >> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>> >> >> 
>> >> >> Bootstrapped and tested on powerpc64le with no regressions(one case is improved)
>> >> >> and new testcases are added. Is this ok for trunk?
>> >> >> 
>> >> >> Thanks!
>> >> >> Jiufu Guo
>> >> >> 
>> >> ...
>> >> >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>> >> >> index c3ea2d6..23000f6 100644
>> >> >> --- a/gcc/tree-ssa-threadedge.c
>> >> >> +++ b/gcc/tree-ssa-threadedge.c
>> >> >> @@ -1157,6 +1157,90 @@ thread_through_normal_block (edge e,
>> >> >>    return 0;
>> >> >>  }
>> >> >>  
>> >> >> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
>> >> >> +   defined in the incoming basic block. Otherwise return false.  */
>> >> >> +static bool
>> >> >> +cmp_from_unconditional_block (gphi *phi, int index)
>> >> >> +{
>> >> >> +  tree value = gimple_phi_arg_def (phi, index);
>> >> >> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> >> >> +    return false;
>> >> >
>> >> > Not sure why we should reject a constant here but I guess we
>> >> > expect it to find a simplified condition anyways ;)
>> >> >
>> >> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
>> >> found this case is already handled by other jump-threading code, like
>> >> 'ethread' pass.
>> >> 
>> >> >> +
>> >> >> +  gimple *def = SSA_NAME_DEF_STMT (value);
>> >> >> +
>> >> >> +  if (!is_gimple_assign (def))
>> >> >> +    return false;
>> >> >> +
>> >> >> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>> >> >> +    {
>> >> >> +      value = gimple_assign_rhs1 (def);
>> >> >> +      if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>> >> >> +	return false;
>> >> >> +
>> >> >> +      def = SSA_NAME_DEF_STMT (value);
>> >> >> +
>> >> >> +      if (!is_gimple_assign (def))
>> >> >> +	return false;
>> >> >
>> >> > too much vertial space.
>> >> >
>> >> Thanks, I will refine it. 
>> >> >> +    }
>> >> >> +
>> >> >> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>> >> >> +    return false;
>> >> >> +
>> >> >> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>> >> >> +  edge e = gimple_phi_arg_edge (phi, index);
>> >> >> +  if (def->bb != e->src)
>> >> >> +    return false;
>> >> >
>> >> > why does this matter?
>> >> >
>> >> Through preparing pathes and duplicating block, this transform can also
>> >> help to combine a cmp in previous block and a gcond in current block.
>> >> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>> >> block of the current; and then combining "cmp with gcond" is safe.  If
>> >> the cmp is defined far from the incoming block, it would be hard to
>> >> achieve the combining, and the transform may not needed.
>> >
>> > We're in SSA form so the "combining" doesn't really care where the
>> > definition comes from.
>> >
>> >> >> +
>> >> >> +  if (!single_succ_p (def->bb))
>> >> >> +    return false;
>> >> >
>> >> > Or this?  The actual threading will ensure this will hold true.
>> >> >
>> >> Yes, other thread code check this and ensure it to be true, like
>> >> function thread_through_normal_block. Since this new function is invoked
>> >> outside thread_through_normal_block, so, checking single_succ_p is also
>> >> needed for this case.
>> >
>> > I mean threading will isolate the path making this trivially true.
>> > It's also no requirement for combining, in fact due to the single-use
>> > check the definition can be sinked across the edge already (if
>> > the edges dest didn't have multiple predecessors which this threading
>> > will fix as well).
>> >
>> I would relax these check and have a test.
>> 
>> And I refactor the code a little as below. Thanks for any comments!
>> 
>> bool
>> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>> {
>>   basic_block bb = e->dest;
>> 
>>   /* See if there is only one stmt which is gcond.  */
>>   gimple *gs = last_and_only_stmt (bb);
>>   if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>>     return false;
>> 
>>   /* See if gcond's condition is "(phi !=/== 0/1)".  */
>>   tree cond = gimple_cond_lhs (gs);
>>   if (TREE_CODE (cond) != SSA_NAME
>>       || gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI
>>       || gimple_bb (SSA_NAME_DEF_STMT (cond)) != bb)
>>     return false;
>>   enum tree_code code = gimple_cond_code (gs);
>>   tree rhs = gimple_cond_rhs (gs);
>>   if (!(code == NE_EXPR || code == EQ_EXPR || integer_onep (rhs)
>> 	|| integer_zerop (rhs)))
>
> GCCs coding standard says that if a condition doesn't fit on
> a single line you should split after each || or &&
Get it.
>
>>     return false;
>> 
>>   gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
>
> If you had used dyn_cast <gphi *> () above the GIMPLE_PHI
> check would have been for phi != NULL and you'd save a line
> of code.
>
>>   edge_iterator ei;
>>   edge in_e;
>>   FOR_EACH_EDGE (in_e, ei, bb->preds)
>>     {
>
> As said in my first review I'd just check whether for the
> edge we want to thread through the definition comes from a CMP.
> Suppose you have
>
>  # val_1 = PHI <a_2, b_3, c_4>
>  if (val_1 != 0)
>
> and only one edge has a b_3 = d_5 != 0 condition it's still
> worth tail-duplicating the if block.
Right.
>
> otherwise it looks ok to me.
>
> Thanks,
> Richard.
>
I would update accordingly and have tests. If pass, would I send refined
patch and ask for approval to deliver code.

Thanks!
Jiufu Guo.
>>       /* Check if phi's incoming value is CMP */
>>       gimple *def;
>>       tree value = PHI_ARG_DEF_FROM_EDGE (phi, in_e);
>>       if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
>> 	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>> 	def = SSA_NAME_DEF_STMT (value);
>>       else
>> 	return false;
>> 
>>       /* Or if it is (INTCONV) (a CMP b). */
>>       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>> 	{
>> 	  value = gimple_assign_rhs1 (def);
>> 	  if (TREE_CODE (value) == SSA_NAME && has_single_use (value)
>> 	      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>> 	    def = SSA_NAME_DEF_STMT (value);
>> 	  else
>> 	    return false;
>> 	}
>>       if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>> 	return false;
>>     }
>> 
>>   return true;
>> }
>> 
>> Thanks,
>> Jiufu Guo
>> >> >> +  return true;
>> >> >> +}
>> >> >> +
>> >> >> +/* There are basic blocks look like:
>> >> >> +  <P0>
>> >> >> +  p0 = a CMP b ; or p0 = (INT)( a CMP b)
>> >> >> +  goto <X>;
>> >> >> +
>> >> >> +  <P1>
>> >> >> +  p1 = c CMP d
>> >> >> +  goto <X>;
>> >> >> +
>> >> >> +  <X>
>> >> >> +  # phi = PHI <p0 (P0), p1 (P1)>
>> >> >> +  if (phi != 0) goto <Y>; else goto <Z>;
>> >> >> +
>> >> >> +  Then, <X>: a trivial join block.
>> >> >> +
>> >> >> + Check if BB is <X> in like above.  */
>> >> >> +
>> >> >> +bool
>> >> >> +is_trivial_join_block (basic_block bb)
>> >> >
>> >> > I'd make this work on a specific edge.
>> >> >
>> >> > edge_forwards_conditional_to_conditional_jump_through_empty_bb_p (edge e)
>> >> > {
>> >> >   basic_block b = e->dest;
>> >> >
>> >> > maybe too elaborate name ;)
>> >> >
>> >> Thanks for help to name the function!  It is very valuable for me ;)
>> >> >> +{
>> >> >> +  gimple *gs = last_and_only_stmt (bb);
>> >> >> +  if (gs == NULL)
>> >> >> +    return false;
>> >> >> +
>> >> >> +  if (gimple_code (gs) != GIMPLE_COND)
>> >> >> +    return false;
>> >> >> +
>> >> >> +  tree cond = gimple_cond_lhs (gs);
>> >> >> +
>> >> >> +  if (TREE_CODE (cond) != SSA_NAME)
>> >> >> +    return false;
>> >> >
>> >> > space after if( too much vertical space in this function
>> >> > for my taste btw.
>> >> Will update this.
>> >> >
>> >> > For the forwarding to work we want a NE_EXPR or EQ_EXPR
>> >> > as gimple_cond_code and integer_one_p or integer_zero_p
>> >> > gimple_cond_rhs.
>> >> Right, checking those would be more safe.  Since no issue found, during
>> >> bootstrap and regression tests, so I did not add these checking.  I will
>> >> add this checking.
>> >> >
>> >> >> +
>> >> >> +  if (gimple_code (SSA_NAME_DEF_STMT (cond)) != GIMPLE_PHI)
>> >> >> +    return false;
>> >> >> +
>> >> >> +  gphi *phi = as_a<gphi *> (SSA_NAME_DEF_STMT (cond));
>> >> >
>> >> > I think to match your pattern you want to check that
>> >> > gimple_bb (phi) == bb as well here.
>> >> Right, it should be checked. I will update.
>> >> >
>> >> >> +  for (unsigned int i = 0; i < phi->nargs; i++)
>> >> >> +    if (!cmp_from_unconditional_block (phi, i))
>> >> >
>> >> > Just process the incoming edge argument and inline the
>> >> > helper.  You can use PHI_ARG_DEF_FROM_EDGE here.
>> >> I will refine code, and try to use it.
>> >> >
>> >> > Thanks for integrating this into jump-threading - it does look
>> >> > like a good fit.
>> >> >
>> >> > How often does this trigger during bootstrap?
>> >> Thanks for your sugguestion, this could help to evaluate patch. During
>> >> bootstrap(stage 2 or 3), in gcc source code, 1300-1500 basic blocks are
>> >> fullfile this tranform.
>> >
>> > Thanks,
>> > Richard.
>> 
>> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH V2] A jump threading opportunity for condition branch
  2019-05-24 12:45         ` Richard Biener
  2019-05-24 14:52           ` Jiufu Guo
@ 2019-05-28 14:07           ` Jiufu Guo
  2019-05-29  1:51             ` Jiufu Guo
                               ` (2 more replies)
  2019-05-29 20:26           ` [PATCH] " Jeff Law
  2 siblings, 3 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-28 14:07 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

Hi,

This patch implements a new opportunity of jump threading for PR77820.
In this optimization, conditional jumps are merged with unconditional
jump. And then moving CMP result to GPR is eliminated.

This version is based on the proposal of Richard, Jeff and Andrew, and
refined to incorporate comments.  Thanks for the reviews!

Bootstrapped and tested on powerpc64le and powerpc64be with no
regressions (one case is improved) and new testcases are added. Is this
ok for trunk?

Example of this opportunity looks like below:

  <P0>
  p0 = a CMP b
  goto <X>;

  <P1>
  p1 = c CMP d
  goto <X>;

  <X>
  # phi = PHI <p0 (P0), p1 (P1)>
  if (phi != 0) goto <Y>; else goto <Z>;

Could be transformed to:

  <P0>
  p0 = a CMP b
  if (p0 != 0) goto <Y>; else goto <Z>;

  <P1>
  p1 = c CMP d
  if (p1 != 0) goto <Y>; else goto <Z>;


This optimization eliminates:
1. saving CMP result: p0 = a CMP b.
2. additional CMP on branch: if (phi != 0).
3. converting CMP result if there is phi = (INT_CONV) p0 if there is.

Thanks!
Jiufu Guo


[gcc]
2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* tree-ssa-threadedge.c
	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
	function.
	(thread_across_edge): Add call to
	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.

[gcc/testsuite]
2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.

---
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
 gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
 6 files changed, 192 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
new file mode 100644
index 0000000..5227c87
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, long x)
+{
+  _Bool t;
+  if (x)
+    {
+      g (a + 1);
+      t = a < b;
+      c = d + x;
+    }
+  else
+    {
+      g (b + 1);
+      a = c + d;
+      t = c > d;
+    }
+
+  if (t)
+    g1 (c);
+
+  g (a);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
new file mode 100644
index 0000000..eaf89bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  _Bool t;
+  if (x)
+    t = c < d;
+  else
+    t = a < b;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
new file mode 100644
index 0000000..d5a1e0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  if (x)
+    t = a < b;
+  else if (d == x)
+    t = c < b;
+  else
+    t = d > c;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
new file mode 100644
index 0000000..53acabc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  _Bool l1 = 0, l2 = 0;
+  if (x)
+    {
+      g (a);
+      c = a + b;
+      t = a < b;
+      l1 = 1;
+    }
+  else
+    {
+      g1 (b);
+      t = c > d;
+      d = c + b;
+      l2 = 1;
+    }
+
+  if (t)
+    {
+      if (l1 | l2)
+	g1 (c);
+    }
+  else
+    {
+      g (d);
+      g1 (a + b);
+    }
+  g (c + d);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
index e9b4f26..1d7b587 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
@@ -69,4 +69,4 @@ lookharder (string)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
+/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index c3ea2d6..36c413a 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
   return 0;
 }
 
+/* There are basic blocks look like:
+   <P0>
+   p0 = a CMP b ; or p0 = (INT) (a CMP b)
+   goto <X>;
+
+   <P1>
+   p1 = c CMP d
+   goto <X>;
+
+   <X>
+   # phi = PHI <p0 (P0), p1 (P1)>
+   if (phi != 0) goto <Y>; else goto <Z>;
+
+   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
+   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
+
+   Return true if E is (P0,X) or (P1,X)  */
+
+bool
+edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
+{
+  basic_block bb = e->dest;
+
+  /* See if there is only one stmt which is gcond.  */
+  gimple *gs = last_and_only_stmt (bb);
+  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
+    return false;
+
+  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
+  tree cond = gimple_cond_lhs (gs);
+  enum tree_code code = gimple_cond_code (gs);
+  tree rhs = gimple_cond_rhs (gs);
+  if (TREE_CODE (cond) != SSA_NAME
+      || (code != NE_EXPR && code != EQ_EXPR)
+      || (!integer_onep (rhs) && !integer_zerop (rhs)))
+    return false;
+  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
+  if (phi == NULL || gimple_bb (phi) != bb)
+    return false;
+
+  /* Check if phi's incoming value is CMP.  */
+  gimple *def;
+  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
+  if (TREE_CODE (value) == SSA_NAME 
+      && has_single_use (value)
+      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
+    def = SSA_NAME_DEF_STMT (value);
+  else
+    return false;
+
+  /* Or if it is (INT) (a CMP b).  */
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+    {
+      value = gimple_assign_rhs1 (def);
+      if (TREE_CODE (value) == SSA_NAME 
+	  && has_single_use (value)
+	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
+	def = SSA_NAME_DEF_STMT (value);
+      else
+	return false;
+    }
+
+  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
+    return false;
+
+  return true;
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -1317,10 +1385,12 @@ thread_across_edge (gcond *dummy_cond,
 
 	/* If we were able to thread through a successor of E->dest, then
 	   record the jump threading opportunity.  */
-	if (found)
+	if (found
+	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
 	  {
-	    propagate_threaded_block_debug_into (path->last ()->e->dest,
-						 taken_edge->dest);
+	    if (taken_edge->dest != path->last ()->e->dest)
+	      propagate_threaded_block_debug_into (path->last ()->e->dest,
+						   taken_edge->dest);
 	    register_jump_thread (path);
 	  }
 	else
-- 
2.7.4


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
@ 2019-05-29  1:51             ` Jiufu Guo
  2019-05-29 12:40             ` Richard Biener
  2019-05-30 15:34             ` Jeff Law
  2 siblings, 0 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-29  1:51 UTC (permalink / raw)
  To: Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

Jiufu Guo <guojiufu@linux.ibm.com> writes:

> Hi,
>
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional
> jump. And then moving CMP result to GPR is eliminated.
>
> This version is based on the proposal of Richard, Jeff and Andrew, and
> refined to incorporate comments.  Thanks for the reviews!
>
> Bootstrapped and tested on powerpc64le and powerpc64be with no
> regressions (one case is improved) and new testcases are added. Is this
> ok for trunk?
For accurate, tested targets are powerpc64-unknown-none and
powerpc64le-unknown-none.  split-path-6.c is the case improved, since
one jump threading opportunity also meet, and then more following
optimization happen on it. 
>
> Example of this opportunity looks like below:
>
>   <P0>
>   p0 = a CMP b
>   goto <X>;
>
>   <P1>
>   p1 = c CMP d
>   goto <X>;
>
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
>
> Could be transformed to:
>
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
>
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
>
>
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>
> Thanks!
> Jiufu Guo
>
>
> [gcc]
> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
>
> 	PR tree-optimization/77820
> 	* tree-ssa-threadedge.c
> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
> 	function.
> 	(thread_across_edge): Add call to
> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>
> [gcc/testsuite]
> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
>
> 	PR tree-optimization/77820
> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>  6 files changed, 192 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 0000000..5227c87
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, long x)
> +{
> +  _Bool t;
> +  if (x)
> +    {
> +      g (a + 1);
> +      t = a < b;
> +      c = d + x;
> +    }
> +  else
> +    {
> +      g (b + 1);
> +      a = c + d;
> +      t = c > d;
> +    }
> +
> +  if (t)
> +    g1 (c);
> +
> +  g (a);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> new file mode 100644
> index 0000000..eaf89bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +    t = c < d;
> +  else
> +    t = a < b;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> new file mode 100644
> index 0000000..d5a1e0b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  if (x)
> +    t = a < b;
> +  else if (d == x)
> +    t = c < b;
> +  else
> +    t = d > c;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> new file mode 100644
> index 0000000..53acabc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  _Bool l1 = 0, l2 = 0;
> +  if (x)
> +    {
> +      g (a);
> +      c = a + b;
> +      t = a < b;
> +      l1 = 1;
> +    }
> +  else
> +    {
> +      g1 (b);
> +      t = c > d;
> +      d = c + b;
> +      l2 = 1;
> +    }
> +
> +  if (t)
> +    {
> +      if (l1 | l2)
> +	g1 (c);
> +    }
> +  else
> +    {
> +      g (d);
> +      g1 (a + b);
> +    }
> +  g (c + d);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> index e9b4f26..1d7b587 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> @@ -69,4 +69,4 @@ lookharder (string)
>      }
>  }
>  
> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index c3ea2d6..36c413a 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>    return 0;
>  }
>  
> +/* There are basic blocks look like:
> +   <P0>
> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
> +   goto <X>;
> +
> +   <P1>
> +   p1 = c CMP d
> +   goto <X>;
> +
> +   <X>
> +   # phi = PHI <p0 (P0), p1 (P1)>
> +   if (phi != 0) goto <Y>; else goto <Z>;
> +
> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
> +
> +   Return true if E is (P0,X) or (P1,X)  */
> +
> +bool
> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> +{
> +  basic_block bb = e->dest;
> +
> +  /* See if there is only one stmt which is gcond.  */
> +  gimple *gs = last_and_only_stmt (bb);
> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
> +    return false;
> +
> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
> +  tree cond = gimple_cond_lhs (gs);
> +  enum tree_code code = gimple_cond_code (gs);
> +  tree rhs = gimple_cond_rhs (gs);
> +  if (TREE_CODE (cond) != SSA_NAME
> +      || (code != NE_EXPR && code != EQ_EXPR)
> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
> +    return false;
> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
> +  if (phi == NULL || gimple_bb (phi) != bb)
> +    return false;
> +
> +  /* Check if phi's incoming value is CMP.  */
> +  gimple *def;
> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> +  if (TREE_CODE (value) == SSA_NAME 
> +      && has_single_use (value)
> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> +    def = SSA_NAME_DEF_STMT (value);
> +  else
> +    return false;
> +
> +  /* Or if it is (INT) (a CMP b).  */
> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> +    {
> +      value = gimple_assign_rhs1 (def);
> +      if (TREE_CODE (value) == SSA_NAME 
> +	  && has_single_use (value)
> +	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> +	def = SSA_NAME_DEF_STMT (value);
> +      else
> +	return false;
> +    }
> +
> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> +    return false;
> +
> +  return true;
> +}
> +
>  /* We are exiting E->src, see if E->dest ends with a conditional
>     jump which has a known value when reached via E.
>  
> @@ -1317,10 +1385,12 @@ thread_across_edge (gcond *dummy_cond,
>  
>  	/* If we were able to thread through a successor of E->dest, then
>  	   record the jump threading opportunity.  */
> -	if (found)
> +	if (found
> +	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
>  	  {
> -	    propagate_threaded_block_debug_into (path->last ()->e->dest,
> -						 taken_edge->dest);
> +	    if (taken_edge->dest != path->last ()->e->dest)
> +	      propagate_threaded_block_debug_into (path->last ()->e->dest,
> +						   taken_edge->dest);
>  	    register_jump_thread (path);
>  	  }
>  	else

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
  2019-05-29  1:51             ` Jiufu Guo
@ 2019-05-29 12:40             ` Richard Biener
  2019-05-29 19:47               ` Jeff Law
  2019-05-30 15:34             ` Jeff Law
  2 siblings, 1 reply; 38+ messages in thread
From: Richard Biener @ 2019-05-29 12:40 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt, law

[-- Attachment #1: Type: text/plain, Size: 10110 bytes --]

On Tue, 28 May 2019, Jiufu Guo wrote:

> Hi,
> 
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional
> jump. And then moving CMP result to GPR is eliminated.
> 
> This version is based on the proposal of Richard, Jeff and Andrew, and
> refined to incorporate comments.  Thanks for the reviews!
> 
> Bootstrapped and tested on powerpc64le and powerpc64be with no
> regressions (one case is improved) and new testcases are added. Is this
> ok for trunk?
> 
> Example of this opportunity looks like below:
> 
>   <P0>
>   p0 = a CMP b
>   goto <X>;
> 
>   <P1>
>   p1 = c CMP d
>   goto <X>;
> 
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
> 
> Could be transformed to:
> 
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
> 
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
> 
> 
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> 
> Thanks!
> Jiufu Guo
> 
> 
> [gcc]
> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* tree-ssa-threadedge.c
> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
> 	function.
> 	(thread_across_edge): Add call to
> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
> 
> [gcc/testsuite]
> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
> 
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>  6 files changed, 192 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 0000000..5227c87
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, long x)
> +{
> +  _Bool t;
> +  if (x)
> +    {
> +      g (a + 1);
> +      t = a < b;
> +      c = d + x;
> +    }
> +  else
> +    {
> +      g (b + 1);
> +      a = c + d;
> +      t = c > d;
> +    }
> +
> +  if (t)
> +    g1 (c);
> +
> +  g (a);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> new file mode 100644
> index 0000000..eaf89bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +    t = c < d;
> +  else
> +    t = a < b;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> new file mode 100644
> index 0000000..d5a1e0b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  if (x)
> +    t = a < b;
> +  else if (d == x)
> +    t = c < b;
> +  else
> +    t = d > c;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> new file mode 100644
> index 0000000..53acabc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  _Bool l1 = 0, l2 = 0;
> +  if (x)
> +    {
> +      g (a);
> +      c = a + b;
> +      t = a < b;
> +      l1 = 1;
> +    }
> +  else
> +    {
> +      g1 (b);
> +      t = c > d;
> +      d = c + b;
> +      l2 = 1;
> +    }
> +
> +  if (t)
> +    {
> +      if (l1 | l2)
> +	g1 (c);
> +    }
> +  else
> +    {
> +      g (d);
> +      g1 (a + b);
> +    }
> +  g (c + d);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> index e9b4f26..1d7b587 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> @@ -69,4 +69,4 @@ lookharder (string)
>      }
>  }
>  
> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index c3ea2d6..36c413a 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>    return 0;
>  }
>  
> +/* There are basic blocks look like:
> +   <P0>
> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
> +   goto <X>;
> +
> +   <P1>
> +   p1 = c CMP d
> +   goto <X>;
> +
> +   <X>
> +   # phi = PHI <p0 (P0), p1 (P1)>
> +   if (phi != 0) goto <Y>; else goto <Z>;
> +
> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
> +
> +   Return true if E is (P0,X) or (P1,X)  */
> +
> +bool
> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> +{
> +  basic_block bb = e->dest;
> +
> +  /* See if there is only one stmt which is gcond.  */
> +  gimple *gs = last_and_only_stmt (bb);
> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
> +    return false;

     gcond *gs;
     if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
       return false;

makes the following gimple_cond_ accesses more efficient when
checking is enabled.

> +
> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
> +  tree cond = gimple_cond_lhs (gs);
> +  enum tree_code code = gimple_cond_code (gs);
> +  tree rhs = gimple_cond_rhs (gs);
> +  if (TREE_CODE (cond) != SSA_NAME
> +      || (code != NE_EXPR && code != EQ_EXPR)
> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
> +    return false;
> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
> +  if (phi == NULL || gimple_bb (phi) != bb)
> +    return false;
> +
> +  /* Check if phi's incoming value is CMP.  */
> +  gimple *def;
> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> +  if (TREE_CODE (value) == SSA_NAME 
> +      && has_single_use (value)
> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> +    def = SSA_NAME_DEF_STMT (value);

Same is true here and below if you rewrite to

     gassign *def;
     tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
     if (TREE_CODE (value) != SSA_NAME
         || !has_single_use (value)
         || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
       return false;

Otherwise it looks good.  I'd like to have Jeffs opinion and
final ACK here because we touch jump-threading and he's most
familiar with that detail and the place you hook into.

Thanks,
Richard.

> +  else
> +    return false;
> +
> +  /* Or if it is (INT) (a CMP b).  */
> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> +    {
> +      value = gimple_assign_rhs1 (def);
> +      if (TREE_CODE (value) == SSA_NAME 
> +	  && has_single_use (value)
> +	  && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> +	def = SSA_NAME_DEF_STMT (value);
> +      else
> +	return false;
> +    }
> +
> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> +    return false;
> +
> +  return true;
> +}
> +
>  /* We are exiting E->src, see if E->dest ends with a conditional
>     jump which has a known value when reached via E.
>  
> @@ -1317,10 +1385,12 @@ thread_across_edge (gcond *dummy_cond,
>  
>  	/* If we were able to thread through a successor of E->dest, then
>  	   record the jump threading opportunity.  */
> -	if (found)
> +	if (found
> +	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
>  	  {
> -	    propagate_threaded_block_debug_into (path->last ()->e->dest,
> -						 taken_edge->dest);
> +	    if (taken_edge->dest != path->last ()->e->dest)
> +	      propagate_threaded_block_debug_into (path->last ()->e->dest,
> +						   taken_edge->dest);
>  	    register_jump_thread (path);
>  	  }
>  	else
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-29 12:40             ` Richard Biener
@ 2019-05-29 19:47               ` Jeff Law
  2019-05-30 15:09                 ` Jiufu Guo
  0 siblings, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-05-29 19:47 UTC (permalink / raw)
  To: Richard Biener, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/29/19 6:36 AM, Richard Biener wrote:
> On Tue, 28 May 2019, Jiufu Guo wrote:
> 
>> Hi,
>>
>> This patch implements a new opportunity of jump threading for PR77820.
>> In this optimization, conditional jumps are merged with unconditional
>> jump. And then moving CMP result to GPR is eliminated.
>>
>> This version is based on the proposal of Richard, Jeff and Andrew, and
>> refined to incorporate comments.  Thanks for the reviews!
>>
>> Bootstrapped and tested on powerpc64le and powerpc64be with no
>> regressions (one case is improved) and new testcases are added. Is this
>> ok for trunk?
>>
>> Example of this opportunity looks like below:
>>
>>   <P0>
>>   p0 = a CMP b
>>   goto <X>;
>>
>>   <P1>
>>   p1 = c CMP d
>>   goto <X>;
>>
>>   <X>
>>   # phi = PHI <p0 (P0), p1 (P1)>
>>   if (phi != 0) goto <Y>; else goto <Z>;
>>
>> Could be transformed to:
>>
>>   <P0>
>>   p0 = a CMP b
>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>
>>   <P1>
>>   p1 = c CMP d
>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>
>>
>> This optimization eliminates:
>> 1. saving CMP result: p0 = a CMP b.
>> 2. additional CMP on branch: if (phi != 0).
>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>>
>> Thanks!
>> Jiufu Guo
>>
>>
>> [gcc]
>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 	    Lijia He  <helijia@linux.ibm.com>
>>
>> 	PR tree-optimization/77820
>> 	* tree-ssa-threadedge.c
>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>> 	function.
>> 	(thread_across_edge): Add call to
>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>
>> [gcc/testsuite]
>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 	    Lijia He  <helijia@linux.ibm.com>
>>
>> 	PR tree-optimization/77820
>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>>  6 files changed, 192 insertions(+), 4 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>> new file mode 100644
>> index 0000000..5227c87
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>> @@ -0,0 +1,30 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (int);
>> +void g1 (int);
>> +
>> +void
>> +f (long a, long b, long c, long d, long x)
>> +{
>> +  _Bool t;
>> +  if (x)
>> +    {
>> +      g (a + 1);
>> +      t = a < b;
>> +      c = d + x;
>> +    }
>> +  else
>> +    {
>> +      g (b + 1);
>> +      a = c + d;
>> +      t = c > d;
>> +    }
>> +
>> +  if (t)
>> +    g1 (c);
>> +
>> +  g (a);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>> new file mode 100644
>> index 0000000..eaf89bb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (void);
>> +void g1 (void);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  _Bool t;
>> +  if (x)
>> +    t = c < d;
>> +  else
>> +    t = a < b;
>> +
>> +  if (t)
>> +    {
>> +      g1 ();
>> +      g ();
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>> new file mode 100644
>> index 0000000..d5a1e0b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>> @@ -0,0 +1,25 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (void);
>> +void g1 (void);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  int t;
>> +  if (x)
>> +    t = a < b;
>> +  else if (d == x)
>> +    t = c < b;
>> +  else
>> +    t = d > c;
>> +
>> +  if (t)
>> +    {
>> +      g1 ();
>> +      g ();
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>> new file mode 100644
>> index 0000000..53acabc
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>> @@ -0,0 +1,40 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (int);
>> +void g1 (int);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  int t;
>> +  _Bool l1 = 0, l2 = 0;
>> +  if (x)
>> +    {
>> +      g (a);
>> +      c = a + b;
>> +      t = a < b;
>> +      l1 = 1;
>> +    }
>> +  else
>> +    {
>> +      g1 (b);
>> +      t = c > d;
>> +      d = c + b;
>> +      l2 = 1;
>> +    }
>> +
>> +  if (t)
>> +    {
>> +      if (l1 | l2)
>> +	g1 (c);
>> +    }
>> +  else
>> +    {
>> +      g (d);
>> +      g1 (a + b);
>> +    }
>> +  g (c + d);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> index e9b4f26..1d7b587 100644
>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> @@ -69,4 +69,4 @@ lookharder (string)
>>      }
>>  }
>>  
>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>> index c3ea2d6..36c413a 100644
>> --- a/gcc/tree-ssa-threadedge.c
>> +++ b/gcc/tree-ssa-threadedge.c
>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>>    return 0;
>>  }
>>  
>> +/* There are basic blocks look like:
>> +   <P0>
>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>> +   goto <X>;
>> +
>> +   <P1>
>> +   p1 = c CMP d
>> +   goto <X>;
>> +
>> +   <X>
>> +   # phi = PHI <p0 (P0), p1 (P1)>
>> +   if (phi != 0) goto <Y>; else goto <Z>;
>> +
>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>> +
>> +   Return true if E is (P0,X) or (P1,X)  */
>> +
>> +bool
>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>> +{
>> +  basic_block bb = e->dest;
>> +
>> +  /* See if there is only one stmt which is gcond.  */
>> +  gimple *gs = last_and_only_stmt (bb);
>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>> +    return false;
>      gcond *gs;
>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
>        return false;
> 
> makes the following gimple_cond_ accesses more efficient when
> checking is enabled.
> 
>> +
>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>> +  tree cond = gimple_cond_lhs (gs);
>> +  enum tree_code code = gimple_cond_code (gs);
>> +  tree rhs = gimple_cond_rhs (gs);
>> +  if (TREE_CODE (cond) != SSA_NAME
>> +      || (code != NE_EXPR && code != EQ_EXPR)
>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>> +    return false;
>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>> +  if (phi == NULL || gimple_bb (phi) != bb)
>> +    return false;
>> +
>> +  /* Check if phi's incoming value is CMP.  */
>> +  gimple *def;
>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>> +  if (TREE_CODE (value) == SSA_NAME 
>> +      && has_single_use (value)
>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>> +    def = SSA_NAME_DEF_STMT (value);
> Same is true here and below if you rewrite to
> 
>      gassign *def;
>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>      if (TREE_CODE (value) != SSA_NAME
>          || !has_single_use (value)
>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>        return false;
> 
> Otherwise it looks good.  I'd like to have Jeffs opinion and
> final ACK here because we touch jump-threading and he's most
> familiar with that detail and the place you hook into.
I've got the full thread to look over.  At a high level I wouldn't have
guessed it'd be this easy to get the threader handle this, but
occasionally we are surprised in a good way.  Anyway, I'll be looking
through the full discussion.

Jeff


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-21 13:45 [PATCH] A jump threading opportunity for condition branch Jiufu Guo
  2019-05-22 12:38 ` Richard Biener
@ 2019-05-29 20:12 ` Jeff Law
  1 sibling, 0 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-29 20:12 UTC (permalink / raw)
  To: Jiufu Guo, gcc-patches; +Cc: jakub, rguenther, dberlin, segher, wschmidt

On 5/21/19 7:44 AM, Jiufu Guo wrote:
> Hi,
> 
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional jump.
> And then moving CMP result to GPR is eliminated.
> 
> It looks like below:
> 
>   <P0>
>   p0 = a CMP b
>   goto <X>;
> 
>   <P1>
>   p1 = c CMP d
>   goto <X>;
> 
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
> 
> Could be transformed to:
> 
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
> 
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
A few high level notes.

I think LLVM does this in their jump threading pass as well, mostly
because it enables discovering additional jump threading opportunities
IIRC.   But it appears to me to be inherently good on its own as well as
it eliminates a dynamic unconditional jump.

It's also the case that after this transformation we may be able to
combine the assignment and test resulting in something like this:

>   <P0>
>   if (a CMP b) goto <Y>; else goto <Z>;
>
>   <P1>
>   if (c CMP d) goto <Y>; else goto <Z>;
Which is inherently good *and* the blocks no longer have side effects
which can have secondary positive effects in the jump threader.

I wouldn't be surprised if this was particularly useful for chained
boolean logical tests where some of the arms collapse down to single tests.

Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-23 12:06   ` Jiufu Guo
  2019-05-23 12:11     ` Richard Biener
@ 2019-05-29 20:18     ` Jeff Law
  2019-05-30  6:41       ` Richard Biener
  1 sibling, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-05-29 20:18 UTC (permalink / raw)
  To: Jiufu Guo, Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/23/19 6:05 AM, Jiufu Guo wrote:
> Hi,
> 
> Richard Biener <rguenther@suse.de> writes:
> 
>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>

>>>  
>>> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the CMP is
>>> +   defined in the incoming basic block. Otherwise return false.  */
>>> +static bool
>>> +cmp_from_unconditional_block (gphi *phi, int index)
>>> +{
>>> +  tree value = gimple_phi_arg_def (phi, index);
>>> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>>> +    return false;
>> Not sure why we should reject a constant here but I guess we
>> expect it to find a simplified condition anyways ;)
>>
> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
> found this case is already handled by other jump-threading code, like
> 'ethread' pass.
Right.  There's no need to handle constants here.  They'll result in
trivially discoverable jump threading opportunities.

>>> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>> +  if (def->bb != e->src)
>>> +    return false;
>> why does this matter?
>>
> Through preparing pathes and duplicating block, this transform can also
> help to combine a cmp in previous block and a gcond in current block.
> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> block of the current; and then combining "cmp with gcond" is safe.  If
> the cmp is defined far from the incoming block, it would be hard to
> achieve the combining, and the transform may not needed.
I don't think it's strictly needed in the long term and could be
addressed in a follow-up if we can find cases where it helps.  I think
we'd just need to double check insertion of the new conditional branch
to relax this if we cared.

However, I would expect sinking to have done is job here and would be
surprised if trying to handle this actually improved any real world code.
> 
>>> +
>>> +  if (!single_succ_p (def->bb))
>>> +    return false;
>> Or this?  The actual threading will ensure this will hold true.
>>
> Yes, other thread code check this and ensure it to be true, like
> function thread_through_normal_block. Since this new function is invoked
> outside thread_through_normal_block, so, checking single_succ_p is also
> needed for this case.
Agreed that it's needed.  Consider if the source block has multiple
successors.  Where do we insert the copy of the conditional branch?


>>> +{
>>> +  gimple *gs = last_and_only_stmt (bb);
>>> +  if (gs == NULL)
>>> +    return false;
>>> +
>>> +  if (gimple_code (gs) != GIMPLE_COND)
>>> +    return false;
>>> +
>>> +  tree cond = gimple_cond_lhs (gs);
>>> +
>>> +  if (TREE_CODE (cond) != SSA_NAME)
>>> +    return false;
>> space after if( too much vertical space in this function
>> for my taste btw.
> Will update this.
>> For the forwarding to work we want a NE_EXPR or EQ_EXPR
>> as gimple_cond_code and integer_one_p or integer_zero_p
>> gimple_cond_rhs.
> Right, checking those would be more safe.  Since no issue found, during
> bootstrap and regression tests, so I did not add these checking.  I will
> add this checking.
Definitely want to verify that we're dealing with an equality test
against 0/1.

Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-23 12:11     ` Richard Biener
  2019-05-23 14:40       ` Jiufu Guo
@ 2019-05-29 20:22       ` Jeff Law
  2019-05-30  6:40         ` Jiufu Guo
  2019-05-30  6:44         ` Richard Biener
  1 sibling, 2 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-29 20:22 UTC (permalink / raw)
  To: Richard Biener, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/23/19 6:11 AM, Richard Biener wrote:
> On Thu, 23 May 2019, Jiufu Guo wrote:
> 
>> Hi,
>>
>> Richard Biener <rguenther@suse.de> writes:
>>
>>> On Tue, 21 May 2019, Jiufu Guo wrote:

>>>> +    }
>>>> +
>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>>>> +    return false;
>>>> +
>>>> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>> +  if (def->bb != e->src)
>>>> +    return false;
>>> why does this matter?
>>>
>> Through preparing pathes and duplicating block, this transform can also
>> help to combine a cmp in previous block and a gcond in current block.
>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>> block of the current; and then combining "cmp with gcond" is safe.  If
>> the cmp is defined far from the incoming block, it would be hard to
>> achieve the combining, and the transform may not needed.
> We're in SSA form so the "combining" doesn't really care where the
> definition comes from.
Combining doesn't care, but we need to make sure the copy of the
conditional ends up in the right block since it wouldn't necessarily be
associated with def->bb anymore.  But I'd expect the sinking pass to
make this a non-issue in practice anyway.

> 
>>>> +
>>>> +  if (!single_succ_p (def->bb))
>>>> +    return false;
>>> Or this?  The actual threading will ensure this will hold true.
>>>
>> Yes, other thread code check this and ensure it to be true, like
>> function thread_through_normal_block. Since this new function is invoked
>> outside thread_through_normal_block, so, checking single_succ_p is also
>> needed for this case.
> I mean threading will isolate the path making this trivially true.
> It's also no requirement for combining, in fact due to the single-use
> check the definition can be sinked across the edge already (if
> the edges dest didn't have multiple predecessors which this threading
> will fix as well).
I don't think so.  The CMP source block could end with a call and have
an abnormal edge (for example).  We can't put the copied conditional
before the call and putting it after the call essentially means creating
a new block.

The CMP source block could also end with a conditional.  Where do we put
the one we want to copy into the CMP source block in that case? :-)

This is something else we'd want to check if we ever allowed the the CMP
defining block to not be the immediate predecessor of the conditional
jump block.  If we did that we'd need to validate that the block where
we're going to insert the copy of the jump has a single successor.


Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-24 12:45         ` Richard Biener
  2019-05-24 14:52           ` Jiufu Guo
  2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
@ 2019-05-29 20:26           ` Jeff Law
  2019-05-30  6:57             ` Richard Biener
  2 siblings, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-05-29 20:26 UTC (permalink / raw)
  To: Richard Biener, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/24/19 6:45 AM, Richard Biener wrote:
[ Aggressive snipping ]

> As said in my first review I'd just check whether for the
> edge we want to thread through the definition comes from a CMP.
> Suppose you have
> 
>  # val_1 = PHI <a_2, b_3, c_4>
>  if (val_1 != 0)
> 
> and only one edge has a b_3 = d_5 != 0 condition it's still
> worth tail-duplicating the if block.
Agreed.  The cost of tail duplicating here is so small we should be
doing it highly aggressively.  About the only case where we might not
want to would be if we're optimizing for size rather than speed.  That
case isn't clearly a win either way.

jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-29 20:22       ` Jeff Law
@ 2019-05-30  6:40         ` Jiufu Guo
  2019-05-30  6:44         ` Richard Biener
  1 sibling, 0 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-05-30  6:40 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

Jeff Law <law@redhat.com> writes:

> On 5/23/19 6:11 AM, Richard Biener wrote:
>> On Thu, 23 May 2019, Jiufu Guo wrote:
>> 
>>> Hi,
>>>
>>> Richard Biener <rguenther@suse.de> writes:
>>>
>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>
>>>>> +    }
>>>>> +
>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>>>>> +    return false;
>>>>> +
>>>>> +  /* Check if phi's incoming value is defined in the incoming basic_block.  */
>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>> +  if (def->bb != e->src)
>>>>> +    return false;
>>>> why does this matter?
>>>>
>>> Through preparing pathes and duplicating block, this transform can also
>>> help to combine a cmp in previous block and a gcond in current block.
>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>> block of the current; and then combining "cmp with gcond" is safe.  If
>>> the cmp is defined far from the incoming block, it would be hard to
>>> achieve the combining, and the transform may not needed.
>> We're in SSA form so the "combining" doesn't really care where the
>> definition comes from.
> Combining doesn't care, but we need to make sure the copy of the
> conditional ends up in the right block since it wouldn't necessarily be
> associated with def->bb anymore.  But I'd expect the sinking pass to
> make this a non-issue in practice anyway.
>
>> 
>>>>> +
>>>>> +  if (!single_succ_p (def->bb))
>>>>> +    return false;
>>>> Or this?  The actual threading will ensure this will hold true.
>>>>
>>> Yes, other thread code check this and ensure it to be true, like
>>> function thread_through_normal_block. Since this new function is invoked
>>> outside thread_through_normal_block, so, checking single_succ_p is also
>>> needed for this case.
>> I mean threading will isolate the path making this trivially true.
>> It's also no requirement for combining, in fact due to the single-use
>> check the definition can be sinked across the edge already (if
>> the edges dest didn't have multiple predecessors which this threading
>> will fix as well).
> I don't think so.  The CMP source block could end with a call and have
> an abnormal edge (for example).  We can't put the copied conditional
> before the call and putting it after the call essentially means creating
> a new block.
>
> The CMP source block could also end with a conditional.  Where do we put
> the one we want to copy into the CMP source block in that case? :-)
>
> This is something else we'd want to check if we ever allowed the the CMP
> defining block to not be the immediate predecessor of the conditional
> jump block.  If we did that we'd need to validate that the block where
> we're going to insert the copy of the jump has a single successor.
OK, Adding single_succ_p (e->src) could make sure the copy jump is
insert to end of immediate predecessor, instead the define block of CMP,
if def->bb != e->src. 
>
>
> Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-29 20:18     ` Jeff Law
@ 2019-05-30  6:41       ` Richard Biener
  0 siblings, 0 replies; 38+ messages in thread
From: Richard Biener @ 2019-05-30  6:41 UTC (permalink / raw)
  To: Jeff Law, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On May 29, 2019 10:12:31 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>On 5/23/19 6:05 AM, Jiufu Guo wrote:
>> Hi,
>> 
>> Richard Biener <rguenther@suse.de> writes:
>> 
>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>>
>
>>>>  
>>>> +/* Return true if PHI's INDEX-th incoming value is a CMP, and the
>CMP is
>>>> +   defined in the incoming basic block. Otherwise return false. 
>*/
>>>> +static bool
>>>> +cmp_from_unconditional_block (gphi *phi, int index)
>>>> +{
>>>> +  tree value = gimple_phi_arg_def (phi, index);
>>>> +  if (!(TREE_CODE (value) == SSA_NAME && has_single_use (value)))
>>>> +    return false;
>>> Not sure why we should reject a constant here but I guess we
>>> expect it to find a simplified condition anyways ;)
>>>
>> Const could be accepted here, like "# t_9 = PHI <5(3), t_17(4)>". I
>> found this case is already handled by other jump-threading code, like
>> 'ethread' pass.
>Right.  There's no need to handle constants here.  They'll result in
>trivially discoverable jump threading opportunities.
>
>>>> +  /* Check if phi's incoming value is defined in the incoming
>basic_block.  */
>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>> +  if (def->bb != e->src)
>>>> +    return false;
>>> why does this matter?
>>>
>> Through preparing pathes and duplicating block, this transform can
>also
>> help to combine a cmp in previous block and a gcond in current block.
>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>> block of the current; and then combining "cmp with gcond" is safe. 
>If
>> the cmp is defined far from the incoming block, it would be hard to
>> achieve the combining, and the transform may not needed.
>I don't think it's strictly needed in the long term and could be
>addressed in a follow-up if we can find cases where it helps.  I think
>we'd just need to double check insertion of the new conditional branch
>to relax this if we cared.
>
>However, I would expect sinking to have done is job here and would be
>surprised if trying to handle this actually improved any real world
>code.
>> 
>>>> +
>>>> +  if (!single_succ_p (def->bb))
>>>> +    return false;
>>> Or this?  The actual threading will ensure this will hold true.
>>>
>> Yes, other thread code check this and ensure it to be true, like
>> function thread_through_normal_block. Since this new function is
>invoked
>> outside thread_through_normal_block, so, checking single_succ_p is
>also
>> needed for this case.
>Agreed that it's needed.  Consider if the source block has multiple
>successors.  Where do we insert the copy of the conditional branch?

We're duplicating its block? That is, we are isolating a path into a conditional - that's always possible? I wanted to make sure that when threading threads through a conditional in the block with the compare we'd add the extra tail duplication? AFAIK we're still looking at unmodified CFG here?

>
>>>> +{
>>>> +  gimple *gs = last_and_only_stmt (bb);
>>>> +  if (gs == NULL)
>>>> +    return false;
>>>> +
>>>> +  if (gimple_code (gs) != GIMPLE_COND)
>>>> +    return false;
>>>> +
>>>> +  tree cond = gimple_cond_lhs (gs);
>>>> +
>>>> +  if (TREE_CODE (cond) != SSA_NAME)
>>>> +    return false;
>>> space after if( too much vertical space in this function
>>> for my taste btw.
>> Will update this.
>>> For the forwarding to work we want a NE_EXPR or EQ_EXPR
>>> as gimple_cond_code and integer_one_p or integer_zero_p
>>> gimple_cond_rhs.
>> Right, checking those would be more safe.  Since no issue found,
>during
>> bootstrap and regression tests, so I did not add these checking.  I
>will
>> add this checking.
>Definitely want to verify that we're dealing with an equality test
>against 0/1.
>
>Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-29 20:22       ` Jeff Law
  2019-05-30  6:40         ` Jiufu Guo
@ 2019-05-30  6:44         ` Richard Biener
  2019-05-30 20:17           ` Jeff Law
  1 sibling, 1 reply; 38+ messages in thread
From: Richard Biener @ 2019-05-30  6:44 UTC (permalink / raw)
  To: Jeff Law, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>On 5/23/19 6:11 AM, Richard Biener wrote:
>> On Thu, 23 May 2019, Jiufu Guo wrote:
>> 
>>> Hi,
>>>
>>> Richard Biener <rguenther@suse.de> writes:
>>>
>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>
>>>>> +    }
>>>>> +
>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
>tcc_comparison)
>>>>> +    return false;
>>>>> +
>>>>> +  /* Check if phi's incoming value is defined in the incoming
>basic_block.  */
>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>> +  if (def->bb != e->src)
>>>>> +    return false;
>>>> why does this matter?
>>>>
>>> Through preparing pathes and duplicating block, this transform can
>also
>>> help to combine a cmp in previous block and a gcond in current
>block.
>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>> block of the current; and then combining "cmp with gcond" is safe. 
>If
>>> the cmp is defined far from the incoming block, it would be hard to
>>> achieve the combining, and the transform may not needed.
>> We're in SSA form so the "combining" doesn't really care where the
>> definition comes from.
>Combining doesn't care, but we need to make sure the copy of the
>conditional ends up in the right block since it wouldn't necessarily be
>associated with def->bb anymore.  But I'd expect the sinking pass to
>make this a non-issue in practice anyway.
>
>> 
>>>>> +
>>>>> +  if (!single_succ_p (def->bb))
>>>>> +    return false;
>>>> Or this?  The actual threading will ensure this will hold true.
>>>>
>>> Yes, other thread code check this and ensure it to be true, like
>>> function thread_through_normal_block. Since this new function is
>invoked
>>> outside thread_through_normal_block, so, checking single_succ_p is
>also
>>> needed for this case.
>> I mean threading will isolate the path making this trivially true.
>> It's also no requirement for combining, in fact due to the single-use
>> check the definition can be sinked across the edge already (if
>> the edges dest didn't have multiple predecessors which this threading
>> will fix as well).
>I don't think so.  The CMP source block could end with a call and have
>an abnormal edge (for example).  We can't put the copied conditional
>before the call and putting it after the call essentially means
>creating
>a new block.
>
>The CMP source block could also end with a conditional.  Where do we
>put
>the one we want to copy into the CMP source block in that case? :-)
>
>This is something else we'd want to check if we ever allowed the the
>CMP
>defining block to not be the immediate predecessor of the conditional
>jump block.  If we did that we'd need to validate that the block where
>we're going to insert the copy of the jump has a single successor.

But were just isolating a path here. The actual combine job is left to followup cleanups. 

Richard. 

>
>Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-29 20:26           ` [PATCH] " Jeff Law
@ 2019-05-30  6:57             ` Richard Biener
  2019-05-30  6:58               ` Jiufu Guo
  2019-05-30 15:03               ` Jeff Law
  0 siblings, 2 replies; 38+ messages in thread
From: Richard Biener @ 2019-05-30  6:57 UTC (permalink / raw)
  To: Jeff Law, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On May 29, 2019 10:21:46 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>On 5/24/19 6:45 AM, Richard Biener wrote:
>[ Aggressive snipping ]
>
>> As said in my first review I'd just check whether for the
>> edge we want to thread through the definition comes from a CMP.
>> Suppose you have
>> 
>>  # val_1 = PHI <a_2, b_3, c_4>
>>  if (val_1 != 0)
>> 
>> and only one edge has a b_3 = d_5 != 0 condition it's still
>> worth tail-duplicating the if block.
>Agreed.  The cost of tail duplicating here is so small we should be
>doing it highly aggressively.  About the only case where we might not
>want to would be if we're optimizing for size rather than speed.  That
>case isn't clearly a win either way.

Even there the PHI likely causes edge copies to be inserted. So I wouldn't care for the moment. The proper check would be ! Optimize_edge_for_size_p (e). 

Richard. 

>jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-30  6:57             ` Richard Biener
@ 2019-05-30  6:58               ` Jiufu Guo
  2019-05-30 14:59                 ` Jeff Law
  2019-05-30 15:03               ` Jeff Law
  1 sibling, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-05-30  6:58 UTC (permalink / raw)
  To: Richard Biener
  Cc: Jeff Law, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

Richard Biener <rguenther@suse.de> writes:

> On May 29, 2019 10:21:46 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>>On 5/24/19 6:45 AM, Richard Biener wrote:
>>[ Aggressive snipping ]
>>
>>> As said in my first review I'd just check whether for the
>>> edge we want to thread through the definition comes from a CMP.
>>> Suppose you have
>>> 
>>>  # val_1 = PHI <a_2, b_3, c_4>
>>>  if (val_1 != 0)
>>> 
>>> and only one edge has a b_3 = d_5 != 0 condition it's still
>>> worth tail-duplicating the if block.
>>Agreed.  The cost of tail duplicating here is so small we should be
>>doing it highly aggressively.  About the only case where we might not
>>want to would be if we're optimizing for size rather than speed.  That
>>case isn't clearly a win either way.
>
> Even there the PHI likely causes edge copies to be inserted. So I
> wouldn't care for the moment. The proper check would be !
> Optimize_edge_for_size_p (e).
For most of this kind of case where the bb contains just one conditional
jump stmt, it may not increase the size especially for there are
combinings in follow passes -- it may save size ;)

>
> Richard. 
>
>>jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-30  6:58               ` Jiufu Guo
@ 2019-05-30 14:59                 ` Jeff Law
  0 siblings, 0 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-30 14:59 UTC (permalink / raw)
  To: Jiufu Guo, Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/30/19 12:57 AM, Jiufu Guo wrote:
> Richard Biener <rguenther@suse.de> writes:
> 
>> On May 29, 2019 10:21:46 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>>> On 5/24/19 6:45 AM, Richard Biener wrote:
>>> [ Aggressive snipping ]
>>>
>>>> As said in my first review I'd just check whether for the
>>>> edge we want to thread through the definition comes from a CMP.
>>>> Suppose you have
>>>>
>>>>  # val_1 = PHI <a_2, b_3, c_4>
>>>>  if (val_1 != 0)
>>>>
>>>> and only one edge has a b_3 = d_5 != 0 condition it's still
>>>> worth tail-duplicating the if block.
>>> Agreed.  The cost of tail duplicating here is so small we should be
>>> doing it highly aggressively.  About the only case where we might not
>>> want to would be if we're optimizing for size rather than speed.  That
>>> case isn't clearly a win either way.
>>
>> Even there the PHI likely causes edge copies to be inserted. So I
>> wouldn't care for the moment. The proper check would be !
>> Optimize_edge_for_size_p (e).
> For most of this kind of case where the bb contains just one conditional
> jump stmt, it may not increase the size especially for there are
> combinings in follow passes -- it may save size ;)
My point was it's not as clear cut.  Regardless I think we've gone
pretty deep into the weeds.  I think we could easily handle that case as
a follow-up.

jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-30  6:57             ` Richard Biener
  2019-05-30  6:58               ` Jiufu Guo
@ 2019-05-30 15:03               ` Jeff Law
  1 sibling, 0 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-30 15:03 UTC (permalink / raw)
  To: Richard Biener, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/30/19 12:44 AM, Richard Biener wrote:
> On May 29, 2019 10:21:46 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>> On 5/24/19 6:45 AM, Richard Biener wrote:
>> [ Aggressive snipping ]
>>
>>> As said in my first review I'd just check whether for the
>>> edge we want to thread through the definition comes from a CMP.
>>> Suppose you have
>>>
>>>  # val_1 = PHI <a_2, b_3, c_4>
>>>  if (val_1 != 0)
>>>
>>> and only one edge has a b_3 = d_5 != 0 condition it's still
>>> worth tail-duplicating the if block.
>> Agreed.  The cost of tail duplicating here is so small we should be
>> doing it highly aggressively.  About the only case where we might not
>> want to would be if we're optimizing for size rather than speed.  That
>> case isn't clearly a win either way.
> 
> Even there the PHI likely causes edge copies to be inserted. So I wouldn't care for the moment. The proper check would be ! Optimize_edge_for_size_p (e). 
Agreed, with capitalization fixed :-)
jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-29 19:47               ` Jeff Law
@ 2019-05-30 15:09                 ` Jiufu Guo
  2019-05-30 23:55                   ` Jeff Law
  0 siblings, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-05-30 15:09 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

Jeff Law <law@redhat.com> writes:

> On 5/29/19 6:36 AM, Richard Biener wrote:
>> On Tue, 28 May 2019, Jiufu Guo wrote:
>> 
>>> Hi,
>>>
>>> This patch implements a new opportunity of jump threading for PR77820.
>>> In this optimization, conditional jumps are merged with unconditional
>>> jump. And then moving CMP result to GPR is eliminated.
>>>
>>> This version is based on the proposal of Richard, Jeff and Andrew, and
>>> refined to incorporate comments.  Thanks for the reviews!
>>>
>>> Bootstrapped and tested on powerpc64le and powerpc64be with no
>>> regressions (one case is improved) and new testcases are added. Is this
>>> ok for trunk?
>>>
>>> Example of this opportunity looks like below:
>>>
>>>   <P0>
>>>   p0 = a CMP b
>>>   goto <X>;
>>>
>>>   <P1>
>>>   p1 = c CMP d
>>>   goto <X>;
>>>
>>>   <X>
>>>   # phi = PHI <p0 (P0), p1 (P1)>
>>>   if (phi != 0) goto <Y>; else goto <Z>;
>>>
>>> Could be transformed to:
>>>
>>>   <P0>
>>>   p0 = a CMP b
>>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>>
>>>   <P1>
>>>   p1 = c CMP d
>>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>>
>>>
>>> This optimization eliminates:
>>> 1. saving CMP result: p0 = a CMP b.
>>> 2. additional CMP on branch: if (phi != 0).
>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>>>
>>> Thanks!
>>> Jiufu Guo
>>>
>>>
>>> [gcc]
>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>
>>> 	PR tree-optimization/77820
>>> 	* tree-ssa-threadedge.c
>>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>>> 	function.
>>> 	(thread_across_edge): Add call to
>>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>>
>>> [gcc/testsuite]
>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>
>>> 	PR tree-optimization/77820
>>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>>
>>> ---
>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>>>  6 files changed, 192 insertions(+), 4 deletions(-)
>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>> new file mode 100644
>>> index 0000000..5227c87
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>> @@ -0,0 +1,30 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>> +
>>> +void g (int);
>>> +void g1 (int);
>>> +
>>> +void
>>> +f (long a, long b, long c, long d, long x)
>>> +{
>>> +  _Bool t;
>>> +  if (x)
>>> +    {
>>> +      g (a + 1);
>>> +      t = a < b;
>>> +      c = d + x;
>>> +    }
>>> +  else
>>> +    {
>>> +      g (b + 1);
>>> +      a = c + d;
>>> +      t = c > d;
>>> +    }
>>> +
>>> +  if (t)
>>> +    g1 (c);
>>> +
>>> +  g (a);
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>> new file mode 100644
>>> index 0000000..eaf89bb
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>> @@ -0,0 +1,23 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>> +
>>> +void g (void);
>>> +void g1 (void);
>>> +
>>> +void
>>> +f (long a, long b, long c, long d, int x)
>>> +{
>>> +  _Bool t;
>>> +  if (x)
>>> +    t = c < d;
>>> +  else
>>> +    t = a < b;
>>> +
>>> +  if (t)
>>> +    {
>>> +      g1 ();
>>> +      g ();
>>> +    }
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>> new file mode 100644
>>> index 0000000..d5a1e0b
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>> @@ -0,0 +1,25 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>> +
>>> +void g (void);
>>> +void g1 (void);
>>> +
>>> +void
>>> +f (long a, long b, long c, long d, int x)
>>> +{
>>> +  int t;
>>> +  if (x)
>>> +    t = a < b;
>>> +  else if (d == x)
>>> +    t = c < b;
>>> +  else
>>> +    t = d > c;
>>> +
>>> +  if (t)
>>> +    {
>>> +      g1 ();
>>> +      g ();
>>> +    }
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>> new file mode 100644
>>> index 0000000..53acabc
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>> @@ -0,0 +1,40 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>> +
>>> +void g (int);
>>> +void g1 (int);
>>> +
>>> +void
>>> +f (long a, long b, long c, long d, int x)
>>> +{
>>> +  int t;
>>> +  _Bool l1 = 0, l2 = 0;
>>> +  if (x)
>>> +    {
>>> +      g (a);
>>> +      c = a + b;
>>> +      t = a < b;
>>> +      l1 = 1;
>>> +    }
>>> +  else
>>> +    {
>>> +      g1 (b);
>>> +      t = c > d;
>>> +      d = c + b;
>>> +      l2 = 1;
>>> +    }
>>> +
>>> +  if (t)
>>> +    {
>>> +      if (l1 | l2)
>>> +	g1 (c);
>>> +    }
>>> +  else
>>> +    {
>>> +      g (d);
>>> +      g1 (a + b);
>>> +    }
>>> +  g (c + d);
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>> index e9b4f26..1d7b587 100644
>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>> @@ -69,4 +69,4 @@ lookharder (string)
>>>      }
>>>  }
>>>  
>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>>> index c3ea2d6..36c413a 100644
>>> --- a/gcc/tree-ssa-threadedge.c
>>> +++ b/gcc/tree-ssa-threadedge.c
>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>>>    return 0;
>>>  }
>>>  
>>> +/* There are basic blocks look like:
>>> +   <P0>
>>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>>> +   goto <X>;
>>> +
>>> +   <P1>
>>> +   p1 = c CMP d
>>> +   goto <X>;
>>> +
>>> +   <X>
>>> +   # phi = PHI <p0 (P0), p1 (P1)>
>>> +   if (phi != 0) goto <Y>; else goto <Z>;
>>> +
>>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>>> +
>>> +   Return true if E is (P0,X) or (P1,X)  */
>>> +
>>> +bool
>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>>> +{
>>> +  basic_block bb = e->dest;
>>> +
>>> +  /* See if there is only one stmt which is gcond.  */
>>> +  gimple *gs = last_and_only_stmt (bb);
>>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>>> +    return false;
>>      gcond *gs;
>>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
>>        return false;
>> 
>> makes the following gimple_cond_ accesses more efficient when
>> checking is enabled.
>> 
>>> +
>>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>> +  tree cond = gimple_cond_lhs (gs);
>>> +  enum tree_code code = gimple_cond_code (gs);
>>> +  tree rhs = gimple_cond_rhs (gs);
>>> +  if (TREE_CODE (cond) != SSA_NAME
>>> +      || (code != NE_EXPR && code != EQ_EXPR)
>>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>> +    return false;
>>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>> +  if (phi == NULL || gimple_bb (phi) != bb)
>>> +    return false;
>>> +
>>> +  /* Check if phi's incoming value is CMP.  */
>>> +  gimple *def;
>>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>> +  if (TREE_CODE (value) == SSA_NAME 
>>> +      && has_single_use (value)
>>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>>> +    def = SSA_NAME_DEF_STMT (value);
>> Same is true here and below if you rewrite to
>> 
>>      gassign *def;
>>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>      if (TREE_CODE (value) != SSA_NAME
>>          || !has_single_use (value)
>>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>        return false;
>> 
>> Otherwise it looks good.  I'd like to have Jeffs opinion and
>> final ACK here because we touch jump-threading and he's most
>> familiar with that detail and the place you hook into.
> I've got the full thread to look over.  At a high level I wouldn't have
> guessed it'd be this easy to get the threader handle this, but
> occasionally we are surprised in a good way.  Anyway, I'll be looking
> through the full discussion.
>
> Jeff

Hi Jeff, Richard and all,

Thanks a lot for your great comments in all threads. Based on those
comments, I refined the code as below:

bool
edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
{
  /* See if there is only one stmt which is gcond.  */
  gcond *gs;
  if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
    return false;
  
  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
  tree cond = gimple_cond_lhs (gs);
  enum tree_code code = gimple_cond_code (gs);
  tree rhs = gimple_cond_rhs (gs);
  if (TREE_CODE (cond) != SSA_NAME
      || (code != NE_EXPR && code != EQ_EXPR)
      || (!integer_onep (rhs) && !integer_zerop (rhs)))
    return false;
  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
  if (phi == NULL || gimple_bb (phi) != e->dest)
    return false;

  /* Check if phi's incoming value is CMP.  */
  gassign *def;
  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
  if (TREE_CODE (value) != SSA_NAME
      || !has_single_use (value)
      || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
    return false;

  /* Or if it is (INT) (a CMP b).  */
  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
    {
      value = gimple_assign_rhs1 (def);
      if (TREE_CODE (value) != SSA_NAME
	  || !has_single_use (value)
	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
	return false;
    }

  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
    return false;

  if (!single_succ_p (e->src))
    return false;

  return true;
}


In this code, I put "if (!single_succ_p (e->src))" there, which may be
helpful for reducing the copy block.

Thanks,
Jiufu Guo.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
  2019-05-29  1:51             ` Jiufu Guo
  2019-05-29 12:40             ` Richard Biener
@ 2019-05-30 15:34             ` Jeff Law
  2019-06-03  2:18               ` [PATCH V3] " Jiufu Guo
  2 siblings, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-05-30 15:34 UTC (permalink / raw)
  To: Jiufu Guo, Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/28/19 7:53 AM, Jiufu Guo wrote:
> Hi,
> 
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional
> jump. And then moving CMP result to GPR is eliminated.
> 
> This version is based on the proposal of Richard, Jeff and Andrew, and
> refined to incorporate comments.  Thanks for the reviews!
> 
> Bootstrapped and tested on powerpc64le and powerpc64be with no
> regressions (one case is improved) and new testcases are added. Is this
> ok for trunk?
> 
> Example of this opportunity looks like below:
> 
>   <P0>
>   p0 = a CMP b
>   goto <X>;
> 
>   <P1>
>   p1 = c CMP d
>   goto <X>;
> 
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
> 
> Could be transformed to:
> 
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
> 
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
> 
> 
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b.
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
Just an FYI, I threw the V2 patch into my tester overnight.  THere's
still several targets to build, but so far no regressions.  In my spot
checks of the embedded targets, they're passing the new tests -- odds
are the tests are simple enough to not run into BRANCH_COST issues so
we're not going to have the insane xfail lists or alternate expected
outputs we've had for other tests in this space.

jeff
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-30  6:44         ` Richard Biener
@ 2019-05-30 20:17           ` Jeff Law
  2019-05-31  7:30             ` Richard Biener
  0 siblings, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-05-30 20:17 UTC (permalink / raw)
  To: Richard Biener, Jiufu Guo
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/30/19 12:41 AM, Richard Biener wrote:
> On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>> On 5/23/19 6:11 AM, Richard Biener wrote:
>>> On Thu, 23 May 2019, Jiufu Guo wrote:
>>>
>>>> Hi,
>>>>
>>>> Richard Biener <rguenther@suse.de> writes:
>>>>
>>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>
>>>>>> +    }
>>>>>> +
>>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
>> tcc_comparison)
>>>>>> +    return false;
>>>>>> +
>>>>>> +  /* Check if phi's incoming value is defined in the incoming
>> basic_block.  */
>>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>>> +  if (def->bb != e->src)
>>>>>> +    return false;
>>>>> why does this matter?
>>>>>
>>>> Through preparing pathes and duplicating block, this transform can
>> also
>>>> help to combine a cmp in previous block and a gcond in current
>> block.
>>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>>> block of the current; and then combining "cmp with gcond" is safe. 
>> If
>>>> the cmp is defined far from the incoming block, it would be hard to
>>>> achieve the combining, and the transform may not needed.
>>> We're in SSA form so the "combining" doesn't really care where the
>>> definition comes from.
>> Combining doesn't care, but we need to make sure the copy of the
>> conditional ends up in the right block since it wouldn't necessarily be
>> associated with def->bb anymore.  But I'd expect the sinking pass to
>> make this a non-issue in practice anyway.
>>
>>>
>>>>>> +
>>>>>> +  if (!single_succ_p (def->bb))
>>>>>> +    return false;
>>>>> Or this?  The actual threading will ensure this will hold true.
>>>>>
>>>> Yes, other thread code check this and ensure it to be true, like
>>>> function thread_through_normal_block. Since this new function is
>> invoked
>>>> outside thread_through_normal_block, so, checking single_succ_p is
>> also
>>>> needed for this case.
>>> I mean threading will isolate the path making this trivially true.
>>> It's also no requirement for combining, in fact due to the single-use
>>> check the definition can be sinked across the edge already (if
>>> the edges dest didn't have multiple predecessors which this threading
>>> will fix as well).
>> I don't think so.  The CMP source block could end with a call and have
>> an abnormal edge (for example).  We can't put the copied conditional
>> before the call and putting it after the call essentially means
>> creating
>> a new block.
>>
>> The CMP source block could also end with a conditional.  Where do we
>> put
>> the one we want to copy into the CMP source block in that case? :-)
>>
>> This is something else we'd want to check if we ever allowed the the
>> CMP
>> defining block to not be the immediate predecessor of the conditional
>> jump block.  If we did that we'd need to validate that the block where
>> we're going to insert the copy of the jump has a single successor.
> 
> But were just isolating a path here. The actual combine job is left to followup cleanups. 
Absolutely agreed.  My point was that there's some additional stuff we'd
have to verify does the right thing if we wanted to allow the CMP to be
somewhere other than in the immediate predecessor of the conditional
jump block.

Jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-30 15:09                 ` Jiufu Guo
@ 2019-05-30 23:55                   ` Jeff Law
  2019-05-31  7:34                     ` Richard Biener
  2019-06-04  3:03                     ` Jiufu Guo
  0 siblings, 2 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-30 23:55 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

[-- Attachment #1: Type: text/plain, Size: 12087 bytes --]

On 5/30/19 9:03 AM, Jiufu Guo wrote:
> Jeff Law <law@redhat.com> writes:
> 
>> On 5/29/19 6:36 AM, Richard Biener wrote:
>>> On Tue, 28 May 2019, Jiufu Guo wrote:
>>>
>>>> Hi,
>>>>
>>>> This patch implements a new opportunity of jump threading for PR77820.
>>>> In this optimization, conditional jumps are merged with unconditional
>>>> jump. And then moving CMP result to GPR is eliminated.
>>>>
>>>> This version is based on the proposal of Richard, Jeff and Andrew, and
>>>> refined to incorporate comments.  Thanks for the reviews!
>>>>
>>>> Bootstrapped and tested on powerpc64le and powerpc64be with no
>>>> regressions (one case is improved) and new testcases are added. Is this
>>>> ok for trunk?
>>>>
>>>> Example of this opportunity looks like below:
>>>>
>>>>   <P0>
>>>>   p0 = a CMP b
>>>>   goto <X>;
>>>>
>>>>   <P1>
>>>>   p1 = c CMP d
>>>>   goto <X>;
>>>>
>>>>   <X>
>>>>   # phi = PHI <p0 (P0), p1 (P1)>
>>>>   if (phi != 0) goto <Y>; else goto <Z>;
>>>>
>>>> Could be transformed to:
>>>>
>>>>   <P0>
>>>>   p0 = a CMP b
>>>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>>>
>>>>   <P1>
>>>>   p1 = c CMP d
>>>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>>>
>>>>
>>>> This optimization eliminates:
>>>> 1. saving CMP result: p0 = a CMP b.
>>>> 2. additional CMP on branch: if (phi != 0).
>>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>>>>
>>>> Thanks!
>>>> Jiufu Guo
>>>>
>>>>
>>>> [gcc]
>>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>>
>>>> 	PR tree-optimization/77820
>>>> 	* tree-ssa-threadedge.c
>>>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>>>> 	function.
>>>> 	(thread_across_edge): Add call to
>>>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>>>
>>>> [gcc/testsuite]
>>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>>
>>>> 	PR tree-optimization/77820
>>>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>>>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>>>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>>>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>>>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>>>
>>>> ---
>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>>>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>>>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>>>>  6 files changed, 192 insertions(+), 4 deletions(-)
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>> new file mode 100644
>>>> index 0000000..5227c87
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>> @@ -0,0 +1,30 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>> +
>>>> +void g (int);
>>>> +void g1 (int);
>>>> +
>>>> +void
>>>> +f (long a, long b, long c, long d, long x)
>>>> +{
>>>> +  _Bool t;
>>>> +  if (x)
>>>> +    {
>>>> +      g (a + 1);
>>>> +      t = a < b;
>>>> +      c = d + x;
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      g (b + 1);
>>>> +      a = c + d;
>>>> +      t = c > d;
>>>> +    }
>>>> +
>>>> +  if (t)
>>>> +    g1 (c);
>>>> +
>>>> +  g (a);
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>> new file mode 100644
>>>> index 0000000..eaf89bb
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>> @@ -0,0 +1,23 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>> +
>>>> +void g (void);
>>>> +void g1 (void);
>>>> +
>>>> +void
>>>> +f (long a, long b, long c, long d, int x)
>>>> +{
>>>> +  _Bool t;
>>>> +  if (x)
>>>> +    t = c < d;
>>>> +  else
>>>> +    t = a < b;
>>>> +
>>>> +  if (t)
>>>> +    {
>>>> +      g1 ();
>>>> +      g ();
>>>> +    }
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>> new file mode 100644
>>>> index 0000000..d5a1e0b
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>> @@ -0,0 +1,25 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>> +
>>>> +void g (void);
>>>> +void g1 (void);
>>>> +
>>>> +void
>>>> +f (long a, long b, long c, long d, int x)
>>>> +{
>>>> +  int t;
>>>> +  if (x)
>>>> +    t = a < b;
>>>> +  else if (d == x)
>>>> +    t = c < b;
>>>> +  else
>>>> +    t = d > c;
>>>> +
>>>> +  if (t)
>>>> +    {
>>>> +      g1 ();
>>>> +      g ();
>>>> +    }
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>> new file mode 100644
>>>> index 0000000..53acabc
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>> @@ -0,0 +1,40 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>> +
>>>> +void g (int);
>>>> +void g1 (int);
>>>> +
>>>> +void
>>>> +f (long a, long b, long c, long d, int x)
>>>> +{
>>>> +  int t;
>>>> +  _Bool l1 = 0, l2 = 0;
>>>> +  if (x)
>>>> +    {
>>>> +      g (a);
>>>> +      c = a + b;
>>>> +      t = a < b;
>>>> +      l1 = 1;
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      g1 (b);
>>>> +      t = c > d;
>>>> +      d = c + b;
>>>> +      l2 = 1;
>>>> +    }
>>>> +
>>>> +  if (t)
>>>> +    {
>>>> +      if (l1 | l2)
>>>> +	g1 (c);
>>>> +    }
>>>> +  else
>>>> +    {
>>>> +      g (d);
>>>> +      g1 (a + b);
>>>> +    }
>>>> +  g (c + d);
>>>> +}
>>>> +
>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>> index e9b4f26..1d7b587 100644
>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>> @@ -69,4 +69,4 @@ lookharder (string)
>>>>      }
>>>>  }
>>>>  
>>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
>>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
>>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>>>> index c3ea2d6..36c413a 100644
>>>> --- a/gcc/tree-ssa-threadedge.c
>>>> +++ b/gcc/tree-ssa-threadedge.c
>>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>>>>    return 0;
>>>>  }
>>>>  
>>>> +/* There are basic blocks look like:
>>>> +   <P0>
>>>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>>>> +   goto <X>;
>>>> +
>>>> +   <P1>
>>>> +   p1 = c CMP d
>>>> +   goto <X>;
>>>> +
>>>> +   <X>
>>>> +   # phi = PHI <p0 (P0), p1 (P1)>
>>>> +   if (phi != 0) goto <Y>; else goto <Z>;
>>>> +
>>>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>>>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>>>> +
>>>> +   Return true if E is (P0,X) or (P1,X)  */
>>>> +
>>>> +bool
>>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>>>> +{
>>>> +  basic_block bb = e->dest;
>>>> +
>>>> +  /* See if there is only one stmt which is gcond.  */
>>>> +  gimple *gs = last_and_only_stmt (bb);
>>>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>>>> +    return false;
>>>      gcond *gs;
>>>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
>>>        return false;
>>>
>>> makes the following gimple_cond_ accesses more efficient when
>>> checking is enabled.
>>>
>>>> +
>>>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>>> +  tree cond = gimple_cond_lhs (gs);
>>>> +  enum tree_code code = gimple_cond_code (gs);
>>>> +  tree rhs = gimple_cond_rhs (gs);
>>>> +  if (TREE_CODE (cond) != SSA_NAME
>>>> +      || (code != NE_EXPR && code != EQ_EXPR)
>>>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>>> +    return false;
>>>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>>> +  if (phi == NULL || gimple_bb (phi) != bb)
>>>> +    return false;
>>>> +
>>>> +  /* Check if phi's incoming value is CMP.  */
>>>> +  gimple *def;
>>>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>> +  if (TREE_CODE (value) == SSA_NAME 
>>>> +      && has_single_use (value)
>>>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>>>> +    def = SSA_NAME_DEF_STMT (value);
>>> Same is true here and below if you rewrite to
>>>
>>>      gassign *def;
>>>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>      if (TREE_CODE (value) != SSA_NAME
>>>          || !has_single_use (value)
>>>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>>        return false;
>>>
>>> Otherwise it looks good.  I'd like to have Jeffs opinion and
>>> final ACK here because we touch jump-threading and he's most
>>> familiar with that detail and the place you hook into.
>> I've got the full thread to look over.  At a high level I wouldn't have
>> guessed it'd be this easy to get the threader handle this, but
>> occasionally we are surprised in a good way.  Anyway, I'll be looking
>> through the full discussion.
>>
>> Jeff
> 
> Hi Jeff, Richard and all,
> 
> Thanks a lot for your great comments in all threads. Based on those
> comments, I refined the code as below:
> 
> bool
> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> {
>   /* See if there is only one stmt which is gcond.  */
>   gcond *gs;
>   if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
>     return false;
>   
>   /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>   tree cond = gimple_cond_lhs (gs);
>   enum tree_code code = gimple_cond_code (gs);
>   tree rhs = gimple_cond_rhs (gs);
>   if (TREE_CODE (cond) != SSA_NAME
>       || (code != NE_EXPR && code != EQ_EXPR)
>       || (!integer_onep (rhs) && !integer_zerop (rhs)))
>     return false;
>   gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>   if (phi == NULL || gimple_bb (phi) != e->dest)
>     return false;
> 
>   /* Check if phi's incoming value is CMP.  */
>   gassign *def;
>   tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>   if (TREE_CODE (value) != SSA_NAME
>       || !has_single_use (value)
>       || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>     return false;
> 
>   /* Or if it is (INT) (a CMP b).  */
>   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>     {
>       value = gimple_assign_rhs1 (def);
>       if (TREE_CODE (value) != SSA_NAME
> 	  || !has_single_use (value)
> 	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
> 	return false;
>     }
> 
>   if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>     return false;
> 
>   if (!single_succ_p (e->src))
>     return false;
> 
>   return true;
> }
> 
> 
> In this code, I put "if (!single_succ_p (e->src))" there, which may be
> helpful for reducing the copy block.
Sounds good.  My testing did show a regression on the sh port.

For pr51244-20 on the SH port your changes make the ultimate resulting
code better, but compromise the test.  We can restore the shape of the
CFG and get the testing coverage for sh_treg_combine by disabling VRP
and DOM.

Can you please include the attached patch in your next update?

Jeff

[-- Attachment #2: P --]
[-- Type: text/plain, Size: 571 bytes --]

diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
index c342163160b..be265cd16af 100644
--- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
+++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
@@ -1,7 +1,7 @@
 /* Check that the SH specific sh_treg_combine RTL optimization pass works as
    expected.  */
 /* { dg-do compile }  */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
 
 /* { dg-final { scan-assembler-not "not\t" } } */
 /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-30 20:17           ` Jeff Law
@ 2019-05-31  7:30             ` Richard Biener
  2019-05-31 15:28               ` Jeff Law
  0 siblings, 1 reply; 38+ messages in thread
From: Richard Biener @ 2019-05-31  7:30 UTC (permalink / raw)
  To: Jeff Law
  Cc: Jiufu Guo, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On Thu, 30 May 2019, Jeff Law wrote:

> On 5/30/19 12:41 AM, Richard Biener wrote:
> > On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
> >> On 5/23/19 6:11 AM, Richard Biener wrote:
> >>> On Thu, 23 May 2019, Jiufu Guo wrote:
> >>>
> >>>> Hi,
> >>>>
> >>>> Richard Biener <rguenther@suse.de> writes:
> >>>>
> >>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
> >>
> >>>>>> +    }
> >>>>>> +
> >>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
> >> tcc_comparison)
> >>>>>> +    return false;
> >>>>>> +
> >>>>>> +  /* Check if phi's incoming value is defined in the incoming
> >> basic_block.  */
> >>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
> >>>>>> +  if (def->bb != e->src)
> >>>>>> +    return false;
> >>>>> why does this matter?
> >>>>>
> >>>> Through preparing pathes and duplicating block, this transform can
> >> also
> >>>> help to combine a cmp in previous block and a gcond in current
> >> block.
> >>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> >>>> block of the current; and then combining "cmp with gcond" is safe. 
> >> If
> >>>> the cmp is defined far from the incoming block, it would be hard to
> >>>> achieve the combining, and the transform may not needed.
> >>> We're in SSA form so the "combining" doesn't really care where the
> >>> definition comes from.
> >> Combining doesn't care, but we need to make sure the copy of the
> >> conditional ends up in the right block since it wouldn't necessarily be
> >> associated with def->bb anymore.  But I'd expect the sinking pass to
> >> make this a non-issue in practice anyway.
> >>
> >>>
> >>>>>> +
> >>>>>> +  if (!single_succ_p (def->bb))
> >>>>>> +    return false;
> >>>>> Or this?  The actual threading will ensure this will hold true.
> >>>>>
> >>>> Yes, other thread code check this and ensure it to be true, like
> >>>> function thread_through_normal_block. Since this new function is
> >> invoked
> >>>> outside thread_through_normal_block, so, checking single_succ_p is
> >> also
> >>>> needed for this case.
> >>> I mean threading will isolate the path making this trivially true.
> >>> It's also no requirement for combining, in fact due to the single-use
> >>> check the definition can be sinked across the edge already (if
> >>> the edges dest didn't have multiple predecessors which this threading
> >>> will fix as well).
> >> I don't think so.  The CMP source block could end with a call and have
> >> an abnormal edge (for example).  We can't put the copied conditional
> >> before the call and putting it after the call essentially means
> >> creating
> >> a new block.
> >>
> >> The CMP source block could also end with a conditional.  Where do we
> >> put
> >> the one we want to copy into the CMP source block in that case? :-)
> >>
> >> This is something else we'd want to check if we ever allowed the the
> >> CMP
> >> defining block to not be the immediate predecessor of the conditional
> >> jump block.  If we did that we'd need to validate that the block where
> >> we're going to insert the copy of the jump has a single successor.
> > 
> > But were just isolating a path here. The actual combine job is left to followup cleanups. 
> Absolutely agreed.  My point was that there's some additional stuff we'd
> have to verify does the right thing if we wanted to allow the CMP to be
> somewhere other than in the immediate predecessor of the conditional
> jump block.

For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
maybe - forwarding a binary operation always incurs register pressure
increase.

Btw, as you already said sinking should have sinked the CMP to the
predecessor (since we have a single use in the PHI).

So I hardly see the point of making this difference.

Richard.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-30 23:55                   ` Jeff Law
@ 2019-05-31  7:34                     ` Richard Biener
  2019-06-04  3:03                     ` Jiufu Guo
  1 sibling, 0 replies; 38+ messages in thread
From: Richard Biener @ 2019-05-31  7:34 UTC (permalink / raw)
  To: Jeff Law
  Cc: Jiufu Guo, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

[-- Attachment #1: Type: text/plain, Size: 13457 bytes --]

On Thu, 30 May 2019, Jeff Law wrote:

> On 5/30/19 9:03 AM, Jiufu Guo wrote:
> > Jeff Law <law@redhat.com> writes:
> > 
> >> On 5/29/19 6:36 AM, Richard Biener wrote:
> >>> On Tue, 28 May 2019, Jiufu Guo wrote:
> >>>
> >>>> Hi,
> >>>>
> >>>> This patch implements a new opportunity of jump threading for PR77820.
> >>>> In this optimization, conditional jumps are merged with unconditional
> >>>> jump. And then moving CMP result to GPR is eliminated.
> >>>>
> >>>> This version is based on the proposal of Richard, Jeff and Andrew, and
> >>>> refined to incorporate comments.  Thanks for the reviews!
> >>>>
> >>>> Bootstrapped and tested on powerpc64le and powerpc64be with no
> >>>> regressions (one case is improved) and new testcases are added. Is this
> >>>> ok for trunk?
> >>>>
> >>>> Example of this opportunity looks like below:
> >>>>
> >>>>   <P0>
> >>>>   p0 = a CMP b
> >>>>   goto <X>;
> >>>>
> >>>>   <P1>
> >>>>   p1 = c CMP d
> >>>>   goto <X>;
> >>>>
> >>>>   <X>
> >>>>   # phi = PHI <p0 (P0), p1 (P1)>
> >>>>   if (phi != 0) goto <Y>; else goto <Z>;
> >>>>
> >>>> Could be transformed to:
> >>>>
> >>>>   <P0>
> >>>>   p0 = a CMP b
> >>>>   if (p0 != 0) goto <Y>; else goto <Z>;
> >>>>
> >>>>   <P1>
> >>>>   p1 = c CMP d
> >>>>   if (p1 != 0) goto <Y>; else goto <Z>;
> >>>>
> >>>>
> >>>> This optimization eliminates:
> >>>> 1. saving CMP result: p0 = a CMP b.
> >>>> 2. additional CMP on branch: if (phi != 0).
> >>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
> >>>>
> >>>> Thanks!
> >>>> Jiufu Guo
> >>>>
> >>>>
> >>>> [gcc]
> >>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> >>>> 	    Lijia He  <helijia@linux.ibm.com>
> >>>>
> >>>> 	PR tree-optimization/77820
> >>>> 	* tree-ssa-threadedge.c
> >>>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
> >>>> 	function.
> >>>> 	(thread_across_edge): Add call to
> >>>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
> >>>>
> >>>> [gcc/testsuite]
> >>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
> >>>> 	    Lijia He  <helijia@linux.ibm.com>
> >>>>
> >>>> 	PR tree-optimization/77820
> >>>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> >>>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> >>>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
> >>>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
> >>>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
> >>>>
> >>>> ---
> >>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
> >>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
> >>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
> >>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
> >>>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
> >>>>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
> >>>>  6 files changed, 192 insertions(+), 4 deletions(-)
> >>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> >>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> >>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> >>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> >>>>
> >>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> >>>> new file mode 100644
> >>>> index 0000000..5227c87
> >>>> --- /dev/null
> >>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> >>>> @@ -0,0 +1,30 @@
> >>>> +/* { dg-do compile } */
> >>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> >>>> +
> >>>> +void g (int);
> >>>> +void g1 (int);
> >>>> +
> >>>> +void
> >>>> +f (long a, long b, long c, long d, long x)
> >>>> +{
> >>>> +  _Bool t;
> >>>> +  if (x)
> >>>> +    {
> >>>> +      g (a + 1);
> >>>> +      t = a < b;
> >>>> +      c = d + x;
> >>>> +    }
> >>>> +  else
> >>>> +    {
> >>>> +      g (b + 1);
> >>>> +      a = c + d;
> >>>> +      t = c > d;
> >>>> +    }
> >>>> +
> >>>> +  if (t)
> >>>> +    g1 (c);
> >>>> +
> >>>> +  g (a);
> >>>> +}
> >>>> +
> >>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> >>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> >>>> new file mode 100644
> >>>> index 0000000..eaf89bb
> >>>> --- /dev/null
> >>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> >>>> @@ -0,0 +1,23 @@
> >>>> +/* { dg-do compile } */
> >>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> >>>> +
> >>>> +void g (void);
> >>>> +void g1 (void);
> >>>> +
> >>>> +void
> >>>> +f (long a, long b, long c, long d, int x)
> >>>> +{
> >>>> +  _Bool t;
> >>>> +  if (x)
> >>>> +    t = c < d;
> >>>> +  else
> >>>> +    t = a < b;
> >>>> +
> >>>> +  if (t)
> >>>> +    {
> >>>> +      g1 ();
> >>>> +      g ();
> >>>> +    }
> >>>> +}
> >>>> +
> >>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> >>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> >>>> new file mode 100644
> >>>> index 0000000..d5a1e0b
> >>>> --- /dev/null
> >>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> >>>> @@ -0,0 +1,25 @@
> >>>> +/* { dg-do compile } */
> >>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> >>>> +
> >>>> +void g (void);
> >>>> +void g1 (void);
> >>>> +
> >>>> +void
> >>>> +f (long a, long b, long c, long d, int x)
> >>>> +{
> >>>> +  int t;
> >>>> +  if (x)
> >>>> +    t = a < b;
> >>>> +  else if (d == x)
> >>>> +    t = c < b;
> >>>> +  else
> >>>> +    t = d > c;
> >>>> +
> >>>> +  if (t)
> >>>> +    {
> >>>> +      g1 ();
> >>>> +      g ();
> >>>> +    }
> >>>> +}
> >>>> +
> >>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> >>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> >>>> new file mode 100644
> >>>> index 0000000..53acabc
> >>>> --- /dev/null
> >>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> >>>> @@ -0,0 +1,40 @@
> >>>> +/* { dg-do compile } */
> >>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> >>>> +
> >>>> +void g (int);
> >>>> +void g1 (int);
> >>>> +
> >>>> +void
> >>>> +f (long a, long b, long c, long d, int x)
> >>>> +{
> >>>> +  int t;
> >>>> +  _Bool l1 = 0, l2 = 0;
> >>>> +  if (x)
> >>>> +    {
> >>>> +      g (a);
> >>>> +      c = a + b;
> >>>> +      t = a < b;
> >>>> +      l1 = 1;
> >>>> +    }
> >>>> +  else
> >>>> +    {
> >>>> +      g1 (b);
> >>>> +      t = c > d;
> >>>> +      d = c + b;
> >>>> +      l2 = 1;
> >>>> +    }
> >>>> +
> >>>> +  if (t)
> >>>> +    {
> >>>> +      if (l1 | l2)
> >>>> +	g1 (c);
> >>>> +    }
> >>>> +  else
> >>>> +    {
> >>>> +      g (d);
> >>>> +      g1 (a + b);
> >>>> +    }
> >>>> +  g (c + d);
> >>>> +}
> >>>> +
> >>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> >>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> >>>> index e9b4f26..1d7b587 100644
> >>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> >>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> >>>> @@ -69,4 +69,4 @@ lookharder (string)
> >>>>      }
> >>>>  }
> >>>>  
> >>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
> >>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
> >>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> >>>> index c3ea2d6..36c413a 100644
> >>>> --- a/gcc/tree-ssa-threadedge.c
> >>>> +++ b/gcc/tree-ssa-threadedge.c
> >>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
> >>>>    return 0;
> >>>>  }
> >>>>  
> >>>> +/* There are basic blocks look like:
> >>>> +   <P0>
> >>>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
> >>>> +   goto <X>;
> >>>> +
> >>>> +   <P1>
> >>>> +   p1 = c CMP d
> >>>> +   goto <X>;
> >>>> +
> >>>> +   <X>
> >>>> +   # phi = PHI <p0 (P0), p1 (P1)>
> >>>> +   if (phi != 0) goto <Y>; else goto <Z>;
> >>>> +
> >>>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
> >>>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
> >>>> +
> >>>> +   Return true if E is (P0,X) or (P1,X)  */
> >>>> +
> >>>> +bool
> >>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> >>>> +{
> >>>> +  basic_block bb = e->dest;
> >>>> +
> >>>> +  /* See if there is only one stmt which is gcond.  */
> >>>> +  gimple *gs = last_and_only_stmt (bb);
> >>>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
> >>>> +    return false;
> >>>      gcond *gs;
> >>>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
> >>>        return false;
> >>>
> >>> makes the following gimple_cond_ accesses more efficient when
> >>> checking is enabled.
> >>>
> >>>> +
> >>>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
> >>>> +  tree cond = gimple_cond_lhs (gs);
> >>>> +  enum tree_code code = gimple_cond_code (gs);
> >>>> +  tree rhs = gimple_cond_rhs (gs);
> >>>> +  if (TREE_CODE (cond) != SSA_NAME
> >>>> +      || (code != NE_EXPR && code != EQ_EXPR)
> >>>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
> >>>> +    return false;
> >>>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
> >>>> +  if (phi == NULL || gimple_bb (phi) != bb)
> >>>> +    return false;
> >>>> +
> >>>> +  /* Check if phi's incoming value is CMP.  */
> >>>> +  gimple *def;
> >>>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> >>>> +  if (TREE_CODE (value) == SSA_NAME 
> >>>> +      && has_single_use (value)
> >>>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
> >>>> +    def = SSA_NAME_DEF_STMT (value);
> >>> Same is true here and below if you rewrite to
> >>>
> >>>      gassign *def;
> >>>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> >>>      if (TREE_CODE (value) != SSA_NAME
> >>>          || !has_single_use (value)
> >>>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
> >>>        return false;
> >>>
> >>> Otherwise it looks good.  I'd like to have Jeffs opinion and
> >>> final ACK here because we touch jump-threading and he's most
> >>> familiar with that detail and the place you hook into.
> >> I've got the full thread to look over.  At a high level I wouldn't have
> >> guessed it'd be this easy to get the threader handle this, but
> >> occasionally we are surprised in a good way.  Anyway, I'll be looking
> >> through the full discussion.
> >>
> >> Jeff
> > 
> > Hi Jeff, Richard and all,
> > 
> > Thanks a lot for your great comments in all threads. Based on those
> > comments, I refined the code as below:
> > 
> > bool
> > edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> > {
> >   /* See if there is only one stmt which is gcond.  */
> >   gcond *gs;
> >   if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
> >     return false;
> >   
> >   /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
> >   tree cond = gimple_cond_lhs (gs);
> >   enum tree_code code = gimple_cond_code (gs);
> >   tree rhs = gimple_cond_rhs (gs);
> >   if (TREE_CODE (cond) != SSA_NAME
> >       || (code != NE_EXPR && code != EQ_EXPR)
> >       || (!integer_onep (rhs) && !integer_zerop (rhs)))
> >     return false;
> >   gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
> >   if (phi == NULL || gimple_bb (phi) != e->dest)
> >     return false;
> > 
> >   /* Check if phi's incoming value is CMP.  */
> >   gassign *def;
> >   tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> >   if (TREE_CODE (value) != SSA_NAME
> >       || !has_single_use (value)
> >       || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
> >     return false;
> > 
> >   /* Or if it is (INT) (a CMP b).  */
> >   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> >     {
> >       value = gimple_assign_rhs1 (def);
> >       if (TREE_CODE (value) != SSA_NAME
> > 	  || !has_single_use (value)
> > 	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
> > 	return false;
> >     }
> > 
> >   if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> >     return false;
> > 
> >   if (!single_succ_p (e->src))
> >     return false;

This new check warrants a comment.  As said I don't necessarily see
why this should prevent tail duplicating the block, the CMP can
still be forwarded into the tail duplicated condition.  Again
we should see sinking to do its job, creating a forwarder between
the CMP and the condition block (because sinking splits critical
edges).  But no need to "wait" for that here.

Richard.

> >   return true;
> > }
> > 
> > 
> > In this code, I put "if (!single_succ_p (e->src))" there, which may be
> > helpful for reducing the copy block.
> Sounds good.  My testing did show a regression on the sh port.
> 
> For pr51244-20 on the SH port your changes make the ultimate resulting
> code better, but compromise the test.  We can restore the shape of the
> CFG and get the testing coverage for sh_treg_combine by disabling VRP
> and DOM.
> 
> Can you please include the attached patch in your next update?
> 
> Jeff
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-31  7:30             ` Richard Biener
@ 2019-05-31 15:28               ` Jeff Law
  2019-06-04  5:19                 ` Jiufu Guo
  2019-06-07  0:05                 ` Jeff Law
  0 siblings, 2 replies; 38+ messages in thread
From: Jeff Law @ 2019-05-31 15:28 UTC (permalink / raw)
  To: Richard Biener
  Cc: Jiufu Guo, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/31/19 1:24 AM, Richard Biener wrote:
> On Thu, 30 May 2019, Jeff Law wrote:
> 
>> On 5/30/19 12:41 AM, Richard Biener wrote:
>>> On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>>>> On 5/23/19 6:11 AM, Richard Biener wrote:
>>>>> On Thu, 23 May 2019, Jiufu Guo wrote:
>>>>>
>>>>>> Hi,
>>>>>>
>>>>>> Richard Biener <rguenther@suse.de> writes:
>>>>>>
>>>>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>>>
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
>>>> tcc_comparison)
>>>>>>>> +    return false;
>>>>>>>> +
>>>>>>>> +  /* Check if phi's incoming value is defined in the incoming
>>>> basic_block.  */
>>>>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>>>>> +  if (def->bb != e->src)
>>>>>>>> +    return false;
>>>>>>> why does this matter?
>>>>>>>
>>>>>> Through preparing pathes and duplicating block, this transform can
>>>> also
>>>>>> help to combine a cmp in previous block and a gcond in current
>>>> block.
>>>>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>>>>> block of the current; and then combining "cmp with gcond" is safe. 
>>>> If
>>>>>> the cmp is defined far from the incoming block, it would be hard to
>>>>>> achieve the combining, and the transform may not needed.
>>>>> We're in SSA form so the "combining" doesn't really care where the
>>>>> definition comes from.
>>>> Combining doesn't care, but we need to make sure the copy of the
>>>> conditional ends up in the right block since it wouldn't necessarily be
>>>> associated with def->bb anymore.  But I'd expect the sinking pass to
>>>> make this a non-issue in practice anyway.
>>>>
>>>>>
>>>>>>>> +
>>>>>>>> +  if (!single_succ_p (def->bb))
>>>>>>>> +    return false;
>>>>>>> Or this?  The actual threading will ensure this will hold true.
>>>>>>>
>>>>>> Yes, other thread code check this and ensure it to be true, like
>>>>>> function thread_through_normal_block. Since this new function is
>>>> invoked
>>>>>> outside thread_through_normal_block, so, checking single_succ_p is
>>>> also
>>>>>> needed for this case.
>>>>> I mean threading will isolate the path making this trivially true.
>>>>> It's also no requirement for combining, in fact due to the single-use
>>>>> check the definition can be sinked across the edge already (if
>>>>> the edges dest didn't have multiple predecessors which this threading
>>>>> will fix as well).
>>>> I don't think so.  The CMP source block could end with a call and have
>>>> an abnormal edge (for example).  We can't put the copied conditional
>>>> before the call and putting it after the call essentially means
>>>> creating
>>>> a new block.
>>>>
>>>> The CMP source block could also end with a conditional.  Where do we
>>>> put
>>>> the one we want to copy into the CMP source block in that case? :-)
>>>>
>>>> This is something else we'd want to check if we ever allowed the the
>>>> CMP
>>>> defining block to not be the immediate predecessor of the conditional
>>>> jump block.  If we did that we'd need to validate that the block where
>>>> we're going to insert the copy of the jump has a single successor.
>>>
>>> But were just isolating a path here. The actual combine job is left to followup cleanups. 
>> Absolutely agreed.  My point was that there's some additional stuff we'd
>> have to verify does the right thing if we wanted to allow the CMP to be
>> somewhere other than in the immediate predecessor of the conditional
>> jump block.
> 
> For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
> maybe - forwarding a binary operation always incurs register pressure
> increase.
For correctness of the patch.  Conceptually I have _no_ issues with
having the CMP in a different block than an immediate predecessor of the
conditional jump block.  But the patch does certain code which would
need to be audited with that change in mind.

> 
> Btw, as you already said sinking should have sinked the CMP to the
> predecessor (since we have a single use in the PHI).
> 
> So I hardly see the point of making this difference.
:-)

jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH V3] A jump threading opportunity for condition branch
  2019-05-30 15:34             ` Jeff Law
@ 2019-06-03  2:18               ` Jiufu Guo
  2019-06-04  5:30                 ` [PATCH V4] " Jiufu Guo
  0 siblings, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-06-03  2:18 UTC (permalink / raw)
  To: Jeff Law, Richard Biener
  Cc: gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt


Hi,

This patch implements a new opportunity of jump threading for PR77820.
In this optimization, conditional jumps are merged with unconditional
jump. And then moving CMP result to GPR is eliminated.

This version is based on the proposal of Richard, Jeff and Andrew on
previous versions, and refined to incorporate comments.
Thanks for the reviews!

Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help
from Jeff) with no regressions (two cases are improved and updated
to keep original test coverage) and new testcases are added.
Is this ok for trunk?

Example of this opportunity looks like below:

  <P0>
  p0 = a CMP b
  goto <X>;

  <P1>
  p1 = c CMP d
  goto <X>;

  <X>
  # phi = PHI <p0 (P0), p1 (P1)>
  if (phi != 0) goto <Y>; else goto <Z>;

Could be transformed to:

  <P0>
  p0 = a CMP b
  if (p0 != 0) goto <Y>; else goto <Z>;

  <P1>
  p1 = c CMP d
  if (p1 != 0) goto <Y>; else goto <Z>;


This optimization eliminates:
1. saving CMP result: p0 = a CMP b. 
2. additional CMP on branch: if (phi != 0).
3. converting CMP result if there is phi = (INT) p0 if there is.

Thanks!
Jiufu Guo

[gcc]
2019-05-31  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* tree-ssa-threadedge.c
	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
	function.
	(thread_across_edge): Add call to
	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.

[gcc/testsuite]
2019-05-31  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
	* gcc.target/sh/pr51244-20.c: Update testcase.


---
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
 gcc/testsuite/gcc.target/sh/pr51244-20.c         |  2 +-
 gcc/tree-ssa-threadedge.c                        | 73 +++++++++++++++++++++++-
 7 files changed, 190 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
new file mode 100644
index 0000000..5227c87
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, long x)
+{
+  _Bool t;
+  if (x)
+    {
+      g (a + 1);
+      t = a < b;
+      c = d + x;
+    }
+  else
+    {
+      g (b + 1);
+      a = c + d;
+      t = c > d;
+    }
+
+  if (t)
+    g1 (c);
+
+  g (a);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
new file mode 100644
index 0000000..eaf89bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  _Bool t;
+  if (x)
+    t = c < d;
+  else
+    t = a < b;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
new file mode 100644
index 0000000..d5a1e0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  if (x)
+    t = a < b;
+  else if (d == x)
+    t = c < b;
+  else
+    t = d > c;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
new file mode 100644
index 0000000..53acabc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  _Bool l1 = 0, l2 = 0;
+  if (x)
+    {
+      g (a);
+      c = a + b;
+      t = a < b;
+      l1 = 1;
+    }
+  else
+    {
+      g1 (b);
+      t = c > d;
+      d = c + b;
+      l2 = 1;
+    }
+
+  if (t)
+    {
+      if (l1 | l2)
+	g1 (c);
+    }
+  else
+    {
+      g (d);
+      g1 (a + b);
+    }
+  g (c + d);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
index e9b4f26..fb171cd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
+/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */
 
 struct __sFILE
 {
diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
index c342163..be265cd 100644
--- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
+++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
@@ -1,7 +1,7 @@
 /* Check that the SH specific sh_treg_combine RTL optimization pass works as
    expected.  */
 /* { dg-do compile }  */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
 
 /* { dg-final { scan-assembler-not "not\t" } } */
 /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index c3ea2d6..d632ad0 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1157,6 +1157,71 @@ thread_through_normal_block (edge e,
   return 0;
 }
 
+/* There are basic blocks look like:
+   <P0>
+   p0 = a CMP b ; or p0 = (INT) (a CMP b)
+   goto <X>;
+
+   <P1>
+   p1 = c CMP d
+   goto <X>;
+
+   <X>
+   # phi = PHI <p0 (P0), p1 (P1)>
+   if (phi != 0) goto <Y>; else goto <Z>;
+
+   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
+   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
+
+   Return true if E is (P0,X) or (P1,X)  */
+
+bool
+edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
+{
+  /* See if there is only one stmt which is gcond.  */
+  gcond *gs;
+  if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
+    return false;
+
+  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
+  tree cond = gimple_cond_lhs (gs);
+  enum tree_code code = gimple_cond_code (gs);
+  tree rhs = gimple_cond_rhs (gs);
+  if (TREE_CODE (cond) != SSA_NAME
+      || (code != NE_EXPR && code != EQ_EXPR)
+      || (!integer_onep (rhs) && !integer_zerop (rhs)))
+    return false;
+  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
+  if (phi == NULL || gimple_bb (phi) != e->dest)
+    return false;
+
+  /* Check if phi's incoming value is CMP.  */
+  gassign *def;
+  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
+  if (TREE_CODE (value) != SSA_NAME
+      || !has_single_use (value)
+      || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
+    return false;
+
+  /* Or if it is (INT) (a CMP b).  */
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+    {
+      value = gimple_assign_rhs1 (def);
+      if (TREE_CODE (value) != SSA_NAME
+	  || !has_single_use (value)
+	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
+	return false;
+    }
+
+  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
+    return false;
+
+  if (!single_succ_p (e->src))
+    return false;
+
+  return true;
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -1317,10 +1382,12 @@ thread_across_edge (gcond *dummy_cond,
 
 	/* If we were able to thread through a successor of E->dest, then
 	   record the jump threading opportunity.  */
-	if (found)
+	if (found
+	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
 	  {
-	    propagate_threaded_block_debug_into (path->last ()->e->dest,
-						 taken_edge->dest);
+	    if (taken_edge->dest != path->last ()->e->dest)
+	      propagate_threaded_block_debug_into (path->last ()->e->dest,
+						   taken_edge->dest);
 	    register_jump_thread (path);
 	  }
 	else
-- 
2.7.4

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V2] A jump threading opportunity for condition branch
  2019-05-30 23:55                   ` Jeff Law
  2019-05-31  7:34                     ` Richard Biener
@ 2019-06-04  3:03                     ` Jiufu Guo
  1 sibling, 0 replies; 38+ messages in thread
From: Jiufu Guo @ 2019-06-04  3:03 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

Jeff Law <law@redhat.com> writes:

> On 5/30/19 9:03 AM, Jiufu Guo wrote:
>> Jeff Law <law@redhat.com> writes:
>> 
>>> On 5/29/19 6:36 AM, Richard Biener wrote:
>>>> On Tue, 28 May 2019, Jiufu Guo wrote:
>>>>
>>>>> Hi,
>>>>>
>>>>> This patch implements a new opportunity of jump threading for PR77820.
>>>>> In this optimization, conditional jumps are merged with unconditional
>>>>> jump. And then moving CMP result to GPR is eliminated.
>>>>>
>>>>> This version is based on the proposal of Richard, Jeff and Andrew, and
>>>>> refined to incorporate comments.  Thanks for the reviews!
>>>>>
>>>>> Bootstrapped and tested on powerpc64le and powerpc64be with no
>>>>> regressions (one case is improved) and new testcases are added. Is this
>>>>> ok for trunk?
>>>>>
>>>>> Example of this opportunity looks like below:
>>>>>
>>>>>   <P0>
>>>>>   p0 = a CMP b
>>>>>   goto <X>;
>>>>>
>>>>>   <P1>
>>>>>   p1 = c CMP d
>>>>>   goto <X>;
>>>>>
>>>>>   <X>
>>>>>   # phi = PHI <p0 (P0), p1 (P1)>
>>>>>   if (phi != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>> Could be transformed to:
>>>>>
>>>>>   <P0>
>>>>>   p0 = a CMP b
>>>>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>>   <P1>
>>>>>   p1 = c CMP d
>>>>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>>
>>>>> This optimization eliminates:
>>>>> 1. saving CMP result: p0 = a CMP b.
>>>>> 2. additional CMP on branch: if (phi != 0).
>>>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>>>>>
>>>>> Thanks!
>>>>> Jiufu Guo
>>>>>
>>>>>
>>>>> [gcc]
>>>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>>>
>>>>> 	PR tree-optimization/77820
>>>>> 	* tree-ssa-threadedge.c
>>>>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>>>>> 	function.
>>>>> 	(thread_across_edge): Add call to
>>>>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>>>>
>>>>> [gcc/testsuite]
>>>>> 2019-05-28  Jiufu Guo  <guojiufu@linux.ibm.com>
>>>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>>>
>>>>> 	PR tree-optimization/77820
>>>>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>>>>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>>>>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>>>>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>>>>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>>>>
>>>>> ---
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>>>>  gcc/tree-ssa-threadedge.c                        | 76 +++++++++++++++++++++++-
>>>>>  6 files changed, 192 insertions(+), 4 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>>
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>> new file mode 100644
>>>>> index 0000000..5227c87
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>> @@ -0,0 +1,30 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (int);
>>>>> +void g1 (int);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, long x)
>>>>> +{
>>>>> +  _Bool t;
>>>>> +  if (x)
>>>>> +    {
>>>>> +      g (a + 1);
>>>>> +      t = a < b;
>>>>> +      c = d + x;
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g (b + 1);
>>>>> +      a = c + d;
>>>>> +      t = c > d;
>>>>> +    }
>>>>> +
>>>>> +  if (t)
>>>>> +    g1 (c);
>>>>> +
>>>>> +  g (a);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>> new file mode 100644
>>>>> index 0000000..eaf89bb
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>> @@ -0,0 +1,23 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (void);
>>>>> +void g1 (void);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  _Bool t;
>>>>> +  if (x)
>>>>> +    t = c < d;
>>>>> +  else
>>>>> +    t = a < b;
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      g1 ();
>>>>> +      g ();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>> new file mode 100644
>>>>> index 0000000..d5a1e0b
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (void);
>>>>> +void g1 (void);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  int t;
>>>>> +  if (x)
>>>>> +    t = a < b;
>>>>> +  else if (d == x)
>>>>> +    t = c < b;
>>>>> +  else
>>>>> +    t = d > c;
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      g1 ();
>>>>> +      g ();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>> new file mode 100644
>>>>> index 0000000..53acabc
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>> @@ -0,0 +1,40 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (int);
>>>>> +void g1 (int);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  int t;
>>>>> +  _Bool l1 = 0, l2 = 0;
>>>>> +  if (x)
>>>>> +    {
>>>>> +      g (a);
>>>>> +      c = a + b;
>>>>> +      t = a < b;
>>>>> +      l1 = 1;
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g1 (b);
>>>>> +      t = c > d;
>>>>> +      d = c + b;
>>>>> +      l2 = 1;
>>>>> +    }
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      if (l1 | l2)
>>>>> +	g1 (c);
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g (d);
>>>>> +      g1 (a + b);
>>>>> +    }
>>>>> +  g (c + d);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> index e9b4f26..1d7b587 100644
>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> @@ -69,4 +69,4 @@ lookharder (string)
>>>>>      }
>>>>>  }
>>>>>  
>>>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 "split-paths" } } */
>>>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 "split-paths" } } */
>>>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>>>>> index c3ea2d6..36c413a 100644
>>>>> --- a/gcc/tree-ssa-threadedge.c
>>>>> +++ b/gcc/tree-ssa-threadedge.c
>>>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>>>>>    return 0;
>>>>>  }
>>>>>  
>>>>> +/* There are basic blocks look like:
>>>>> +   <P0>
>>>>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>>>>> +   goto <X>;
>>>>> +
>>>>> +   <P1>
>>>>> +   p1 = c CMP d
>>>>> +   goto <X>;
>>>>> +
>>>>> +   <X>
>>>>> +   # phi = PHI <p0 (P0), p1 (P1)>
>>>>> +   if (phi != 0) goto <Y>; else goto <Z>;
>>>>> +
>>>>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>>>>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>>>>> +
>>>>> +   Return true if E is (P0,X) or (P1,X)  */
>>>>> +
>>>>> +bool
>>>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>>>>> +{
>>>>> +  basic_block bb = e->dest;
>>>>> +
>>>>> +  /* See if there is only one stmt which is gcond.  */
>>>>> +  gimple *gs = last_and_only_stmt (bb);
>>>>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>>>>> +    return false;
>>>>      gcond *gs;
>>>>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
>>>>        return false;
>>>>
>>>> makes the following gimple_cond_ accesses more efficient when
>>>> checking is enabled.
>>>>
>>>>> +
>>>>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>>>> +  tree cond = gimple_cond_lhs (gs);
>>>>> +  enum tree_code code = gimple_cond_code (gs);
>>>>> +  tree rhs = gimple_cond_rhs (gs);
>>>>> +  if (TREE_CODE (cond) != SSA_NAME
>>>>> +      || (code != NE_EXPR && code != EQ_EXPR)
>>>>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>>>> +    return false;
>>>>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>>>> +  if (phi == NULL || gimple_bb (phi) != bb)
>>>>> +    return false;
>>>>> +
>>>>> +  /* Check if phi's incoming value is CMP.  */
>>>>> +  gimple *def;
>>>>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>>> +  if (TREE_CODE (value) == SSA_NAME 
>>>>> +      && has_single_use (value)
>>>>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>>>>> +    def = SSA_NAME_DEF_STMT (value);
>>>> Same is true here and below if you rewrite to
>>>>
>>>>      gassign *def;
>>>>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>>      if (TREE_CODE (value) != SSA_NAME
>>>>          || !has_single_use (value)
>>>>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>>>        return false;
>>>>
>>>> Otherwise it looks good.  I'd like to have Jeffs opinion and
>>>> final ACK here because we touch jump-threading and he's most
>>>> familiar with that detail and the place you hook into.
>>> I've got the full thread to look over.  At a high level I wouldn't have
>>> guessed it'd be this easy to get the threader handle this, but
>>> occasionally we are surprised in a good way.  Anyway, I'll be looking
>>> through the full discussion.
>>>
>>> Jeff
>> 
>> Hi Jeff, Richard and all,
>> 
>> Thanks a lot for your great comments in all threads. Based on those
>> comments, I refined the code as below:
>> 
>> bool
>> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>> {
>>   /* See if there is only one stmt which is gcond.  */
>>   gcond *gs;
>>   if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
>>     return false;
>>   
>>   /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>   tree cond = gimple_cond_lhs (gs);
>>   enum tree_code code = gimple_cond_code (gs);
>>   tree rhs = gimple_cond_rhs (gs);
>>   if (TREE_CODE (cond) != SSA_NAME
>>       || (code != NE_EXPR && code != EQ_EXPR)
>>       || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>     return false;
>>   gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>   if (phi == NULL || gimple_bb (phi) != e->dest)
>>     return false;
>> 
>>   /* Check if phi's incoming value is CMP.  */
>>   gassign *def;
>>   tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>   if (TREE_CODE (value) != SSA_NAME
>>       || !has_single_use (value)
>>       || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>     return false;
>> 
>>   /* Or if it is (INT) (a CMP b).  */
>>   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>>     {
>>       value = gimple_assign_rhs1 (def);
>>       if (TREE_CODE (value) != SSA_NAME
>> 	  || !has_single_use (value)
>> 	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
>> 	return false;
>>     }
>> 
>>   if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>>     return false;
>> 
>>   if (!single_succ_p (e->src))
>>     return false;
>> 
>>   return true;
>> }
>> 
>> 
>> In this code, I put "if (!single_succ_p (e->src))" there, which may be
>> helpful for reducing the copy block.
> Sounds good.  My testing did show a regression on the sh port.
>
> For pr51244-20 on the SH port your changes make the ultimate resulting
> code better, but compromise the test.  We can restore the shape of the
> CFG and get the testing coverage for sh_treg_combine by disabling VRP
> and DOM.
>
> Can you please include the attached patch in your next update?

Thanks, I sent out a new version patch to include this update.

Jiufu Guo
>
> Jeff
> diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> index c342163160b..be265cd16af 100644
> --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
> +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> @@ -1,7 +1,7 @@
>  /* Check that the SH specific sh_treg_combine RTL optimization pass works as
>     expected.  */
>  /* { dg-do compile }  */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
>  
>  /* { dg-final { scan-assembler-not "not\t" } } */
>  /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-31 15:28               ` Jeff Law
@ 2019-06-04  5:19                 ` Jiufu Guo
  2019-06-04  7:07                   ` Richard Biener
  2019-06-07  0:05                 ` Jeff Law
  1 sibling, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-06-04  5:19 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

Jeff Law <law@redhat.com> writes:

> On 5/31/19 1:24 AM, Richard Biener wrote:
>> On Thu, 30 May 2019, Jeff Law wrote:
>> 
>>> On 5/30/19 12:41 AM, Richard Biener wrote:
>>>> On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>>>>> On 5/23/19 6:11 AM, Richard Biener wrote:
>>>>>> On Thu, 23 May 2019, Jiufu Guo wrote:
>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> Richard Biener <rguenther@suse.de> writes:
>>>>>>>
>>>>>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>>>>
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
>>>>> tcc_comparison)
>>>>>>>>> +    return false;
>>>>>>>>> +
>>>>>>>>> +  /* Check if phi's incoming value is defined in the incoming
>>>>> basic_block.  */
>>>>>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>>>>>> +  if (def->bb != e->src)
>>>>>>>>> +    return false;
>>>>>>>> why does this matter?
>>>>>>>>
>>>>>>> Through preparing pathes and duplicating block, this transform can
>>>>> also
>>>>>>> help to combine a cmp in previous block and a gcond in current
>>>>> block.
>>>>>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>>>>>> block of the current; and then combining "cmp with gcond" is safe. 
>>>>> If
>>>>>>> the cmp is defined far from the incoming block, it would be hard to
>>>>>>> achieve the combining, and the transform may not needed.
>>>>>> We're in SSA form so the "combining" doesn't really care where the
>>>>>> definition comes from.
>>>>> Combining doesn't care, but we need to make sure the copy of the
>>>>> conditional ends up in the right block since it wouldn't necessarily be
>>>>> associated with def->bb anymore.  But I'd expect the sinking pass to
>>>>> make this a non-issue in practice anyway.
>>>>>
>>>>>>
>>>>>>>>> +
>>>>>>>>> +  if (!single_succ_p (def->bb))
>>>>>>>>> +    return false;
>>>>>>>> Or this?  The actual threading will ensure this will hold true.
>>>>>>>>
>>>>>>> Yes, other thread code check this and ensure it to be true, like
>>>>>>> function thread_through_normal_block. Since this new function is
>>>>> invoked
>>>>>>> outside thread_through_normal_block, so, checking single_succ_p is
>>>>> also
>>>>>>> needed for this case.
>>>>>> I mean threading will isolate the path making this trivially true.
>>>>>> It's also no requirement for combining, in fact due to the single-use
>>>>>> check the definition can be sinked across the edge already (if
>>>>>> the edges dest didn't have multiple predecessors which this threading
>>>>>> will fix as well).
>>>>> I don't think so.  The CMP source block could end with a call and have
>>>>> an abnormal edge (for example).  We can't put the copied conditional
>>>>> before the call and putting it after the call essentially means
>>>>> creating
>>>>> a new block.
>>>>>
>>>>> The CMP source block could also end with a conditional.  Where do we
>>>>> put
>>>>> the one we want to copy into the CMP source block in that case? :-)
>>>>>
>>>>> This is something else we'd want to check if we ever allowed the the
>>>>> CMP
>>>>> defining block to not be the immediate predecessor of the conditional
>>>>> jump block.  If we did that we'd need to validate that the block where
>>>>> we're going to insert the copy of the jump has a single successor.
>>>>
>>>> But were just isolating a path here. The actual combine job is left to followup cleanups. 
>>> Absolutely agreed.  My point was that there's some additional stuff we'd
>>> have to verify does the right thing if we wanted to allow the CMP to be
>>> somewhere other than in the immediate predecessor of the conditional
>>> jump block.
>> 
>> For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
>> maybe - forwarding a binary operation always incurs register pressure
>> increase.
> For correctness of the patch.  Conceptually I have _no_ issues with
> having the CMP in a different block than an immediate predecessor of the
> conditional jump block.  But the patch does certain code which would
> need to be audited with that change in mind.
Thanks for all your great comments! It is right, if immediate predecessor
of conditional jump block has more than one successors, the conditional
jump block can be duplicated to split the path; and the condtional jump
will keep in the duplicate block instead inserting into predecessor.  From
functionality aspect, it is still correct. While it does not merge CMP
with conditional jump in this pass; then it may not directly help to
eliminate the CMP. While I also agree this path may provides other
optimize opportunity in following passes.

I just have a check with gcc bootstrap, and find there are ~1800 edges
as !single_succ_p (e->src).  And similar number edges are single_succ_p
(e->src).  It would be valuable to take the opptunity for these edges of
!single_succ_p (e->src).

Jiufu Guo
>
>> 
>> Btw, as you already said sinking should have sinked the CMP to the
>> predecessor (since we have a single use in the PHI).
>> 
>> So I hardly see the point of making this difference.
> :-)
>
> jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH V4] A jump threading opportunity for condition branch
  2019-06-03  2:18               ` [PATCH V3] " Jiufu Guo
@ 2019-06-04  5:30                 ` Jiufu Guo
  2019-06-13 18:56                   ` Jeff Law
  0 siblings, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-06-04  5:30 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt


Hi,

This patch implements a new opportunity of jump threading for PR77820.
In this optimization, conditional jumps are merged with unconditional
jump. And then moving CMP result to GPR is eliminated.

This version is based on the proposal of Richard, Jeff and Andrew on
previous versions, and refined to incorporate comments, such as accept
the path with single_succ_p (e->src).
Thanks for the reviews!

Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help
from Jeff) with no regressions (two cases are improved and updated
to keep original test coverage) and new testcases are added.
Is this ok for trunk?

Example of this opportunity looks like below:

  <P0>
  p0 = a CMP b
  goto <X>;

  <P1>
  p1 = c CMP d
  goto <X>;

  <X>
  # phi = PHI <p0 (P0), p1 (P1)>
  if (phi != 0) goto <Y>; else goto <Z>;

Could be transformed to:

  <P0>
  p0 = a CMP b
  if (p0 != 0) goto <Y>; else goto <Z>;

  <P1>
  p1 = c CMP d
  if (p1 != 0) goto <Y>; else goto <Z>;


This optimization eliminates:
1. saving CMP result: p0 = a CMP b. 
2. additional CMP on branch: if (phi != 0).
3. converting CMP result if there is phi = (INT) p0 if there is.

Thanks!
Jiufu Guo

[gcc]
2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* tree-ssa-threadedge.c
	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
	function.
	(thread_across_edge): Add call to
	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.

[gcc/testsuite]
2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
	    Lijia He  <helijia@linux.ibm.com>

	PR tree-optimization/77820
	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
	* gcc.target/sh/pr51244-20.c: Update testcase.


---
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 +++++++++
 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 ++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
 gcc/testsuite/gcc.target/sh/pr51244-20.c         |  2 +-
 gcc/tree-ssa-threadedge.c                        | 70 +++++++++++++++++++++++-
 7 files changed, 187 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
new file mode 100644
index 0000000..5227c87
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, long x)
+{
+  _Bool t;
+  if (x)
+    {
+      g (a + 1);
+      t = a < b;
+      c = d + x;
+    }
+  else
+    {
+      g (b + 1);
+      a = c + d;
+      t = c > d;
+    }
+
+  if (t)
+    g1 (c);
+
+  g (a);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
new file mode 100644
index 0000000..eaf89bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  _Bool t;
+  if (x)
+    t = c < d;
+  else
+    t = a < b;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
new file mode 100644
index 0000000..d5a1e0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (void);
+void g1 (void);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  if (x)
+    t = a < b;
+  else if (d == x)
+    t = c < b;
+  else
+    t = d > c;
+
+  if (t)
+    {
+      g1 ();
+      g ();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
new file mode 100644
index 0000000..53acabc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-vrp1" } */
+
+void g (int);
+void g1 (int);
+
+void
+f (long a, long b, long c, long d, int x)
+{
+  int t;
+  _Bool l1 = 0, l2 = 0;
+  if (x)
+    {
+      g (a);
+      c = a + b;
+      t = a < b;
+      l1 = 1;
+    }
+  else
+    {
+      g1 (b);
+      t = c > d;
+      d = c + b;
+      l2 = 1;
+    }
+
+  if (t)
+    {
+      if (l1 | l2)
+	g1 (c);
+    }
+  else
+    {
+      g (d);
+      g1 (a + b);
+    }
+  g (c + d);
+}
+
+/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
index e9b4f26..fb171cd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
+/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */
 
 struct __sFILE
 {
diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
index c342163..be265cd 100644
--- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
+++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
@@ -1,7 +1,7 @@
 /* Check that the SH specific sh_treg_combine RTL optimization pass works as
    expected.  */
 /* { dg-do compile }  */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
 
 /* { dg-final { scan-assembler-not "not\t" } } */
 /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index c3ea2d6..785227d 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e,
   return 0;
 }
 
+/* There are basic blocks look like:
+   <P0>
+   p0 = a CMP b ; or p0 = (INT) (a CMP b)
+   goto <X>;
+
+   <P1>
+   p1 = c CMP d
+   goto <X>;
+
+   <X>
+   # phi = PHI <p0 (P0), p1 (P1)>
+   if (phi != 0) goto <Y>; else goto <Z>;
+
+   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
+   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
+
+   Return true if E is (P0,X) or (P1,X)  */
+
+bool
+edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
+{
+  /* See if there is only one stmt which is gcond.  */
+  gcond *gs;
+  if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
+    return false;
+
+  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
+  tree cond = gimple_cond_lhs (gs);
+  enum tree_code code = gimple_cond_code (gs);
+  tree rhs = gimple_cond_rhs (gs);
+  if (TREE_CODE (cond) != SSA_NAME
+      || (code != NE_EXPR && code != EQ_EXPR)
+      || (!integer_onep (rhs) && !integer_zerop (rhs)))
+    return false;
+  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
+  if (phi == NULL || gimple_bb (phi) != e->dest)
+    return false;
+
+  /* Check if phi's incoming value is CMP.  */
+  gassign *def;
+  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
+  if (TREE_CODE (value) != SSA_NAME
+      || !has_single_use (value)
+      || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
+    return false;
+
+  /* Or if it is (INT) (a CMP b).  */
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
+    {
+      value = gimple_assign_rhs1 (def);
+      if (TREE_CODE (value) != SSA_NAME
+	  || !has_single_use (value)
+	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
+	return false;
+    }
+
+  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
+    return false;
+
+  return true;
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond,
 
 	/* If we were able to thread through a successor of E->dest, then
 	   record the jump threading opportunity.  */
-	if (found)
+	if (found
+	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
 	  {
-	    propagate_threaded_block_debug_into (path->last ()->e->dest,
-						 taken_edge->dest);
+	    if (taken_edge->dest != path->last ()->e->dest)
+	      propagate_threaded_block_debug_into (path->last ()->e->dest,
+						   taken_edge->dest);
 	    register_jump_thread (path);
 	  }
 	else
-- 
2.7.4

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-06-04  5:19                 ` Jiufu Guo
@ 2019-06-04  7:07                   ` Richard Biener
  0 siblings, 0 replies; 38+ messages in thread
From: Richard Biener @ 2019-06-04  7:07 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: Jeff Law, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

[-- Attachment #1: Type: text/plain, Size: 6022 bytes --]

On Tue, 4 Jun 2019, Jiufu Guo wrote:

> Jeff Law <law@redhat.com> writes:
> 
> > On 5/31/19 1:24 AM, Richard Biener wrote:
> >> On Thu, 30 May 2019, Jeff Law wrote:
> >> 
> >>> On 5/30/19 12:41 AM, Richard Biener wrote:
> >>>> On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
> >>>>> On 5/23/19 6:11 AM, Richard Biener wrote:
> >>>>>> On Thu, 23 May 2019, Jiufu Guo wrote:
> >>>>>>
> >>>>>>> Hi,
> >>>>>>>
> >>>>>>> Richard Biener <rguenther@suse.de> writes:
> >>>>>>>
> >>>>>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
> >>>>>
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
> >>>>> tcc_comparison)
> >>>>>>>>> +    return false;
> >>>>>>>>> +
> >>>>>>>>> +  /* Check if phi's incoming value is defined in the incoming
> >>>>> basic_block.  */
> >>>>>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
> >>>>>>>>> +  if (def->bb != e->src)
> >>>>>>>>> +    return false;
> >>>>>>>> why does this matter?
> >>>>>>>>
> >>>>>>> Through preparing pathes and duplicating block, this transform can
> >>>>> also
> >>>>>>> help to combine a cmp in previous block and a gcond in current
> >>>>> block.
> >>>>>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
> >>>>>>> block of the current; and then combining "cmp with gcond" is safe. 
> >>>>> If
> >>>>>>> the cmp is defined far from the incoming block, it would be hard to
> >>>>>>> achieve the combining, and the transform may not needed.
> >>>>>> We're in SSA form so the "combining" doesn't really care where the
> >>>>>> definition comes from.
> >>>>> Combining doesn't care, but we need to make sure the copy of the
> >>>>> conditional ends up in the right block since it wouldn't necessarily be
> >>>>> associated with def->bb anymore.  But I'd expect the sinking pass to
> >>>>> make this a non-issue in practice anyway.
> >>>>>
> >>>>>>
> >>>>>>>>> +
> >>>>>>>>> +  if (!single_succ_p (def->bb))
> >>>>>>>>> +    return false;
> >>>>>>>> Or this?  The actual threading will ensure this will hold true.
> >>>>>>>>
> >>>>>>> Yes, other thread code check this and ensure it to be true, like
> >>>>>>> function thread_through_normal_block. Since this new function is
> >>>>> invoked
> >>>>>>> outside thread_through_normal_block, so, checking single_succ_p is
> >>>>> also
> >>>>>>> needed for this case.
> >>>>>> I mean threading will isolate the path making this trivially true.
> >>>>>> It's also no requirement for combining, in fact due to the single-use
> >>>>>> check the definition can be sinked across the edge already (if
> >>>>>> the edges dest didn't have multiple predecessors which this threading
> >>>>>> will fix as well).
> >>>>> I don't think so.  The CMP source block could end with a call and have
> >>>>> an abnormal edge (for example).  We can't put the copied conditional
> >>>>> before the call and putting it after the call essentially means
> >>>>> creating
> >>>>> a new block.
> >>>>>
> >>>>> The CMP source block could also end with a conditional.  Where do we
> >>>>> put
> >>>>> the one we want to copy into the CMP source block in that case? :-)
> >>>>>
> >>>>> This is something else we'd want to check if we ever allowed the the
> >>>>> CMP
> >>>>> defining block to not be the immediate predecessor of the conditional
> >>>>> jump block.  If we did that we'd need to validate that the block where
> >>>>> we're going to insert the copy of the jump has a single successor.
> >>>>
> >>>> But were just isolating a path here. The actual combine job is left to followup cleanups. 
> >>> Absolutely agreed.  My point was that there's some additional stuff we'd
> >>> have to verify does the right thing if we wanted to allow the CMP to be
> >>> somewhere other than in the immediate predecessor of the conditional
> >>> jump block.
> >> 
> >> For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
> >> maybe - forwarding a binary operation always incurs register pressure
> >> increase.
> > For correctness of the patch.  Conceptually I have _no_ issues with
> > having the CMP in a different block than an immediate predecessor of the
> > conditional jump block.  But the patch does certain code which would
> > need to be audited with that change in mind.
> Thanks for all your great comments! It is right, if immediate predecessor
> of conditional jump block has more than one successors, the conditional
> jump block can be duplicated to split the path; and the condtional jump
> will keep in the duplicate block instead inserting into predecessor.  From
> functionality aspect, it is still correct. While it does not merge CMP
> with conditional jump in this pass; then it may not directly help to
> eliminate the CMP. While I also agree this path may provides other
> optimize opportunity in following passes.
> 
> I just have a check with gcc bootstrap, and find there are ~1800 edges
> as !single_succ_p (e->src).  And similar number edges are single_succ_p
> (e->src).  It would be valuable to take the opptunity for these edges of
> !single_succ_p (e->src).

Thanks for checking that.  It would be interesting to see which
number of cases in the IL after the jump threading pass still have
!single_succ_p.  That is, I'd at least have allowed a regular
jump threading opportunity path ending in the condition block
extended to tail-duplicate that block where previous to applying
the jump-threading there's !single_succ_p but after jump threading
finished it would have been single_succ_p.

Creating twisted CFG examples is probably easiest done by feeding
DOM with a GIMPLE FE testcase.

Richard.


> Jiufu Guo
> >
> >> 
> >> Btw, as you already said sinking should have sinked the CMP to the
> >> predecessor (since we have a single use in the PHI).
> >> 
> >> So I hardly see the point of making this difference.
> > :-)
> >
> > jeff
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG NÌrnberg)

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] A jump threading opportunity for condition branch
  2019-05-31 15:28               ` Jeff Law
  2019-06-04  5:19                 ` Jiufu Guo
@ 2019-06-07  0:05                 ` Jeff Law
  1 sibling, 0 replies; 38+ messages in thread
From: Jeff Law @ 2019-06-07  0:05 UTC (permalink / raw)
  To: Richard Biener
  Cc: Jiufu Guo, gcc-patches, Jakub Jelinek, Daniel Berlin, segher, wschmidt

On 5/31/19 9:03 AM, Jeff Law wrote:
> On 5/31/19 1:24 AM, Richard Biener wrote:
>> On Thu, 30 May 2019, Jeff Law wrote:
>>
>>> On 5/30/19 12:41 AM, Richard Biener wrote:
>>>> On May 29, 2019 10:18:01 PM GMT+02:00, Jeff Law <law@redhat.com> wrote:
>>>>> On 5/23/19 6:11 AM, Richard Biener wrote:
>>>>>> On Thu, 23 May 2019, Jiufu Guo wrote:
>>>>>>
>>>>>>> Hi,
>>>>>>>
>>>>>>> Richard Biener <rguenther@suse.de> writes:
>>>>>>>
>>>>>>>> On Tue, 21 May 2019, Jiufu Guo wrote:
>>>>>
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) !=
>>>>> tcc_comparison)
>>>>>>>>> +    return false;
>>>>>>>>> +
>>>>>>>>> +  /* Check if phi's incoming value is defined in the incoming
>>>>> basic_block.  */
>>>>>>>>> +  edge e = gimple_phi_arg_edge (phi, index);
>>>>>>>>> +  if (def->bb != e->src)
>>>>>>>>> +    return false;
>>>>>>>> why does this matter?
>>>>>>>>
>>>>>>> Through preparing pathes and duplicating block, this transform can
>>>>> also
>>>>>>> help to combine a cmp in previous block and a gcond in current
>>>>> block.
>>>>>>> "if (def->bb != e->src)" make sure the cmp is define in the incoming
>>>>>>> block of the current; and then combining "cmp with gcond" is safe. 
>>>>> If
>>>>>>> the cmp is defined far from the incoming block, it would be hard to
>>>>>>> achieve the combining, and the transform may not needed.
>>>>>> We're in SSA form so the "combining" doesn't really care where the
>>>>>> definition comes from.
>>>>> Combining doesn't care, but we need to make sure the copy of the
>>>>> conditional ends up in the right block since it wouldn't necessarily be
>>>>> associated with def->bb anymore.  But I'd expect the sinking pass to
>>>>> make this a non-issue in practice anyway.
>>>>>
>>>>>>
>>>>>>>>> +
>>>>>>>>> +  if (!single_succ_p (def->bb))
>>>>>>>>> +    return false;
>>>>>>>> Or this?  The actual threading will ensure this will hold true.
>>>>>>>>
>>>>>>> Yes, other thread code check this and ensure it to be true, like
>>>>>>> function thread_through_normal_block. Since this new function is
>>>>> invoked
>>>>>>> outside thread_through_normal_block, so, checking single_succ_p is
>>>>> also
>>>>>>> needed for this case.
>>>>>> I mean threading will isolate the path making this trivially true.
>>>>>> It's also no requirement for combining, in fact due to the single-use
>>>>>> check the definition can be sinked across the edge already (if
>>>>>> the edges dest didn't have multiple predecessors which this threading
>>>>>> will fix as well).
>>>>> I don't think so.  The CMP source block could end with a call and have
>>>>> an abnormal edge (for example).  We can't put the copied conditional
>>>>> before the call and putting it after the call essentially means
>>>>> creating
>>>>> a new block.
>>>>>
>>>>> The CMP source block could also end with a conditional.  Where do we
>>>>> put
>>>>> the one we want to copy into the CMP source block in that case? :-)
>>>>>
>>>>> This is something else we'd want to check if we ever allowed the the
>>>>> CMP
>>>>> defining block to not be the immediate predecessor of the conditional
>>>>> jump block.  If we did that we'd need to validate that the block where
>>>>> we're going to insert the copy of the jump has a single successor.
>>>>
>>>> But were just isolating a path here. The actual combine job is left to followup cleanups. 
>>> Absolutely agreed.  My point was that there's some additional stuff we'd
>>> have to verify does the right thing if we wanted to allow the CMP to be
>>> somewhere other than in the immediate predecessor of the conditional
>>> jump block.
>>
>> For correctness?  No.  For the CMP to be forwarded?  No.  For optimality
>> maybe - forwarding a binary operation always incurs register pressure
>> increase.
> For correctness of the patch.  Conceptually I have _no_ issues with
> having the CMP in a different block than an immediate predecessor of the
> conditional jump block.  But the patch does certain code which would
> need to be audited with that change in mind.
> 
>>
>> Btw, as you already said sinking should have sinked the CMP to the
>> predecessor (since we have a single use in the PHI).
>>
>> So I hardly see the point of making this difference.
> :-)
So just to satisfy my curiosity I put in some instrumentation to check
for cases where the CMP is not in an immediate predecessor of the
conditional branch.  It happens.  It's not terribly common though.  I'd
guess it's cases where this code is running before sinking.

I went ahead and audited the patch for this case so that we could just
eliminate that test.  The key thing thing is that we don't use the block
with the CMP insn at all in this code.  So there's no possibility of
duplicating the conditional into the wrong block or anything like that.

Since this code is running from within thread_across_edge it can't be
called with complex/abnormal edges or any other cases that can't be
handled since we filter those out before calling thread_across_edge.

So it should be safe to just eliminate that conditional.

There's been a lot of testing instability lately, so it's hard to know
if my testruns were clean or not since they update from the trunk each
time.  I'm going to do a fresh round and hopefully get usable results.

jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V4] A jump threading opportunity for condition branch
  2019-06-04  5:30                 ` [PATCH V4] " Jiufu Guo
@ 2019-06-13 18:56                   ` Jeff Law
  2019-06-14 12:51                     ` Jiufu Guo
  0 siblings, 1 reply; 38+ messages in thread
From: Jeff Law @ 2019-06-13 18:56 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

On 6/3/19 11:28 PM, Jiufu Guo wrote:
> 
> Hi,
> 
> This patch implements a new opportunity of jump threading for PR77820.
> In this optimization, conditional jumps are merged with unconditional
> jump. And then moving CMP result to GPR is eliminated.
> 
> This version is based on the proposal of Richard, Jeff and Andrew on
> previous versions, and refined to incorporate comments, such as accept
> the path with single_succ_p (e->src).
> Thanks for the reviews!
> 
> Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help
> from Jeff) with no regressions (two cases are improved and updated
> to keep original test coverage) and new testcases are added.
> Is this ok for trunk?
> 
> Example of this opportunity looks like below:
> 
>   <P0>
>   p0 = a CMP b
>   goto <X>;
> 
>   <P1>
>   p1 = c CMP d
>   goto <X>;
> 
>   <X>
>   # phi = PHI <p0 (P0), p1 (P1)>
>   if (phi != 0) goto <Y>; else goto <Z>;
> 
> Could be transformed to:
> 
>   <P0>
>   p0 = a CMP b
>   if (p0 != 0) goto <Y>; else goto <Z>;
> 
>   <P1>
>   p1 = c CMP d
>   if (p1 != 0) goto <Y>; else goto <Z>;
> 
> 
> This optimization eliminates:
> 1. saving CMP result: p0 = a CMP b. 
> 2. additional CMP on branch: if (phi != 0).
> 3. converting CMP result if there is phi = (INT) p0 if there is.
> 
> Thanks!
> Jiufu Guo
> 
> [gcc]
> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* tree-ssa-threadedge.c
> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
> 	function.
> 	(thread_across_edge): Add call to
> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
> 
> [gcc/testsuite]
> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
> 	    Lijia He  <helijia@linux.ibm.com>
> 
> 	PR tree-optimization/77820
> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
> 	* gcc.target/sh/pr51244-20.c: Update testcase.
Yes, this is OK for the trunk.  I'll commit it momentarily.

Jeff




> 
> 
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 +++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 ++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>  gcc/testsuite/gcc.target/sh/pr51244-20.c         |  2 +-
>  gcc/tree-ssa-threadedge.c                        | 70 +++++++++++++++++++++++-
>  7 files changed, 187 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> new file mode 100644
> index 0000000..5227c87
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, long x)
> +{
> +  _Bool t;
> +  if (x)
> +    {
> +      g (a + 1);
> +      t = a < b;
> +      c = d + x;
> +    }
> +  else
> +    {
> +      g (b + 1);
> +      a = c + d;
> +      t = c > d;
> +    }
> +
> +  if (t)
> +    g1 (c);
> +
> +  g (a);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> new file mode 100644
> index 0000000..eaf89bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  _Bool t;
> +  if (x)
> +    t = c < d;
> +  else
> +    t = a < b;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> new file mode 100644
> index 0000000..d5a1e0b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (void);
> +void g1 (void);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  if (x)
> +    t = a < b;
> +  else if (d == x)
> +    t = c < b;
> +  else
> +    t = d > c;
> +
> +  if (t)
> +    {
> +      g1 ();
> +      g ();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> new file mode 100644
> index 0000000..53acabc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
> +
> +void g (int);
> +void g1 (int);
> +
> +void
> +f (long a, long b, long c, long d, int x)
> +{
> +  int t;
> +  _Bool l1 = 0, l2 = 0;
> +  if (x)
> +    {
> +      g (a);
> +      c = a + b;
> +      t = a < b;
> +      l1 = 1;
> +    }
> +  else
> +    {
> +      g1 (b);
> +      t = c > d;
> +      d = c + b;
> +      l2 = 1;
> +    }
> +
> +  if (t)
> +    {
> +      if (l1 | l2)
> +	g1 (c);
> +    }
> +  else
> +    {
> +      g (d);
> +      g1 (a + b);
> +    }
> +  g (c + d);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> index e9b4f26..fb171cd 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
> +/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */
>  
>  struct __sFILE
>  {
> diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> index c342163..be265cd 100644
> --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
> +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> @@ -1,7 +1,7 @@
>  /* Check that the SH specific sh_treg_combine RTL optimization pass works as
>     expected.  */
>  /* { dg-do compile }  */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
>  
>  /* { dg-final { scan-assembler-not "not\t" } } */
>  /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index c3ea2d6..785227d 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e,
>    return 0;
>  }
>  
> +/* There are basic blocks look like:
> +   <P0>
> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
> +   goto <X>;
> +
> +   <P1>
> +   p1 = c CMP d
> +   goto <X>;
> +
> +   <X>
> +   # phi = PHI <p0 (P0), p1 (P1)>
> +   if (phi != 0) goto <Y>; else goto <Z>;
> +
> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
> +
> +   Return true if E is (P0,X) or (P1,X)  */
> +
> +bool
> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
> +{
> +  /* See if there is only one stmt which is gcond.  */
> +  gcond *gs;
> +  if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
> +    return false;
> +
> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
> +  tree cond = gimple_cond_lhs (gs);
> +  enum tree_code code = gimple_cond_code (gs);
> +  tree rhs = gimple_cond_rhs (gs);
> +  if (TREE_CODE (cond) != SSA_NAME
> +      || (code != NE_EXPR && code != EQ_EXPR)
> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
> +    return false;
> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
> +  if (phi == NULL || gimple_bb (phi) != e->dest)
> +    return false;
> +
> +  /* Check if phi's incoming value is CMP.  */
> +  gassign *def;
> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
> +  if (TREE_CODE (value) != SSA_NAME
> +      || !has_single_use (value)
> +      || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
> +    return false;
> +
> +  /* Or if it is (INT) (a CMP b).  */
> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
> +    {
> +      value = gimple_assign_rhs1 (def);
> +      if (TREE_CODE (value) != SSA_NAME
> +	  || !has_single_use (value)
> +	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
> +	return false;
> +    }
> +
> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
> +    return false;
> +
> +  return true;
> +}
> +
>  /* We are exiting E->src, see if E->dest ends with a conditional
>     jump which has a known value when reached via E.
>  
> @@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond,
>  
>  	/* If we were able to thread through a successor of E->dest, then
>  	   record the jump threading opportunity.  */
> -	if (found)
> +	if (found
> +	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
>  	  {
> -	    propagate_threaded_block_debug_into (path->last ()->e->dest,
> -						 taken_edge->dest);
> +	    if (taken_edge->dest != path->last ()->e->dest)
> +	      propagate_threaded_block_debug_into (path->last ()->e->dest,
> +						   taken_edge->dest);
>  	    register_jump_thread (path);
>  	  }
>  	else
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V4] A jump threading opportunity for condition branch
  2019-06-13 18:56                   ` Jeff Law
@ 2019-06-14 12:51                     ` Jiufu Guo
  2019-06-14 16:34                       ` Jeff Law
  0 siblings, 1 reply; 38+ messages in thread
From: Jiufu Guo @ 2019-06-14 12:51 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

Jeff Law <law@redhat.com> writes:

> On 6/3/19 11:28 PM, Jiufu Guo wrote:
>> 
>> Hi,
>> 
>> This patch implements a new opportunity of jump threading for PR77820.
>> In this optimization, conditional jumps are merged with unconditional
>> jump. And then moving CMP result to GPR is eliminated.
>> 
>> This version is based on the proposal of Richard, Jeff and Andrew on
>> previous versions, and refined to incorporate comments, such as accept
>> the path with single_succ_p (e->src).
>> Thanks for the reviews!
>> 
>> Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help
>> from Jeff) with no regressions (two cases are improved and updated
>> to keep original test coverage) and new testcases are added.
>> Is this ok for trunk?
>> 
>> Example of this opportunity looks like below:
>> 
>>   <P0>
>>   p0 = a CMP b
>>   goto <X>;
>> 
>>   <P1>
>>   p1 = c CMP d
>>   goto <X>;
>> 
>>   <X>
>>   # phi = PHI <p0 (P0), p1 (P1)>
>>   if (phi != 0) goto <Y>; else goto <Z>;
>> 
>> Could be transformed to:
>> 
>>   <P0>
>>   p0 = a CMP b
>>   if (p0 != 0) goto <Y>; else goto <Z>;
>> 
>>   <P1>
>>   p1 = c CMP d
>>   if (p1 != 0) goto <Y>; else goto <Z>;
>> 
>> 
>> This optimization eliminates:
>> 1. saving CMP result: p0 = a CMP b. 
>> 2. additional CMP on branch: if (phi != 0).
>> 3. converting CMP result if there is phi = (INT) p0 if there is.
>> 
>> Thanks!
>> Jiufu Guo
>> 
>> [gcc]
>> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 	    Lijia He  <helijia@linux.ibm.com>
>> 
>> 	PR tree-optimization/77820
>> 	* tree-ssa-threadedge.c
>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>> 	function.
>> 	(thread_across_edge): Add call to
>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>> 
>> [gcc/testsuite]
>> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
>> 	    Lijia He  <helijia@linux.ibm.com>
>> 
>> 	PR tree-optimization/77820
>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>> 	* gcc.target/sh/pr51244-20.c: Update testcase.
> Yes, this is OK for the trunk.  I'll commit it momentarily.
Thanks Jeff! I got svn access,  I could commit next time. 
>
> Jeff
>
>
>
>
>> 
>> 
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 +++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 ++++++++++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>  gcc/testsuite/gcc.target/sh/pr51244-20.c         |  2 +-
>>  gcc/tree-ssa-threadedge.c                        | 70 +++++++++++++++++++++++-
>>  7 files changed, 187 insertions(+), 5 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>> 
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>> new file mode 100644
>> index 0000000..5227c87
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>> @@ -0,0 +1,30 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (int);
>> +void g1 (int);
>> +
>> +void
>> +f (long a, long b, long c, long d, long x)
>> +{
>> +  _Bool t;
>> +  if (x)
>> +    {
>> +      g (a + 1);
>> +      t = a < b;
>> +      c = d + x;
>> +    }
>> +  else
>> +    {
>> +      g (b + 1);
>> +      a = c + d;
>> +      t = c > d;
>> +    }
>> +
>> +  if (t)
>> +    g1 (c);
>> +
>> +  g (a);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>> new file mode 100644
>> index 0000000..eaf89bb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>> @@ -0,0 +1,23 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (void);
>> +void g1 (void);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  _Bool t;
>> +  if (x)
>> +    t = c < d;
>> +  else
>> +    t = a < b;
>> +
>> +  if (t)
>> +    {
>> +      g1 ();
>> +      g ();
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>> new file mode 100644
>> index 0000000..d5a1e0b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>> @@ -0,0 +1,25 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (void);
>> +void g1 (void);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  int t;
>> +  if (x)
>> +    t = a < b;
>> +  else if (d == x)
>> +    t = c < b;
>> +  else
>> +    t = d > c;
>> +
>> +  if (t)
>> +    {
>> +      g1 ();
>> +      g ();
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>> new file mode 100644
>> index 0000000..53acabc
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>> @@ -0,0 +1,40 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>> +
>> +void g (int);
>> +void g1 (int);
>> +
>> +void
>> +f (long a, long b, long c, long d, int x)
>> +{
>> +  int t;
>> +  _Bool l1 = 0, l2 = 0;
>> +  if (x)
>> +    {
>> +      g (a);
>> +      c = a + b;
>> +      t = a < b;
>> +      l1 = 1;
>> +    }
>> +  else
>> +    {
>> +      g1 (b);
>> +      t = c > d;
>> +      d = c + b;
>> +      l2 = 1;
>> +    }
>> +
>> +  if (t)
>> +    {
>> +      if (l1 | l2)
>> +	g1 (c);
>> +    }
>> +  else
>> +    {
>> +      g (d);
>> +      g1 (a + b);
>> +    }
>> +  g (c + d);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> index e9b4f26..fb171cd 100644
>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>> @@ -1,5 +1,5 @@
>>  /* { dg-do compile } */
>> -/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
>> +/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */
>>  
>>  struct __sFILE
>>  {
>> diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c
>> index c342163..be265cd 100644
>> --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
>> +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
>> @@ -1,7 +1,7 @@
>>  /* Check that the SH specific sh_treg_combine RTL optimization pass works as
>>     expected.  */
>>  /* { dg-do compile }  */
>> -/* { dg-options "-O2" } */
>> +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
>>  
>>  /* { dg-final { scan-assembler-not "not\t" } } */
>>  /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */
>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>> index c3ea2d6..785227d 100644
>> --- a/gcc/tree-ssa-threadedge.c
>> +++ b/gcc/tree-ssa-threadedge.c
>> @@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e,
>>    return 0;
>>  }
>>  
>> +/* There are basic blocks look like:
>> +   <P0>
>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>> +   goto <X>;
>> +
>> +   <P1>
>> +   p1 = c CMP d
>> +   goto <X>;
>> +
>> +   <X>
>> +   # phi = PHI <p0 (P0), p1 (P1)>
>> +   if (phi != 0) goto <Y>; else goto <Z>;
>> +
>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>> +
>> +   Return true if E is (P0,X) or (P1,X)  */
>> +
>> +bool
>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>> +{
>> +  /* See if there is only one stmt which is gcond.  */
>> +  gcond *gs;
>> +  if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
>> +    return false;
>> +
>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>> +  tree cond = gimple_cond_lhs (gs);
>> +  enum tree_code code = gimple_cond_code (gs);
>> +  tree rhs = gimple_cond_rhs (gs);
>> +  if (TREE_CODE (cond) != SSA_NAME
>> +      || (code != NE_EXPR && code != EQ_EXPR)
>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>> +    return false;
>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>> +  if (phi == NULL || gimple_bb (phi) != e->dest)
>> +    return false;
>> +
>> +  /* Check if phi's incoming value is CMP.  */
>> +  gassign *def;
>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>> +  if (TREE_CODE (value) != SSA_NAME
>> +      || !has_single_use (value)
>> +      || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>> +    return false;
>> +
>> +  /* Or if it is (INT) (a CMP b).  */
>> +  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>> +    {
>> +      value = gimple_assign_rhs1 (def);
>> +      if (TREE_CODE (value) != SSA_NAME
>> +	  || !has_single_use (value)
>> +	  || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
>> +	return false;
>> +    }
>> +
>> +  if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>> +    return false;
>> +
>> +  return true;
>> +}
>> +
>>  /* We are exiting E->src, see if E->dest ends with a conditional
>>     jump which has a known value when reached via E.
>>  
>> @@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond,
>>  
>>  	/* If we were able to thread through a successor of E->dest, then
>>  	   record the jump threading opportunity.  */
>> -	if (found)
>> +	if (found
>> +	    || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e))
>>  	  {
>> -	    propagate_threaded_block_debug_into (path->last ()->e->dest,
>> -						 taken_edge->dest);
>> +	    if (taken_edge->dest != path->last ()->e->dest)
>> +	      propagate_threaded_block_debug_into (path->last ()->e->dest,
>> +						   taken_edge->dest);
>>  	    register_jump_thread (path);
>>  	  }
>>  	else
>> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH V4] A jump threading opportunity for condition branch
  2019-06-14 12:51                     ` Jiufu Guo
@ 2019-06-14 16:34                       ` Jeff Law
  0 siblings, 0 replies; 38+ messages in thread
From: Jeff Law @ 2019-06-14 16:34 UTC (permalink / raw)
  To: Jiufu Guo
  Cc: Richard Biener, gcc-patches, Jakub Jelinek, Daniel Berlin,
	segher, wschmidt

On 6/14/19 6:51 AM, Jiufu Guo wrote:
> Jeff Law <law@redhat.com> writes:
> 
>> On 6/3/19 11:28 PM, Jiufu Guo wrote:
>>>
>>> Hi,
>>>
>>> This patch implements a new opportunity of jump threading for PR77820.
>>> In this optimization, conditional jumps are merged with unconditional
>>> jump. And then moving CMP result to GPR is eliminated.
>>>
>>> This version is based on the proposal of Richard, Jeff and Andrew on
>>> previous versions, and refined to incorporate comments, such as accept
>>> the path with single_succ_p (e->src).
>>> Thanks for the reviews!
>>>
>>> Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help
>>> from Jeff) with no regressions (two cases are improved and updated
>>> to keep original test coverage) and new testcases are added.
>>> Is this ok for trunk?
>>>
>>> Example of this opportunity looks like below:
>>>
>>>   <P0>
>>>   p0 = a CMP b
>>>   goto <X>;
>>>
>>>   <P1>
>>>   p1 = c CMP d
>>>   goto <X>;
>>>
>>>   <X>
>>>   # phi = PHI <p0 (P0), p1 (P1)>
>>>   if (phi != 0) goto <Y>; else goto <Z>;
>>>
>>> Could be transformed to:
>>>
>>>   <P0>
>>>   p0 = a CMP b
>>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>>
>>>   <P1>
>>>   p1 = c CMP d
>>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>>
>>>
>>> This optimization eliminates:
>>> 1. saving CMP result: p0 = a CMP b. 
>>> 2. additional CMP on branch: if (phi != 0).
>>> 3. converting CMP result if there is phi = (INT) p0 if there is.
>>>
>>> Thanks!
>>> Jiufu Guo
>>>
>>> [gcc]
>>> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>
>>> 	PR tree-optimization/77820
>>> 	* tree-ssa-threadedge.c
>>> 	(edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>>> 	function.
>>> 	(thread_across_edge): Add call to
>>> 	edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>>
>>> [gcc/testsuite]
>>> 2019-06-04  Jiufu Guo  <guojiufu@linux.ibm.com>
>>> 	    Lijia He  <helijia@linux.ibm.com>
>>>
>>> 	PR tree-optimization/77820
>>> 	* gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>>> 	* gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>>> 	* gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>> 	* gcc.target/sh/pr51244-20.c: Update testcase.
>> Yes, this is OK for the trunk.  I'll commit it momentarily.
> Thanks Jeff! I got svn access,  I could commit next time. 
Yea, Segher pointed that out to me in IRC.  Looking forward to your next
patchkit :-)

jeff

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2019-06-14 16:34 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-21 13:45 [PATCH] A jump threading opportunity for condition branch Jiufu Guo
2019-05-22 12:38 ` Richard Biener
2019-05-23 12:06   ` Jiufu Guo
2019-05-23 12:11     ` Richard Biener
2019-05-23 14:40       ` Jiufu Guo
2019-05-24 12:45         ` Richard Biener
2019-05-24 14:52           ` Jiufu Guo
2019-05-28 14:07           ` [PATCH V2] " Jiufu Guo
2019-05-29  1:51             ` Jiufu Guo
2019-05-29 12:40             ` Richard Biener
2019-05-29 19:47               ` Jeff Law
2019-05-30 15:09                 ` Jiufu Guo
2019-05-30 23:55                   ` Jeff Law
2019-05-31  7:34                     ` Richard Biener
2019-06-04  3:03                     ` Jiufu Guo
2019-05-30 15:34             ` Jeff Law
2019-06-03  2:18               ` [PATCH V3] " Jiufu Guo
2019-06-04  5:30                 ` [PATCH V4] " Jiufu Guo
2019-06-13 18:56                   ` Jeff Law
2019-06-14 12:51                     ` Jiufu Guo
2019-06-14 16:34                       ` Jeff Law
2019-05-29 20:26           ` [PATCH] " Jeff Law
2019-05-30  6:57             ` Richard Biener
2019-05-30  6:58               ` Jiufu Guo
2019-05-30 14:59                 ` Jeff Law
2019-05-30 15:03               ` Jeff Law
2019-05-29 20:22       ` Jeff Law
2019-05-30  6:40         ` Jiufu Guo
2019-05-30  6:44         ` Richard Biener
2019-05-30 20:17           ` Jeff Law
2019-05-31  7:30             ` Richard Biener
2019-05-31 15:28               ` Jeff Law
2019-06-04  5:19                 ` Jiufu Guo
2019-06-04  7:07                   ` Richard Biener
2019-06-07  0:05                 ` Jeff Law
2019-05-29 20:18     ` Jeff Law
2019-05-30  6:41       ` Richard Biener
2019-05-29 20:12 ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).