public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
@ 2021-05-18  7:42 Jakub Jelinek
  2021-05-18  7:55 ` Richard Biener
  2021-05-19  8:15 ` Christophe Lyon
  0 siblings, 2 replies; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-18  7:42 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
>= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
>= comparison is implemented as (res & 1) == res to choose the 0 and 1
cases from that.  As we optimize that only with -ffinite-math-only, the
2 case is assumed not to happen and my earlier match.pd change optimizes
(res & 1) == res into (res & ~1) == 0, so this patch pattern matches
that case and handles it like res >= 0.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-05-18  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/94589
	* tree-ssa-phiopt.c (spaceship_replacement): Pattern match
	phi result used in (res & ~1) == 0 comparison as res >= 0 as
	res == 2 would be UB with -ffinite-math-only.

	* g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.

--- gcc/tree-ssa-phiopt.c.jj	2021-05-06 14:05:03.254763364 +0200
+++ gcc/tree-ssa-phiopt.c	2021-05-17 14:51:55.648356364 +0200
@@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
 		       edge e0, edge e1, gphi *phi,
 		       tree arg0, tree arg1)
 {
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
-      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
+  tree phires = PHI_RESULT (phi);
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
+      || TYPE_UNSIGNED (TREE_TYPE (phires))
       || !tree_fits_shwi_p (arg0)
       || !tree_fits_shwi_p (arg1)
       || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
@@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
 
   use_operand_p use_p;
   gimple *use_stmt;
-  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
     return false;
-  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
+  if (!single_imm_use (phires, &use_p, &use_stmt))
     return false;
   enum tree_code cmp;
   tree lhs, rhs;
+  gimple *orig_use_stmt = use_stmt;
+  tree orig_use_lhs = NULL_TREE;
+  int prec = TYPE_PRECISION (TREE_TYPE (phires));
+  if (is_gimple_assign (use_stmt)
+      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
+      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
+      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
+	  == wi::shifted_mask (1, prec - 1, false, prec)))
+    {
+      /* For partial_ordering result operator>= with unspec as second
+	 argument is (res & 1) == res, folded by match.pd into
+	 (res & ~1) == 0.  */
+      orig_use_lhs = gimple_assign_lhs (use_stmt);
+      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
+	return false;
+      if (EDGE_COUNT (phi_bb->preds) != 4)
+	return false;
+      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
+	return false;
+    }
   if (gimple_code (use_stmt) == GIMPLE_COND)
     {
       cmp = gimple_cond_code (use_stmt);
@@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
     default:
       return false;
     }
-  if (lhs != PHI_RESULT (phi)
+  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
       || !tree_fits_shwi_p (rhs)
       || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
     return false;
+  if (orig_use_lhs)
+    {
+      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
+	return false;
+      /* As for -ffast-math we assume the 2 return to be
+	 impossible, canonicalize (res & ~1) == 0 into
+	 res >= 0 and (res & ~1) != 0 as res < 0.  */
+      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
+    }
 
   if (!empty_block_p (middle_bb))
     return false;
@@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
 				? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
     return false;
 
-  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
+  /* lhs1 one_cmp rhs1 results in phires of 1.  */
   enum tree_code one_cmp;
   if ((cmp1 == LT_EXPR)
       ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
@@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
       use_operand_p use_p;
       imm_use_iterator iter;
       bool has_debug_uses = false;
-      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
+      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
 	{
 	  gimple *use_stmt = USE_STMT (use_p);
+	  if (orig_use_lhs && use_stmt == orig_use_stmt)
+	    continue;
 	  gcc_assert (is_gimple_debug (use_stmt));
 	  has_debug_uses = true;
 	  break;
 	}
+      if (orig_use_lhs)
+	{
+	  if (!has_debug_uses)
+	    FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
+	      {
+		gimple *use_stmt = USE_STMT (use_p);
+		gcc_assert (is_gimple_debug (use_stmt));
+		has_debug_uses = true;
+	      }
+	  gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+	  tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
+	  gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
+	  update_stmt (orig_use_stmt);
+	}
 
       if (has_debug_uses)
 	{
@@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
 	     Ignore the value of 2, because if NaNs aren't expected,
 	     all floating point numbers should be comparable.  */
 	  gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
-	  tree type = TREE_TYPE (PHI_RESULT (phi));
+	  tree type = TREE_TYPE (phires);
 	  tree temp1 = make_node (DEBUG_EXPR_DECL);
 	  DECL_ARTIFICIAL (temp1) = 1;
 	  TREE_TYPE (temp1) = type;
@@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
 	  t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
 	  g = gimple_build_debug_bind (temp2, t, phi);
 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
-	  replace_uses_by (PHI_RESULT (phi), temp2);
+	  replace_uses_by (phires, temp2);
+	  if (orig_use_lhs)
+	    replace_uses_by (orig_use_lhs, temp2);
 	}
     }
 
+  if (orig_use_lhs)
+    {
+      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+      gsi_remove (&gsi, true);
+    }
+
   gimple_stmt_iterator psi = gsi_for_stmt (phi);
   remove_phi_node (&psi, true);
 
--- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj	2021-05-06 10:15:23.712751382 +0200
+++ gcc/testsuite/g++.dg/opt/pr94589-2.C	2021-05-17 14:57:21.154800488 +0200
@@ -1,8 +1,8 @@
 // PR tree-optimization/94589
 // { dg-do compile { target c++20 } }
 // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
-// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
-// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
+// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
+// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
 
 #include <compare>
 

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
  2021-05-18  7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
@ 2021-05-18  7:55 ` Richard Biener
  2021-05-19  8:15 ` Christophe Lyon
  1 sibling, 0 replies; 8+ messages in thread
From: Richard Biener @ 2021-05-18  7:55 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Tue, 18 May 2021, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that.  As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2021-05-18  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/94589
> 	* tree-ssa-phiopt.c (spaceship_replacement): Pattern match
> 	phi result used in (res & ~1) == 0 comparison as res >= 0 as
> 	res == 2 would be UB with -ffinite-math-only.
> 
> 	* g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
> 
> --- gcc/tree-ssa-phiopt.c.jj	2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c	2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
>  		       edge e0, edge e1, gphi *phi,
>  		       tree arg0, tree arg1)
>  {
> -  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> -      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> +  tree phires = PHI_RESULT (phi);
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> +      || TYPE_UNSIGNED (TREE_TYPE (phires))
>        || !tree_fits_shwi_p (arg0)
>        || !tree_fits_shwi_p (arg1)
>        || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>  
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
>      return false;
> -  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> +  if (!single_imm_use (phires, &use_p, &use_stmt))
>      return false;
>    enum tree_code cmp;
>    tree lhs, rhs;
> +  gimple *orig_use_stmt = use_stmt;
> +  tree orig_use_lhs = NULL_TREE;
> +  int prec = TYPE_PRECISION (TREE_TYPE (phires));
> +  if (is_gimple_assign (use_stmt)
> +      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> +      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> +	  == wi::shifted_mask (1, prec - 1, false, prec)))
> +    {
> +      /* For partial_ordering result operator>= with unspec as second
> +	 argument is (res & 1) == res, folded by match.pd into
> +	 (res & ~1) == 0.  */
> +      orig_use_lhs = gimple_assign_lhs (use_stmt);
> +      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> +	return false;
> +      if (EDGE_COUNT (phi_bb->preds) != 4)
> +	return false;
> +      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> +	return false;
> +    }
>    if (gimple_code (use_stmt) == GIMPLE_COND)
>      {
>        cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
>      default:
>        return false;
>      }
> -  if (lhs != PHI_RESULT (phi)
> +  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
>        || !tree_fits_shwi_p (rhs)
>        || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
>      return false;
> +  if (orig_use_lhs)
> +    {
> +      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> +	return false;
> +      /* As for -ffast-math we assume the 2 return to be
> +	 impossible, canonicalize (res & ~1) == 0 into
> +	 res >= 0 and (res & ~1) != 0 as res < 0.  */
> +      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> +    }
>  
>    if (!empty_block_p (middle_bb))
>      return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
>  				? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
>      return false;
>  
> -  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
> +  /* lhs1 one_cmp rhs1 results in phires of 1.  */
>    enum tree_code one_cmp;
>    if ((cmp1 == LT_EXPR)
>        ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
>        use_operand_p use_p;
>        imm_use_iterator iter;
>        bool has_debug_uses = false;
> -      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
>  	{
>  	  gimple *use_stmt = USE_STMT (use_p);
> +	  if (orig_use_lhs && use_stmt == orig_use_stmt)
> +	    continue;
>  	  gcc_assert (is_gimple_debug (use_stmt));
>  	  has_debug_uses = true;
>  	  break;
>  	}
> +      if (orig_use_lhs)
> +	{
> +	  if (!has_debug_uses)
> +	    FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> +	      {
> +		gimple *use_stmt = USE_STMT (use_p);
> +		gcc_assert (is_gimple_debug (use_stmt));
> +		has_debug_uses = true;
> +	      }
> +	  gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +	  tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> +	  gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> +	  update_stmt (orig_use_stmt);
> +	}
>  
>        if (has_debug_uses)
>  	{
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
>  	     Ignore the value of 2, because if NaNs aren't expected,
>  	     all floating point numbers should be comparable.  */
>  	  gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> -	  tree type = TREE_TYPE (PHI_RESULT (phi));
> +	  tree type = TREE_TYPE (phires);
>  	  tree temp1 = make_node (DEBUG_EXPR_DECL);
>  	  DECL_ARTIFICIAL (temp1) = 1;
>  	  TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
>  	  t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
>  	  g = gimple_build_debug_bind (temp2, t, phi);
>  	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> -	  replace_uses_by (PHI_RESULT (phi), temp2);
> +	  replace_uses_by (phires, temp2);
> +	  if (orig_use_lhs)
> +	    replace_uses_by (orig_use_lhs, temp2);
>  	}
>      }
>  
> +  if (orig_use_lhs)
> +    {
> +      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +      gsi_remove (&gsi, true);
> +    }
> +
>    gimple_stmt_iterator psi = gsi_for_stmt (phi);
>    remove_phi_node (&psi, true);
>  
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj	2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C	2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
>  // PR tree-optimization/94589
>  // { dg-do compile { target c++20 } }
>  // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>  
>  #include <compare>
>  
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
  2021-05-18  7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
  2021-05-18  7:55 ` Richard Biener
@ 2021-05-19  8:15 ` Christophe Lyon
  2021-05-19  9:09   ` Jakub Jelinek
  1 sibling, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19  8:15 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc Patches

On Tue, 18 May 2021 at 09:42, Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi!
>
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that.  As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-05-18  Jakub Jelinek  <jakub@redhat.com>
>
>         PR tree-optimization/94589
>         * tree-ssa-phiopt.c (spaceship_replacement): Pattern match
>         phi result used in (res & ~1) == 0 comparison as res >= 0 as
>         res == 2 would be UB with -ffinite-math-only.
>
>         * g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
>
> --- gcc/tree-ssa-phiopt.c.jj    2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c       2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
>                        edge e0, edge e1, gphi *phi,
>                        tree arg0, tree arg1)
>  {
> -  if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> -      || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> +  tree phires = PHI_RESULT (phi);
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> +      || TYPE_UNSIGNED (TREE_TYPE (phires))
>        || !tree_fits_shwi_p (arg0)
>        || !tree_fits_shwi_p (arg1)
>        || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>
>    use_operand_p use_p;
>    gimple *use_stmt;
> -  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> +  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
>      return false;
> -  if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> +  if (!single_imm_use (phires, &use_p, &use_stmt))
>      return false;
>    enum tree_code cmp;
>    tree lhs, rhs;
> +  gimple *orig_use_stmt = use_stmt;
> +  tree orig_use_lhs = NULL_TREE;
> +  int prec = TYPE_PRECISION (TREE_TYPE (phires));
> +  if (is_gimple_assign (use_stmt)
> +      && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> +      && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> +      && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> +         == wi::shifted_mask (1, prec - 1, false, prec)))
> +    {
> +      /* For partial_ordering result operator>= with unspec as second
> +        argument is (res & 1) == res, folded by match.pd into
> +        (res & ~1) == 0.  */
> +      orig_use_lhs = gimple_assign_lhs (use_stmt);
> +      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> +       return false;
> +      if (EDGE_COUNT (phi_bb->preds) != 4)
> +       return false;
> +      if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> +       return false;
> +    }
>    if (gimple_code (use_stmt) == GIMPLE_COND)
>      {
>        cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
>      default:
>        return false;
>      }
> -  if (lhs != PHI_RESULT (phi)
> +  if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
>        || !tree_fits_shwi_p (rhs)
>        || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
>      return false;
> +  if (orig_use_lhs)
> +    {
> +      if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> +       return false;
> +      /* As for -ffast-math we assume the 2 return to be
> +        impossible, canonicalize (res & ~1) == 0 into
> +        res >= 0 and (res & ~1) != 0 as res < 0.  */
> +      cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> +    }
>
>    if (!empty_block_p (middle_bb))
>      return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
>                                 ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
>      return false;
>
> -  /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1.  */
> +  /* lhs1 one_cmp rhs1 results in phires of 1.  */
>    enum tree_code one_cmp;
>    if ((cmp1 == LT_EXPR)
>        ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
>        use_operand_p use_p;
>        imm_use_iterator iter;
>        bool has_debug_uses = false;
> -      FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> +      FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
>         {
>           gimple *use_stmt = USE_STMT (use_p);
> +         if (orig_use_lhs && use_stmt == orig_use_stmt)
> +           continue;
>           gcc_assert (is_gimple_debug (use_stmt));
>           has_debug_uses = true;
>           break;
>         }
> +      if (orig_use_lhs)
> +       {
> +         if (!has_debug_uses)
> +           FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> +             {
> +               gimple *use_stmt = USE_STMT (use_p);
> +               gcc_assert (is_gimple_debug (use_stmt));
> +               has_debug_uses = true;
> +             }
> +         gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +         tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> +         gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> +         update_stmt (orig_use_stmt);
> +       }
>
>        if (has_debug_uses)
>         {
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
>              Ignore the value of 2, because if NaNs aren't expected,
>              all floating point numbers should be comparable.  */
>           gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> -         tree type = TREE_TYPE (PHI_RESULT (phi));
> +         tree type = TREE_TYPE (phires);
>           tree temp1 = make_node (DEBUG_EXPR_DECL);
>           DECL_ARTIFICIAL (temp1) = 1;
>           TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
>           t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
>           g = gimple_build_debug_bind (temp2, t, phi);
>           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> -         replace_uses_by (PHI_RESULT (phi), temp2);
> +         replace_uses_by (phires, temp2);
> +         if (orig_use_lhs)
> +           replace_uses_by (orig_use_lhs, temp2);
>         }
>      }
>
> +  if (orig_use_lhs)
> +    {
> +      gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> +      gsi_remove (&gsi, true);
> +    }
> +
>    gimple_stmt_iterator psi = gsi_for_stmt (phi);
>    remove_phi_node (&psi, true);
>
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj     2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C        2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
>  // PR tree-optimization/94589
>  // { dg-do compile { target c++20 } }
>  // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>

Hi Jakub,

After this update, the test fails on arm and aarch64: according to the
logs, the optimization is still performed 14 times.

Can you check?

Thanks

Christophe

>  #include <compare>
>
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
  2021-05-19  8:15 ` Christophe Lyon
@ 2021-05-19  9:09   ` Jakub Jelinek
  2021-05-19  9:51     ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-19  9:09 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc Patches, Richard Biener

On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> After this update, the test fails on arm and aarch64: according to the
> logs, the optimization is still performed 14 times.

Seems this is because
              if (change
                  && !flag_syntax_only
                  && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
                      == ZERO_EXTEND))
                {
                  tree uns = unsigned_type_for (TREE_TYPE (and0));
                  and0 = fold_convert_loc (loc, uns, and0);
                  and1 = fold_convert_loc (loc, uns, and1);
                }
in fold-const.c adds on these targets extra casts that prevent the
optimizations, instead of
      _1 = __v._M_value;
      _2 = (int) _1;
      _3 = _2 & 1;
      _4 = __v._M_value;
      _5 = (int) _4;
      D.8503 = _3 == _5;
on x86_64 it is:
      _1 = __v._M_value;
      _2 = (unsigned char) _1;
      _3 = (int) _2;
      _4 = _3 & 1;
      _5 = __v._M_value;
      _6 = (int) _5;
      D.10471 = _4 == _6;
Before fre1 it is
  __v$_M_value_9 = __v._M_value;
  _1 = __v$_M_value_9;
  _2 = (int) _1;
  _8 = _1 & 1;
  _3 = (int) _8;
  _4 = __v$_M_value_9;
  _5 = (int) _4;
  _7 = _4 == _8;
which fre1 using match.pd optimizes into
  __v$_M_value_9 = __v._M_value;
  _10 = __v$_M_value_9 & -2;
  _7 = _10 == 0;
but on aarch64 before fre1 we have:
  __v$_M_value_10 = __v._M_value;
  _1 = __v$_M_value_10;
  _2 = (unsigned char) _1;
  _3 = (int) _2;
  _9 = _2 & 1;
  _4 = (int) _9;
  _5 = __v$_M_value_10;
  _6 = (int) _5;
  _8 = _4 == _6;
which is not optimized.

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
  2021-05-19  9:09   ` Jakub Jelinek
@ 2021-05-19  9:51     ` Jakub Jelinek
  2021-05-19 11:13       ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-19  9:51 UTC (permalink / raw)
  To: Christophe Lyon, Richard Biener, gcc Patches

On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > After this update, the test fails on arm and aarch64: according to the
> > logs, the optimization is still performed 14 times.
> 
> Seems this is because
>               if (change
>                   && !flag_syntax_only
>                   && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
>                       == ZERO_EXTEND))
>                 {
>                   tree uns = unsigned_type_for (TREE_TYPE (and0));
>                   and0 = fold_convert_loc (loc, uns, and0);
>                   and1 = fold_convert_loc (loc, uns, and1);
>                 }
> in fold-const.c adds on these targets extra casts that prevent the
> optimizations.

This patch seems to fix it (but I don't have an easy way to test on aarch64
or arm on the trunk and 11 branch would need numerous backports).

2021-05-19  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/94589
	* match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
	of integral conversions.

--- gcc/match.pd.jj	2021-05-15 10:10:28.000000000 +0200
+++ gcc/match.pd	2021-05-19 11:34:42.130624557 +0200
@@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
  (simplify
   (cmp:c (bit_and:c @0 @1) @0)
   (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
+ (simplify
+  (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
+       && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
+       && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
+       && !wi::neg_p (wi::to_wide (@1)))
+   (cmp (bit_and @0 (convert (bit_not @1)))
+	{ build_zero_cst (TREE_TYPE (@0)); })))
 
  /* (X | Y) == Y becomes (X & ~Y) == 0.  */
  (simplify


	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
  2021-05-19  9:51     ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
@ 2021-05-19 11:13       ` Richard Biener
  2021-05-19 11:29         ` Christophe Lyon
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-05-19 11:13 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Christophe Lyon, gcc Patches

On Wed, 19 May 2021, Jakub Jelinek wrote:

> On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > After this update, the test fails on arm and aarch64: according to the
> > > logs, the optimization is still performed 14 times.
> > 
> > Seems this is because
> >               if (change
> >                   && !flag_syntax_only
> >                   && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> >                       == ZERO_EXTEND))
> >                 {
> >                   tree uns = unsigned_type_for (TREE_TYPE (and0));
> >                   and0 = fold_convert_loc (loc, uns, and0);
> >                   and1 = fold_convert_loc (loc, uns, and1);
> >                 }
> > in fold-const.c adds on these targets extra casts that prevent the
> > optimizations.
> 
> This patch seems to fix it (but I don't have an easy way to test on aarch64
> or arm on the trunk and 11 branch would need numerous backports).

OK if somebody manages to test on arm/aarch64.

Richard.

> 2021-05-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/94589
> 	* match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> 	of integral conversions.
> 
> --- gcc/match.pd.jj	2021-05-15 10:10:28.000000000 +0200
> +++ gcc/match.pd	2021-05-19 11:34:42.130624557 +0200
> @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
>   (simplify
>    (cmp:c (bit_and:c @0 @1) @0)
>    (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> + (simplify
> +  (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> +       && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> +       && !wi::neg_p (wi::to_wide (@1)))
> +   (cmp (bit_and @0 (convert (bit_not @1)))
> +	{ build_zero_cst (TREE_TYPE (@0)); })))
>  
>   /* (X | Y) == Y becomes (X & ~Y) == 0.  */
>   (simplify
> 
> 
> 	Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
  2021-05-19 11:13       ` Richard Biener
@ 2021-05-19 11:29         ` Christophe Lyon
  2021-05-19 12:19           ` Christophe Lyon
  0 siblings, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19 11:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches

On Wed, 19 May 2021 at 13:13, Richard Biener <rguenther@suse.de> wrote:
>
> On Wed, 19 May 2021, Jakub Jelinek wrote:
>
> > On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > > After this update, the test fails on arm and aarch64: according to the
> > > > logs, the optimization is still performed 14 times.
> > >
> > > Seems this is because
> > >               if (change
> > >                   && !flag_syntax_only
> > >                   && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> > >                       == ZERO_EXTEND))
> > >                 {
> > >                   tree uns = unsigned_type_for (TREE_TYPE (and0));
> > >                   and0 = fold_convert_loc (loc, uns, and0);
> > >                   and1 = fold_convert_loc (loc, uns, and1);
> > >                 }
> > > in fold-const.c adds on these targets extra casts that prevent the
> > > optimizations.
> >
> > This patch seems to fix it (but I don't have an easy way to test on aarch64
> > or arm on the trunk and 11 branch would need numerous backports).
>
> OK if somebody manages to test on arm/aarch64.
>
I confirm it fixes the problem on arm. (aarch64 in progress)

Thanks!

> Richard.
>
> > 2021-05-19  Jakub Jelinek  <jakub@redhat.com>
> >
> >       PR tree-optimization/94589
> >       * match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> >       of integral conversions.
> >
> > --- gcc/match.pd.jj   2021-05-15 10:10:28.000000000 +0200
> > +++ gcc/match.pd      2021-05-19 11:34:42.130624557 +0200
> > @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
> >   (simplify
> >    (cmp:c (bit_and:c @0 @1) @0)
> >    (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> > + (simplify
> > +  (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> > +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> > +       && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> > +       && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> > +       && !wi::neg_p (wi::to_wide (@1)))
> > +   (cmp (bit_and @0 (convert (bit_not @1)))
> > +     { build_zero_cst (TREE_TYPE (@0)); })))
> >
> >   /* (X | Y) == Y becomes (X & ~Y) == 0.  */
> >   (simplify
> >
> >
> >       Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
  2021-05-19 11:29         ` Christophe Lyon
@ 2021-05-19 12:19           ` Christophe Lyon
  0 siblings, 0 replies; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19 12:19 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches

On Wed, 19 May 2021 at 13:29, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
>
> On Wed, 19 May 2021 at 13:13, Richard Biener <rguenther@suse.de> wrote:
> >
> > On Wed, 19 May 2021, Jakub Jelinek wrote:
> >
> > > On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > > > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > > > After this update, the test fails on arm and aarch64: according to the
> > > > > logs, the optimization is still performed 14 times.
> > > >
> > > > Seems this is because
> > > >               if (change
> > > >                   && !flag_syntax_only
> > > >                   && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> > > >                       == ZERO_EXTEND))
> > > >                 {
> > > >                   tree uns = unsigned_type_for (TREE_TYPE (and0));
> > > >                   and0 = fold_convert_loc (loc, uns, and0);
> > > >                   and1 = fold_convert_loc (loc, uns, and1);
> > > >                 }
> > > > in fold-const.c adds on these targets extra casts that prevent the
> > > > optimizations.
> > >
> > > This patch seems to fix it (but I don't have an easy way to test on aarch64
> > > or arm on the trunk and 11 branch would need numerous backports).
> >
> > OK if somebody manages to test on arm/aarch64.
> >
> I confirm it fixes the problem on arm. (aarch64 in progress)
>
And aarch64 is OK too.

> Thanks!
>
> > Richard.
> >
> > > 2021-05-19  Jakub Jelinek  <jakub@redhat.com>
> > >
> > >       PR tree-optimization/94589
> > >       * match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> > >       of integral conversions.
> > >
> > > --- gcc/match.pd.jj   2021-05-15 10:10:28.000000000 +0200
> > > +++ gcc/match.pd      2021-05-19 11:34:42.130624557 +0200
> > > @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
> > >   (simplify
> > >    (cmp:c (bit_and:c @0 @1) @0)
> > >    (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> > > + (simplify
> > > +  (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> > > +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > > +       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> > > +       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> > > +       && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> > > +       && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> > > +       && !wi::neg_p (wi::to_wide (@1)))
> > > +   (cmp (bit_and @0 (convert (bit_not @1)))
> > > +     { build_zero_cst (TREE_TYPE (@0)); })))
> > >
> > >   /* (X | Y) == Y becomes (X & ~Y) == 0.  */
> > >   (simplify
> > >
> > >
> > >       Jakub

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-05-19 12:20 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-18  7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
2021-05-18  7:55 ` Richard Biener
2021-05-19  8:15 ` Christophe Lyon
2021-05-19  9:09   ` Jakub Jelinek
2021-05-19  9:51     ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
2021-05-19 11:13       ` Richard Biener
2021-05-19 11:29         ` Christophe Lyon
2021-05-19 12:19           ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).