* [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
@ 2021-05-18 7:42 Jakub Jelinek
2021-05-18 7:55 ` Richard Biener
2021-05-19 8:15 ` Christophe Lyon
0 siblings, 2 replies; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-18 7:42 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Hi!
As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
>= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
>= comparison is implemented as (res & 1) == res to choose the 0 and 1
cases from that. As we optimize that only with -ffinite-math-only, the
2 case is assumed not to happen and my earlier match.pd change optimizes
(res & 1) == res into (res & ~1) == 0, so this patch pattern matches
that case and handles it like res >= 0.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2021-05-18 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/94589
* tree-ssa-phiopt.c (spaceship_replacement): Pattern match
phi result used in (res & ~1) == 0 comparison as res >= 0 as
res == 2 would be UB with -ffinite-math-only.
* g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
--- gcc/tree-ssa-phiopt.c.jj 2021-05-06 14:05:03.254763364 +0200
+++ gcc/tree-ssa-phiopt.c 2021-05-17 14:51:55.648356364 +0200
@@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
edge e0, edge e1, gphi *phi,
tree arg0, tree arg1)
{
- if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
- || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
+ tree phires = PHI_RESULT (phi);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
+ || TYPE_UNSIGNED (TREE_TYPE (phires))
|| !tree_fits_shwi_p (arg0)
|| !tree_fits_shwi_p (arg1)
|| !IN_RANGE (tree_to_shwi (arg0), -1, 2)
@@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
use_operand_p use_p;
gimple *use_stmt;
- if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
return false;
- if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
+ if (!single_imm_use (phires, &use_p, &use_stmt))
return false;
enum tree_code cmp;
tree lhs, rhs;
+ gimple *orig_use_stmt = use_stmt;
+ tree orig_use_lhs = NULL_TREE;
+ int prec = TYPE_PRECISION (TREE_TYPE (phires));
+ if (is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
+ && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
+ && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
+ == wi::shifted_mask (1, prec - 1, false, prec)))
+ {
+ /* For partial_ordering result operator>= with unspec as second
+ argument is (res & 1) == res, folded by match.pd into
+ (res & ~1) == 0. */
+ orig_use_lhs = gimple_assign_lhs (use_stmt);
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
+ return false;
+ if (EDGE_COUNT (phi_bb->preds) != 4)
+ return false;
+ if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
+ return false;
+ }
if (gimple_code (use_stmt) == GIMPLE_COND)
{
cmp = gimple_cond_code (use_stmt);
@@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
default:
return false;
}
- if (lhs != PHI_RESULT (phi)
+ if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
|| !tree_fits_shwi_p (rhs)
|| !IN_RANGE (tree_to_shwi (rhs), -1, 1))
return false;
+ if (orig_use_lhs)
+ {
+ if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
+ return false;
+ /* As for -ffast-math we assume the 2 return to be
+ impossible, canonicalize (res & ~1) == 0 into
+ res >= 0 and (res & ~1) != 0 as res < 0. */
+ cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
+ }
if (!empty_block_p (middle_bb))
return false;
@@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
return false;
- /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1. */
+ /* lhs1 one_cmp rhs1 results in phires of 1. */
enum tree_code one_cmp;
if ((cmp1 == LT_EXPR)
^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
@@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
use_operand_p use_p;
imm_use_iterator iter;
bool has_debug_uses = false;
- FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
+ FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
{
gimple *use_stmt = USE_STMT (use_p);
+ if (orig_use_lhs && use_stmt == orig_use_stmt)
+ continue;
gcc_assert (is_gimple_debug (use_stmt));
has_debug_uses = true;
break;
}
+ if (orig_use_lhs)
+ {
+ if (!has_debug_uses)
+ FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
+ {
+ gimple *use_stmt = USE_STMT (use_p);
+ gcc_assert (is_gimple_debug (use_stmt));
+ has_debug_uses = true;
+ }
+ gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+ tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
+ gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
+ update_stmt (orig_use_stmt);
+ }
if (has_debug_uses)
{
@@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
Ignore the value of 2, because if NaNs aren't expected,
all floating point numbers should be comparable. */
gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
- tree type = TREE_TYPE (PHI_RESULT (phi));
+ tree type = TREE_TYPE (phires);
tree temp1 = make_node (DEBUG_EXPR_DECL);
DECL_ARTIFICIAL (temp1) = 1;
TREE_TYPE (temp1) = type;
@@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
g = gimple_build_debug_bind (temp2, t, phi);
gsi_insert_before (&gsi, g, GSI_SAME_STMT);
- replace_uses_by (PHI_RESULT (phi), temp2);
+ replace_uses_by (phires, temp2);
+ if (orig_use_lhs)
+ replace_uses_by (orig_use_lhs, temp2);
}
}
+ if (orig_use_lhs)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
+ gsi_remove (&gsi, true);
+ }
+
gimple_stmt_iterator psi = gsi_for_stmt (phi);
remove_phi_node (&psi, true);
--- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj 2021-05-06 10:15:23.712751382 +0200
+++ gcc/testsuite/g++.dg/opt/pr94589-2.C 2021-05-17 14:57:21.154800488 +0200
@@ -1,8 +1,8 @@
// PR tree-optimization/94589
// { dg-do compile { target c++20 } }
// { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
-// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
-// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
+// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
+// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
#include <compare>
Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
2021-05-18 7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
@ 2021-05-18 7:55 ` Richard Biener
2021-05-19 8:15 ` Christophe Lyon
1 sibling, 0 replies; 8+ messages in thread
From: Richard Biener @ 2021-05-18 7:55 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Tue, 18 May 2021, Jakub Jelinek wrote:
> Hi!
>
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that. As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
Richard.
> 2021-05-18 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/94589
> * tree-ssa-phiopt.c (spaceship_replacement): Pattern match
> phi result used in (res & ~1) == 0 comparison as res >= 0 as
> res == 2 would be UB with -ffinite-math-only.
>
> * g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
>
> --- gcc/tree-ssa-phiopt.c.jj 2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c 2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
> edge e0, edge e1, gphi *phi,
> tree arg0, tree arg1)
> {
> - if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> - || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> + tree phires = PHI_RESULT (phi);
> + if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> + || TYPE_UNSIGNED (TREE_TYPE (phires))
> || !tree_fits_shwi_p (arg0)
> || !tree_fits_shwi_p (arg1)
> || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>
> use_operand_p use_p;
> gimple *use_stmt;
> - if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
> return false;
> - if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> + if (!single_imm_use (phires, &use_p, &use_stmt))
> return false;
> enum tree_code cmp;
> tree lhs, rhs;
> + gimple *orig_use_stmt = use_stmt;
> + tree orig_use_lhs = NULL_TREE;
> + int prec = TYPE_PRECISION (TREE_TYPE (phires));
> + if (is_gimple_assign (use_stmt)
> + && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> + && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> + && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> + == wi::shifted_mask (1, prec - 1, false, prec)))
> + {
> + /* For partial_ordering result operator>= with unspec as second
> + argument is (res & 1) == res, folded by match.pd into
> + (res & ~1) == 0. */
> + orig_use_lhs = gimple_assign_lhs (use_stmt);
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> + return false;
> + if (EDGE_COUNT (phi_bb->preds) != 4)
> + return false;
> + if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> + return false;
> + }
> if (gimple_code (use_stmt) == GIMPLE_COND)
> {
> cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
> default:
> return false;
> }
> - if (lhs != PHI_RESULT (phi)
> + if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
> || !tree_fits_shwi_p (rhs)
> || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
> return false;
> + if (orig_use_lhs)
> + {
> + if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> + return false;
> + /* As for -ffast-math we assume the 2 return to be
> + impossible, canonicalize (res & ~1) == 0 into
> + res >= 0 and (res & ~1) != 0 as res < 0. */
> + cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> + }
>
> if (!empty_block_p (middle_bb))
> return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
> ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
> return false;
>
> - /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1. */
> + /* lhs1 one_cmp rhs1 results in phires of 1. */
> enum tree_code one_cmp;
> if ((cmp1 == LT_EXPR)
> ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
> use_operand_p use_p;
> imm_use_iterator iter;
> bool has_debug_uses = false;
> - FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> + FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
> {
> gimple *use_stmt = USE_STMT (use_p);
> + if (orig_use_lhs && use_stmt == orig_use_stmt)
> + continue;
> gcc_assert (is_gimple_debug (use_stmt));
> has_debug_uses = true;
> break;
> }
> + if (orig_use_lhs)
> + {
> + if (!has_debug_uses)
> + FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> + {
> + gimple *use_stmt = USE_STMT (use_p);
> + gcc_assert (is_gimple_debug (use_stmt));
> + has_debug_uses = true;
> + }
> + gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> + tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> + gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> + update_stmt (orig_use_stmt);
> + }
>
> if (has_debug_uses)
> {
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
> Ignore the value of 2, because if NaNs aren't expected,
> all floating point numbers should be comparable. */
> gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> - tree type = TREE_TYPE (PHI_RESULT (phi));
> + tree type = TREE_TYPE (phires);
> tree temp1 = make_node (DEBUG_EXPR_DECL);
> DECL_ARTIFICIAL (temp1) = 1;
> TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
> t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
> g = gimple_build_debug_bind (temp2, t, phi);
> gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> - replace_uses_by (PHI_RESULT (phi), temp2);
> + replace_uses_by (phires, temp2);
> + if (orig_use_lhs)
> + replace_uses_by (orig_use_lhs, temp2);
> }
> }
>
> + if (orig_use_lhs)
> + {
> + gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> + gsi_remove (&gsi, true);
> + }
> +
> gimple_stmt_iterator psi = gsi_for_stmt (phi);
> remove_phi_node (&psi, true);
>
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj 2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C 2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
> // PR tree-optimization/94589
> // { dg-do compile { target c++20 } }
> // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>
> #include <compare>
>
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
2021-05-18 7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
2021-05-18 7:55 ` Richard Biener
@ 2021-05-19 8:15 ` Christophe Lyon
2021-05-19 9:09 ` Jakub Jelinek
1 sibling, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19 8:15 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Richard Biener, gcc Patches
On Tue, 18 May 2021 at 09:42, Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi!
>
> As mentioned earlier, spaceship_replacement didn't optimize partial_ordering
> >= 0 comparisons, because the possible values are -1, 0, 1, 2 and the
> >= comparison is implemented as (res & 1) == res to choose the 0 and 1
> cases from that. As we optimize that only with -ffinite-math-only, the
> 2 case is assumed not to happen and my earlier match.pd change optimizes
> (res & 1) == res into (res & ~1) == 0, so this patch pattern matches
> that case and handles it like res >= 0.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-05-18 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/94589
> * tree-ssa-phiopt.c (spaceship_replacement): Pattern match
> phi result used in (res & ~1) == 0 comparison as res >= 0 as
> res == 2 would be UB with -ffinite-math-only.
>
> * g++.dg/opt/pr94589-2.C: Adjust scan-tree-dump count from 14 to 12.
>
> --- gcc/tree-ssa-phiopt.c.jj 2021-05-06 14:05:03.254763364 +0200
> +++ gcc/tree-ssa-phiopt.c 2021-05-17 14:51:55.648356364 +0200
> @@ -1887,8 +1887,9 @@ spaceship_replacement (basic_block cond_
> edge e0, edge e1, gphi *phi,
> tree arg0, tree arg1)
> {
> - if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi)))
> - || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi)))
> + tree phires = PHI_RESULT (phi);
> + if (!INTEGRAL_TYPE_P (TREE_TYPE (phires))
> + || TYPE_UNSIGNED (TREE_TYPE (phires))
> || !tree_fits_shwi_p (arg0)
> || !tree_fits_shwi_p (arg1)
> || !IN_RANGE (tree_to_shwi (arg0), -1, 2)
> @@ -1902,12 +1903,32 @@ spaceship_replacement (basic_block cond_
>
> use_operand_p use_p;
> gimple *use_stmt;
> - if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (phires))
> return false;
> - if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt))
> + if (!single_imm_use (phires, &use_p, &use_stmt))
> return false;
> enum tree_code cmp;
> tree lhs, rhs;
> + gimple *orig_use_stmt = use_stmt;
> + tree orig_use_lhs = NULL_TREE;
> + int prec = TYPE_PRECISION (TREE_TYPE (phires));
> + if (is_gimple_assign (use_stmt)
> + && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
> + && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
> + && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
> + == wi::shifted_mask (1, prec - 1, false, prec)))
> + {
> + /* For partial_ordering result operator>= with unspec as second
> + argument is (res & 1) == res, folded by match.pd into
> + (res & ~1) == 0. */
> + orig_use_lhs = gimple_assign_lhs (use_stmt);
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
> + return false;
> + if (EDGE_COUNT (phi_bb->preds) != 4)
> + return false;
> + if (!single_imm_use (orig_use_lhs, &use_p, &use_stmt))
> + return false;
> + }
> if (gimple_code (use_stmt) == GIMPLE_COND)
> {
> cmp = gimple_cond_code (use_stmt);
> @@ -1948,10 +1969,19 @@ spaceship_replacement (basic_block cond_
> default:
> return false;
> }
> - if (lhs != PHI_RESULT (phi)
> + if (lhs != (orig_use_lhs ? orig_use_lhs : phires)
> || !tree_fits_shwi_p (rhs)
> || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
> return false;
> + if (orig_use_lhs)
> + {
> + if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
> + return false;
> + /* As for -ffast-math we assume the 2 return to be
> + impossible, canonicalize (res & ~1) == 0 into
> + res >= 0 and (res & ~1) != 0 as res < 0. */
> + cmp = cmp == EQ_EXPR ? GE_EXPR : LT_EXPR;
> + }
>
> if (!empty_block_p (middle_bb))
> return false;
> @@ -2092,7 +2122,7 @@ spaceship_replacement (basic_block cond_
> ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0)
> return false;
>
> - /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1. */
> + /* lhs1 one_cmp rhs1 results in phires of 1. */
> enum tree_code one_cmp;
> if ((cmp1 == LT_EXPR)
> ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0)))
> @@ -2185,13 +2215,29 @@ spaceship_replacement (basic_block cond_
> use_operand_p use_p;
> imm_use_iterator iter;
> bool has_debug_uses = false;
> - FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi))
> + FOR_EACH_IMM_USE_FAST (use_p, iter, phires)
> {
> gimple *use_stmt = USE_STMT (use_p);
> + if (orig_use_lhs && use_stmt == orig_use_stmt)
> + continue;
> gcc_assert (is_gimple_debug (use_stmt));
> has_debug_uses = true;
> break;
> }
> + if (orig_use_lhs)
> + {
> + if (!has_debug_uses)
> + FOR_EACH_IMM_USE_FAST (use_p, iter, orig_use_lhs)
> + {
> + gimple *use_stmt = USE_STMT (use_p);
> + gcc_assert (is_gimple_debug (use_stmt));
> + has_debug_uses = true;
> + }
> + gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> + tree zero = build_zero_cst (TREE_TYPE (orig_use_lhs));
> + gimple_assign_set_rhs_with_ops (&gsi, INTEGER_CST, zero);
> + update_stmt (orig_use_stmt);
> + }
>
> if (has_debug_uses)
> {
> @@ -2203,7 +2249,7 @@ spaceship_replacement (basic_block cond_
> Ignore the value of 2, because if NaNs aren't expected,
> all floating point numbers should be comparable. */
> gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi));
> - tree type = TREE_TYPE (PHI_RESULT (phi));
> + tree type = TREE_TYPE (phires);
> tree temp1 = make_node (DEBUG_EXPR_DECL);
> DECL_ARTIFICIAL (temp1) = 1;
> TREE_TYPE (temp1) = type;
> @@ -2221,10 +2267,18 @@ spaceship_replacement (basic_block cond_
> t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1);
> g = gimple_build_debug_bind (temp2, t, phi);
> gsi_insert_before (&gsi, g, GSI_SAME_STMT);
> - replace_uses_by (PHI_RESULT (phi), temp2);
> + replace_uses_by (phires, temp2);
> + if (orig_use_lhs)
> + replace_uses_by (orig_use_lhs, temp2);
> }
> }
>
> + if (orig_use_lhs)
> + {
> + gimple_stmt_iterator gsi = gsi_for_stmt (orig_use_stmt);
> + gsi_remove (&gsi, true);
> + }
> +
> gimple_stmt_iterator psi = gsi_for_stmt (phi);
> remove_phi_node (&psi, true);
>
> --- gcc/testsuite/g++.dg/opt/pr94589-2.C.jj 2021-05-06 10:15:23.712751382 +0200
> +++ gcc/testsuite/g++.dg/opt/pr94589-2.C 2021-05-17 14:57:21.154800488 +0200
> @@ -1,8 +1,8 @@
> // PR tree-optimization/94589
> // { dg-do compile { target c++20 } }
> // { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" }
> -// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } }
> -// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } }
> +// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } }
> +// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 12 "optimized" } }
>
Hi Jakub,
After this update, the test fails on arm and aarch64: according to the
logs, the optimization is still performed 14 times.
Can you check?
Thanks
Christophe
> #include <compare>
>
>
> Jakub
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589]
2021-05-19 8:15 ` Christophe Lyon
@ 2021-05-19 9:09 ` Jakub Jelinek
2021-05-19 9:51 ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-19 9:09 UTC (permalink / raw)
To: Christophe Lyon; +Cc: gcc Patches, Richard Biener
On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> After this update, the test fails on arm and aarch64: according to the
> logs, the optimization is still performed 14 times.
Seems this is because
if (change
&& !flag_syntax_only
&& (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
== ZERO_EXTEND))
{
tree uns = unsigned_type_for (TREE_TYPE (and0));
and0 = fold_convert_loc (loc, uns, and0);
and1 = fold_convert_loc (loc, uns, and1);
}
in fold-const.c adds on these targets extra casts that prevent the
optimizations, instead of
_1 = __v._M_value;
_2 = (int) _1;
_3 = _2 & 1;
_4 = __v._M_value;
_5 = (int) _4;
D.8503 = _3 == _5;
on x86_64 it is:
_1 = __v._M_value;
_2 = (unsigned char) _1;
_3 = (int) _2;
_4 = _3 & 1;
_5 = __v._M_value;
_6 = (int) _5;
D.10471 = _4 == _6;
Before fre1 it is
__v$_M_value_9 = __v._M_value;
_1 = __v$_M_value_9;
_2 = (int) _1;
_8 = _1 & 1;
_3 = (int) _8;
_4 = __v$_M_value_9;
_5 = (int) _4;
_7 = _4 == _8;
which fre1 using match.pd optimizes into
__v$_M_value_9 = __v._M_value;
_10 = __v$_M_value_9 & -2;
_7 = _10 == 0;
but on aarch64 before fre1 we have:
__v$_M_value_10 = __v._M_value;
_1 = __v$_M_value_10;
_2 = (unsigned char) _1;
_3 = (int) _2;
_9 = _2 & 1;
_4 = (int) _9;
_5 = __v$_M_value_10;
_6 = (int) _5;
_8 = _4 == _6;
which is not optimized.
Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
2021-05-19 9:09 ` Jakub Jelinek
@ 2021-05-19 9:51 ` Jakub Jelinek
2021-05-19 11:13 ` Richard Biener
0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2021-05-19 9:51 UTC (permalink / raw)
To: Christophe Lyon, Richard Biener, gcc Patches
On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > After this update, the test fails on arm and aarch64: according to the
> > logs, the optimization is still performed 14 times.
>
> Seems this is because
> if (change
> && !flag_syntax_only
> && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> == ZERO_EXTEND))
> {
> tree uns = unsigned_type_for (TREE_TYPE (and0));
> and0 = fold_convert_loc (loc, uns, and0);
> and1 = fold_convert_loc (loc, uns, and1);
> }
> in fold-const.c adds on these targets extra casts that prevent the
> optimizations.
This patch seems to fix it (but I don't have an easy way to test on aarch64
or arm on the trunk and 11 branch would need numerous backports).
2021-05-19 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/94589
* match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
of integral conversions.
--- gcc/match.pd.jj 2021-05-15 10:10:28.000000000 +0200
+++ gcc/match.pd 2021-05-19 11:34:42.130624557 +0200
@@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
(simplify
(cmp:c (bit_and:c @0 @1) @0)
(cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
+ (simplify
+ (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@3))
+ && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
+ && !wi::neg_p (wi::to_wide (@1)))
+ (cmp (bit_and @0 (convert (bit_not @1)))
+ { build_zero_cst (TREE_TYPE (@0)); })))
/* (X | Y) == Y becomes (X & ~Y) == 0. */
(simplify
Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
2021-05-19 9:51 ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
@ 2021-05-19 11:13 ` Richard Biener
2021-05-19 11:29 ` Christophe Lyon
0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2021-05-19 11:13 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Christophe Lyon, gcc Patches
On Wed, 19 May 2021, Jakub Jelinek wrote:
> On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > After this update, the test fails on arm and aarch64: according to the
> > > logs, the optimization is still performed 14 times.
> >
> > Seems this is because
> > if (change
> > && !flag_syntax_only
> > && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> > == ZERO_EXTEND))
> > {
> > tree uns = unsigned_type_for (TREE_TYPE (and0));
> > and0 = fold_convert_loc (loc, uns, and0);
> > and1 = fold_convert_loc (loc, uns, and1);
> > }
> > in fold-const.c adds on these targets extra casts that prevent the
> > optimizations.
>
> This patch seems to fix it (but I don't have an easy way to test on aarch64
> or arm on the trunk and 11 branch would need numerous backports).
OK if somebody manages to test on arm/aarch64.
Richard.
> 2021-05-19 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/94589
> * match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> of integral conversions.
>
> --- gcc/match.pd.jj 2021-05-15 10:10:28.000000000 +0200
> +++ gcc/match.pd 2021-05-19 11:34:42.130624557 +0200
> @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
> (simplify
> (cmp:c (bit_and:c @0 @1) @0)
> (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> + (simplify
> + (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> + && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> + && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> + && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> + && !wi::neg_p (wi::to_wide (@1)))
> + (cmp (bit_and @0 (convert (bit_not @1)))
> + { build_zero_cst (TREE_TYPE (@0)); })))
>
> /* (X | Y) == Y becomes (X & ~Y) == 0. */
> (simplify
>
>
> Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
2021-05-19 11:13 ` Richard Biener
@ 2021-05-19 11:29 ` Christophe Lyon
2021-05-19 12:19 ` Christophe Lyon
0 siblings, 1 reply; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19 11:29 UTC (permalink / raw)
To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches
On Wed, 19 May 2021 at 13:13, Richard Biener <rguenther@suse.de> wrote:
>
> On Wed, 19 May 2021, Jakub Jelinek wrote:
>
> > On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > > After this update, the test fails on arm and aarch64: according to the
> > > > logs, the optimization is still performed 14 times.
> > >
> > > Seems this is because
> > > if (change
> > > && !flag_syntax_only
> > > && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> > > == ZERO_EXTEND))
> > > {
> > > tree uns = unsigned_type_for (TREE_TYPE (and0));
> > > and0 = fold_convert_loc (loc, uns, and0);
> > > and1 = fold_convert_loc (loc, uns, and1);
> > > }
> > > in fold-const.c adds on these targets extra casts that prevent the
> > > optimizations.
> >
> > This patch seems to fix it (but I don't have an easy way to test on aarch64
> > or arm on the trunk and 11 branch would need numerous backports).
>
> OK if somebody manages to test on arm/aarch64.
>
I confirm it fixes the problem on arm. (aarch64 in progress)
Thanks!
> Richard.
>
> > 2021-05-19 Jakub Jelinek <jakub@redhat.com>
> >
> > PR tree-optimization/94589
> > * match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> > of integral conversions.
> >
> > --- gcc/match.pd.jj 2021-05-15 10:10:28.000000000 +0200
> > +++ gcc/match.pd 2021-05-19 11:34:42.130624557 +0200
> > @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
> > (simplify
> > (cmp:c (bit_and:c @0 @1) @0)
> > (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> > + (simplify
> > + (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > + && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> > + && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> > + && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> > + && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> > + && !wi::neg_p (wi::to_wide (@1)))
> > + (cmp (bit_and @0 (convert (bit_not @1)))
> > + { build_zero_cst (TREE_TYPE (@0)); })))
> >
> > /* (X | Y) == Y becomes (X & ~Y) == 0. */
> > (simplify
> >
> >
> > Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589]
2021-05-19 11:29 ` Christophe Lyon
@ 2021-05-19 12:19 ` Christophe Lyon
0 siblings, 0 replies; 8+ messages in thread
From: Christophe Lyon @ 2021-05-19 12:19 UTC (permalink / raw)
To: Richard Biener; +Cc: Jakub Jelinek, gcc Patches
On Wed, 19 May 2021 at 13:29, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
>
> On Wed, 19 May 2021 at 13:13, Richard Biener <rguenther@suse.de> wrote:
> >
> > On Wed, 19 May 2021, Jakub Jelinek wrote:
> >
> > > On Wed, May 19, 2021 at 11:09:19AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > > > On Wed, May 19, 2021 at 10:15:53AM +0200, Christophe Lyon via Gcc-patches wrote:
> > > > > After this update, the test fails on arm and aarch64: according to the
> > > > > logs, the optimization is still performed 14 times.
> > > >
> > > > Seems this is because
> > > > if (change
> > > > && !flag_syntax_only
> > > > && (load_extend_op (TYPE_MODE (TREE_TYPE (and0)))
> > > > == ZERO_EXTEND))
> > > > {
> > > > tree uns = unsigned_type_for (TREE_TYPE (and0));
> > > > and0 = fold_convert_loc (loc, uns, and0);
> > > > and1 = fold_convert_loc (loc, uns, and1);
> > > > }
> > > > in fold-const.c adds on these targets extra casts that prevent the
> > > > optimizations.
> > >
> > > This patch seems to fix it (but I don't have an easy way to test on aarch64
> > > or arm on the trunk and 11 branch would need numerous backports).
> >
> > OK if somebody manages to test on arm/aarch64.
> >
> I confirm it fixes the problem on arm. (aarch64 in progress)
>
And aarch64 is OK too.
> Thanks!
>
> > Richard.
> >
> > > 2021-05-19 Jakub Jelinek <jakub@redhat.com>
> > >
> > > PR tree-optimization/94589
> > > * match.pd ((X & Y) == X -> (X & ~Y) == 0): Simplify even in presence
> > > of integral conversions.
> > >
> > > --- gcc/match.pd.jj 2021-05-15 10:10:28.000000000 +0200
> > > +++ gcc/match.pd 2021-05-19 11:34:42.130624557 +0200
> > > @@ -4769,6 +4769,16 @@ (define_operator_list COND_TERNARY
> > > (simplify
> > > (cmp:c (bit_and:c @0 @1) @0)
> > > (cmp (bit_and @0 (bit_not! @1)) { build_zero_cst (TREE_TYPE (@0)); }))
> > > + (simplify
> > > + (cmp:c (convert@3 (bit_and (convert@2 @0) INTEGER_CST@1)) (convert @0))
> > > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > > + && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> > > + && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> > > + && TYPE_PRECISION (TREE_TYPE (@2)) == TYPE_PRECISION (TREE_TYPE (@0))
> > > + && TYPE_PRECISION (TREE_TYPE (@3)) > TYPE_PRECISION (TREE_TYPE (@2))
> > > + && !wi::neg_p (wi::to_wide (@1)))
> > > + (cmp (bit_and @0 (convert (bit_not @1)))
> > > + { build_zero_cst (TREE_TYPE (@0)); })))
> > >
> > > /* (X | Y) == Y becomes (X & ~Y) == 0. */
> > > (simplify
> > >
> > >
> > > Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2021-05-19 12:20 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-18 7:42 [PATCH] phiopt: Optimize partial_ordering spaceship >= 0 -ffinite-math-only [PR94589] Jakub Jelinek
2021-05-18 7:55 ` Richard Biener
2021-05-19 8:15 ` Christophe Lyon
2021-05-19 9:09 ` Jakub Jelinek
2021-05-19 9:51 ` [PATCH] phiopt: Simplify (X & Y) == X -> (X & ~Y) == 0 even in presence of integral conversions [PR94589] Jakub Jelinek
2021-05-19 11:13 ` Richard Biener
2021-05-19 11:29 ` Christophe Lyon
2021-05-19 12:19 ` Christophe Lyon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).