* [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning.
@ 2023-11-07 7:33 Robin Dapp
2023-11-07 8:19 ` Richard Biener
0 siblings, 1 reply; 3+ messages in thread
From: Robin Dapp @ 2023-11-07 7:33 UTC (permalink / raw)
To: gcc-patches, Richard Biener; +Cc: rdapp.gcc
Hi,
this restricts tree-ifcvt to only create COND_OPs when we versioned the
loop for vectorization. Apart from that it re-creates a VEC_COND_EXPR
in vect_expand_fold_left if we emitted a COND_OP.
I'm still missing the "bail out" part for vect_expand_fold_left, though?
Bootstrap, testsuites are unchanged but so they were for the original
patch, so... :)
Regards
Robin
gcc/ChangeLog:
PR tree-optimization/112361
PR target/112359
PR middle-end/112406
* tree-if-conv.cc (convert_scalar_cond_reduction): Remember if
loop was versioned and only then create COND_OPs.
(predicate_scalar_phi): Do not create COND_OP when not
vectorizing.
* tree-vect-loop.cc (vect_expand_fold_left): Re-create
VEC_COND_EXPR.
(vectorize_fold_left_reduction): Pass mask and whether a cond_op
was used to vect_expand_fold_left.
gcc/testsuite/ChangeLog:
* gcc.dg/pr112359.c: New test.
---
gcc/testsuite/gcc.dg/pr112359.c | 14 +++++++++++
gcc/tree-if-conv.cc | 41 ++++++++++++++++++++++-----------
gcc/tree-vect-loop.cc | 22 ++++++++++++++++--
3 files changed, 62 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr112359.c
diff --git a/gcc/testsuite/gcc.dg/pr112359.c b/gcc/testsuite/gcc.dg/pr112359.c
new file mode 100644
index 00000000000..26c77457399
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112359.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mavx512fp16 -ftree-loop-if-convert" } */
+
+int i, c;
+unsigned long long u;
+
+void
+foo (void)
+{
+ for (; i; i++)
+ if (c)
+ u |= i;
+}
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a7a751b668c..0190cf2369e 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1845,12 +1845,15 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
res_2 = res_13 + _ifc__1;
Argument SWAP tells that arguments of conditional expression should be
swapped.
+ If LOOP_VERSIONED is true if we assume that we versioned the loop for
+ vectorization. In that case we can create a COND_OP.
Returns rhs of resulting PHI assignment. */
static tree
convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
tree cond, tree op0, tree op1, bool swap,
- bool has_nop, gimple* nop_reduc)
+ bool has_nop, gimple* nop_reduc,
+ bool loop_versioned)
{
gimple_stmt_iterator stmt_it;
gimple *new_assign;
@@ -1874,7 +1877,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
The COND_OP will have a neutral_op else value. */
internal_fn ifn;
ifn = get_conditional_internal_fn (reduction_op);
- if (ifn != IFN_LAST
+ if (loop_versioned && ifn != IFN_LAST
&& vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
&& !swap)
{
@@ -2129,11 +2132,13 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
The generated code is inserted at GSI that points to the top of
basic block's statement list.
If PHI node has more than two arguments a chain of conditional
- expression is produced. */
+ expression is produced.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
+predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
{
gimple *new_stmt = NULL, *reduc, *nop_reduc;
tree rhs, res, arg0, arg1, op0, op1, scev;
@@ -2213,7 +2218,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
true_bb != gimple_bb (reduc),
- has_nop, nop_reduc);
+ has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
else
@@ -2311,7 +2317,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
{
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
- swap, has_nop, nop_reduc);
+ swap, has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
new_stmt = gimple_build_assign (res, rhs);
@@ -2334,10 +2341,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
}
/* Replaces in LOOP all the scalar phi nodes other than those in the
- LOOP->header block with conditional modify expressions. */
+ LOOP->header block with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_all_scalar_phis (class loop *loop)
+predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
{
basic_block bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2365,7 +2374,7 @@ predicate_all_scalar_phis (class loop *loop)
gsi_next (&phi_gsi);
else
{
- predicate_scalar_phi (phi, &gsi);
+ predicate_scalar_phi (phi, &gsi, loop_versioned);
remove_phi_node (&phi_gsi, false);
}
}
@@ -2887,10 +2896,12 @@ remove_conditions_and_labels (loop_p loop)
}
/* Combine all the basic blocks from LOOP into one or two super basic
- blocks. Replace PHI nodes with conditional modify expressions. */
+ blocks. Replace PHI nodes with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-combine_blocks (class loop *loop)
+combine_blocks (class loop *loop, bool loop_versioned)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2900,7 +2911,7 @@ combine_blocks (class loop *loop)
remove_conditions_and_labels (loop);
insert_gimplified_predicates (loop);
- predicate_all_scalar_phis (loop);
+ predicate_all_scalar_phis (loop, loop_versioned);
if (need_to_predicate || need_to_rewrite_undefined)
predicate_statements (loop);
@@ -3727,6 +3738,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
bitmap exit_bbs;
edge pe;
auto_vec<data_reference_p, 10> refs;
+ bool loop_versioned;
again:
rloop = NULL;
@@ -3736,6 +3748,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
need_to_predicate = false;
need_to_rewrite_undefined = false;
any_complicated_phi = false;
+ loop_versioned = false;
/* Apply more aggressive if-conversion when loop or its outer loop were
marked with simd pragma. When that's the case, we try to if-convert
@@ -3843,6 +3856,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
will re-use that for things like runtime alias versioning
whose condition can end up using those invariants. */
pe = single_pred_edge (gimple_bb (preds->last ()));
+
+ loop_versioned = true;
}
if (need_to_lower_bitfields)
@@ -3875,7 +3890,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop);
+ combine_blocks (loop, loop_versioned);
}
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 362856a6507..d48685ae308 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6907,7 +6907,8 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
static tree
vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
- tree_code code, tree lhs, tree vector_rhs)
+ tree_code code, tree lhs, tree vector_rhs,
+ bool is_cond_op, tree mask)
{
tree vectype = TREE_TYPE (vector_rhs);
tree scalar_type = TREE_TYPE (vectype);
@@ -6915,6 +6916,22 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
+ /* If we created a COND_OP in ifcvt re-create a VEC_COND_EXPR to mask the
+ input here in order to be able to perform an unconditional element-wise
+ reduction of it. */
+ if (is_cond_op)
+ {
+ tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL,
+ "masked_vector_rhs");
+ tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
+ false);
+ tree vector_identity = build_vector_from_val (vectype, neutral_op);
+ gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
+ mask, vector_rhs, vector_identity);
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
+ vector_rhs = masked_vector_rhs;
+ }
+
for (unsigned HOST_WIDE_INT bit_offset = 0;
bit_offset < vec_size_in_bits;
bit_offset += element_bitsize)
@@ -7151,7 +7168,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
else
{
reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
- tree_code (code), reduc_var, def0);
+ tree_code (code), reduc_var, def0,
+ is_cond_op, mask);
new_stmt = SSA_NAME_DEF_STMT (reduc_var);
/* Remove the statement, so that we can use the same code paths
as for statements that we've just created. */
--
2.41.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning.
2023-11-07 7:33 [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning Robin Dapp
@ 2023-11-07 8:19 ` Richard Biener
2023-11-07 14:18 ` Robin Dapp
0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2023-11-07 8:19 UTC (permalink / raw)
To: Robin Dapp; +Cc: gcc-patches
On Tue, 7 Nov 2023, Robin Dapp wrote:
> Hi,
>
> this restricts tree-ifcvt to only create COND_OPs when we versioned the
> loop for vectorization. Apart from that it re-creates a VEC_COND_EXPR
> in vect_expand_fold_left if we emitted a COND_OP.
>
> I'm still missing the "bail out" part for vect_expand_fold_left, though?
>
> Bootstrap, testsuites are unchanged but so they were for the original
> patch, so... :)
Looks good, maybe ...
> Regards
> Robin
>
> gcc/ChangeLog:
>
> PR tree-optimization/112361
> PR target/112359
> PR middle-end/112406
>
> * tree-if-conv.cc (convert_scalar_cond_reduction): Remember if
> loop was versioned and only then create COND_OPs.
> (predicate_scalar_phi): Do not create COND_OP when not
> vectorizing.
> * tree-vect-loop.cc (vect_expand_fold_left): Re-create
> VEC_COND_EXPR.
> (vectorize_fold_left_reduction): Pass mask and whether a cond_op
> was used to vect_expand_fold_left.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr112359.c: New test.
> ---
> gcc/testsuite/gcc.dg/pr112359.c | 14 +++++++++++
> gcc/tree-if-conv.cc | 41 ++++++++++++++++++++++-----------
> gcc/tree-vect-loop.cc | 22 ++++++++++++++++--
> 3 files changed, 62 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/pr112359.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr112359.c b/gcc/testsuite/gcc.dg/pr112359.c
> new file mode 100644
> index 00000000000..26c77457399
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr112359.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -mavx512fp16 -ftree-loop-if-convert" } */
> +
> +int i, c;
> +unsigned long long u;
> +
> +void
> +foo (void)
> +{
> + for (; i; i++)
> + if (c)
> + u |= i;
> +}
> +
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index a7a751b668c..0190cf2369e 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -1845,12 +1845,15 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
> res_2 = res_13 + _ifc__1;
> Argument SWAP tells that arguments of conditional expression should be
> swapped.
> + If LOOP_VERSIONED is true if we assume that we versioned the loop for
> + vectorization. In that case we can create a COND_OP.
> Returns rhs of resulting PHI assignment. */
>
> static tree
> convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
> tree cond, tree op0, tree op1, bool swap,
> - bool has_nop, gimple* nop_reduc)
> + bool has_nop, gimple* nop_reduc,
> + bool loop_versioned)
> {
> gimple_stmt_iterator stmt_it;
> gimple *new_assign;
> @@ -1874,7 +1877,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
> The COND_OP will have a neutral_op else value. */
> internal_fn ifn;
> ifn = get_conditional_internal_fn (reduction_op);
> - if (ifn != IFN_LAST
> + if (loop_versioned && ifn != IFN_LAST
> && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
> && !swap)
> {
> @@ -2129,11 +2132,13 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
> The generated code is inserted at GSI that points to the top of
> basic block's statement list.
> If PHI node has more than two arguments a chain of conditional
> - expression is produced. */
> + expression is produced.
> + LOOP_VERSIONED should be true if we know that the loop was versioned for
> + vectorization. */
>
>
> static void
> -predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
> +predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
> {
> gimple *new_stmt = NULL, *reduc, *nop_reduc;
> tree rhs, res, arg0, arg1, op0, op1, scev;
> @@ -2213,7 +2218,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
> /* Convert reduction stmt into vectorizable form. */
> rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
> true_bb != gimple_bb (reduc),
> - has_nop, nop_reduc);
> + has_nop, nop_reduc,
> + loop_versioned);
> redundant_ssa_names.safe_push (std::make_pair (res, rhs));
> }
> else
> @@ -2311,7 +2317,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
> {
> /* Convert reduction stmt into vectorizable form. */
> rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
> - swap, has_nop, nop_reduc);
> + swap, has_nop, nop_reduc,
> + loop_versioned);
> redundant_ssa_names.safe_push (std::make_pair (res, rhs));
> }
> new_stmt = gimple_build_assign (res, rhs);
> @@ -2334,10 +2341,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
> }
>
> /* Replaces in LOOP all the scalar phi nodes other than those in the
> - LOOP->header block with conditional modify expressions. */
> + LOOP->header block with conditional modify expressions.
> + LOOP_VERSIONED should be true if we know that the loop was versioned for
> + vectorization. */
>
> static void
> -predicate_all_scalar_phis (class loop *loop)
> +predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
> {
> basic_block bb;
> unsigned int orig_loop_num_nodes = loop->num_nodes;
> @@ -2365,7 +2374,7 @@ predicate_all_scalar_phis (class loop *loop)
> gsi_next (&phi_gsi);
> else
> {
> - predicate_scalar_phi (phi, &gsi);
> + predicate_scalar_phi (phi, &gsi, loop_versioned);
> remove_phi_node (&phi_gsi, false);
> }
> }
> @@ -2887,10 +2896,12 @@ remove_conditions_and_labels (loop_p loop)
> }
>
> /* Combine all the basic blocks from LOOP into one or two super basic
> - blocks. Replace PHI nodes with conditional modify expressions. */
> + blocks. Replace PHI nodes with conditional modify expressions.
> + LOOP_VERSIONED should be true if we know that the loop was versioned for
> + vectorization. */
>
> static void
> -combine_blocks (class loop *loop)
> +combine_blocks (class loop *loop, bool loop_versioned)
> {
> basic_block bb, exit_bb, merge_target_bb;
> unsigned int orig_loop_num_nodes = loop->num_nodes;
> @@ -2900,7 +2911,7 @@ combine_blocks (class loop *loop)
>
> remove_conditions_and_labels (loop);
> insert_gimplified_predicates (loop);
> - predicate_all_scalar_phis (loop);
> + predicate_all_scalar_phis (loop, loop_versioned);
>
> if (need_to_predicate || need_to_rewrite_undefined)
> predicate_statements (loop);
> @@ -3727,6 +3738,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> bitmap exit_bbs;
> edge pe;
> auto_vec<data_reference_p, 10> refs;
> + bool loop_versioned;
>
> again:
> rloop = NULL;
> @@ -3736,6 +3748,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> need_to_predicate = false;
> need_to_rewrite_undefined = false;
> any_complicated_phi = false;
> + loop_versioned = false;
>
> /* Apply more aggressive if-conversion when loop or its outer loop were
> marked with simd pragma. When that's the case, we try to if-convert
> @@ -3843,6 +3856,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> will re-use that for things like runtime alias versioning
> whose condition can end up using those invariants. */
> pe = single_pred_edge (gimple_bb (preds->last ()));
> +
> + loop_versioned = true;
> }
>
> if (need_to_lower_bitfields)
> @@ -3875,7 +3890,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
> /* Now all statements are if-convertible. Combine all the basic
> blocks into one huge basic block doing the if-conversion
> on-the-fly. */
> - combine_blocks (loop);
> + combine_blocks (loop, loop_versioned);
> }
>
> /* Perform local CSE, this esp. helps the vectorizer analysis if loads
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 362856a6507..d48685ae308 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -6907,7 +6907,8 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
>
> static tree
> vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
> - tree_code code, tree lhs, tree vector_rhs)
> + tree_code code, tree lhs, tree vector_rhs,
> + bool is_cond_op, tree mask)
isn't is_cond_op implied by mask != NULL? That said, if we ever end
up here with a non-cond op but a loop mask we effectively want the
same behvior so I think eliding is_cond_op and instead checking
mask != NULL_TREE below is more future proof.
OK with that change.
I think we already have the "bail out" part for the case of applying
loop masking here:
else if (reduction_type == FOLD_LEFT_REDUCTION
&& internal_fn_mask_index (reduc_fn) == -1
&& FLOAT_TYPE_P (vectype_in)
&& HONOR_SIGN_DEPENDENT_ROUNDING (vectype_in))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"can't operate on partial vectors because"
" signed zeros cannot be preserved.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
we'd have to do a hard-fail for the case the reduction op is
conditional with a similar condition (the above is too late for a
hard fail I think).
Thanks,
Richard.
> {
> tree vectype = TREE_TYPE (vector_rhs);
> tree scalar_type = TREE_TYPE (vectype);
> @@ -6915,6 +6916,22 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
> unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
> unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
>
> + /* If we created a COND_OP in ifcvt re-create a VEC_COND_EXPR to mask the
> + input here in order to be able to perform an unconditional element-wise
> + reduction of it. */
> + if (is_cond_op)
> + {
> + tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL,
> + "masked_vector_rhs");
> + tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
> + false);
> + tree vector_identity = build_vector_from_val (vectype, neutral_op);
> + gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
> + mask, vector_rhs, vector_identity);
> + gsi_insert_before (gsi, select, GSI_SAME_STMT);
> + vector_rhs = masked_vector_rhs;
> + }
> +
> for (unsigned HOST_WIDE_INT bit_offset = 0;
> bit_offset < vec_size_in_bits;
> bit_offset += element_bitsize)
> @@ -7151,7 +7168,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
> else
> {
> reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
> - tree_code (code), reduc_var, def0);
> + tree_code (code), reduc_var, def0,
> + is_cond_op, mask);
> new_stmt = SSA_NAME_DEF_STMT (reduc_var);
> /* Remove the statement, so that we can use the same code paths
> as for statements that we've just created. */
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning.
2023-11-07 8:19 ` Richard Biener
@ 2023-11-07 14:18 ` Robin Dapp
0 siblings, 0 replies; 3+ messages in thread
From: Robin Dapp @ 2023-11-07 14:18 UTC (permalink / raw)
To: Richard Biener; +Cc: rdapp.gcc, gcc-patches
> isn't is_cond_op implied by mask != NULL? That said, if we ever end
> up here with a non-cond op but a loop mask we effectively want the
> same behvior so I think eliding is_cond_op and instead checking
> mask != NULL_TREE below is more future proof.
>
> OK with that change.
Thanks, attached v2 with the is_cond_op removed and the comment
adjusted. Also made the test specific to x86 and aarch64.
Going to commit it later.
Regards
Robin
gcc/ChangeLog:
PR tree-optimization/112361
PR target/112359
PR middle-end/112406
* tree-if-conv.cc (convert_scalar_cond_reduction): Remember if
loop was versioned and only then create COND_OPs.
(predicate_scalar_phi): Do not create COND_OP when not
vectorizing.
* tree-vect-loop.cc (vect_expand_fold_left): Re-create
VEC_COND_EXPR.
(vectorize_fold_left_reduction): Pass mask to
vect_expand_fold_left.
gcc/testsuite/ChangeLog:
* gcc.dg/pr112359.c: New test.
---
gcc/testsuite/gcc.dg/pr112359.c | 15 ++++++++++++
gcc/tree-if-conv.cc | 41 ++++++++++++++++++++++-----------
gcc/tree-vect-loop.cc | 22 ++++++++++++++++--
3 files changed, 63 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr112359.c
diff --git a/gcc/testsuite/gcc.dg/pr112359.c b/gcc/testsuite/gcc.dg/pr112359.c
new file mode 100644
index 00000000000..38d588f3958
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112359.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target aarch64*-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-O -std=gnu99 -mavx512fp16 -ftree-loop-if-convert" { target i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-O -std=gnu99 -march=armv8.4-a+sve -ftree-loop-if-convert" { target aarch64*-*-* } } */
+
+int i, c;
+unsigned long long u;
+
+void
+foo (void)
+{
+ for (; i; i++)
+ if (c)
+ u |= i;
+}
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a7a751b668c..0190cf2369e 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1845,12 +1845,15 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
res_2 = res_13 + _ifc__1;
Argument SWAP tells that arguments of conditional expression should be
swapped.
+ If LOOP_VERSIONED is true if we assume that we versioned the loop for
+ vectorization. In that case we can create a COND_OP.
Returns rhs of resulting PHI assignment. */
static tree
convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
tree cond, tree op0, tree op1, bool swap,
- bool has_nop, gimple* nop_reduc)
+ bool has_nop, gimple* nop_reduc,
+ bool loop_versioned)
{
gimple_stmt_iterator stmt_it;
gimple *new_assign;
@@ -1874,7 +1877,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
The COND_OP will have a neutral_op else value. */
internal_fn ifn;
ifn = get_conditional_internal_fn (reduction_op);
- if (ifn != IFN_LAST
+ if (loop_versioned && ifn != IFN_LAST
&& vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
&& !swap)
{
@@ -2129,11 +2132,13 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
The generated code is inserted at GSI that points to the top of
basic block's statement list.
If PHI node has more than two arguments a chain of conditional
- expression is produced. */
+ expression is produced.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
+predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
{
gimple *new_stmt = NULL, *reduc, *nop_reduc;
tree rhs, res, arg0, arg1, op0, op1, scev;
@@ -2213,7 +2218,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
true_bb != gimple_bb (reduc),
- has_nop, nop_reduc);
+ has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
else
@@ -2311,7 +2317,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
{
/* Convert reduction stmt into vectorizable form. */
rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
- swap, has_nop, nop_reduc);
+ swap, has_nop, nop_reduc,
+ loop_versioned);
redundant_ssa_names.safe_push (std::make_pair (res, rhs));
}
new_stmt = gimple_build_assign (res, rhs);
@@ -2334,10 +2341,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
}
/* Replaces in LOOP all the scalar phi nodes other than those in the
- LOOP->header block with conditional modify expressions. */
+ LOOP->header block with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-predicate_all_scalar_phis (class loop *loop)
+predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
{
basic_block bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2365,7 +2374,7 @@ predicate_all_scalar_phis (class loop *loop)
gsi_next (&phi_gsi);
else
{
- predicate_scalar_phi (phi, &gsi);
+ predicate_scalar_phi (phi, &gsi, loop_versioned);
remove_phi_node (&phi_gsi, false);
}
}
@@ -2887,10 +2896,12 @@ remove_conditions_and_labels (loop_p loop)
}
/* Combine all the basic blocks from LOOP into one or two super basic
- blocks. Replace PHI nodes with conditional modify expressions. */
+ blocks. Replace PHI nodes with conditional modify expressions.
+ LOOP_VERSIONED should be true if we know that the loop was versioned for
+ vectorization. */
static void
-combine_blocks (class loop *loop)
+combine_blocks (class loop *loop, bool loop_versioned)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2900,7 +2911,7 @@ combine_blocks (class loop *loop)
remove_conditions_and_labels (loop);
insert_gimplified_predicates (loop);
- predicate_all_scalar_phis (loop);
+ predicate_all_scalar_phis (loop, loop_versioned);
if (need_to_predicate || need_to_rewrite_undefined)
predicate_statements (loop);
@@ -3727,6 +3738,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
bitmap exit_bbs;
edge pe;
auto_vec<data_reference_p, 10> refs;
+ bool loop_versioned;
again:
rloop = NULL;
@@ -3736,6 +3748,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
need_to_predicate = false;
need_to_rewrite_undefined = false;
any_complicated_phi = false;
+ loop_versioned = false;
/* Apply more aggressive if-conversion when loop or its outer loop were
marked with simd pragma. When that's the case, we try to if-convert
@@ -3843,6 +3856,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
will re-use that for things like runtime alias versioning
whose condition can end up using those invariants. */
pe = single_pred_edge (gimple_bb (preds->last ()));
+
+ loop_versioned = true;
}
if (need_to_lower_bitfields)
@@ -3875,7 +3890,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop);
+ combine_blocks (loop, loop_versioned);
}
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5213aa0169c..a544bc9b059 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6908,11 +6908,13 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
/* Successively apply CODE to each element of VECTOR_RHS, in left-to-right
order, starting with LHS. Insert the extraction statements before GSI and
associate the new scalar SSA names with variable SCALAR_DEST.
+ If MASK is nonzero mask the input and then operate on it unconditionally.
Return the SSA name for the result. */
static tree
vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
- tree_code code, tree lhs, tree vector_rhs)
+ tree_code code, tree lhs, tree vector_rhs,
+ tree mask)
{
tree vectype = TREE_TYPE (vector_rhs);
tree scalar_type = TREE_TYPE (vectype);
@@ -6920,6 +6922,21 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
+ /* Re-create a VEC_COND_EXPR to mask the input here in order to be able
+ to perform an unconditional element-wise reduction of it. */
+ if (mask)
+ {
+ tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL,
+ "masked_vector_rhs");
+ tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
+ false);
+ tree vector_identity = build_vector_from_val (vectype, neutral_op);
+ gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
+ mask, vector_rhs, vector_identity);
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
+ vector_rhs = masked_vector_rhs;
+ }
+
for (unsigned HOST_WIDE_INT bit_offset = 0;
bit_offset < vec_size_in_bits;
bit_offset += element_bitsize)
@@ -7156,7 +7173,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
else
{
reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
- tree_code (code), reduc_var, def0);
+ tree_code (code), reduc_var, def0,
+ mask);
new_stmt = SSA_NAME_DEF_STMT (reduc_var);
/* Remove the statement, so that we can use the same code paths
as for statements that we've just created. */
--
2.41.0
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-11-07 14:18 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-07 7:33 [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning Robin Dapp
2023-11-07 8:19 ` Richard Biener
2023-11-07 14:18 ` Robin Dapp
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).