public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Robin Dapp <rdapp.gcc@gmail.com>
To: Richard Biener <rguenther@suse.de>
Cc: rdapp.gcc@gmail.com, gcc-patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH] vect/ifcvt: Add vec_cond fallback and check for vector versioning.
Date: Tue, 7 Nov 2023 15:18:38 +0100	[thread overview]
Message-ID: <2ab39472-10b9-400d-8661-6015751b59f1@gmail.com> (raw)
In-Reply-To: <nycvar.YFH.7.77.849.2311070814500.8772@jbgna.fhfr.qr>

> isn't is_cond_op implied by mask != NULL?  That said, if we ever end
> up here with a non-cond op but a loop mask we effectively want the
> same behvior so I think eliding is_cond_op and instead checking
> mask != NULL_TREE below is more future proof.
> 
> OK with that change.

Thanks, attached v2 with the is_cond_op removed and the comment
adjusted.  Also made the test specific to x86 and aarch64.
Going to commit it later.

Regards
 Robin

gcc/ChangeLog:

	PR tree-optimization/112361
	PR target/112359
	PR middle-end/112406

	* tree-if-conv.cc (convert_scalar_cond_reduction): Remember if
	loop was versioned and only then create COND_OPs.
	(predicate_scalar_phi): Do not create COND_OP when not
	vectorizing.
	* tree-vect-loop.cc (vect_expand_fold_left): Re-create
	VEC_COND_EXPR.
	(vectorize_fold_left_reduction): Pass mask to
	vect_expand_fold_left.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr112359.c: New test.
---
 gcc/testsuite/gcc.dg/pr112359.c | 15 ++++++++++++
 gcc/tree-if-conv.cc             | 41 ++++++++++++++++++++++-----------
 gcc/tree-vect-loop.cc           | 22 ++++++++++++++++--
 3 files changed, 63 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr112359.c

diff --git a/gcc/testsuite/gcc.dg/pr112359.c b/gcc/testsuite/gcc.dg/pr112359.c
new file mode 100644
index 00000000000..38d588f3958
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112359.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target aarch64*-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-O -std=gnu99 -mavx512fp16 -ftree-loop-if-convert" { target i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-O -std=gnu99 -march=armv8.4-a+sve -ftree-loop-if-convert" { target aarch64*-*-* } } */
+
+int i, c;
+unsigned long long u;
+
+void
+foo (void)
+{
+  for (; i; i++)
+    if (c)
+      u |= i;
+}
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index a7a751b668c..0190cf2369e 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1845,12 +1845,15 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
     res_2 = res_13 + _ifc__1;
   Argument SWAP tells that arguments of conditional expression should be
   swapped.
+  If LOOP_VERSIONED is true if we assume that we versioned the loop for
+  vectorization.  In that case we can create a COND_OP.
   Returns rhs of resulting PHI assignment.  */
 
 static tree
 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
 			       tree cond, tree op0, tree op1, bool swap,
-			       bool has_nop, gimple* nop_reduc)
+			       bool has_nop, gimple* nop_reduc,
+			       bool loop_versioned)
 {
   gimple_stmt_iterator stmt_it;
   gimple *new_assign;
@@ -1874,7 +1877,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
      The COND_OP will have a neutral_op else value.  */
   internal_fn ifn;
   ifn = get_conditional_internal_fn (reduction_op);
-  if (ifn != IFN_LAST
+  if (loop_versioned && ifn != IFN_LAST
       && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
       && !swap)
     {
@@ -2129,11 +2132,13 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data.  */)
    The generated code is inserted at GSI that points to the top of
    basic block's statement list.
    If PHI node has more than two arguments a chain of conditional
-   expression is produced.  */
+   expression is produced.
+   LOOP_VERSIONED should be true if we know that the loop was versioned for
+   vectorization. */
 
 
 static void
-predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
+predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
 {
   gimple *new_stmt = NULL, *reduc, *nop_reduc;
   tree rhs, res, arg0, arg1, op0, op1, scev;
@@ -2213,7 +2218,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 	  /* Convert reduction stmt into vectorizable form.  */
 	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
 					       true_bb != gimple_bb (reduc),
-					       has_nop, nop_reduc);
+					       has_nop, nop_reduc,
+					       loop_versioned);
 	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
 	}
       else
@@ -2311,7 +2317,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 	{
 	  /* Convert reduction stmt into vectorizable form.  */
 	  rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
-					       swap, has_nop, nop_reduc);
+					       swap, has_nop, nop_reduc,
+					       loop_versioned);
 	  redundant_ssa_names.safe_push (std::make_pair (res, rhs));
 	}
       new_stmt = gimple_build_assign (res, rhs);
@@ -2334,10 +2341,12 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
 }
 
 /* Replaces in LOOP all the scalar phi nodes other than those in the
-   LOOP->header block with conditional modify expressions.  */
+   LOOP->header block with conditional modify expressions.
+   LOOP_VERSIONED should be true if we know that the loop was versioned for
+   vectorization. */
 
 static void
-predicate_all_scalar_phis (class loop *loop)
+predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
 {
   basic_block bb;
   unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2365,7 +2374,7 @@ predicate_all_scalar_phis (class loop *loop)
 	    gsi_next (&phi_gsi);
 	  else
 	    {
-	      predicate_scalar_phi (phi, &gsi);
+	      predicate_scalar_phi (phi, &gsi, loop_versioned);
 	      remove_phi_node (&phi_gsi, false);
 	    }
 	}
@@ -2887,10 +2896,12 @@ remove_conditions_and_labels (loop_p loop)
 }
 
 /* Combine all the basic blocks from LOOP into one or two super basic
-   blocks.  Replace PHI nodes with conditional modify expressions.  */
+   blocks.  Replace PHI nodes with conditional modify expressions.
+   LOOP_VERSIONED should be true if we know that the loop was versioned for
+   vectorization. */
 
 static void
-combine_blocks (class loop *loop)
+combine_blocks (class loop *loop, bool loop_versioned)
 {
   basic_block bb, exit_bb, merge_target_bb;
   unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -2900,7 +2911,7 @@ combine_blocks (class loop *loop)
 
   remove_conditions_and_labels (loop);
   insert_gimplified_predicates (loop);
-  predicate_all_scalar_phis (loop);
+  predicate_all_scalar_phis (loop, loop_versioned);
 
   if (need_to_predicate || need_to_rewrite_undefined)
     predicate_statements (loop);
@@ -3727,6 +3738,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   bitmap exit_bbs;
   edge pe;
   auto_vec<data_reference_p, 10> refs;
+  bool loop_versioned;
 
  again:
   rloop = NULL;
@@ -3736,6 +3748,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
   need_to_predicate = false;
   need_to_rewrite_undefined = false;
   any_complicated_phi = false;
+  loop_versioned = false;
 
   /* Apply more aggressive if-conversion when loop or its outer loop were
      marked with simd pragma.  When that's the case, we try to if-convert
@@ -3843,6 +3856,8 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
 	   will re-use that for things like runtime alias versioning
 	   whose condition can end up using those invariants.  */
 	pe = single_pred_edge (gimple_bb (preds->last ()));
+
+      loop_versioned = true;
     }
 
   if (need_to_lower_bitfields)
@@ -3875,7 +3890,7 @@ tree_if_conversion (class loop *loop, vec<gimple *> *preds)
       /* Now all statements are if-convertible.  Combine all the basic
 	 blocks into one huge basic block doing the if-conversion
 	 on-the-fly.  */
-      combine_blocks (loop);
+      combine_blocks (loop, loop_versioned);
     }
 
   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5213aa0169c..a544bc9b059 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6908,11 +6908,13 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
 /* Successively apply CODE to each element of VECTOR_RHS, in left-to-right
    order, starting with LHS.  Insert the extraction statements before GSI and
    associate the new scalar SSA names with variable SCALAR_DEST.
+   If MASK is nonzero mask the input and then operate on it unconditionally.
    Return the SSA name for the result.  */
 
 static tree
 vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
-		       tree_code code, tree lhs, tree vector_rhs)
+		       tree_code code, tree lhs, tree vector_rhs,
+		       tree mask)
 {
   tree vectype = TREE_TYPE (vector_rhs);
   tree scalar_type = TREE_TYPE (vectype);
@@ -6920,6 +6922,21 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
   unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
   unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
 
+  /* Re-create a VEC_COND_EXPR to mask the input here in order to be able
+     to perform an unconditional element-wise reduction of it.  */
+  if (mask)
+    {
+      tree masked_vector_rhs = make_temp_ssa_name (vectype, NULL,
+						   "masked_vector_rhs");
+      tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
+						  false);
+      tree vector_identity = build_vector_from_val (vectype, neutral_op);
+      gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
+					     mask, vector_rhs, vector_identity);
+      gsi_insert_before (gsi, select, GSI_SAME_STMT);
+      vector_rhs = masked_vector_rhs;
+    }
+
   for (unsigned HOST_WIDE_INT bit_offset = 0;
        bit_offset < vec_size_in_bits;
        bit_offset += element_bitsize)
@@ -7156,7 +7173,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       else
 	{
 	  reduc_var = vect_expand_fold_left (gsi, scalar_dest_var,
-					     tree_code (code), reduc_var, def0);
+					     tree_code (code), reduc_var, def0,
+					     mask);
 	  new_stmt = SSA_NAME_DEF_STMT (reduc_var);
 	  /* Remove the statement, so that we can use the same code paths
 	     as for statements that we've just created.  */
-- 
2.41.0


      reply	other threads:[~2023-11-07 14:18 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-07  7:33 Robin Dapp
2023-11-07  8:19 ` Richard Biener
2023-11-07 14:18   ` Robin Dapp [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2ab39472-10b9-400d-8661-6015751b59f1@gmail.com \
    --to=rdapp.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).