public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] VECT: Apply LEN_FOLD_EXTRACT_LAST into loop vectorizer
@ 2023-08-24  2:08 Juzhe-Zhong
  2023-08-24  6:39 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Juzhe-Zhong @ 2023-08-24  2:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, rguenther, Juzhe-Zhong

Hi.

This patch is apply LEN_FOLD_EXTRACT_LAST into loop vectorizer.

Consider this following case:
#include <stdint.h>

#define N 32

/* Simple condition reduction.  */

int __attribute__ ((noinline, noclone))
condition_reduction (int *a, int min_v)
{
  int last = 66; /* High start value.  */

  for (int i = 0; i < N; i++)
    if (a[i] < min_v)
      last = i;

  return last;
}

With this patch, we can generate this following IR:

  _44 = .SELECT_VL (ivtmp_42, POLY_INT_CST [4, 4]);
  _34 = vect_vec_iv_.5_33 + { POLY_INT_CST [4, 4], ... };
  ivtmp_36 = _44 * 4;
  vect__4.8_39 = .MASK_LEN_LOAD (vectp_a.6_37, 32B, { -1, ... }, _44, 0);

  mask__11.9_41 = vect__4.8_39 < vect_cst__40;
  last_5 = .LEN_FOLD_EXTRACT_LAST (last_14, mask__11.9_41, vect_vec_iv_.5_33, _44, 0);
  ...

gcc/ChangeLog:

        * tree-vect-loop.cc (vectorizable_reduction): Apply LEN_FOLD_EXTRACT_LAST.
        * tree-vect-stmts.cc (vectorizable_condition): Ditto.

---
 gcc/tree-vect-loop.cc  |  7 ++++--
 gcc/tree-vect-stmts.cc | 52 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 1cd6c291377..ebee8037e02 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7494,8 +7494,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	}
 
       if (reduc_chain_length == 1
-	  && direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
-					     vectype_in, OPTIMIZE_FOR_SPEED))
+	  && (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, vectype_in,
+					      OPTIMIZE_FOR_SPEED)
+	      || direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
+						 vectype_in,
+						 OPTIMIZE_FOR_SPEED)))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 413a88750d6..be9f3a280bd 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11740,8 +11740,17 @@ vectorizable_condition (vec_info *vinfo,
 	  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
 	{
 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
-	    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
-				   ncopies * vec_num, vectype, NULL);
+	    {
+	      if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
+						  vectype, OPTIMIZE_FOR_SPEED))
+		vect_record_loop_len (loop_vinfo,
+				      &LOOP_VINFO_LENS (loop_vinfo),
+				      ncopies * vec_num, vectype, 1);
+	      else
+		vect_record_loop_mask (loop_vinfo,
+				       &LOOP_VINFO_MASKS (loop_vinfo),
+				       ncopies * vec_num, vectype, NULL);
+	    }
 	  /* Extra inactive lanes should be safe for vect_nested_cycle.  */
 	  else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
 	    {
@@ -11772,7 +11781,13 @@ vectorizable_condition (vec_info *vinfo,
      mask to the condition, or to its inverse.  */
 
   vec_loop_masks *masks = NULL;
-  if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+  vec_loop_lens *lens = NULL;
+  if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+    {
+      if (reduction_type == EXTRACT_LAST_REDUCTION)
+	lens = &LOOP_VINFO_LENS (loop_vinfo);
+    }
+  else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
     {
       if (reduction_type == EXTRACT_LAST_REDUCTION)
 	masks = &LOOP_VINFO_MASKS (loop_vinfo);
@@ -11910,7 +11925,8 @@ vectorizable_condition (vec_info *vinfo,
       /* Force vec_compare to be an SSA_NAME rather than a comparison,
 	 in cases where that's necessary.  */
 
-      if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
+      tree len = NULL_TREE, bias = NULL_TREE;
+      if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
 	{
 	  if (!is_gimple_val (vec_compare))
 	    {
@@ -11931,6 +11947,23 @@ vectorizable_condition (vec_info *vinfo,
 	      vec_compare = vec_compare_name;
 	    }
 
+	  if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
+					      vectype, OPTIMIZE_FOR_SPEED))
+	    {
+	      if (lens)
+		{
+		  len = vect_get_loop_len (loop_vinfo, gsi, lens,
+					   vec_num * ncopies, vectype, i, 1);
+		  signed char biasval
+		    = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+		  bias = build_int_cst (intQI_type_node, biasval);
+		}
+	      else
+		{
+		  len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+		  bias = build_int_cst (intQI_type_node, 0);
+		}
+	    }
 	  if (masks)
 	    {
 	      tree loop_mask
@@ -11950,9 +11983,14 @@ vectorizable_condition (vec_info *vinfo,
 	{
 	  gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
 	  tree lhs = gimple_get_lhs (old_stmt);
-	  new_stmt = gimple_build_call_internal
-	      (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
-	       vec_then_clause);
+	  if (len)
+	    new_stmt = gimple_build_call_internal
+	        (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
+	         vec_then_clause, len, bias);
+	  else
+	    new_stmt = gimple_build_call_internal
+	        (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
+	         vec_then_clause);
 	  gimple_call_set_lhs (new_stmt, lhs);
 	  SSA_NAME_DEF_STMT (lhs) = new_stmt;
 	  if (old_stmt == gsi_stmt (*gsi))
-- 
2.36.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-08-24  9:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-24  2:08 [PATCH] VECT: Apply LEN_FOLD_EXTRACT_LAST into loop vectorizer Juzhe-Zhong
2023-08-24  6:39 ` Richard Biener
2023-08-24  9:03   ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).