[gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] VECT: Support floating-point in-order reduction for length loop control

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] VECT: Support floating-point in-order reduction for length loop control
@ 2023-07-25 18:34 Jeff Law
  0 siblings, 0 replies; only message in thread
From: Jeff Law @ 2023-07-25 18:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1c694aac243b3621f82409fce4545d4337e75e06

commit 1c694aac243b3621f82409fce4545d4337e75e06
Author: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Date:   Sun Jul 23 12:23:33 2023 +0800

    VECT: Support floating-point in-order reduction for length loop control
    
    Hi, Richard and Richi.
    
    This patch support floating-point in-order reduction for loop length control.
    
    Consider this following case:
    
    float foo (float *__restrict a, int n)
    {
      float result = 1.0;
      for (int i = 0; i < n; i++)
       result += a[i];
      return result;
    }
    
    When compile with **NO** -ffast-math on ARM SVE, we will end up with:
    
    loop_mask = WHILE_ULT
    result = MASK_FOLD_LEFT_PLUS (...loop_mask...)
    
    For RVV, we don't use length loop control instead of mask:
    
    So, with this patch, we expect to see:
    
    loop_len = SELECT_VL
    result = MASK_LEN_FOLD_LEFT_PLUS (...loop_len...)
    
    gcc/ChangeLog:
    
            * tree-vect-loop.cc (get_masked_reduction_fn): Add mask_len_fold_left_plus.
            (vectorize_fold_left_reduction): Ditto.
            (vectorizable_reduction): Ditto.
            (vect_transform_reduction): Ditto.
    
    (cherry picked from commit 42b17d00c33dd9994dee973c001d85dc09b79b24)

Diff:
---
 gcc/tree-vect-loop.cc | 41 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index cd0160a17c8..e4851f15543 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6550,11 +6550,13 @@ static internal_fn
 get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in)
 {
   internal_fn mask_reduc_fn;
+  internal_fn mask_len_reduc_fn;
 
   switch (reduc_fn)
     {
     case IFN_FOLD_LEFT_PLUS:
       mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS;
+      mask_len_reduc_fn = IFN_MASK_LEN_FOLD_LEFT_PLUS;
       break;
 
     default:
@@ -6564,6 +6566,9 @@ get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in)
   if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in,
 				      OPTIMIZE_FOR_SPEED))
     return mask_reduc_fn;
+  if (direct_internal_fn_supported_p (mask_len_reduc_fn, vectype_in,
+				      OPTIMIZE_FOR_SPEED))
+    return mask_len_reduc_fn;
   return IFN_LAST;
 }
 
@@ -6584,7 +6589,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
 			       gimple *reduc_def_stmt,
 			       tree_code code, internal_fn reduc_fn,
 			       tree ops[3], tree vectype_in,
-			       int reduc_index, vec_loop_masks *masks)
+			       int reduc_index, vec_loop_masks *masks,
+			       vec_loop_lens *lens)
 {
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
@@ -6646,8 +6652,18 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
     {
       gimple *new_stmt;
       tree mask = NULL_TREE;
+      tree len = NULL_TREE;
+      tree bias = NULL_TREE;
       if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
 	mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
+      if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+	{
+	  len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
+				   i, 1);
+	  signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+	  bias = build_int_cst (intQI_type_node, biasval);
+	  mask = build_minus_one_cst (truth_type_for (vectype_in));
+	}
 
       /* Handle MINUS by adding the negative.  */
       if (reduc_fn != IFN_LAST && code == MINUS_EXPR)
@@ -6667,7 +6683,10 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
 	 the preceding operation.  */
       if (reduc_fn != IFN_LAST || (mask && mask_reduc_fn != IFN_LAST))
 	{
-	  if (mask && mask_reduc_fn != IFN_LAST)
+	  if (mask && len && mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
+	    new_stmt = gimple_build_call_internal (mask_reduc_fn, 5, reduc_var,
+						   def0, mask, len, bias);
+	  else if (mask && mask_reduc_fn == IFN_MASK_FOLD_LEFT_PLUS)
 	    new_stmt = gimple_build_call_internal (mask_reduc_fn, 3, reduc_var,
 						   def0, mask);
 	  else
@@ -7729,6 +7748,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
     {
       vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+      vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
       internal_fn cond_fn = get_conditional_internal_fn (op.code, op.type);
 
       if (reduction_type != FOLD_LEFT_REDUCTION
@@ -7756,8 +7776,17 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	  LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
 	}
       else
-	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
-			       vectype_in, NULL);
+	{
+	  internal_fn mask_reduc_fn
+	    = get_masked_reduction_fn (reduc_fn, vectype_in);
+
+	  if (mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
+	    vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
+				  vectype_in, 1);
+	  else
+	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+				   vectype_in, NULL);
+	}
     }
   return true;
 }
@@ -7887,6 +7916,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
   code_helper code = canonicalize_code (op.code, op.type);
   internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
 
   /* Transform.  */
@@ -7912,7 +7942,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       gcc_assert (code.is_tree_code ());
       return vectorize_fold_left_reduction
 	  (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi,
-	   tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks);
+	   tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks,
+	   lens);
     }
 
   bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-25 18:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-25 18:34 [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] VECT: Support floating-point in-order reduction for length loop control Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).