public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>
To: 钟居哲 <juzhe.zhong@rivai.ai>, gcc-patches <gcc-patches@gcc.gnu.org>
Cc: richard.sandiford <richard.sandiford@arm.com>,
	 rguenther <rguenther@suse.de>
Subject: Re: [PATCH V12] VECT: Add decrement IV iteration loop control by variable amount support
Date: Tue, 23 May 2023 20:32:06 +0800	[thread overview]
Message-ID: <E4D088BE999664EA+20230523203206317497176@rivai.ai> (raw)
In-Reply-To: <20230522083814.1647787-1-juzhe.zhong@rivai.ai>

[-- Attachment #1: Type: text/plain, Size: 10223 bytes --]

Bootstrap on X86 passed.
Ok for trunk?

Thanks.


juzhe.zhong@rivai.ai
 
From: juzhe.zhong
Date: 2023-05-22 16:38
To: gcc-patches
CC: richard.sandiford; rguenther; Ju-Zhe Zhong
Subject: [PATCH V12] VECT: Add decrement IV iteration loop control by variable amount support
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
 
gcc/ChangeLog:
 
        * tree-vect-loop-manip.cc (vect_adjust_loop_lens_control): New function.
        (vect_set_loop_controls_directly): Add decrement IV support.
        (vect_set_loop_condition_partial_vectors): Ditto.
        * tree-vect-loop.cc: Ditto.
        * tree-vectorizer.h (LOOP_VINFO_USING_DECREMENTING_IV_P): New macro.
 
---
gcc/tree-vect-loop-manip.cc | 184 +++++++++++++++++++++++++++++++++++-
gcc/tree-vect-loop.cc       |  10 ++
gcc/tree-vectorizer.h       |   8 ++
3 files changed, 199 insertions(+), 3 deletions(-)
 
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ff6159e08d5..94b38d1e0fb 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -385,6 +385,66 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
   return false;
}
+/* Try to use adjust loop lens for non-SLP multiple-rgroups.
+
+     _36 = MIN_EXPR <ivtmp_34, VF>;
+
+     First length (MIN (X, VF/N)):
+       loop_len_15 = MIN_EXPR <_36, VF/N>;
+
+     Second length:
+       tmp = _36 - loop_len_15;
+       loop_len_16 = MIN (tmp, VF/N);
+
+     Third length:
+       tmp2 = tmp - loop_len_16;
+       loop_len_17 = MIN (tmp2, VF/N);
+
+     Last length:
+       loop_len_18 = tmp2 - loop_len_17;
+*/
+
+static void
+vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
+        rgroup_controls *dest_rgm,
+        rgroup_controls *src_rgm, tree step)
+{
+  tree ctrl_type = dest_rgm->type;
+  poly_uint64 nitems_per_ctrl
+    = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
+  tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
+
+  for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
+    {
+      if (!step)
+ step = src_rgm->controls[i / dest_rgm->controls.length ()];
+      tree ctrl = dest_rgm->controls[i];
+      if (i == 0)
+ {
+   /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
+   gassign *assign
+     = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+   gimple_seq_add_stmt (seq, assign);
+ }
+      else if (i == dest_rgm->controls.length () - 1)
+ {
+   /* Last iteration: Remain capped to the range [0, VF/N].  */
+   gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
+ dest_rgm->controls[i - 1]);
+   gimple_seq_add_stmt (seq, assign);
+ }
+      else
+ {
+   /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
+   step = gimple_build (seq, MINUS_EXPR, iv_type, step,
+        dest_rgm->controls[i - 1]);
+   gassign *assign
+     = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
+   gimple_seq_add_stmt (seq, assign);
+ }
+    }
+}
+
/* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
    for all the rgroup controls in RGC and return a control that is nonzero
    when the loop needs to iterate.  Add any new preheader statements to
@@ -468,9 +528,78 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
-  create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
-      loop, &incr_gsi, insert_after, &index_before_incr,
-      &index_after_incr);
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+    {
+      nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
+      tree step = make_ssa_name (iv_type);
+      /* Create decrement IV.  */
+      create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
+ insert_after, &index_before_incr, &index_after_incr);
+      tree temp = gimple_build (header_seq, MIN_EXPR, iv_type,
+ index_before_incr, nitems_step);
+      gimple_seq_add_stmt (header_seq, gimple_build_assign (step, temp));
+
+      if (rgc->max_nscalars_per_iter == 1)
+ {
+   /* single rgroup:
+      ...
+      _10 = (unsigned long) count_12(D);
+      ...
+      # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
+      _36 = MIN_EXPR <ivtmp_9, POLY_INT_CST [4, 4]>;
+      ...
+      vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
+      ...
+      ivtmp_35 = ivtmp_9 - _36;
+      ...
+      if (ivtmp_35 != 0)
+        goto <bb 4>; [83.33%]
+      else
+        goto <bb 5>; [16.67%]
+   */
+   gassign *assign = gimple_build_assign (rgc->controls[0], step);
+   gimple_seq_add_stmt (header_seq, assign);
+ }
+      else
+ {
+   /* Multiple rgroup (SLP):
+      ...
+      _38 = (unsigned long) bnd.7_29;
+      _39 = _38 * 2;
+      ...
+      # ivtmp_41 = PHI <ivtmp_42(6), _39(5)>
+      ...
+      _43 = MIN_EXPR <ivtmp_41, 32>;
+      loop_len_26 = MIN_EXPR <_43, 16>;
+      loop_len_25 = _43 - loop_len_26;
+      ...
+      .LEN_STORE (_6, 8B, loop_len_26, ...);
+      ...
+      .LEN_STORE (_25, 8B, loop_len_25, ...);
+      _33 = loop_len_26 / 2;
+      ...
+      .LEN_STORE (_8, 16B, _33, ...);
+      _36 = loop_len_25 / 2;
+      ...
+      .LEN_STORE (_15, 16B, _36, ...);
+      ivtmp_42 = ivtmp_41 - _43;
+      ...
+      if (ivtmp_42 != 0)
+        goto <bb 4>; [83.33%]
+      else
+       goto <bb 5>; [16.67%]
+   */
+   vect_adjust_loop_lens_control (iv_type, header_seq, rgc, NULL, step);
+ }
+      return index_after_incr;
+    }
+  else
+    {
+      /* Create increment IV.  */
+      create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
+ loop, &incr_gsi, insert_after, &index_before_incr,
+ &index_after_incr);
+    }
   tree zero_index = build_int_cst (compare_type, 0);
   tree test_index, test_limit, first_limit;
@@ -753,6 +882,55 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
      continue;
  }
+ if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+     && rgc->max_nscalars_per_iter == 1
+     && rgc != &LOOP_VINFO_LENS (loop_vinfo)[0])
+   {
+     /* Multiple rgroup (non-SLP):
+       ...
+       _38 = (unsigned long) n_12(D);
+       ...
+       # ivtmp_38 = PHI <ivtmp_39(3), 100(2)>
+       ...
+       _40 = MIN_EXPR <ivtmp_38, POLY_INT_CST [8, 8]>;
+       loop_len_21 = MIN_EXPR <_40, POLY_INT_CST [2, 2]>;
+       _41 = _40 - loop_len_21;
+       loop_len_20 = MIN_EXPR <_41, POLY_INT_CST [2, 2]>;
+       _42 = _40 - loop_len_20;
+       loop_len_19 = MIN_EXPR <_42, POLY_INT_CST [2, 2]>;
+       _43 = _40 - loop_len_19;
+       loop_len_16 = MIN_EXPR <_43, POLY_INT_CST [2, 2]>;
+       ...
+       vect__4.8_15 = .LEN_LOAD (_6, 64B, loop_len_21, 0);
+       ...
+       vect__4.9_8 = .LEN_LOAD (_13, 64B, loop_len_20, 0);
+       ...
+       vect__4.10_28 = .LEN_LOAD (_46, 64B, loop_len_19, 0);
+       ...
+       vect__4.11_30 = .LEN_LOAD (_49, 64B, loop_len_16, 0);
+       vect__7.13_31 = VEC_PACK_TRUNC_EXPR <vect__4.8_15, vect__4.9_8>;
+       vect__7.13_32 = VEC_PACK_TRUNC_EXPR <...>;
+       vect__7.12_33 = VEC_PACK_TRUNC_EXPR <...>;
+       ...
+       .LEN_STORE (_14, 16B, _40, vect__7.12_33, 0);
+       ivtmp_39 = ivtmp_38 - _40;
+       ...
+       if (ivtmp_39 != 0)
+ goto <bb 3>; [92.31%]
+       else
+ goto <bb 4>; [7.69%]
+     */
+     rgroup_controls *sub_rgc
+       = &(*controls)[nmasks / rgc->controls.length () - 1];
+     if (!sub_rgc->controls.is_empty ())
+       {
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ vect_adjust_loop_lens_control (iv_type, &header_seq, rgc,
+        sub_rgc, NULL_TREE);
+ continue;
+       }
+   }
+
/* See whether zero-based IV would ever generate all-false masks
   or zero length before wrapping around.  */
bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index cf10132b0bf..53ac4d2a6c2 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2725,6 +2725,16 @@ start_over:
       && !vect_verify_loop_lens (loop_vinfo))
     LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+  /* If we're vectorizing an loop that uses length "controls" and
+     can iterate more than once, we apply decrementing IV approach
+     in loop control.  */
+  if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      && !LOOP_VINFO_LENS (loop_vinfo).is_empty ()
+      && !(LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+    && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+    LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
+
   /* If we're vectorizing an epilogue loop, the vectorized loop either needs
      to be able to handle fewer than VF scalars, or needs to have a lower VF
      than the main loop.  */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 02d2ad6fba1..fba09b9ffd3 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -818,6 +818,13 @@ public:
      the vector loop can handle fewer than VF scalars.  */
   bool using_partial_vectors_p;
+  /* True if we've decided to use a decrementing loop control IV that counts
+     scalars. This can be done for any loop that:
+
+ (a) uses length "controls"; and
+ (b) can iterate more than once.  */
+  bool using_decrementing_iv_p;
+
   /* True if we've decided to use partially-populated vectors for the
      epilogue of loop.  */
   bool epil_using_partial_vectors_p;
@@ -890,6 +897,7 @@ public:
#define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
+#define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
#define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L)                             \
   (L)->epil_using_partial_vectors_p
#define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
-- 
2.36.3
 

  reply	other threads:[~2023-05-23 12:32 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-22  8:38 juzhe.zhong
2023-05-23 12:32 ` juzhe.zhong [this message]
2023-05-24 11:23 ` Richard Sandiford
2023-05-24 11:52   ` 钟居哲
2023-05-24 12:41     ` Richard Sandiford
2023-05-24 12:51       ` Richard Biener
2023-05-24 13:27         ` 钟居哲
2023-05-24 13:26       ` 钟居哲
2023-05-24 14:01         ` Richard Sandiford
2023-05-24 14:10           ` 钟居哲
2023-05-24 14:57             ` Richard Sandiford
2023-05-24 14:58               ` 钟居哲
     [not found]           ` <2023052422100415521814@rivai.ai>
2023-05-24 14:11             ` 钟居哲
2023-05-24 14:22             ` 钟居哲
2023-05-24 14:35           ` 钟居哲
2023-05-24 12:19   ` 钟居哲

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E4D088BE999664EA+20230523203206317497176@rivai.ai \
    --to=juzhe.zhong@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).