public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Robin Dapp <rdapp@linux.ibm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>,
	linkw@linux.ibm.com, richard.sandiford@arm.com
Subject: Re: [PATCH] vect: Add bias parameter for partial vectorization
Date: Tue, 2 Nov 2021 21:16:29 +0100	[thread overview]
Message-ID: <b0d4f375-726b-8bfc-41cf-27dd18ba3fef@linux.ibm.com> (raw)
In-Reply-To: <mptlf2cgtol.fsf@arm.com>

[-- Attachment #1: Type: text/plain, Size: 588 bytes --]

Hi,

thanks for the helpful comments. The attached v2 addresses the following
points from them:

 - Save the bias in loop_vinfo and set it once in vect_verify_loop_lens.
 - Add code to handle the bias in vect_set_loop_controls_directly.
 - Adjust costing.
 - Add comments for the new len_load parameter as well as change wording.
 - Include the rs6000 change directly.

I'm not entirely satisfied with the bias code for the loop controls,
mainly because of side effects I might not have considered.  The test
suites show no new regressions and bootstrap succeeded, though.

Regards
 Robin

[-- Attachment #2: vll-v2.patch --]
[-- Type: text/x-patch, Size: 13864 bytes --]

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bf033e31c1c..dc2756f83e9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5637,7 +5637,8 @@
 (define_expand "len_load_v16qi"
   [(match_operand:V16QI 0 "vlogical_operand")
    (match_operand:V16QI 1 "memory_operand")
-   (match_operand:QI 2 "gpc_reg_operand")]
+   (match_operand:QI 2 "gpc_reg_operand")
+   (match_operand:QI 3 "zero_constant")]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
   rtx mem = XEXP (operands[1], 0);
@@ -5651,6 +5652,7 @@
   [(match_operand:V16QI 0 "memory_operand")
    (match_operand:V16QI 1 "vlogical_operand")
    (match_operand:QI 2 "gpc_reg_operand")
+   (match_operand:QI 3 "zero_constant")
   ]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 2b41cb7fb7b..265c76f1609 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5213,7 +5213,10 @@ which must be a vector mode.  Operand 2 has whichever integer mode the
 target prefers.  If operand 2 exceeds the number of elements in mode
 @var{m}, the behavior is undefined.  If the target prefers the length
 to be measured in bytes rather than elements, it should only implement
-this pattern for vectors of @code{QI} elements.
+this pattern for vectors of @code{QI} elements.  Operand 3 specifies
+a bias predicate that determines whether a length of zero is permitted
+or not.  If permitted, the predicate should only allow a zero immediate,
+otherwhise it should only allow an immediate value of -1.
 
 This pattern is not allowed to @code{FAIL}.
 
@@ -5226,7 +5229,10 @@ a vector mode.  Operand 2 has whichever integer mode the target prefers.
 If operand 2 exceeds the number of elements in mode @var{m}, the behavior
 is undefined.  If the target prefers the length to be measured in bytes
 rather than elements, it should only implement this pattern for vectors
-of @code{QI} elements.
+of @code{QI} elements.  Operand 3 specifies a bias predicate that
+determines whether a length of zero is permitted or not.  If permitted,
+the predicate should only allow a zero constant, otherwhise it should
+only allow an immediate value of -1.
 
 This pattern is not allowed to @code{FAIL}.
 
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 8312d08aab2..90fdc440248 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
 static void
 expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
-  class expand_operand ops[3];
-  tree type, lhs, rhs, maskt;
-  rtx mem, target, mask;
+  class expand_operand ops[4];
+  tree type, lhs, rhs, maskt, biast;
+  rtx mem, target, mask, bias;
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
@@ -2723,11 +2723,20 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
   if (optab == len_load_optab)
-    create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
-				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
+    {
+      create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+				   TYPE_UNSIGNED (TREE_TYPE (maskt)));
+      biast = gimple_call_arg (stmt, 3);
+      bias = expand_normal (biast);
+      create_input_operand (&ops[3], bias, QImode);
+      expand_insn (icode, 4, ops);
+    }
   else
+    {
     create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (icode, 3, ops);
+    expand_insn (icode, 3, ops);
+    }
+
   if (!rtx_equal_p (target, ops[0].value))
     emit_move_insn (target, ops[0].value);
 }
@@ -2741,9 +2750,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 static void
 expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
-  class expand_operand ops[3];
-  tree type, lhs, rhs, maskt;
-  rtx mem, reg, mask;
+  class expand_operand ops[4];
+  tree type, lhs, rhs, maskt, biast;
+  rtx mem, reg, mask, bias;
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
@@ -2766,11 +2775,19 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_fixed_operand (&ops[0], mem);
   create_input_operand (&ops[1], reg, TYPE_MODE (type));
   if (optab == len_store_optab)
-    create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
-				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
+    {
+      create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+				   TYPE_UNSIGNED (TREE_TYPE (maskt)));
+      biast = gimple_call_arg (stmt, 4);
+      bias = expand_normal (biast);
+      create_input_operand (&ops[3], bias, QImode);
+      expand_insn (icode, 4, ops);
+    }
   else
-    create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (icode, 3, ops);
+    {
+      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
+      expand_insn (icode, 3, ops);
+    }
 }
 
 #define expand_mask_store_optab_fn expand_partial_store_optab_fn
@@ -4172,6 +4189,29 @@ internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type,
 	  && insn_operand_matches (icode, 4, GEN_INT (align)));
 }
 
+/* Return the supported bias for the len_load IFN.  For now we only support
+   the biases of 0 and -1 (in case 0 is not an allowable length for len_load).
+   If none of the biases match what the backend provides, return
+   VECT_PARTIAL_BIAS_UNSUPPORTED.  */
+
+signed char
+internal_len_load_bias (internal_fn ifn, machine_mode mode)
+{
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, mode);
+
+  if (icode != CODE_FOR_nothing)
+    {
+      /* For now we only support biases of 0 or -1.  Try both of them.  */
+      if (insn_operand_matches (icode, 3, GEN_INT (0)))
+	return 0;
+      if (insn_operand_matches (icode, 3, GEN_INT (-1)))
+	return -1;
+    }
+
+  return VECT_PARTIAL_BIAS_UNSUPPORTED;
+}
+
 /* Expand STMT as though it were a call to internal function FN.  */
 
 void
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 19d0f849a5a..c057df1327a 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -227,6 +227,10 @@ extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
 						    tree, tree, int);
 extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
 						poly_uint64, unsigned int);
+#define VECT_PARTIAL_BIAS_UNSUPPORTED 127
+
+extern signed char internal_len_load_bias (internal_fn ifn,
+					   machine_mode);
 
 extern void expand_addsub_overflow (location_t, tree_code, tree, tree, tree,
 				    bool, bool, bool, bool, tree *);
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 4988c93fdb6..79cf5029faf 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -436,7 +436,14 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
   tree length_limit = NULL_TREE;
   /* For length, we need length_limit to ensure length in range.  */
   if (!use_masks_p)
-    length_limit = build_int_cst (compare_type, nitems_per_ctrl);
+    {
+      int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_BIAS (loop_vinfo);
+      if (partial_load_bias == 0)
+	length_limit = build_int_cst (compare_type, nitems_per_ctrl);
+      else
+	length_limit = build_int_cst (compare_type,
+				      nitems_per_ctrl + partial_load_bias);
+    }
 
   /* Calculate the maximum number of item values that the rgroup
      handles in total, the number that it handles for each iteration
@@ -560,8 +567,12 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
     {
       /* Previous controls will cover BIAS items.  This control covers the
 	 next batch.  */
+      tree bias_tree;
       poly_uint64 bias = nitems_per_ctrl * i;
-      tree bias_tree = build_int_cst (compare_type, bias);
+      int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_BIAS (loop_vinfo);
+      if (partial_load_bias != 0)
+	bias -= partial_load_bias;
+      bias_tree = build_int_cst (compare_type, bias);
 
       /* See whether the first iteration of the vector loop is known
 	 to have a full control.  */
@@ -574,7 +585,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
 	 TEST_LIMIT, prefer to use the same 0-based IV for each control
 	 and adjust the bound down by BIAS.  */
       tree this_test_limit = test_limit;
-      if (i != 0)
+      if (i != 0 || (i == 0 && partial_load_bias != 0))
 	{
 	  this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
 					  compare_type, this_test_limit,
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index e94356d76e9..705439cb380 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1163,6 +1163,21 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
   if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
     return false;
 
+  machine_mode len_load_mode = get_len_load_store_mode
+    (loop_vinfo->vector_mode, false).require ();
+
+  signed char partial_load_bias = internal_len_load_bias
+    (IFN_LEN_LOAD, len_load_mode);
+
+  LOOP_VINFO_PARTIAL_LOAD_BIAS (loop_vinfo) = partial_load_bias;
+
+  /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
+     len_loads with a length of zero.  In order to avoid that we prohibit
+     more than one loop length here.  */
+  if (partial_load_bias == -1
+      && LOOP_VINFO_LENS (loop_vinfo).length () > 1)
+      return false;
+
   unsigned int max_nitems_per_iter = 1;
   unsigned int i;
   rgroup_controls *rgl;
@@ -4125,6 +4140,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	 here.  */
 
       bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
+      signed char partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_BIAS (loop_vinfo);
       bool need_iterate_p
 	= (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
 	   && !vect_known_niters_smaller_than_vf (loop_vinfo));
@@ -4157,6 +4173,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	       for each since start index is zero.  */
 	    prologue_stmts += num_vectors;
 
+	    /* If we have a non-zero partial load bias, we need one MINUS
+	       and a MAX to adjust the load length.  */
+	    if (partial_load_bias != 0)
+	      prologue_stmts += 2;
+
 	    /* Each may need two MINs and one MINUS to update lengths in body
 	       for next iteration.  */
 	    if (need_iterate_p)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 17849b575b7..57ecafb7965 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8289,9 +8289,16 @@ vectorizable_store (vec_info *vinfo,
 						   gsi);
 		      vec_oprnd = var;
 		    }
+
+		  /* Check which bias value to use.  */
+		  char biasval = internal_len_load_bias
+		    (IFN_LEN_STORE, new_vmode);
+
+		  tree bias = build_int_cst (intQI_type_node, biasval);
 		  gcall *call
-		    = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
-						  ptr, final_len, vec_oprnd);
+		    = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
+						  ptr, final_len, vec_oprnd,
+						  bias);
 		  gimple_call_set_nothrow (call, true);
 		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
 		  new_stmt = call;
@@ -9588,24 +9595,32 @@ vectorizable_load (vec_info *vinfo,
 					       vec_num * j + i);
 			tree ptr = build_int_cst (ref_type,
 						  align * BITS_PER_UNIT);
+
+			machine_mode vmode = TYPE_MODE (vectype);
+			opt_machine_mode new_ovmode
+			  = get_len_load_store_mode (vmode, true);
+			machine_mode new_vmode = new_ovmode.require ();
+			tree qi_type = unsigned_intQI_type_node;
+			tree new_vtype
+			  = build_vector_type_for_mode (qi_type, new_vmode);
+
+			/* Check which bias value to use.  */
+			char biasval = internal_len_load_bias
+			  (IFN_LEN_LOAD, new_vmode);
+
+			tree bias = build_int_cst (intQI_type_node, biasval);
+
 			gcall *call
-			  = gimple_build_call_internal (IFN_LEN_LOAD, 3,
+			  = gimple_build_call_internal (IFN_LEN_LOAD, 4,
 							dataref_ptr, ptr,
-							final_len);
+							final_len, bias);
 			gimple_call_set_nothrow (call, true);
 			new_stmt = call;
 			data_ref = NULL_TREE;
 
 			/* Need conversion if it's wrapped with VnQI.  */
-			machine_mode vmode = TYPE_MODE (vectype);
-			opt_machine_mode new_ovmode
-			  = get_len_load_store_mode (vmode, true);
-			machine_mode new_vmode = new_ovmode.require ();
 			if (vmode != new_vmode)
 			  {
-			    tree qi_type = unsigned_intQI_type_node;
-			    tree new_vtype
-			      = build_vector_type_for_mode (qi_type, new_vmode);
 			    tree var = vect_get_new_ssa_name (new_vtype,
 							      vect_simple_var);
 			    gimple_set_lhs (call, var);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index c4c5678e7f1..b59f5f6643c 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -749,6 +749,11 @@ public:
      epilogue of loop.  */
   bool epil_using_partial_vectors_p;
 
+  /* The bias for len_load and len_store.  For now, only 0 and -1 are
+     supported.  -1 must be used when a backend does not support
+     len_load/len_store with a length of zero.  */
+  signed char partial_load_bias;
+
   /* When we have grouped data accesses with gaps, we may introduce invalid
      memory accesses.  We peel the last iteration of the loop to prevent
      this.  */
@@ -814,6 +819,7 @@ public:
 #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
 #define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L)                             \
   (L)->epil_using_partial_vectors_p
+#define LOOP_VINFO_PARTIAL_LOAD_BIAS(L)    (L)->partial_load_bias
 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
 #define LOOP_VINFO_MASKS(L)                (L)->masks

  reply	other threads:[~2021-11-02 20:16 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-28 14:44 Robin Dapp
2021-10-29  3:10 ` Kewen.Lin
2021-10-29  7:46 ` Richard Sandiford
2021-11-02 20:16   ` Robin Dapp [this message]
2021-11-04  3:59     ` Kewen.Lin
2021-11-12  9:56       ` Robin Dapp
2021-11-22  6:55         ` Kewen.Lin
2021-12-07 11:52           ` Robin Dapp
2021-12-10 19:21             ` Richard Sandiford
2021-12-13 18:51               ` Robin Dapp
2021-12-14 11:47                 ` Richard Sandiford
2022-01-10 12:53               ` Robin Dapp
2022-01-10 14:21                 ` Richard Sandiford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b0d4f375-726b-8bfc-41cf-27dd18ba3fef@linux.ibm.com \
    --to=rdapp@linux.ibm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@linux.ibm.com \
    --cc=richard.sandiford@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).