public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* PATCH-1v3, expand] Enable vector mode for compare_by_pieces [PR111449]
@ 2023-10-13  9:32 HAO CHEN GUI
  2023-10-16 21:51 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: HAO CHEN GUI @ 2023-10-13  9:32 UTC (permalink / raw)
  To: gcc-patches
  Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner, richard.sandiford

Hi,
  Vector mode instructions are efficient for compare on some targets.
This patch enables vector mode for compare_by_pieces. Currently,
vector mode is enabled for compare, set and clear. Helper function
"qi_vector_p" decides if vector mode is enabled for certain by pieces
operation. optabs_checking checks if optabs are available for the
mode and certain by pieces operations. Both of them are called in
fixed_size_mode finding functions. A member is added to class
op_by_pieces_d in order to record the type of by pieces operations.

  The test case is in the second patch which is rs6000 specific.

  Compared to last version, the main change is to create two helper
functions and call them in mode finding function.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen

ChangeLog
Expand: Enable vector mode for pieces compares

Vector mode compare instructions are efficient for equality compare on
rs6000. This patch refactors the codes of pieces operation to enable
vector mode for compare.

gcc/
	PR target/111449
	* expr.cc (qi_vector_p): New function to indicate if vector mode
	is enabled for certain by pieces operations.
	(optabs_checking): New function to check if optabs are available
	for certain by pieces operations.
	(widest_fixed_size_mode_for_size): Replace the second argument
	with the type of by pieces operations.  Call qi_vector_p to check
	if vector mode is enable.  Call optabs_checking to check if optabs
	are available for the candidate vector mode.
	(by_pieces_ninsns): Pass the type of by pieces operation to
	widest_fixed_size_mode_for_size.
	(class op_by_pieces_d): Add a protected member m_op to record the
	type of by pieces operations.  Declare member function
	fixed_size_mode widest_fixed_size_mode_for_size.
	(op_by_pieces_d::op_by_pieces_d): Change last argument to the type
	of by pieces operations, initialize m_op with it.  Call non-member
	function widest_fixed_size_mode_for_size.
	(op_by_pieces_d::get_usable_mode): Call member function
	widest_fixed_size_mode_for_size.
	(op_by_pieces_d::smallest_fixed_size_mode_for_size): Call
	qi_vector_p to check if vector mode is enable.  Call
	optabs_checking to check if optabs are available for the candidate
	vector mode.
	(op_by_pieces_d::run): Call member function
	widest_fixed_size_mode_for_size.
	(op_by_pieces_d::widest_fixed_size_mode_for_size): Implement.
	(move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES.
	(store_by_pieces_d::store_by_pieces_d): Set m_op with the op.
	(can_store_by_pieces): Pass the type of by pieces operations to
	widest_fixed_size_mode_for_size.
	(clear_by_pieces): Initialize class store_by_pieces_d with
	CLEAR_BY_PIECES.
	(compare_by_pieces_d::compare_by_pieces_d): Set m_op to
	COMPARE_BY_PIECES.

patch.diff
diff --git a/gcc/expr.cc b/gcc/expr.cc
index d87346dc07f..8ec3f5465a9 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -988,18 +988,43 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
   return align;
 }

-/* Return the widest QI vector, if QI_MODE is true, or integer mode
-   that is narrower than SIZE bytes.  */
+/* Return true if vector mode is enabled for the op.  */
+static bool
+qi_vector_p (by_pieces_operation op)
+{
+  return (op == COMPARE_BY_PIECES
+	  || op == SET_BY_PIECES
+	  || op == CLEAR_BY_PIECES);
+}
+
+/* Return true if optabs are available for the mode and by pieces
+   operations.  */
+static bool
+optabs_checking (fixed_size_mode mode, by_pieces_operation op)
+{
+  if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
+      && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing)
+    return true;
+  else if (op == COMPARE_BY_PIECES
+	   && optab_handler (mov_optab, mode) != CODE_FOR_nothing
+	   && can_compare_p (EQ, mode, ccp_jump))
+    return true;
+
+  return false;
+}
+
+/* Return the widest QI vector, if vector mode is enabled for the op,
+   or integer mode that is narrower than SIZE bytes.  */

 static fixed_size_mode
-widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
+widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op)
 {
   fixed_size_mode result = NARROWEST_INT_MODE;

   gcc_checking_assert (size > 1);

   /* Use QI vector only if size is wider than a WORD.  */
-  if (qi_vector && size > UNITS_PER_WORD)
+  if (qi_vector_p (op) && size > UNITS_PER_WORD)
     {
       machine_mode mode;
       fixed_size_mode candidate;
@@ -1009,8 +1034,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
 	  {
 	    if (GET_MODE_SIZE (candidate) >= size)
 	      break;
-	    if (optab_handler (vec_duplicate_optab, candidate)
-		!= CODE_FOR_nothing)
+	    if (optabs_checking (candidate, op))
 	      result = candidate;
 	  }

@@ -1061,8 +1085,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
     {
       /* NB: Round up L and ALIGN to the widest integer mode for
 	 MAX_SIZE.  */
-      mode = widest_fixed_size_mode_for_size (max_size,
-					      op == SET_BY_PIECES);
+      mode = widest_fixed_size_mode_for_size (max_size, op);
       if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
 	{
 	  unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
@@ -1076,8 +1099,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,

   while (max_size > 1 && l > 0)
     {
-      mode = widest_fixed_size_mode_for_size (max_size,
-					      op == SET_BY_PIECES);
+      mode = widest_fixed_size_mode_for_size (max_size, op);
       enum insn_code icode;

       unsigned int modesize = GET_MODE_SIZE (mode);
@@ -1319,6 +1341,8 @@ class op_by_pieces_d
   bool m_overlap_op_by_pieces;
   /* True if QI vector mode can be used.  */
   bool m_qi_vector_mode;
+  /* The types of by pieces operations.  */
+  by_pieces_operation m_op;

   /* Virtual functions, overriden by derived classes for the specific
      operation.  */
@@ -1327,11 +1351,12 @@ class op_by_pieces_d
   virtual void finish_mode (machine_mode)
   {
   }
+  fixed_size_mode widest_fixed_size_mode_for_size (unsigned int);

  public:
   op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
 		  void *, unsigned HOST_WIDE_INT, unsigned int, bool,
-		  bool = false);
+		  by_pieces_operation);
   void run ();
 };

@@ -1349,11 +1374,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
 				void *from_cfn_data,
 				unsigned HOST_WIDE_INT len,
 				unsigned int align, bool push,
-				bool qi_vector_mode)
+				by_pieces_operation op)
   : m_to (to, to_load, NULL, NULL),
     m_from (from, from_load, from_cfn, from_cfn_data),
     m_len (len), m_max_size (max_pieces + 1),
-    m_push (push), m_qi_vector_mode (qi_vector_mode)
+    m_push (push), m_op (op)
 {
   int toi = m_to.get_addr_inc ();
   int fromi = m_from.get_addr_inc ();
@@ -1375,8 +1400,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
     {
       /* Find the mode of the largest comparison.  */
       fixed_size_mode mode
-	= widest_fixed_size_mode_for_size (m_max_size,
-					   m_qi_vector_mode);
+	= ::widest_fixed_size_mode_for_size (m_max_size, m_op);

       m_from.decide_autoinc (mode, m_reverse, len);
       m_to.decide_autoinc (mode, m_reverse, len);
@@ -1401,7 +1425,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
       if (len >= size && prepare_mode (mode, m_align))
 	break;
       /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
-      mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode);
+      mode = widest_fixed_size_mode_for_size (size);
     }
   while (1);
   return mode;
@@ -1414,7 +1438,7 @@ fixed_size_mode
 op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
 {
   /* Use QI vector only for > size of WORD.  */
-  if (m_qi_vector_mode && size > UNITS_PER_WORD)
+  if (qi_vector_p (m_op) && size > UNITS_PER_WORD)
     {
       machine_mode mode;
       fixed_size_mode candidate;
@@ -1427,8 +1451,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
 	      break;

 	    if (GET_MODE_SIZE (candidate) >= size
-		&& (optab_handler (vec_duplicate_optab, candidate)
-		    != CODE_FOR_nothing))
+		&& optabs_checking (candidate, m_op))
 	      return candidate;
 	  }
     }
@@ -1451,7 +1474,7 @@ op_by_pieces_d::run ()

   /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1.  */
   fixed_size_mode mode
-    = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode);
+    = widest_fixed_size_mode_for_size (m_max_size);
   mode = get_usable_mode (mode, length);

   by_pieces_prev to_prev = { nullptr, mode };
@@ -1516,14 +1539,19 @@ op_by_pieces_d::run ()
       else
 	{
 	  /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
-	  mode = widest_fixed_size_mode_for_size (size,
-						  m_qi_vector_mode);
+	  mode = widest_fixed_size_mode_for_size (size);
 	  mode = get_usable_mode (mode, length);
 	}
     }
   while (1);
 }

+fixed_size_mode
+op_by_pieces_d::widest_fixed_size_mode_for_size (unsigned int size)
+{
+  return ::widest_fixed_size_mode_for_size (size, m_op);
+}
+
 /* Derived class from op_by_pieces_d, providing support for block move
    operations.  */

@@ -1543,7 +1571,7 @@ class move_by_pieces_d : public op_by_pieces_d
   move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
 		    unsigned int align)
     : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
-		      NULL, len, align, PUSHG_P (to))
+		      NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES)
   {
   }
   rtx finish_retmode (memop_ret);
@@ -1632,15 +1660,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
 class store_by_pieces_d : public op_by_pieces_d
 {
   insn_gen_fn m_gen_fun;
+
   void generate (rtx, rtx, machine_mode) final override;
   bool prepare_mode (machine_mode, unsigned int) final override;

  public:
   store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
 		     unsigned HOST_WIDE_INT len, unsigned int align,
-		     bool qi_vector_mode)
+		     by_pieces_operation op)
     : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
-		      cfn_data, len, align, false, qi_vector_mode)
+		      cfn_data, len, align, false, op)
   {
   }
   rtx finish_retmode (memop_ret);
@@ -1730,7 +1759,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
       while (max_size > 1 && l > 0)
 	{
 	  fixed_size_mode mode
-	    = widest_fixed_size_mode_for_size (max_size, memsetp);
+	    = widest_fixed_size_mode_for_size (max_size,
+		memsetp ? SET_BY_PIECES : STORE_BY_PIECES);

 	  icode = optab_handler (mov_optab, mode);
 	  if (icode != CODE_FOR_nothing
@@ -1793,7 +1823,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
 		 optimize_insn_for_speed_p ()));

   store_by_pieces_d data (to, constfun, constfundata, len, align,
-			  memsetp);
+			  memsetp ? SET_BY_PIECES : STORE_BY_PIECES);
   data.run ();

   if (retmode != RETURN_BEGIN)
@@ -1814,7 +1844,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
   /* Use builtin_memset_read_str to support vector mode broadcast.  */
   char c = 0;
   store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
-			  true);
+			  CLEAR_BY_PIECES);
   data.run ();
 }

@@ -1832,12 +1862,13 @@ class compare_by_pieces_d : public op_by_pieces_d
   void generate (rtx, rtx, machine_mode) final override;
   bool prepare_mode (machine_mode, unsigned int) final override;
   void finish_mode (machine_mode) final override;
+
  public:
   compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
 		       void *op1_cfn_data, HOST_WIDE_INT len, int align,
 		       rtx_code_label *fail_label)
     : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
-		      op1_cfn_data, len, align, false)
+		      op1_cfn_data, len, align, false, COMPARE_BY_PIECES)
   {
     m_fail_label = fail_label;
   }
@@ -1943,6 +1974,7 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,

   return target;
 }
+
 \f
 /* Emit code to move a block Y to a block X.  This may be done with
    string-move instructions, with multiple scalar move instructions,

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: PATCH-1v3, expand] Enable vector mode for compare_by_pieces [PR111449]
  2023-10-13  9:32 PATCH-1v3, expand] Enable vector mode for compare_by_pieces [PR111449] HAO CHEN GUI
@ 2023-10-16 21:51 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2023-10-16 21:51 UTC (permalink / raw)
  To: HAO CHEN GUI
  Cc: gcc-patches, Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Thanks for the update.  The comments below are mostly asking for
cosmetic changes.

HAO CHEN GUI <guihaoc@linux.ibm.com> writes:
> Hi,
>   Vector mode instructions are efficient for compare on some targets.
> This patch enables vector mode for compare_by_pieces. Currently,
> vector mode is enabled for compare, set and clear. Helper function
> "qi_vector_p" decides if vector mode is enabled for certain by pieces
> operation. optabs_checking checks if optabs are available for the
> mode and certain by pieces operations. Both of them are called in
> fixed_size_mode finding functions. A member is added to class
> op_by_pieces_d in order to record the type of by pieces operations.
>
>   The test case is in the second patch which is rs6000 specific.
>
>   Compared to last version, the main change is to create two helper
> functions and call them in mode finding function.
>
>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
> regressions.
>
> Thanks
> Gui Haochen
>
> ChangeLog
> Expand: Enable vector mode for pieces compares
>
> Vector mode compare instructions are efficient for equality compare on
> rs6000. This patch refactors the codes of pieces operation to enable
> vector mode for compare.
>
> gcc/
> 	PR target/111449
> 	* expr.cc (qi_vector_p): New function to indicate if vector mode
> 	is enabled for certain by pieces operations.
> 	(optabs_checking): New function to check if optabs are available
> 	for certain by pieces operations.
> 	(widest_fixed_size_mode_for_size): Replace the second argument
> 	with the type of by pieces operations.  Call qi_vector_p to check
> 	if vector mode is enable.  Call optabs_checking to check if optabs
> 	are available for the candidate vector mode.
> 	(by_pieces_ninsns): Pass the type of by pieces operation to
> 	widest_fixed_size_mode_for_size.
> 	(class op_by_pieces_d): Add a protected member m_op to record the
> 	type of by pieces operations.  Declare member function
> 	fixed_size_mode widest_fixed_size_mode_for_size.
> 	(op_by_pieces_d::op_by_pieces_d): Change last argument to the type
> 	of by pieces operations, initialize m_op with it.  Call non-member
> 	function widest_fixed_size_mode_for_size.
> 	(op_by_pieces_d::get_usable_mode): Call member function
> 	widest_fixed_size_mode_for_size.
> 	(op_by_pieces_d::smallest_fixed_size_mode_for_size): Call
> 	qi_vector_p to check if vector mode is enable.  Call
> 	optabs_checking to check if optabs are available for the candidate
> 	vector mode.
> 	(op_by_pieces_d::run): Call member function
> 	widest_fixed_size_mode_for_size.
> 	(op_by_pieces_d::widest_fixed_size_mode_for_size): Implement.
> 	(move_by_pieces_d::move_by_pieces_d): Set m_op to MOVE_BY_PIECES.
> 	(store_by_pieces_d::store_by_pieces_d): Set m_op with the op.
> 	(can_store_by_pieces): Pass the type of by pieces operations to
> 	widest_fixed_size_mode_for_size.
> 	(clear_by_pieces): Initialize class store_by_pieces_d with
> 	CLEAR_BY_PIECES.
> 	(compare_by_pieces_d::compare_by_pieces_d): Set m_op to
> 	COMPARE_BY_PIECES.
>
> patch.diff
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d87346dc07f..8ec3f5465a9 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -988,18 +988,43 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
>    return align;
>  }
>
> -/* Return the widest QI vector, if QI_MODE is true, or integer mode
> -   that is narrower than SIZE bytes.  */
> +/* Return true if vector mode is enabled for the op.  */

Maybe:

/* Return true if we know how to implement OP using vectors of bytes.  */

"enabled" would normally imply target support.

> +static bool
> +qi_vector_p (by_pieces_operation op)

And maybe call the function "can_use_qi_vectors"

> +{
> +  return (op == COMPARE_BY_PIECES
> +	  || op == SET_BY_PIECES
> +	  || op == CLEAR_BY_PIECES);
> +}
> +
> +/* Return true if optabs are available for the mode and by pieces
> +   operations.  */

Maybe:

/* Return true if the target supports operation OP using vector mode MODE.  */

> +static bool
> +optabs_checking (fixed_size_mode mode, by_pieces_operation op)

And maybe call the function "qi_vector_mode_supported_p".

> +{
> +  if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES)
> +      && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing)
> +    return true;
> +  else if (op == COMPARE_BY_PIECES
> +	   && optab_handler (mov_optab, mode) != CODE_FOR_nothing
> +	   && can_compare_p (EQ, mode, ccp_jump))
> +    return true;
> +
> +  return false;
> +}
> +
> +/* Return the widest QI vector, if vector mode is enabled for the op,
> +   or integer mode that is narrower than SIZE bytes.  */

Maybe:

/* Return the widest mode that can be used to perform part of
   an operation OP on SIZE bytes.  Try to use QI vector modes
   where possible.  */
>
>  static fixed_size_mode
> -widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
> +widest_fixed_size_mode_for_size (unsigned int size, by_pieces_operation op)
>  {
>    fixed_size_mode result = NARROWEST_INT_MODE;
>
>    gcc_checking_assert (size > 1);
>
>    /* Use QI vector only if size is wider than a WORD.  */
> -  if (qi_vector && size > UNITS_PER_WORD)
> +  if (qi_vector_p (op) && size > UNITS_PER_WORD)
>      {
>        machine_mode mode;
>        fixed_size_mode candidate;
> @@ -1009,8 +1034,7 @@ widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
>  	  {
>  	    if (GET_MODE_SIZE (candidate) >= size)
>  	      break;
> -	    if (optab_handler (vec_duplicate_optab, candidate)
> -		!= CODE_FOR_nothing)
> +	    if (optabs_checking (candidate, op))
>  	      result = candidate;
>  	  }
>
> @@ -1061,8 +1085,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
>      {
>        /* NB: Round up L and ALIGN to the widest integer mode for
>  	 MAX_SIZE.  */
> -      mode = widest_fixed_size_mode_for_size (max_size,
> -					      op == SET_BY_PIECES);
> +      mode = widest_fixed_size_mode_for_size (max_size, op);
>        if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
>  	{
>  	  unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
> @@ -1076,8 +1099,7 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
>
>    while (max_size > 1 && l > 0)
>      {
> -      mode = widest_fixed_size_mode_for_size (max_size,
> -					      op == SET_BY_PIECES);
> +      mode = widest_fixed_size_mode_for_size (max_size, op);
>        enum insn_code icode;
>
>        unsigned int modesize = GET_MODE_SIZE (mode);
> @@ -1319,6 +1341,8 @@ class op_by_pieces_d
>    bool m_overlap_op_by_pieces;
>    /* True if QI vector mode can be used.  */
>    bool m_qi_vector_mode;

We should be able to remove this field.

> +  /* The types of by pieces operations.  */

Maybe: "The type of operation that we're performing."

> +  by_pieces_operation m_op;
>
>    /* Virtual functions, overriden by derived classes for the specific
>       operation.  */
> @@ -1327,11 +1351,12 @@ class op_by_pieces_d
>    virtual void finish_mode (machine_mode)
>    {
>    }
> +  fixed_size_mode widest_fixed_size_mode_for_size (unsigned int);

This function should no longer be needed.

>   public:
>    op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
>  		  void *, unsigned HOST_WIDE_INT, unsigned int, bool,
> -		  bool = false);
> +		  by_pieces_operation);
>    void run ();
>  };
>
> @@ -1349,11 +1374,11 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
>  				void *from_cfn_data,
>  				unsigned HOST_WIDE_INT len,
>  				unsigned int align, bool push,
> -				bool qi_vector_mode)
> +				by_pieces_operation op)
>    : m_to (to, to_load, NULL, NULL),
>      m_from (from, from_load, from_cfn, from_cfn_data),
>      m_len (len), m_max_size (max_pieces + 1),
> -    m_push (push), m_qi_vector_mode (qi_vector_mode)
> +    m_push (push), m_op (op)
>  {
>    int toi = m_to.get_addr_inc ();
>    int fromi = m_from.get_addr_inc ();
> @@ -1375,8 +1400,7 @@ op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
>      {
>        /* Find the mode of the largest comparison.  */
>        fixed_size_mode mode
> -	= widest_fixed_size_mode_for_size (m_max_size,
> -					   m_qi_vector_mode);
> +	= ::widest_fixed_size_mode_for_size (m_max_size, m_op);
>
>        m_from.decide_autoinc (mode, m_reverse, len);
>        m_to.decide_autoinc (mode, m_reverse, len);
> @@ -1401,7 +1425,7 @@ op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
>        if (len >= size && prepare_mode (mode, m_align))
>  	break;
>        /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
> -      mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode);
> +      mode = widest_fixed_size_mode_for_size (size);

It looks like this could be:

  mode = widest_fixed_size_mode_for_size (size, m_op);

>      }
>    while (1);
>    return mode;
> @@ -1414,7 +1438,7 @@ fixed_size_mode
>  op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
>  {
>    /* Use QI vector only for > size of WORD.  */
> -  if (m_qi_vector_mode && size > UNITS_PER_WORD)
> +  if (qi_vector_p (m_op) && size > UNITS_PER_WORD)
>      {
>        machine_mode mode;
>        fixed_size_mode candidate;
> @@ -1427,8 +1451,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
>  	      break;
>
>  	    if (GET_MODE_SIZE (candidate) >= size
> -		&& (optab_handler (vec_duplicate_optab, candidate)
> -		    != CODE_FOR_nothing))
> +		&& optabs_checking (candidate, m_op))
>  	      return candidate;
>  	  }
>      }
> @@ -1451,7 +1474,7 @@ op_by_pieces_d::run ()
>
>    /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1.  */
>    fixed_size_mode mode
> -    = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode);
> +    = widest_fixed_size_mode_for_size (m_max_size);

Same here.

>    mode = get_usable_mode (mode, length);
>
>    by_pieces_prev to_prev = { nullptr, mode };
> @@ -1516,14 +1539,19 @@ op_by_pieces_d::run ()
>        else
>  	{
>  	  /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
> -	  mode = widest_fixed_size_mode_for_size (size,
> -						  m_qi_vector_mode);
> +	  mode = widest_fixed_size_mode_for_size (size);

Here too.

>  	  mode = get_usable_mode (mode, length);
>  	}
>      }
>    while (1);
>  }
>
> +fixed_size_mode
> +op_by_pieces_d::widest_fixed_size_mode_for_size (unsigned int size)
> +{
> +  return ::widest_fixed_size_mode_for_size (size, m_op);
> +}
> +
>  /* Derived class from op_by_pieces_d, providing support for block move
>     operations.  */
>
> @@ -1543,7 +1571,7 @@ class move_by_pieces_d : public op_by_pieces_d
>    move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
>  		    unsigned int align)
>      : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
> -		      NULL, len, align, PUSHG_P (to))
> +		      NULL, len, align, PUSHG_P (to), MOVE_BY_PIECES)
>    {
>    }
>    rtx finish_retmode (memop_ret);
> @@ -1632,15 +1660,16 @@ move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
>  class store_by_pieces_d : public op_by_pieces_d
>  {
>    insn_gen_fn m_gen_fun;
> +
>    void generate (rtx, rtx, machine_mode) final override;
>    bool prepare_mode (machine_mode, unsigned int) final override;
>
>   public:
>    store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
>  		     unsigned HOST_WIDE_INT len, unsigned int align,
> -		     bool qi_vector_mode)
> +		     by_pieces_operation op)
>      : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
> -		      cfn_data, len, align, false, qi_vector_mode)
> +		      cfn_data, len, align, false, op)
>    {
>    }
>    rtx finish_retmode (memop_ret);
> @@ -1730,7 +1759,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
>        while (max_size > 1 && l > 0)
>  	{
>  	  fixed_size_mode mode
> -	    = widest_fixed_size_mode_for_size (max_size, memsetp);
> +	    = widest_fixed_size_mode_for_size (max_size,
> +		memsetp ? SET_BY_PIECES : STORE_BY_PIECES);

Stricly speaking, this doesn't follow the coding convention.  Maybe:

	  auto op = memsetp ? SET_BY_PIECES : STORE_BY_PIECES;
	  auto mode = widest_fixed_size_mode_for_size (max_size, op);

Looks good otherwise.  Pretty sure the next review iteration
will be the last. :)

Richard

>
>  	  icode = optab_handler (mov_optab, mode);
>  	  if (icode != CODE_FOR_nothing
> @@ -1793,7 +1823,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
>  		 optimize_insn_for_speed_p ()));
>
>    store_by_pieces_d data (to, constfun, constfundata, len, align,
> -			  memsetp);
> +			  memsetp ? SET_BY_PIECES : STORE_BY_PIECES);
>    data.run ();
>
>    if (retmode != RETURN_BEGIN)
> @@ -1814,7 +1844,7 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
>    /* Use builtin_memset_read_str to support vector mode broadcast.  */
>    char c = 0;
>    store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
> -			  true);
> +			  CLEAR_BY_PIECES);
>    data.run ();
>  }
>
> @@ -1832,12 +1862,13 @@ class compare_by_pieces_d : public op_by_pieces_d
>    void generate (rtx, rtx, machine_mode) final override;
>    bool prepare_mode (machine_mode, unsigned int) final override;
>    void finish_mode (machine_mode) final override;
> +
>   public:
>    compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
>  		       void *op1_cfn_data, HOST_WIDE_INT len, int align,
>  		       rtx_code_label *fail_label)
>      : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
> -		      op1_cfn_data, len, align, false)
> +		      op1_cfn_data, len, align, false, COMPARE_BY_PIECES)
>    {
>      m_fail_label = fail_label;
>    }
> @@ -1943,6 +1974,7 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,
>
>    return target;
>  }
> +
>  \f
>  /* Emit code to move a block Y to a block X.  This may be done with
>     string-move instructions, with multiple scalar move instructions,

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-10-16 21:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-13  9:32 PATCH-1v3, expand] Enable vector mode for compare_by_pieces [PR111449] HAO CHEN GUI
2023-10-16 21:51 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).