public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Sandiford <richard.sandiford@arm.com>
To: Richard Biener <rguenther@suse.de>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] tree-optimization/111115 - SLP of masked stores
Date: Thu, 24 Aug 2023 10:38:21 +0100	[thread overview]
Message-ID: <mpt7cpkaigi.fsf@arm.com> (raw)
In-Reply-To: <d8d44ad7-52b0-4d07-ad19-84acf702f19b@DBAEUR03FT055.eop-EUR03.prod.protection.outlook.com> (Richard Biener's message of "Wed, 23 Aug 2023 13:24:17 +0000 (UTC)")

Richard Biener <rguenther@suse.de> writes:
> The following adds the capability to do SLP on .MASK_STORE, I do not
> plan to add interleaving support.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?

LGTM, thanks.

Richard

> Thanks,
> Richard.
>
> 	PR tree-optimization/111115
> gcc/
> 	* tree-vectorizer.h (vect_slp_child_index_for_operand): New.
> 	* tree-vect-data-refs.cc (can_group_stmts_p): Also group
> 	.MASK_STORE.
> 	* tree-vect-slp.cc (arg3_arg2_map): New.
> 	(vect_get_operand_map): Handle IFN_MASK_STORE.
> 	(vect_slp_child_index_for_operand): New function.
> 	(vect_build_slp_tree_1): Handle statements with no LHS,
> 	masked store ifns.
> 	(vect_remove_slp_scalar_calls): Likewise.
> 	* tree-vect-stmts.c (vect_check_store_rhs): Lookup the
> 	SLP child corresponding to the ifn value index.
> 	(vectorizable_store): Likewise for the mask index.  Support
> 	masked stores.
> 	(vectorizable_load): Lookup the SLP child corresponding to the
> 	ifn mask index.
>
> gcc/testsuite/
> 	* lib/target-supports.exp (check_effective_target_vect_masked_store):
> 	Supported with check_avx_available.
> 	* gcc.dg/vect/slp-mask-store-1.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 39 +++++++++++++++++
>  gcc/testsuite/lib/target-supports.exp        |  3 +-
>  gcc/tree-vect-data-refs.cc                   |  3 +-
>  gcc/tree-vect-slp.cc                         | 46 +++++++++++++++++---
>  gcc/tree-vect-stmts.cc                       | 23 +++++-----
>  gcc/tree-vectorizer.h                        |  1 +
>  6 files changed, 94 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
> new file mode 100644
> index 00000000000..50b7066778e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-mavx2" { target avx2 } } */
> +
> +#include "tree-vect.h"
> +
> +void __attribute__((noipa))
> +foo (unsigned * __restrict x, int * __restrict flag)
> +{
> +  for (int i = 0; i < 32; ++i)
> +    {
> +      if (flag[2*i+0])
> +        x[2*i+0] = x[2*i+0] + 3;
> +      if (flag[2*i+1])
> +        x[2*i+1] = x[2*i+1] + 177;
> +    }
> +}
> +
> +unsigned x[16];
> +int flag[32] = { 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
> +                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
> +unsigned res[16] = { 3, 177, 0, 0, 0, 177, 3, 0, 3, 177, 0, 0, 0, 177, 3, 0 };
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +
> +  foo (x, flag);
> +
> +  if (__builtin_memcmp (x, res, sizeof (x)) != 0)
> +    abort ();
> +  for (int i = 0; i < 32; ++i)
> +    if (flag[i] != 0 && flag[i] != 1)
> +      abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { vect_masked_store && vect_masked_load } } } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index d4623ee6b45..d353cc0aaf0 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -8400,7 +8400,8 @@ proc check_effective_target_vect_masked_load { } {
>  # Return 1 if the target supports vector masked stores.
>  
>  proc check_effective_target_vect_masked_store { } {
> -    return [expr { [check_effective_target_aarch64_sve]
> +    return [expr { [check_avx_available]
> +		   || [check_effective_target_aarch64_sve]
>  		   || [istarget amdgcn*-*-*] }]
>  }
>  
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 3e9a284666c..a2caf6cb1c7 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -3048,8 +3048,7 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
>  	 like those created by build_mask_conversion.  */
>        tree mask1 = gimple_call_arg (call1, 2);
>        tree mask2 = gimple_call_arg (call2, 2);
> -      if (!operand_equal_p (mask1, mask2, 0)
> -          && (ifn == IFN_MASK_STORE || !allow_slp_p))
> +      if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
>  	{
>  	  mask1 = strip_conversion (mask1);
>  	  if (!mask1)
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index b5f9333fc22..cc799b6ebcd 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -503,6 +503,7 @@ static const int cond_expr_maps[3][5] = {
>  static const int arg1_map[] = { 1, 1 };
>  static const int arg2_map[] = { 1, 2 };
>  static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg3_arg2_map[] = { 2, 3, 2 };
>  static const int op1_op0_map[] = { 2, 1, 0 };
>  
>  /* For most SLP statements, there is a one-to-one mapping between
> @@ -543,6 +544,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
>  	  case IFN_MASK_GATHER_LOAD:
>  	    return arg1_arg4_map;
>  
> +	  case IFN_MASK_STORE:
> +	    return arg3_arg2_map;
> +
>  	  default:
>  	    break;
>  	  }
> @@ -550,6 +554,20 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
>    return nullptr;
>  }
>  
> +/* Return the SLP node child index for operand OP of STMT.  */
> +
> +int
> +vect_slp_child_index_for_operand (const gimple *stmt, int op)
> +{
> +  const int *opmap = vect_get_operand_map (stmt);
> +  if (!opmap)
> +    return op;
> +  for (int i = 1; i < 1 + opmap[0]; ++i)
> +    if (opmap[i] == op)
> +      return i - 1;
> +  gcc_unreachable ();
> +}
> +
>  /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
>     they are of a valid type and that they match the defs of the first stmt of
>     the SLP group (stored in OPRNDS_INFO).  This function tries to match stmts
> @@ -1003,8 +1021,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>            return false;
>          }
>  
> +      gcall *call_stmt = dyn_cast <gcall *> (stmt);
>        lhs = gimple_get_lhs (stmt);
> -      if (lhs == NULL_TREE)
> +      if (lhs == NULL_TREE
> +	  && (!call_stmt
> +	      || !gimple_call_internal_p (stmt)
> +	      || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
>  	{
>  	  if (dump_enabled_p ())
>  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -1041,7 +1063,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>  
>        gcc_assert (vectype);
>  
> -      gcall *call_stmt = dyn_cast <gcall *> (stmt);
>        if (call_stmt)
>  	{
>  	  combined_fn cfn = gimple_call_combined_fn (call_stmt);
> @@ -1054,6 +1075,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>  	      || cfn == CFN_GATHER_LOAD
>  	      || cfn == CFN_MASK_GATHER_LOAD)
>  	    load_p = true;
> +	  else if (cfn == CFN_MASK_STORE)
> +	    rhs_code = CFN_MASK_STORE;
>  	  else if ((internal_fn_p (cfn)
>  		    && !vectorizable_internal_fn_p (as_internal_fn (cfn)))
>  		   || gimple_call_tail_p (call_stmt)
> @@ -1212,7 +1235,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>  	      continue;
>  	    }
>  
> -	  if (call_stmt && first_stmt_code != CFN_MASK_LOAD)
> +	  if (call_stmt
> +	      && first_stmt_code != CFN_MASK_LOAD
> +	      && first_stmt_code != CFN_MASK_STORE)
>  	    {
>  	      if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
>  				       call_stmt))
> @@ -1266,9 +1291,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>        /* Grouped store or load.  */
>        if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
>  	{
> -	  if (REFERENCE_CLASS_P (lhs))
> +	  if (!load_p)
>  	    {
>  	      /* Store.  */
> +	      gcc_assert (rhs_code == CFN_MASK_STORE
> +			  || REFERENCE_CLASS_P (lhs));
>  	      ;
>  	    }
>  	  else
> @@ -9090,10 +9117,17 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
>  	  || !PURE_SLP_STMT (stmt_info))
>  	continue;
>        lhs = gimple_call_lhs (stmt);
> -      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> +      if (lhs)
> +	new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> +      else
> +	{
> +	  new_stmt = gimple_build_nop ();
> +	  unlink_stmt_vdef (stmt_info->stmt);
> +	}
>        gsi = gsi_for_stmt (stmt);
>        vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
> -      SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> +      if (lhs)
> +	SSA_NAME_DEF_STMT (lhs) = new_stmt;
>      }
>  }
>  
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 413a88750d6..31b73b08e62 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -2629,12 +2629,14 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
>        return false;
>      }
>  
> -  unsigned op_no = 0;
> +  int op_no = 0;
>    if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
>      {
>        if (gimple_call_internal_p (call)
>  	  && internal_store_fn_p (gimple_call_internal_fn (call)))
>  	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
> +      if (slp_node)
> +	op_no = vect_slp_child_index_for_operand (call, op_no);
>      }
>  
>    enum vect_def_type rhs_dt;
> @@ -8244,15 +8246,9 @@ vectorizable_store (vec_info *vinfo,
>        if (!internal_store_fn_p (ifn))
>  	return false;
>  
> -      if (slp_node != NULL)
> -	{
> -	  if (dump_enabled_p ())
> -	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -			     "SLP of masked stores not supported.\n");
> -	  return false;
> -	}
> -
>        int mask_index = internal_fn_mask_index (ifn);
> +      if (mask_index >= 0 && slp_node)
> +	mask_index = vect_slp_child_index_for_operand (call, mask_index);
>        if (mask_index >= 0
>  	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
>  				      &mask, NULL, &mask_dt, &mask_vectype))
> @@ -9093,8 +9089,10 @@ vectorizable_store (vec_info *vinfo,
>  	    {
>  	      /* Get vectorized arguments for SLP_NODE.  */
>  	      vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
> -				 &vec_oprnds);
> +				 &vec_oprnds, mask, &vec_masks);
>  	      vec_oprnd = vec_oprnds[0];
> +	      if (mask)
> +		vec_mask = vec_masks[0];
>  	    }
>  	  else
>  	    {
> @@ -9191,6 +9189,8 @@ vectorizable_store (vec_info *vinfo,
>  	    final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
>  					     vec_num * ncopies, vectype,
>  					     vec_num * j + i);
> +	  if (slp && vec_mask)
> +	    vec_mask = vec_masks[i];
>  	  if (vec_mask)
>  	    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
>  					   vec_mask, gsi);
> @@ -9575,9 +9575,8 @@ vectorizable_load (vec_info *vinfo,
>  	return false;
>  
>        mask_index = internal_fn_mask_index (ifn);
> -      /* ??? For SLP the mask operand is always last.  */
>        if (mask_index >= 0 && slp_node)
> -	mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
> +	mask_index = vect_slp_child_index_for_operand (call, mask_index);
>        if (mask_index >= 0
>  	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
>  				      &mask, NULL, &mask_dt, &mask_vectype))
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 53a3d78d545..f1d0cd79961 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2429,6 +2429,7 @@ extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
>  extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
>  extern void vect_free_slp_tree (slp_tree);
>  extern bool compatible_calls_p (gcall *, gcall *);
> +extern int vect_slp_child_index_for_operand (const gimple *, int op);
>  
>  /* In tree-vect-patterns.cc.  */
>  extern void

       reply	other threads:[~2023-08-24  9:38 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <d8d44ad7-52b0-4d07-ad19-84acf702f19b@DBAEUR03FT055.eop-EUR03.prod.protection.outlook.com>
2023-08-24  9:38 ` Richard Sandiford [this message]
2023-08-24 12:18   ` Robin Dapp
2023-08-24 12:43     ` Richard Biener
2023-08-23 13:24 Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=mpt7cpkaigi.fsf@arm.com \
    --to=richard.sandiford@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=rguenther@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).