public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <rguenther@suse.de>
To: Tamar Christina <tamar.christina@arm.com>
Cc: gcc-patches@gcc.gnu.org, nd@arm.com, ook@ucw.cz
Subject: Re: [PATCH 1/2] middle-end : Initial scaffolding and definitions for SLP patttern matches
Date: Mon, 16 Nov 2020 14:16:24 +0100 (CET)	[thread overview]
Message-ID: <nycvar.YFH.7.76.2011161339140.10073@p653.nepu.fhfr.qr> (raw)
In-Reply-To: <patch-13785-tamar@arm.com>

On Sat, 14 Nov 2020, Tamar Christina wrote:

> Hi All,
> 
> This patch adds the pre-requisites and general scaffolding for supporting doing
> SLP pattern matching.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

Comments below.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* tree-vect-loop.c (vect_dissolve_slp_only_patterns): New.
> 	(vect_dissolve_slp_only_groups): Call here.
> 	* tree-vect-slp.c (vect_free_slp_tree, vect_create_new_slp_node): Export
> 	from file.
> 	(vect_build_slp_tree_2): Set vectype for externals.
> 	(vect_print_slp_tree): Print SLP only patterns.
> 	(optimize_load_redistribution_1, optimize_load_redistribution,
> 	vect_match_slp_patterns_2, vect_match_slp_patterns): New.
> 	(vect_analyze_slp): Call matcher.
> 	* tree-vectorizer.c (vec_info::add_pattern_stmt): Save relevancy.
> 	* tree-vectorizer.h (STMT_VINFO_SAVED_RELEVANT, vect_pop_relevancy,
> 	vect_dissolve_pattern_relevancy, vect_save_relevancy,
> 	vect_push_relevancy, vect_free_slp_tree, enum _complex_operation,
> 	class vect_pattern): New.
> 
> --- inline copy of patch --
> 
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 39b7319e8253c351a4f6fbdd8c154330f08f2b1b..791d9c6cb0649862a84fd3c80efc89fefedbb085 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1979,6 +1979,61 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
>    return opt_result::success ();
>  }
>  
> +/* For every SLP only pattern created by the pattern matched rooted in ROOT
> +   restore the relevancy of the original statements over those of the pattern
> +   and destroy the pattern relationship.  This restores the SLP tree to a state
> +   where it can be used when SLP build is cancelled or re-tried.  */
> +
> +static void
> +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo,
> +				 hash_set<slp_tree> *visited, slp_tree root)
> +{
> +  if (!root || visited->add (root))
> +    return;
> +
> +  unsigned int i;
> +  slp_tree node;
> +  stmt_vec_info related_stmt_info;
> +  stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (root);
> +
> +  if (stmt_info && STMT_VINFO_SLP_VECT_ONLY (stmt_info))
> +    {
> +      vect_pop_relevancy (stmt_info);
> +      if ((related_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info)) != NULL)
> +	{
> +	  if (dump_enabled_p ())
> +	    dump_printf_loc (MSG_NOTE, vect_location,
> +			     "dissolving relevancy of %G",
> +			     STMT_VINFO_STMT (stmt_info));
> +	  vect_dissolve_pattern_relevancy (related_stmt_info);
> +	}
> +    }
> +
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> +    vect_dissolve_slp_only_patterns (loop_vinfo, visited, node);
> +}
> +
> +/* Lookup any SLP Only Pattern statements created by the SLP pattern matcher in
> +   all slp_instances in LOOP_VINFO and undo the relevancy of statements such
> +   that the original SLP tree before the pattern matching is used.  */
> +
> +static void
> +vect_dissolve_slp_only_patterns (loop_vec_info loop_vinfo)
> +{
> +
> +  unsigned int i;
> +  hash_set<slp_tree> visited;
> +
> +  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_patterns");
> +
> +  /* Unmark any SLP only patterns as relevant and restore the STMT_INFO of the
> +     related instruction.  */
> +  slp_instance instance;
> +  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
> +    vect_dissolve_slp_only_patterns (loop_vinfo, &visited,
> +				     SLP_INSTANCE_TREE (instance));
> +}
> +
>  /* Look for SLP-only access groups and turn each individual access into its own
>     group.  */
>  static void
> @@ -2583,6 +2638,9 @@ again:
>    /* Ensure that "ok" is false (with an opt_problem if dumping is enabled).  */
>    gcc_assert (!ok);
>  
> +  /* Dissolve any SLP patterns created by the SLP pattern matcher.  */
> +  vect_dissolve_slp_only_patterns (loop_vinfo);
> +
>    /* Try again with SLP forced off but if we didn't do any SLP there is
>       no point in re-trying.  */
>    if (!slp)
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 0c065e835ad13ad32d222e2590e05ef56849c411..3be565a2e566e09a9e42d6c77ba402b9499b06b6 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -105,7 +105,7 @@ _slp_tree::~_slp_tree ()
>  
>  /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
>  
> -static void
> +void
>  vect_free_slp_tree (slp_tree node)
>  {
>    int i;
> @@ -148,7 +148,7 @@ vect_free_slp_instance (slp_instance instance)
>  
>  /* Create an SLP node for SCALAR_STMTS.  */
>  
> -slp_tree
> +static slp_tree
>  vect_create_new_slp_node (slp_tree node,
>  			  vec<stmt_vec_info> scalar_stmts, unsigned nops)
>  {
> @@ -165,7 +165,7 @@ vect_create_new_slp_node (slp_tree node,
>  
>  /* Create an SLP node for SCALAR_STMTS.  */
>  
> -static slp_tree
> +slp_tree
>  vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops)
>  {
>    return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops);
> @@ -1646,6 +1646,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
>  	{
>  	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
>  	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
> +	  SLP_TREE_VECTYPE (invnode) = vectype;

This is wrong - the vector type of invariants is determined by
the consuming SLP stmts in vectorizable_*

>  	  oprnd_info->ops = vNULL;
>  	  children.safe_push (invnode);
>  	  continue;
> @@ -1929,6 +1930,13 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
>  	dump_printf (dump_kind, " %u", j);
>        dump_printf (dump_kind, " }\n");
>      }
> +  if (SLP_TREE_REPRESENTATIVE (node)
> +      && STMT_VINFO_SLP_VECT_ONLY (SLP_TREE_REPRESENTATIVE (node)))
> +    {
> +      dump_printf_loc (metadata, user_loc, "\tSLP Only pattern:\n");
> +      dump_printf_loc (dump_kind, user_loc, "\t %G",
> +		       STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
> +    }
>    if (SLP_TREE_LANE_PERMUTATION (node).exists ())
>      {
>        dump_printf_loc (metadata, user_loc, "\tlane permutation {");
> @@ -2174,6 +2182,219 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
>    return exact_div (common_multiple (nunits, group_size), group_size);
>  }
>  
> +/* Helper function of optimize_load_redistribution that performs the operation
> +   recursively.  */
> +
> +bool optimize_load_redistribution_1 (hash_set<slp_tree> *loads,
> +				     scalar_stmts_to_slp_tree_map_t *bst_map,
> +				     hash_set<slp_tree> *visited,
> +				     slp_tree parent, unsigned idx,
> +				     slp_tree root)
> +{
> +  if (visited->contains (root))
> +    return true;
> +  visited->add (root);

    if (visited->add (root))
      return true;

> +
> +  slp_tree node;
> +  unsigned i;
> +  stmt_vec_info dr_stmt = NULL;
> +
> +  /* For now, we don't know anything about externals so do not do anything.  */
> +  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
> +      || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
> +    return false;
> +
> +  if (gimple_assign_load_p (STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (root))))

The appropriate check is STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE 
(root)) && DR_IS_READ (STMT_VINFO_DATA_REF (...))

let's not mix gimple & SLP checks when not necessary.

> +    loads->add (root);
> +
> +  if (SLP_TREE_CODE (root) == VEC_PERM_EXPR

else if

> +      && SLP_TREE_LANE_PERMUTATION (root).exists ()
> +      && !SLP_TREE_SCALAR_STMTS (root).exists ())

why do !SLP_TREE_SCALAR_STMTS matter?

> +    {
> +

extra vertical space

> +      /* First convert this node into a load node and add it to the leaves
> +         list and flatten the permute from a lane to a load one.  If it's
> +         unneeded it will be elided later.  */
> +      auto_vec<stmt_vec_info> stmts;
> +      stmts.create (SLP_TREE_LANES (root));
> +      load_permutation_t load_perm;
> +      load_perm.create (SLP_TREE_LANES (root));
> +      lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);
> +      for (unsigned j = 0; j < lane_perm.length (); j++)
> +        {
> +          std::pair<unsigned, unsigned> perm = lane_perm[j];
> +	  /* This isn't strictly needed, but this function is a temporary
> +	     one for specifically pattern matching, so don't want it to
> +	     optimize things the remainder of the pipeline will.  */
> +	  if (perm.first != j)
> +	    goto next;
> +          node = SLP_TREE_CHILDREN (root)[perm.first];
> +	  if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
> +	   {
> +	      load_perm.release ();
> +	      return false;

you're possibly prematurely exiting the DFS walk on a two-operator
permute.  Ah, guess that's what the above check was for?  I guess
it's better to pre-check all children of the VEC_PERM node
to be proper, two(?) leafs with load permutation.

> +	   }
> +
> +	  stmt_vec_info rep_stmt = SLP_TREE_REPRESENTATIVE (node);
> +	  if (!STMT_VINFO_GROUPED_ACCESS (rep_stmt))
> +	    goto next;

I think for a node with a load permutation this never triggers.

> +
> +	  if (!dr_stmt)
> +	    dr_stmt = DR_GROUP_FIRST_ELEMENT (rep_stmt);
> +
> +	  if (dr_stmt != DR_GROUP_FIRST_ELEMENT (rep_stmt))
> +	    goto next;

So all of the above could be done on the children w/o allocating
and the rest on the loop iterating over the lanes.

> +	  stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
> +          load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION (node)[perm.second]);
> +        }
> +
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "converting loads on permute node %p\n", root);
> +
> +      slp_tree *value = bst_map->get (stmts);
> +      if (value)
> +	node = *value;
> +      else
> +	{
> +	  /* One last iteration to free the nodes.  */
> +	  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
> +	    {
> +	      /* If we are the only reference to the node, remove the vertex.
> +		 We don't have to modify the graph since vertices lead the
> +		 graph traversal.  */
> +	      vect_free_slp_tree (node);
> +	    }

You should do this unconditionally (also if the bst_map lookup
succeeded), but after bumping the refcount for the use in this
node.

> +
> +	  vec<stmt_vec_info> stmts_cpy = stmts.copy ();
> +	  node = vect_create_new_slp_node (stmts_cpy.copy (), 0);
> +	  bst_map->put (stmts_cpy, node);
> +	}
> +      SLP_TREE_CHILDREN (parent)[idx] = node;

hmm, what is 'parent' - shouldn't this be 'root'?  Ah, I see what
you are doing.  I think you should do what optimize_slp does,
namely replace 'root' in-place so you do not have to adjust
parents (or even care for the case of multiple parents refering to
'root').  I see that doesn't easily work when attempting to CSE so
the actual CSE needs to happen below (*)

> +      SLP_TREE_REF_COUNT (node)++;
> +      SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (root);
> +      SLP_TREE_LOAD_PERMUTATION (node) = load_perm;
> +      loads->add (node);

Note with the load_lane check delayed we no longer need
to vect_gather_slp_loads so early so I suggest to simply
remove the existing early call and do it after
pattern matching instead.

> +      //do this up the recursive call.
> +      //vect_free_slp_tree (root);
> +
> +      return true;
> +    }
> +
> +next:
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
> +    optimize_load_redistribution_1 (loads, bst_map, visited, root, i, node);

(*) so here after recursing (and maybe returning that 'node' was changed)
try to CSE 'node' like

  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
    if (optimize_load_redistribution_1 (loads, bst_map, visited, root, i, 
node))
      {
        if (slp_tree *value = bst_map->get (stmts))
          {
            SLP_TREE_CHILDREN (root)[i] = value;
            SLP_TREE_REFCNT (value)++;
            slp_tree_free (root);
          }
      }

or so.  Note that the lane configuration of the result of you
transform doesn't change, we're merely propagating the lanes
from the permuted loads to the VEC_PERM node (where we "missed" them)
and then we're doing a re-lookup to do the CSE.  Then of course
there's the part turning a VEC_PERM node to a load itself.

So I wonder if we can make sure SLP_TREE_SCALAR_STMTS (the output
lane config) for the VEC_PERM node is set appropriately by
pattern matching?  Or if we should make this a two-step thing,
propagate the lane setup and CSE and then turn VEC_PERMs into
permuted loads?

It might make sense to split out this optimization from the
patch.

> +
> +  return true;
> +}
> +
> +/* Temporary workaround for loads not being CSEd during SLP build.  This
> +   function will traverse the SLP tree rooted in ROOT for INSTANCE and find
> +   VEC_PERM nodes that blend vectors from multiple nodes that all read from the
> +   same DR such that the final operation is equal to a permuted load.  Such
> +   NODES are then directly converted into LOADS themselves.  The nodes are
> +   CSEd using BST_MAP.
> +
> +   Finally the LOADS in INSTANCE are updated with the current set of loads.  */
> +
> +bool optimize_load_redistribution (slp_instance instance,
> +				   scalar_stmts_to_slp_tree_map_t *bst_map,
> +				   slp_tree root)
> +{
> +  slp_tree node;
> +  unsigned i;
> +  hash_set<slp_tree> visited;
> +  hash_set<slp_tree> loads;
> +
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node)
> +    optimize_load_redistribution_1 (&loads, bst_map, &visited, root, i, node);
> +
> +  SLP_INSTANCE_LOADS (instance).truncate (0);
> +  for (hash_set<slp_tree>::iterator it = loads.begin ();
> +       it != loads.end (); ++it)
> +    SLP_INSTANCE_LOADS (instance).safe_push (*it);
> +
> +  return true;
> +}
> +
> +/* Helper function of vect_match_slp_patterns.
> +
> +   Attempts to match patterns against the slp tree rooted in REF_NODE using
> +   VINFO.  Patterns are matched in post-order traversal.
> +
> +   If matching is successful the value in REF_NODE is updated and returned, if
> +   not then it is returned unchanged.  */
> +
> +static bool
> +vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
> +			   slp_tree_to_load_perm_map_t *perm_cache,
> +			   hash_set<slp_tree> *visited)
> +{
> +  unsigned i;
> +  slp_tree node = *ref_node;
> +  bool found_p = false, found_rec_p = false;
> +  if (!node || visited->add (node))
> +    return false;
> +
> +  slp_tree child;
> +  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +    found_rec_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i],
> +					      vinfo, perm_cache, visited);
> +
> +  for (unsigned x = 0; x < num__slp_patterns; x++)
> +    {
> +      vect_pattern *pattern = slp_patterns[x] (ref_node);
> +      found_p = pattern->recognize (perm_cache, vinfo);
> +      delete pattern;
> +      found_rec_p = found_p | found_rec_p;
> +    }
> +
> +  return found_rec_p;
> +}
> +
> +/* Applies pattern matching to the given SLP tree rooted in REF_NODE using
> +   vec_info VINFO.
> +
> +   The modified tree is returned.  Patterns are tried in order and multiple
> +   patterns may match.  */
> +
> +static bool
> +vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
> +			 hash_set<slp_tree> *visited,
> +			 slp_tree_to_load_perm_map_t *perm_cache,
> +			 scalar_stmts_to_slp_tree_map_t *bst_map)
> +{
> +  DUMP_VECT_SCOPE ("vect_match_slp_patterns");
> +  slp_tree *ref_node = &SLP_INSTANCE_TREE (instance);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +		     "Analyzing SLP tree %p for patterns\n",
> +		     SLP_INSTANCE_TREE (instance));
> +
> +  bool found_p
> +    = vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
> +
> +  if (found_p)
> +    {
> +      optimize_load_redistribution (instance, bst_map, *ref_node);
> +
> +      if (dump_enabled_p ())
> +	{
> +	  dump_printf_loc (MSG_NOTE, vect_location,
> +			   "Pattern matched SLP tree\n");
> +	  vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node);
> +	}
> +    }
> +
> +  return found_p;
> +}
> +
> +/* Analyze an SLP instance starting from a group of grouped stores.  Call
> +   vect_build_slp_tree to build a tree of packed stmts if possible.
> +   Return FALSE if it's impossible to SLP any stmt in the loop.  */
> +
>  static bool
>  vect_analyze_slp_instance (vec_info *vinfo,
>  			   scalar_stmts_to_slp_tree_map_t *bst_map,
> @@ -2540,6 +2761,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>  {
>    unsigned int i;
>    stmt_vec_info first_element;
> +  slp_instance instance;
>  
>    DUMP_VECT_SCOPE ("vect_analyze_slp");
>  
> @@ -2586,6 +2808,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
>  				   slp_inst_kind_reduc_group, max_tree_size);
>      }
>  
> +  hash_set<slp_tree> visited_patterns;
> +  slp_tree_to_load_perm_map_t perm_cache;
> +  /* See if any patterns can be found in the SLP tree.  */
> +  FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance)
> +    vect_match_slp_patterns (instance, vinfo, &visited_patterns, &perm_cache,
> +			     bst_map);
> +
>    /* The map keeps a reference on SLP nodes built, release that.  */
>    for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
>         it != bst_map->end (); ++it)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index fa211b95c0e54be1d51ed949d7a06c31b7b50802..3b49ce22f7aae0465dbd0b24cbf48ae054c31d22 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -27,6 +27,7 @@ typedef class _stmt_vec_info *stmt_vec_info;
>  #include "tree-hash-traits.h"
>  #include "target.h"
>  #include "alloc-pool.h"
> +#include "internal-fn.h"
>  
>  
>  /* Used for naming of new temporaries.  */
> @@ -1118,6 +1119,11 @@ public:
>       indicates whether the stmt needs to be vectorized.  */
>    enum vect_relevant relevant;
>  
> +  /* During SLP vectorization we may need to change the relevancy of a statement
> +     but restore it during dissolving of SLP nodes.  This field contains a copy
> +     of the original relevancy analysis.  */
> +  enum vect_relevant saved_relevant;
> +
>    /* For loads if this is a gather, for stores if this is a scatter.  */
>    bool gather_scatter_p;
>  
> @@ -1240,6 +1246,7 @@ struct gather_scatter_info {
>  #define STMT_VINFO_TYPE(S)                 (S)->type
>  #define STMT_VINFO_STMT(S)                 (S)->stmt
>  #define STMT_VINFO_RELEVANT(S)             (S)->relevant
> +#define STMT_VINFO_SAVED_RELEVANT(S)       (S)->saved_relevant
>  #define STMT_VINFO_LIVE_P(S)               (S)->live
>  #define STMT_VINFO_VECTYPE(S)              (S)->vectype
>  #define STMT_VINFO_VEC_STMTS(S)            (S)->vec_stmts
> @@ -1368,6 +1375,46 @@ vect_orig_stmt (stmt_vec_info stmt_info)
>    return stmt_info;
>  }
>  
> +/* If restore the saved relevancy information STMT_INFO from the copy made
> +   during SLP pattern detection.  */
> +
> +static inline void
> +vect_pop_relevancy (stmt_vec_info stmt_info)
> +{
> +  STMT_VINFO_RELEVANT (stmt_info) = STMT_VINFO_SAVED_RELEVANT (stmt_info);
> +}

Calling the single-level "stack" a stack is a bit odd.  Since
you already have vect_save_relevancy () just call ig
vect_restore_relevancy ()?

> +
> +/* Restores the saved relevancy of STMT_INFO and marks it as not being inside a
> +   pattern.  Lastly the SLP_TYPE is set to loop_vect.  */
> +
> +static inline void
> +vect_dissolve_pattern_relevancy (stmt_vec_info stmt_info)
> +{
> +  vect_pop_relevancy (stmt_info);
> +  STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
> +  STMT_SLP_TYPE (stmt_info) = loop_vect;
> +}
> +
> +/* Save the current relevancy of STMT_INFO such that it can be restored by
> +   vect_pop_relevancy.  */
> +
> +static inline void
> +vect_save_relevancy (stmt_vec_info stmt_info)
> +{
> +  STMT_VINFO_SAVED_RELEVANT (stmt_info)
> +    = STMT_VINFO_RELEVANT (stmt_info);
> +}
> +
> +/* Save the current relevancy of STMT_INFO before changing it to REL.  */
> +
> +static inline void
> +vect_push_relevancy (stmt_vec_info stmt_info, enum vect_relevant rel)
> +{
> +  vect_save_relevancy (stmt_info);
> +  STMT_VINFO_RELEVANT (stmt_info) = rel;
> +}

See above - maybe combine save and push by doing

vect_set_slp_relevancy (...)

and

vect_restore_loop_relevancy (...)

rather than the four separate functions.

It would be nice if the vect_dissolve_slp_only_patterns work
could be done fully from the IL walk we do here:

  /* Reset SLP type to loop_vect on all stmts.  */
  for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
    {
      basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
      for (gimple_stmt_iterator si = gsi_start_phis (bb);
           !gsi_end_p (si); gsi_next (&si))
        {
...

where we also reset STMT_VINFO_SLP_TYPE.

> +
>  /* Return the later statement between STMT1_INFO and STMT2_INFO.  */
>  
>  static inline stmt_vec_info
> @@ -1993,6 +2040,7 @@ extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
>  extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
>  extern bool vect_update_shared_vectype (stmt_vec_info, tree);
>  extern slp_tree vect_create_new_slp_node (vec<stmt_vec_info>, unsigned);
> +extern void vect_free_slp_tree (slp_tree);
>  
>  /* In tree-vect-patterns.c.  */
>  extern void
> @@ -2009,4 +2057,108 @@ void vect_free_loop_info_assumptions (class loop *);
>  gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
>  bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
>  
> +/* SLP Pattern matcher types, tree-vect-slp-patterns.c.  */
> +
> +/* Forward declaration of possible two operands operation that can be matched
> +   by the complex numbers pattern matchers.  */
> +enum _complex_operation : unsigned;
> +
> +/* Cache from nodes to the load permutation they represent.  */
> +typedef hash_map <slp_tree, load_permutation_t >
> +  slp_tree_to_load_perm_map_t;
> +
> +/* Vector pattern matcher base class.  All SLP pattern matchers must inherit
> +   from this type.  */
> +
> +class vect_pattern
> +{
> +  protected:
> +    /* The number of arguments that the IFN requires.  */
> +    unsigned m_num_args;
> +
> +    /* The internal function that will be used when a pattern is created.  */
> +    internal_fn m_ifn;
> +
> +    /* The current node being inspected.  */
> +    slp_tree *m_node;
> +
> +    /* The list of operands to be the children for the node produced when the
> +       internal function is created.  */
> +    vec<slp_tree> m_ops;
> +
> +    /* Default constructor where NODE is the root of the tree to inspect.  */
> +    vect_pattern (slp_tree *node)
> +    {
> +      this->m_ifn = IFN_LAST;
> +      this->m_node = node;
> +      this->m_ops.create (0);
> +    }
> +
> +  public:
> +    /* Attempt to recognize a pattern, validate and update the tree rooted in
> +       M_NODE.  */
> +    virtual bool recognize (slp_tree_to_load_perm_map_t *, vec_info *);
> +
> +    /* Only perform the pattern creation part of the matcher.  This creates and
> +       returns the new pattern statement.  */
> +    virtual gcall *build (vec_info *) = 0;
> +
> +    /* Performs a check to see if the matched IFN is supported by the current
> +       target.  */
> +    virtual bool is_optab_supported_p (tree vectype, optimization_type opt_type)
> +    {
> +      if (!vectype)
> +        return false;
> +
> +      return direct_internal_fn_supported_p (this->m_ifn, vectype, opt_type);
> +    }
> +
> +    /* Create a new instance of the pattern matcher class of the given type.  */
> +    static vect_pattern* create (slp_tree *);
> +
> +    /* Match but do not perform any additional operations on the SLP tree.  */
> +    virtual bool matches (slp_tree_to_load_perm_map_t *) = 0;
> +
> +    /* Match but use for the first operation the supplied COMPLEX_OPERATION.  No
> +       additional operations or modification of the SLP tree are performed.  */
> +    virtual bool matches (enum _complex_operation,
> +			  slp_tree_to_load_perm_map_t *, vec<slp_tree>)
> +    {
> +      return false;
> +    }
> +
> +    /* Friendly name of the operation the pattern matches.  */
> +    virtual const char* get_name () = 0;
> +
> +    /* Default destructor.  */
> +    virtual ~vect_pattern ()
> +    {
> +	this->m_ops.release ();
> +    }
> +
> +    /* Check to see if the matched tree is valid for the operation the matcher
> +       wants.  If the operation is valid then the tree is reshaped in the final
> +       format that build () requires.  */
> +    virtual bool validate_p (slp_tree_to_load_perm_map_t *)
> +    {
> +      return true;
> +    }
> +
> +    /* Return the matched internal function.  If no match was done this is set
> +       to LAST_IFN.  */
> +    virtual internal_fn get_ifn ()
> +    {
> +      return this->m_ifn;
> +    }
> +};
> +
> +/* Function pointer to create a new pattern matcher from a generic type.  */
> +typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree *);
> +
> +/* List of supported pattern matchers.  */
> +extern vect_pattern_decl_t slp_patterns[];
> +
> +/* Number of supported pattern matchers.  */
> +extern size_t num__slp_patterns;
> +
>  #endif  /* GCC_TREE_VECTORIZER_H  */
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index d81774b242569262a51b7be02815acd6d1a6bfd0..2a6ddd685922f6b60ae1305974335fb863a2af39 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -535,6 +535,8 @@ vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
>    stmt_vec_info res = new_stmt_vec_info (stmt);
>    set_vinfo_for_stmt (stmt, res, false);
>    STMT_VINFO_RELATED_STMT (res) = stmt_info;
> +  vect_save_relevancy (stmt_info);
> +  vect_push_relevancy (res, STMT_VINFO_RELEVANT (stmt_info));

Hmmm, that looks like an odd place to do this.  I suspect it's not
the "final" modification of either relevancy?

Can we get rid of this hunk somehow?

Thanks,
Richard.

>    return res;
>  }
>  
> 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend

  parent reply	other threads:[~2020-11-16 13:16 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-14 14:37 Tamar Christina
2020-11-14 14:39 ` [PATCH 2/2] middle-end : Add support for complex pattern detection for Add, Mul, FMA and FMS Tamar Christina
2020-11-16 15:11   ` Richard Biener
2020-11-16 15:54     ` Tamar Christina
2020-11-17  7:50       ` Richard Biener
2020-11-16 13:16 ` Richard Biener [this message]
2020-11-16 13:33   ` [PATCH 1/2] middle-end : Initial scaffolding and definitions for SLP patttern matches Richard Biener

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=nycvar.YFH.7.76.2011161339140.10073@p653.nepu.fhfr.qr \
    --to=rguenther@suse.de \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=nd@arm.com \
    --cc=ook@ucw.cz \
    --cc=tamar.christina@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).