public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Philipp Tomsich <ptomsich@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Build SLP tree for nodes with shared statements
Date: Tue, 28 Nov 2023 13:35:00 +0000 (GMT)	[thread overview]
Message-ID: <20231128133500.38A593836E8E@sourceware.org> (raw)

https://gcc.gnu.org/g:c8eb943687266e13cdd95286518cdeed2508237a

commit c8eb943687266e13cdd95286518cdeed2508237a
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Mon Oct 30 17:11:34 2023 +0100

    Build SLP tree for nodes with shared statements
    
    When an SLP tree has multiple occurances of the same statement in a
    vector node (e.g. {A, B, A, B, ...} it can be that this duplication
    hurts the efficacy of the tree building procedures, compared to what
    can be achieved if we used a ordered node with no duplicates (i.e.,
    {A, B, ...}) and then did a permute.
    
    This is especially true when the two_operators conditions are met,
    where nodes are duplicated and if they have duplicates there is an
    'exponential explosion' of nodes that end up as splat vectors.
    
    This is a first iteration that improves these cases by matching some
    patterns and deduplicates + permutes them appropriately. This
    currently targets the vectorization of the first loop in x264's
    pixel_satd functions.
    
    See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98138
    
    Ref #342

Diff:
---
 gcc/tree-vect-slp.cc | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4a09b3c2aca..681f72c43fe 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-vectorizer.h"
 #include "langhooks.h"
 #include "gimple-walk.h"
+#include "gimple-pretty-print.h"
 #include "dbgcnt.h"
 #include "tree-vector-builder.h"
 #include "vec-perm-indices.h"
@@ -1831,6 +1832,109 @@ vect_slp_build_two_operator_nodes (slp_tree perm, tree vectype,
   SLP_TREE_CHILDREN (perm).quick_push (child2);
 }
 
+static int
+try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
+{
+  unsigned i;
+  slp_oprnd_info oprnd_info;
+
+  /* A more generalized version is WIP but this is generic enough anyway.  */
+  if (oprnds_info.length() != 2 || group_size < 4)
+    return 0;
+
+  FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+    for (unsigned int j = 0; j < group_size; j += 1)
+      if (!oprnd_info->def_stmts[j])
+	return 0;
+
+  int pattern = 0;
+  FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+    for (unsigned int j = 0; j < group_size; j += 4)
+      {
+	int cur_pattern = 0;
+	/* Check for an ABAB... pattern.  */
+	if ((oprnd_info->def_stmts[j] == oprnd_info->def_stmts[j + 2])
+	    && (oprnd_info->def_stmts[j + 1] == oprnd_info->def_stmts[j + 3]))
+	  cur_pattern = 1;
+	/* Check for an AABB... pattern.  */
+	else if ((oprnd_info->def_stmts[j] == oprnd_info->def_stmts[j + 1])
+		 && (oprnd_info->def_stmts[j + 2] == oprnd_info->def_stmts[j + 3]))
+	  cur_pattern = 2;
+	/* Check for an ABBA... pattern.  */
+	else if ((oprnd_info->def_stmts[j] == oprnd_info->def_stmts[j + 3])
+		 && (oprnd_info->def_stmts[j + 1] == oprnd_info->def_stmts[j + 2]))
+	  cur_pattern = 3;
+	/* Unrecognised pattern.  */
+	else
+	  return 0;
+
+	if (pattern == 0)
+	  pattern = cur_pattern;
+	/* Multiple patterns detected.  */
+	else if (cur_pattern != pattern)
+	  return 0;
+      }
+
+  gcc_checking_assert (pattern);
+
+  if (dump_enabled_p ())
+    {
+      if (pattern == 1)
+	dump_printf (MSG_NOTE, "ABAB");
+      else if (pattern == 2)
+	dump_printf (MSG_NOTE, "AABB");
+      else if (pattern == 3)
+	dump_printf (MSG_NOTE, "ABBA");
+      dump_printf (MSG_NOTE, " pattern detected.\n");
+    }
+
+  if (pattern == 1)
+    for (unsigned int j = 0; j < group_size; j += 4)
+      {
+	oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
+	oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
+	oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+1];
+	oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+1];
+      }
+  else if (pattern == 2)
+    for (unsigned int j = 0; j < group_size; j += 4)
+      {
+	/* The ordering here is at least to some extent arbitrary.
+	   A generilized version needs to use some explicit ordering.  */
+	oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j];
+	oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j];
+	oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2];
+	oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2];
+	oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2];
+	oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2];
+      }
+  else if (pattern == 3)
+    /* No need to handle that for now.  */
+    return 0;
+
+  if (dump_enabled_p ())
+    {
+      dump_printf (MSG_NOTE, "Recurse with:\n");
+      for (unsigned int j = 0; j < group_size; j++)
+	{
+	  dump_printf (MSG_NOTE, "  ");
+	  print_gimple_stmt (dump_file, oprnds_info[0]->def_stmts[j]->stmt, 0);
+	}
+    }
+
+  /* Since we've merged the two nodes in one, make the second one a copy of the first.
+     An improvement could be to truncate to one just node, but that needs refactoring
+     in vect_build_slp_tree_2 which we can avoid for now. The nodes are going to be
+     cached anyway.  */
+  for (unsigned int j = 0; j < group_size; j++)
+    {
+      oprnds_info[1]->def_stmts[j] = oprnds_info[0]->def_stmts[j];
+      oprnds_info[1]->ops[j] = oprnds_info[0]->ops[j];
+    }
+
+  return pattern;
+}
+
 /* Recursively build an SLP tree starting from NODE.
    Fail (and return a value not equal to zero) if def-stmts are not
    isomorphic, require data permutation or are of unsupported types of
@@ -2392,6 +2496,8 @@ out:
 
   stmt_info = stmts[0];
 
+  int rearrange_pattern = try_rearrange_oprnd_info (oprnds_info, group_size);
+
   /* Create SLP_TREE nodes for the definition node/s.  */
   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
     {
@@ -2629,6 +2735,83 @@ fail:
   *tree_size += this_tree_size + 1;
   *max_nunits = this_max_nunits;
 
+  /* If we applied any rearrangmenets then we need to reconstruct the original
+     elements with an additional permutation layer.  */
+  if (rearrange_pattern)
+    {
+      slp_tree one =  new _slp_tree;
+      slp_tree two = new _slp_tree;
+      SLP_TREE_DEF_TYPE (one) = vect_internal_def;
+      SLP_TREE_DEF_TYPE (two) = vect_internal_def;
+      SLP_TREE_VECTYPE (one) = vectype;
+      SLP_TREE_VECTYPE (two) = vectype;
+      SLP_TREE_CHILDREN (one).safe_splice (children);
+      SLP_TREE_CHILDREN (two).safe_splice (children);
+
+      if (rearrange_pattern == 1)
+	{
+	  SLP_TREE_CODE (one) = VEC_PERM_EXPR;
+	  SLP_TREE_CODE (two) = VEC_PERM_EXPR;
+	  unsigned int h = group_size / 2;
+	  SLP_TREE_REPRESENTATIVE (one) = stmts[0];
+	  SLP_TREE_REPRESENTATIVE (two) = stmts[h];
+
+	  for (unsigned int j = 0; j < h; j += 2)
+	    {
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
+	    }
+	  for (unsigned int j = 0; j < h; j += 2)
+	    {
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+	    }
+	}
+      else if (rearrange_pattern == 2)
+	{
+	  SLP_TREE_CODE (one) = VEC_PERM_EXPR;
+	  SLP_TREE_CODE (two) = VEC_PERM_EXPR;
+	  unsigned int h = group_size / 2;
+	  SLP_TREE_REPRESENTATIVE (one) = stmts[0];
+	  SLP_TREE_REPRESENTATIVE (two) = stmts[h];
+
+	  for (unsigned int j = 0; j < h; j += 2)
+	    {
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
+	    }
+	  for (unsigned int j = 0; j < h; j += 2)
+	    {
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+	    }
+	}
+
+      slp_tree child;
+      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two), i, child)
+       SLP_TREE_REF_COUNT (child)++;
+
+      node = vect_create_new_slp_node (node, stmts, 2);
+      SLP_TREE_VECTYPE (node) = vectype;
+      SLP_TREE_CHILDREN (node).quick_push (one);
+      SLP_TREE_CHILDREN (node).quick_push (two);
+
+      SLP_TREE_LANES (one) = stmts.length ();
+      SLP_TREE_LANES (two) = stmts.length ();
+
+      children.truncate(0);
+      children.safe_splice(SLP_TREE_CHILDREN (node));
+    }
+
+
   if (two_operators)
     {
       /* ???  We'd likely want to either cache in bst_map sth like

             reply	other threads:[~2023-11-28 13:35 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-28 13:35 Philipp Tomsich [this message]
2024-01-17 19:13 Philipp Tomsich
2024-01-23 20:57 Philipp Tomsich
2024-02-27 13:37 Philipp Tomsich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231128133500.38A593836E8E@sourceware.org \
    --to=ptomsich@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).