public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Philipp Tomsich <ptomsich@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix packing/unpacking of rearranged SLP nodes.
Date: Tue, 23 Jan 2024 20:59:09 +0000 (GMT)	[thread overview]
Message-ID: <20240123205909.98101385802B@sourceware.org> (raw)

https://gcc.gnu.org/g:dc3dc6fb7d804704d0044eb4d5a79214db3c8dab

commit dc3dc6fb7d804704d0044eb4d5a79214db3c8dab
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Thu Jan 18 13:13:33 2024 +0100

    Fix packing/unpacking of rearranged SLP nodes.
    
    Under some circumstances, the rearrangement of SLP nodes could pack and unpack
    nodes in a way that wasn't equivalent to the original code. Apart from
    addressing the actual bug, comments that indicate the node packing/unpacking
    schemes are added. Also, as a better canonicalization measure all packed
    representations have use the same sequence (A1, B1, C1, D1, A2, B2, C2, D2, ...)
    
    This both fixes a bug that miscompiled x264 on x86-64 (#356) and other bugs that
    hadn't occur yet (some cases where group_size was larger than 4).

Diff:
---
 gcc/tree-vect-slp.cc | 84 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 48 insertions(+), 36 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 16bc447b0f0..f7970bafd45 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1907,6 +1907,9 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
   if (pattern == SLP_OPRND_PATTERN_ABAB || pattern == SLP_OPRND_PATTERN_ABBA)
     for (unsigned int j = 0; j < group_size; j += 4)
       {
+	/* Given oprnd[0] -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+	   Given oprnd[1] -> C1, D1, C1, D1, C2, D2, C2, D2, ...
+	   Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...  */
 	oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
 	oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
 	oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+1];
@@ -1915,12 +1918,16 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
   else if (pattern == SLP_OPRND_PATTERN_AABB)
     for (unsigned int j = 0; j < group_size; j += 4)
       {
+	/* Given oprnd[0] -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+	   Given oprnd[1] -> C1, C1, D1, D1, C2, C2, D2, D2, ...
+	   Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...  */
+
 	/* The ordering here is at least to some extent arbitrary.
 	   A generilized version needs to use some explicit ordering.  */
-	oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j];
-	oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j];
-	oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2];
-	oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2];
+	oprnds_info[0]->def_stmts[j+1] = oprnds_info[0]->def_stmts[j+2];
+	oprnds_info[0]->ops[j+1] = oprnds_info[0]->ops[j+2];
+	oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
+	oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
 	oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2];
 	oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2];
       }
@@ -2786,59 +2793,64 @@ fail:
 
       SLP_TREE_CODE (one) = VEC_PERM_EXPR;
       SLP_TREE_CODE (two) = VEC_PERM_EXPR;
-      unsigned int h = group_size / 2;
       SLP_TREE_REPRESENTATIVE (one) = stmts[0];
-      SLP_TREE_REPRESENTATIVE (two) = stmts[h];
+      SLP_TREE_REPRESENTATIVE (two) = stmts[2];
 
       if (rearrange_pattern == SLP_OPRND_PATTERN_ABAB)
 	{
-	  for (unsigned int j = 0; j < h; j += 2)
+	   /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+	      Create node "one" -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+	      Create node "two" -> C1, D1, C1, D1, C2, D2, C2, D2, ...  */
+
+	  for (unsigned int j = 0; j < group_size; j += 4)
 	    {
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
-	    }
-	  for (unsigned int j = 0; j < h; j += 2)
-	    {
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
 	    }
 	}
       else if (rearrange_pattern == SLP_OPRND_PATTERN_AABB)
 	{
-	  for (unsigned int j = 0; j < h; j += 2)
+	   /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+	      Create node "one" -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+	      Create node "two" -> C1, C1, D1, D1, C2, C2, D2, D2, ...  */
+
+	  for (unsigned int j = 0; j < group_size; j += 4)
 	    {
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
-	    }
-	  for (unsigned int j = 0; j < h; j += 2)
-	    {
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
 	    }
 	}
       else if (rearrange_pattern == SLP_OPRND_PATTERN_ABBA)
 	{
-	  for (unsigned int j = 0; j < h; j += 2)
+	   /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+	      Create node "one" -> A1, B1, B1, A1, A2, B2, B2, A2, ...
+	      Create node "two" -> C1, D1, D1, C1, C2, D2, D2, C2, ...  */
+
+	  for (unsigned int j = 0; j < group_size; j += 4)
 	    {
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
 	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
-	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
-	    }
-	  for (unsigned int j = 0; j < h; j += 2)
-	    {
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
-	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+	      SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+	      SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
 	    }
 	}

             reply	other threads:[~2024-01-23 20:59 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-23 20:59 Philipp Tomsich [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-01-18 19:37 Philipp Tomsich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240123205909.98101385802B@sourceware.org \
    --to=ptomsich@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).