public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix packing/unpacking of rearranged SLP nodes.
@ 2024-01-18 19:37 Philipp Tomsich
0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2024-01-18 19:37 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:66fde29d6d9a12ea89d44039aa85de6d2a6ae6b5
commit 66fde29d6d9a12ea89d44039aa85de6d2a6ae6b5
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date: Thu Jan 18 13:13:33 2024 +0100
Fix packing/unpacking of rearranged SLP nodes.
Under some circumstances, the rearrangement of SLP nodes could pack and unpack
nodes in a way that wasn't equivalent to the original code. Apart from
addressing the actual bug, comments that indicate the node packing/unpacking
schemes are added. Also, as a better canonicalization measure all packed
representations have use the same sequence (A1, B1, C1, D1, A2, B2, C2, D2, ...)
This both fixes a bug that miscompiled x264 on x86-64 (#356) and other bugs that
hadn't occur yet (some cases where group_size was larger than 4).
Diff:
---
gcc/tree-vect-slp.cc | 84 ++++++++++++++++++++++++++++++----------------------
1 file changed, 48 insertions(+), 36 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e5842f022f1..3b2ca3f1b33 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1907,6 +1907,9 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
if (pattern == SLP_OPRND_PATTERN_ABAB || pattern == SLP_OPRND_PATTERN_ABBA)
for (unsigned int j = 0; j < group_size; j += 4)
{
+ /* Given oprnd[0] -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+ Given oprnd[1] -> C1, D1, C1, D1, C2, D2, C2, D2, ...
+ Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */
oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+1];
@@ -1915,12 +1918,16 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
else if (pattern == SLP_OPRND_PATTERN_AABB)
for (unsigned int j = 0; j < group_size; j += 4)
{
+ /* Given oprnd[0] -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+ Given oprnd[1] -> C1, C1, D1, D1, C2, C2, D2, D2, ...
+ Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */
+
/* The ordering here is at least to some extent arbitrary.
A generilized version needs to use some explicit ordering. */
- oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j];
- oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j];
- oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2];
- oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2];
+ oprnds_info[0]->def_stmts[j+1] = oprnds_info[0]->def_stmts[j+2];
+ oprnds_info[0]->ops[j+1] = oprnds_info[0]->ops[j+2];
+ oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
+ oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2];
oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2];
}
@@ -2786,59 +2793,64 @@ fail:
SLP_TREE_CODE (one) = VEC_PERM_EXPR;
SLP_TREE_CODE (two) = VEC_PERM_EXPR;
- unsigned int h = group_size / 2;
SLP_TREE_REPRESENTATIVE (one) = stmts[0];
- SLP_TREE_REPRESENTATIVE (two) = stmts[h];
+ SLP_TREE_REPRESENTATIVE (two) = stmts[2];
if (rearrange_pattern == SLP_OPRND_PATTERN_ABAB)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+ Create node "two" -> C1, D1, C1, D1, C2, D2, C2, D2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
}
}
else if (rearrange_pattern == SLP_OPRND_PATTERN_AABB)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+ Create node "two" -> C1, C1, D1, D1, C2, C2, D2, D2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
}
}
else if (rearrange_pattern == SLP_OPRND_PATTERN_ABBA)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, B1, B1, A1, A2, B2, B2, A2, ...
+ Create node "two" -> C1, D1, D1, C1, C2, D2, D2, C2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
}
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix packing/unpacking of rearranged SLP nodes.
@ 2024-01-23 20:59 Philipp Tomsich
0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2024-01-23 20:59 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:dc3dc6fb7d804704d0044eb4d5a79214db3c8dab
commit dc3dc6fb7d804704d0044eb4d5a79214db3c8dab
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date: Thu Jan 18 13:13:33 2024 +0100
Fix packing/unpacking of rearranged SLP nodes.
Under some circumstances, the rearrangement of SLP nodes could pack and unpack
nodes in a way that wasn't equivalent to the original code. Apart from
addressing the actual bug, comments that indicate the node packing/unpacking
schemes are added. Also, as a better canonicalization measure all packed
representations have use the same sequence (A1, B1, C1, D1, A2, B2, C2, D2, ...)
This both fixes a bug that miscompiled x264 on x86-64 (#356) and other bugs that
hadn't occur yet (some cases where group_size was larger than 4).
Diff:
---
gcc/tree-vect-slp.cc | 84 ++++++++++++++++++++++++++++++----------------------
1 file changed, 48 insertions(+), 36 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 16bc447b0f0..f7970bafd45 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1907,6 +1907,9 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
if (pattern == SLP_OPRND_PATTERN_ABAB || pattern == SLP_OPRND_PATTERN_ABBA)
for (unsigned int j = 0; j < group_size; j += 4)
{
+ /* Given oprnd[0] -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+ Given oprnd[1] -> C1, D1, C1, D1, C2, D2, C2, D2, ...
+ Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */
oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+1];
@@ -1915,12 +1918,16 @@ try_rearrange_oprnd_info (vec<slp_oprnd_info> &oprnds_info, unsigned group_size)
else if (pattern == SLP_OPRND_PATTERN_AABB)
for (unsigned int j = 0; j < group_size; j += 4)
{
+ /* Given oprnd[0] -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+ Given oprnd[1] -> C1, C1, D1, D1, C2, C2, D2, D2, ...
+ Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */
+
/* The ordering here is at least to some extent arbitrary.
A generilized version needs to use some explicit ordering. */
- oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j];
- oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j];
- oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2];
- oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2];
+ oprnds_info[0]->def_stmts[j+1] = oprnds_info[0]->def_stmts[j+2];
+ oprnds_info[0]->ops[j+1] = oprnds_info[0]->ops[j+2];
+ oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j];
+ oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j];
oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2];
oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2];
}
@@ -2786,59 +2793,64 @@ fail:
SLP_TREE_CODE (one) = VEC_PERM_EXPR;
SLP_TREE_CODE (two) = VEC_PERM_EXPR;
- unsigned int h = group_size / 2;
SLP_TREE_REPRESENTATIVE (one) = stmts[0];
- SLP_TREE_REPRESENTATIVE (two) = stmts[h];
+ SLP_TREE_REPRESENTATIVE (two) = stmts[2];
if (rearrange_pattern == SLP_OPRND_PATTERN_ABAB)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, B1, A1, B1, A2, B2, A2, B2, ...
+ Create node "two" -> C1, D1, C1, D1, C2, D2, C2, D2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
}
}
else if (rearrange_pattern == SLP_OPRND_PATTERN_AABB)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, A1, B1, B1, A2, A2, B2, B2, ...
+ Create node "two" -> C1, C1, D1, D1, C2, C2, D2, D2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
}
}
else if (rearrange_pattern == SLP_OPRND_PATTERN_ABBA)
{
- for (unsigned int j = 0; j < h; j += 2)
+ /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ...
+ Create node "one" -> A1, B1, B1, A1, A2, B2, B2, A2, ...
+ Create node "two" -> C1, D1, D1, C1, C2, D2, D2, C2, ... */
+
+ for (unsigned int j = 0; j < group_size; j += 4)
{
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1));
- SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j));
- }
- for (unsigned int j = 0; j < h; j += 2)
- {
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1));
- SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j));
+ SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0));
+
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3));
+ SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2));
}
}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-01-23 20:59 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-18 19:37 [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix packing/unpacking of rearranged SLP nodes Philipp Tomsich
2024-01-23 20:59 Philipp Tomsich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).