From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id 166C13858D33; Thu, 18 Jan 2024 19:37:19 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 166C13858D33 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705606640; bh=z4qU4IwAi0gJiUYtHDEDm31iNs229+dJLYLEOQV2d+w=; h=From:To:Subject:Date:From; b=PKXAG8AjTRMcndajlUqbI0cAg+81PfnTibJQNpxTU5PG7XparDi1glbASAHIA/T5l DYEWYamOXCIoON5JiHh/eH/NMewc1QH1X7nMvGaomgxiJ0WgZHp+6SYSV3sygVzwPQ 73FG5CrL1UgvpjSCAs66muxMlW3vrsmhVUYyeQ+M= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix packing/unpacking of rearranged SLP nodes. X-Act-Checkin: gcc X-Git-Author: Manolis Tsamis X-Git-Refname: refs/vendors/vrull/heads/slp-improvements X-Git-Oldrev: 95f071f025a7f27a66e1a78a921a32ae078e3864 X-Git-Newrev: 66fde29d6d9a12ea89d44039aa85de6d2a6ae6b5 Message-Id: <20240118193720.166C13858D33@sourceware.org> Date: Thu, 18 Jan 2024 19:37:19 +0000 (GMT) List-Id: https://gcc.gnu.org/g:66fde29d6d9a12ea89d44039aa85de6d2a6ae6b5 commit 66fde29d6d9a12ea89d44039aa85de6d2a6ae6b5 Author: Manolis Tsamis Date: Thu Jan 18 13:13:33 2024 +0100 Fix packing/unpacking of rearranged SLP nodes. Under some circumstances, the rearrangement of SLP nodes could pack and unpack nodes in a way that wasn't equivalent to the original code. Apart from addressing the actual bug, comments that indicate the node packing/unpacking schemes are added. Also, as a better canonicalization measure all packed representations have use the same sequence (A1, B1, C1, D1, A2, B2, C2, D2, ...) This both fixes a bug that miscompiled x264 on x86-64 (#356) and other bugs that hadn't occur yet (some cases where group_size was larger than 4). Diff: --- gcc/tree-vect-slp.cc | 84 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index e5842f022f1..3b2ca3f1b33 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1907,6 +1907,9 @@ try_rearrange_oprnd_info (vec &oprnds_info, unsigned group_size) if (pattern == SLP_OPRND_PATTERN_ABAB || pattern == SLP_OPRND_PATTERN_ABBA) for (unsigned int j = 0; j < group_size; j += 4) { + /* Given oprnd[0] -> A1, B1, A1, B1, A2, B2, A2, B2, ... + Given oprnd[1] -> C1, D1, C1, D1, C2, D2, C2, D2, ... + Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */ oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j]; oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j]; oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+1]; @@ -1915,12 +1918,16 @@ try_rearrange_oprnd_info (vec &oprnds_info, unsigned group_size) else if (pattern == SLP_OPRND_PATTERN_AABB) for (unsigned int j = 0; j < group_size; j += 4) { + /* Given oprnd[0] -> A1, A1, B1, B1, A2, A2, B2, B2, ... + Given oprnd[1] -> C1, C1, D1, D1, C2, C2, D2, D2, ... + Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */ + /* The ordering here is at least to some extent arbitrary. A generilized version needs to use some explicit ordering. */ - oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j]; - oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j]; - oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2]; - oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2]; + oprnds_info[0]->def_stmts[j+1] = oprnds_info[0]->def_stmts[j+2]; + oprnds_info[0]->ops[j+1] = oprnds_info[0]->ops[j+2]; + oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j]; + oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j]; oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2]; oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2]; } @@ -2786,59 +2793,64 @@ fail: SLP_TREE_CODE (one) = VEC_PERM_EXPR; SLP_TREE_CODE (two) = VEC_PERM_EXPR; - unsigned int h = group_size / 2; SLP_TREE_REPRESENTATIVE (one) = stmts[0]; - SLP_TREE_REPRESENTATIVE (two) = stmts[h]; + SLP_TREE_REPRESENTATIVE (two) = stmts[2]; if (rearrange_pattern == SLP_OPRND_PATTERN_ABAB) { - for (unsigned int j = 0; j < h; j += 2) + /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... + Create node "one" -> A1, B1, A1, B1, A2, B2, A2, B2, ... + Create node "two" -> C1, D1, C1, D1, C2, D2, C2, D2, ... */ + + for (unsigned int j = 0; j < group_size; j += 4) { - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); - } - for (unsigned int j = 0; j < h; j += 2) - { - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); + + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); } } else if (rearrange_pattern == SLP_OPRND_PATTERN_AABB) { - for (unsigned int j = 0; j < h; j += 2) + /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... + Create node "one" -> A1, A1, B1, B1, A2, A2, B2, B2, ... + Create node "two" -> C1, C1, D1, D1, C2, C2, D2, D2, ... */ + + for (unsigned int j = 0; j < group_size; j += 4) { - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); - } - for (unsigned int j = 0; j < h; j += 2) - { - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); + + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); } } else if (rearrange_pattern == SLP_OPRND_PATTERN_ABBA) { - for (unsigned int j = 0; j < h; j += 2) + /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... + Create node "one" -> A1, B1, B1, A1, A2, B2, B2, A2, ... + Create node "two" -> C1, D1, D1, C1, C2, D2, D2, C2, ... */ + + for (unsigned int j = 0; j < group_size; j += 4) { - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j)); - } - for (unsigned int j = 0; j < h; j += 2) - { - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j + 1)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, h + j)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); + + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); } }