From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1923) id AF4A438582B9; Tue, 23 Jan 2024 20:59:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org AF4A438582B9 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1706043554; bh=/okEUJc9BlPoX1ntk62+07ZW/GIyLRfu0DKKLYIYEnU=; h=From:To:Subject:Date:From; b=cfAUekqltoq3ZiSHdIA4BBW08v184W0Su7hh6nu4meSG6DKiny4pE5OYK45PNrVqi +Va8bNLGPxzje3dW1Lc03+BXp9URKjcPdVENteK+fTfw19YqEZOxJ0aBM8JBf42zTc CEkeQK2LGFhaO60A+IbKADbmIFgaTqTWKDpi1ggI= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Philipp Tomsich To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix (perf regression): revert to previous ordering X-Act-Checkin: gcc X-Git-Author: Manolis Tsamis X-Git-Refname: refs/vendors/vrull/heads/slp-improvements X-Git-Oldrev: dc3dc6fb7d804704d0044eb4d5a79214db3c8dab X-Git-Newrev: d8a63369bf3cb2a8f6450162683b51a388021312 Message-Id: <20240123205914.AF4A438582B9@sourceware.org> Date: Tue, 23 Jan 2024 20:59:14 +0000 (GMT) List-Id: https://gcc.gnu.org/g:d8a63369bf3cb2a8f6450162683b51a388021312 commit d8a63369bf3cb2a8f6450162683b51a388021312 Author: Manolis Tsamis Date: Tue Jan 23 17:58:05 2024 +0100 Fix (perf regression): revert to previous ordering The 'more natural' ABCD ordering (as opposed to the ACBD order) of all nodes from the refactored implementation resulted in a x264 regression, where the number of tbl instructions has increased from 26 to 286 in total. This changes the order back to ACBD, reducing the number of tbl-instructions back to 26 and resolves the observed performance regression from the fix for #356. Ref #356 Signed-off-by: Philipp Tomsich Diff: --- gcc/tree-vect-slp.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f7970bafd45..6b5edf1cebc 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1920,14 +1920,14 @@ try_rearrange_oprnd_info (vec &oprnds_info, unsigned group_size) { /* Given oprnd[0] -> A1, A1, B1, B1, A2, A2, B2, B2, ... Given oprnd[1] -> C1, C1, D1, D1, C2, C2, D2, D2, ... - Create a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... */ + Create a single node -> A1, C1, B1, D1, A2, C2, B2, D2, ... */ /* The ordering here is at least to some extent arbitrary. A generilized version needs to use some explicit ordering. */ - oprnds_info[0]->def_stmts[j+1] = oprnds_info[0]->def_stmts[j+2]; - oprnds_info[0]->ops[j+1] = oprnds_info[0]->ops[j+2]; - oprnds_info[0]->def_stmts[j+2] = oprnds_info[1]->def_stmts[j]; - oprnds_info[0]->ops[j+2] = oprnds_info[1]->ops[j]; + oprnds_info[0]->def_stmts[j+1] = oprnds_info[1]->def_stmts[j]; + oprnds_info[0]->ops[j+1] = oprnds_info[1]->ops[j]; + oprnds_info[0]->def_stmts[j+2] = oprnds_info[0]->def_stmts[j+2]; + oprnds_info[0]->ops[j+2] = oprnds_info[0]->ops[j+2]; oprnds_info[0]->def_stmts[j+3] = oprnds_info[1]->def_stmts[j+2]; oprnds_info[0]->ops[j+3] = oprnds_info[1]->ops[j+2]; } @@ -2817,7 +2817,7 @@ fail: } else if (rearrange_pattern == SLP_OPRND_PATTERN_AABB) { - /* Given a single node -> A1, B1, C1, D1, A2, B2, C2, D2, ... + /* Given a single node -> A1, C1, B1, D1, A2, C2, B2, D2, ... Create node "one" -> A1, A1, B1, B1, A2, A2, B2, B2, ... Create node "two" -> C1, C1, D1, D1, C2, C2, D2, D2, ... */ @@ -2825,11 +2825,11 @@ fail: { SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 0)); - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); - SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 1)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(one).safe_push (std::make_pair (0, j + 2)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); - SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 2)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 1)); + SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 1)); SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); SLP_TREE_LANE_PERMUTATION(two).safe_push (std::make_pair (0, j + 3)); }