public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix statement ordering for vect_slp_optimize_permutes (#346)
@ 2024-01-23 20:58 Philipp Tomsich
  0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2024-01-23 20:58 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:636cdb17256940e2de18810a8e47e7deafa172ff

commit 636cdb17256940e2de18810a8e47e7deafa172ff
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Fri Dec 1 12:33:51 2023 +0100

    Fix statement ordering for vect_slp_optimize_permutes (#346)
    
    There were cases where the code in vect_slp_optimize_permutes would
    create dependancies to future statements like in this case:
    
      c1_7 = VEC_PERM_EXPR <_1, _3, { 0, 1, 4, 5 }>;
      # VUSE <.MEM_5(D)>
      _2 = MEM[(veciD.4395 *)a_6(D) + 16B];
      c2_8 = VEC_PERM_EXPR <_2, _4, { 2, 3, 6, 7 }>;
      # VUSE <.MEM_5(D)>
      _3 = MEM[(veciD.4395 *)a_6(D) + 32B];
      # VUSE <.MEM_5(D)>
      _4 = MEM[(veciD.4395 *)a_6(D) + 48B];
    
    This happened because we modify gimple arguments in-place without
    guaranteeing anything about their initial order. Since we know the
    uses of the statements we alter, we can freely reaarange them so the
    resulting gimple code is correct.

Diff:
---
 gcc/tree-vect-slp.cc | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4f4cb38f185..724e53a4cfd 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8246,18 +8246,18 @@ get_tree_def (tree name, bool single_use_only)
 
      src1_perm = VEC_PERM_EXPR <SRC1, SRC1, CONST_VEC>
      src2_perm = VEC_PERM_EXPR <SRC2, SRC2, CONST_VEC>
-     bop_1 = src1_perm BINOP1 src2_perm
-     bop_2 = src1_perm BINOP2 src2_perm
-     STMT = VEC_PERM_EXPR <bop_1, bop_2, CONST_VEC>
+     bop1 = src1_perm BINOP1 src2_perm
+     bop2 = src1_perm BINOP2 src2_perm
+     STMT = VEC_PERM_EXPR <bop1, bop2, CONST_VEC>
 
-   and src1_perm, src2_perm, bop_1, bop_2 are not used outside of STMT.
+   and src1_perm, src2_perm, bop1, bop2 are not used outside of STMT.
    Return the first two permute statements and the binops through the
    corresponding pointer arguments.  */
 
 static bool
-recognise_perm_binop_perm_pattern (gassign *stmt, enum tree_code *binop1,
-				   enum tree_code *binop2, gassign **perm1_out,
-				   gassign **perm2_out)
+recognise_perm_binop_perm_pattern (gassign *stmt,
+				   gassign **bop1_out, gassign **bop2_out,
+				   gassign **perm1_out, gassign **perm2_out)
 {
   enum tree_code code = gimple_assign_rhs_code (stmt);
 
@@ -8293,8 +8293,8 @@ recognise_perm_binop_perm_pattern (gassign *stmt, enum tree_code *binop1,
       || !VECTOR_CST_NELTS (gimple_assign_rhs3 (perm2)).is_constant ())
     return false;
 
-  *binop1 = gimple_assign_rhs_code (bop1);
-  *binop2 = gimple_assign_rhs_code (bop2);
+  *bop1_out = bop1;
+  *bop2_out = bop2;
   *perm1_out = perm1;
   *perm2_out = perm2;
 
@@ -8314,6 +8314,7 @@ vect_slp_optimize_permutes (function *fun)
   FOR_EACH_BB_FN (bb, fun)
     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
       {
+	gimple_stmt_iterator gsi_stmt1 = gsi;
 	gassign *stmt1 = dyn_cast <gassign *> (gsi_stmt (gsi));
 	gsi_next (&gsi);
 
@@ -8329,18 +8330,21 @@ vect_slp_optimize_permutes (function *fun)
 	   vector and merge two of them into one. This case can arise from
 	   TWO_OPERATOR SLP patterns because the final permute uses only half
 	   of each input vector.  */
-	enum tree_code binop1_1, binop1_2, binop2_1, binop2_2;
+	gassign *bop1_1, *bop1_2, *bop2_1, *bop2_2;
 	gassign *src1_1, *src1_2, *src2_1, *src2_2;
 
-	if (!recognise_perm_binop_perm_pattern(stmt1, &binop1_1, &binop1_2,
+	if (!recognise_perm_binop_perm_pattern(stmt1, &bop1_1, &bop1_2,
 					       &src1_1, &src1_2))
 	  continue;
 
-	if (!recognise_perm_binop_perm_pattern(stmt2, &binop2_1, &binop2_2,
+	if (!recognise_perm_binop_perm_pattern(stmt2, &bop2_1, &bop2_2,
 					       &src2_1, &src2_2))
 	  continue;
 
-	if (binop1_1 != binop2_1 || binop1_2 != binop2_2)
+	if (gimple_assign_rhs_code (bop1_1) != gimple_assign_rhs_code (bop2_1))
+	  continue;
+
+	if (gimple_assign_rhs_code (bop1_2) != gimple_assign_rhs_code (bop2_2))
 	  continue;
 
 	tree mask1 = gimple_assign_rhs3 (stmt1);
@@ -8437,6 +8441,24 @@ vect_slp_optimize_permutes (function *fun)
 	update_stmt (stmt2);
 	update_stmt (src1_1);
 	update_stmt (src1_2);
+
+	/* We need to move the updated statements because otherwise they may
+	   come before some variable that they depend on.  Since we know that
+	   they don't have uses outside the pattern, we can remove them and
+	   add them back in order.  */
+	gimple_stmt_iterator gsi_rm = gsi_for_stmt (bop1_1);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (bop1_2);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (src1_1);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (src1_2);
+	gsi_remove (&gsi_rm, false);
+
+	gsi_insert_before (&gsi_stmt1, src1_1, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, src1_2, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, bop1_1, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, bop1_2, GSI_SAME_STMT);
       }
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix statement ordering for vect_slp_optimize_permutes (#346)
@ 2024-01-17 19:15 Philipp Tomsich
  0 siblings, 0 replies; 2+ messages in thread
From: Philipp Tomsich @ 2024-01-17 19:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d670ca49c4bb9c894d94de876d592aff34812229

commit d670ca49c4bb9c894d94de876d592aff34812229
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Fri Dec 1 12:33:51 2023 +0100

    Fix statement ordering for vect_slp_optimize_permutes (#346)
    
    There were cases where the code in vect_slp_optimize_permutes would
    create dependancies to future statements like in this case:
    
      c1_7 = VEC_PERM_EXPR <_1, _3, { 0, 1, 4, 5 }>;
      # VUSE <.MEM_5(D)>
      _2 = MEM[(veciD.4395 *)a_6(D) + 16B];
      c2_8 = VEC_PERM_EXPR <_2, _4, { 2, 3, 6, 7 }>;
      # VUSE <.MEM_5(D)>
      _3 = MEM[(veciD.4395 *)a_6(D) + 32B];
      # VUSE <.MEM_5(D)>
      _4 = MEM[(veciD.4395 *)a_6(D) + 48B];
    
    This happened because we modify gimple arguments in-place without
    guaranteeing anything about their initial order. Since we know the
    uses of the statements we alter, we can freely reaarange them so the
    resulting gimple code is correct.

Diff:
---
 gcc/tree-vect-slp.cc | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 30ac656a0af..8f2993b84f1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8117,18 +8117,18 @@ get_tree_def (tree name, bool single_use_only)
 
      src1_perm = VEC_PERM_EXPR <SRC1, SRC1, CONST_VEC>
      src2_perm = VEC_PERM_EXPR <SRC2, SRC2, CONST_VEC>
-     bop_1 = src1_perm BINOP1 src2_perm
-     bop_2 = src1_perm BINOP2 src2_perm
-     STMT = VEC_PERM_EXPR <bop_1, bop_2, CONST_VEC>
+     bop1 = src1_perm BINOP1 src2_perm
+     bop2 = src1_perm BINOP2 src2_perm
+     STMT = VEC_PERM_EXPR <bop1, bop2, CONST_VEC>
 
-   and src1_perm, src2_perm, bop_1, bop_2 are not used outside of STMT.
+   and src1_perm, src2_perm, bop1, bop2 are not used outside of STMT.
    Return the first two permute statements and the binops through the
    corresponding pointer arguments.  */
 
 static bool
-recognise_perm_binop_perm_pattern (gassign *stmt, enum tree_code *binop1,
-				   enum tree_code *binop2, gassign **perm1_out,
-				   gassign **perm2_out)
+recognise_perm_binop_perm_pattern (gassign *stmt,
+				   gassign **bop1_out, gassign **bop2_out,
+				   gassign **perm1_out, gassign **perm2_out)
 {
   enum tree_code code = gimple_assign_rhs_code (stmt);
 
@@ -8164,8 +8164,8 @@ recognise_perm_binop_perm_pattern (gassign *stmt, enum tree_code *binop1,
       || !VECTOR_CST_NELTS (gimple_assign_rhs3 (perm2)).is_constant ())
     return false;
 
-  *binop1 = gimple_assign_rhs_code (bop1);
-  *binop2 = gimple_assign_rhs_code (bop2);
+  *bop1_out = bop1;
+  *bop2_out = bop2;
   *perm1_out = perm1;
   *perm2_out = perm2;
 
@@ -8185,6 +8185,7 @@ vect_slp_optimize_permutes (function *fun)
   FOR_EACH_BB_FN (bb, fun)
     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
       {
+	gimple_stmt_iterator gsi_stmt1 = gsi;
 	gassign *stmt1 = dyn_cast <gassign *> (gsi_stmt (gsi));
 	gsi_next (&gsi);
 
@@ -8200,18 +8201,21 @@ vect_slp_optimize_permutes (function *fun)
 	   vector and merge two of them into one. This case can arise from
 	   TWO_OPERATOR SLP patterns because the final permute uses only half
 	   of each input vector.  */
-	enum tree_code binop1_1, binop1_2, binop2_1, binop2_2;
+	gassign *bop1_1, *bop1_2, *bop2_1, *bop2_2;
 	gassign *src1_1, *src1_2, *src2_1, *src2_2;
 
-	if (!recognise_perm_binop_perm_pattern(stmt1, &binop1_1, &binop1_2,
+	if (!recognise_perm_binop_perm_pattern(stmt1, &bop1_1, &bop1_2,
 					       &src1_1, &src1_2))
 	  continue;
 
-	if (!recognise_perm_binop_perm_pattern(stmt2, &binop2_1, &binop2_2,
+	if (!recognise_perm_binop_perm_pattern(stmt2, &bop2_1, &bop2_2,
 					       &src2_1, &src2_2))
 	  continue;
 
-	if (binop1_1 != binop2_1 || binop1_2 != binop2_2)
+	if (gimple_assign_rhs_code (bop1_1) != gimple_assign_rhs_code (bop2_1))
+	  continue;
+
+	if (gimple_assign_rhs_code (bop1_2) != gimple_assign_rhs_code (bop2_2))
 	  continue;
 
 	tree mask1 = gimple_assign_rhs3 (stmt1);
@@ -8308,6 +8312,24 @@ vect_slp_optimize_permutes (function *fun)
 	update_stmt (stmt2);
 	update_stmt (src1_1);
 	update_stmt (src1_2);
+
+	/* We need to move the updated statements because otherwise they may
+	   come before some variable that they depend on.  Since we know that
+	   they don't have uses outside the pattern, we can remove them and
+	   add them back in order.  */
+	gimple_stmt_iterator gsi_rm = gsi_for_stmt (bop1_1);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (bop1_2);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (src1_1);
+	gsi_remove (&gsi_rm, false);
+	gsi_rm = gsi_for_stmt (src1_2);
+	gsi_remove (&gsi_rm, false);
+
+	gsi_insert_before (&gsi_stmt1, src1_1, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, src1_2, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, bop1_1, GSI_SAME_STMT);
+	gsi_insert_before (&gsi_stmt1, bop1_2, GSI_SAME_STMT);
       }
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-01-23 20:58 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-23 20:58 [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix statement ordering for vect_slp_optimize_permutes (#346) Philipp Tomsich
  -- strict thread matches above, loose matches on Subject: below --
2024-01-17 19:15 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).