[gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize_permutes.
@ 2024-01-23 20:57 Philipp Tomsich
  0 siblings, 0 replies; 3+ messages in thread
From: Philipp Tomsich @ 2024-01-23 20:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:042460234612e9817e26d6f1c1dd41aa144a4c20

commit 042460234612e9817e26d6f1c1dd41aa144a4c20
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Thu Nov 23 11:28:44 2023 +0100

    Fix issues and improve vect_slp_optimize_permutes.
    
     * Don't use tree_to_vec_perm_builder to get the elements of the
       permute constant vectors. This function uses
       vector_cst_encoded_nelts internally which may return fewer elements
       than VECTOR_CST_NELTS and in turn fewer elements than what is
       observed on the GIMPLE statement. Because we iterate based on NELTS
       this would cause memory corruption issues. The resulting code is
       also more clean and performant.
    
     * Call update_stmt after rewriting permutes because otherwise we may
       get SSA imm_use lists issues.
    
    Ref #346

Diff:
---
 gcc/tree-vect-slp.cc | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 68b745f8445..8dd9d85d9b8 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8385,33 +8385,36 @@ vect_slp_optimize_permutes (function *fun)
 	  continue;
 
 	vec_perm_builder sel (nelts, nelts, 1);
-	vec_perm_builder sel1_1, sel1_2, sel2_1, sel2_2;
-	if (!tree_to_vec_perm_builder (&sel1_1, gimple_assign_rhs3 (src1_1))
-	    || !tree_to_vec_perm_builder (&sel1_2, gimple_assign_rhs3 (src1_2))
-	    || !tree_to_vec_perm_builder (&sel2_1, gimple_assign_rhs3 (src2_1))
-	    || !tree_to_vec_perm_builder (&sel2_2, gimple_assign_rhs3 (src2_2)))
-	  continue;
+	vec_perm_builder sel_1 (nelts, nelts, 1);
+	vec_perm_builder sel_2 (nelts, nelts, 1);
 
 	/* Rewrite the permutations based on MASK1_LANES/MASK2_LANES.  */
 	for (i = 0; i < nelts; i++)
 	  {
+	    /* Calculate new mask for STMT2.  */
 	    tree val = VECTOR_CST_ELT (mask2, i);
-	    unsigned HOST_WIDE_INT j
+	    unsigned HOST_WIDE_INT lane
 	      = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
-	    unsigned off = (j < nelts) ? 0 : nelts;
-	    unsigned HOST_WIDE_INT new_lane = *mask2_lanes.get (j - off) + off;
+	    unsigned off = (lane < nelts) ? 0 : nelts;
+	    unsigned HOST_WIDE_INT new_lane
+	      = *mask2_lanes.get (lane - off) + off;
 	    sel.quick_push (new_lane);
 
-	    if (!mask1_lanes.get (i))
-	      {
-		sel1_1[i] = nelts + sel2_1[i];
-		sel1_2[i] = nelts + sel2_2[i];
-	      }
+	    /* Calculate new masks for SRC1_1 and SRC1_2.  */
+	    bool use_src1 = mask1_lanes.get (i);
+	    tree mask1 = gimple_assign_rhs3 (use_src1 ? src1_1 : src2_1);
+	    tree mask2 = gimple_assign_rhs3 (use_src1 ? src1_2 : src2_2);
+	    unsigned HOST_WIDE_INT lane1
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask1, i)) & (nelts - 1);
+	    unsigned HOST_WIDE_INT lane2
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask2, i)) & (nelts - 1);
+	    sel_1.quick_push (lane1 + (use_src1 ? 0 : nelts));
+	    sel_2.quick_push (lane2 + (use_src1 ? 0 : nelts));
 	  }
 
 	vec_perm_indices indices (sel, 2, nelts);
-	vec_perm_indices indices1_1 (sel1_1, 2, nelts);
-	vec_perm_indices indices1_2 (sel1_2, 2, nelts);
+	vec_perm_indices indices1_1 (sel_1, 2, nelts);
+	vec_perm_indices indices1_2 (sel_2, 2, nelts);
 
 	tree vectype = TREE_TYPE (gimple_assign_lhs (stmt2));
 	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)
@@ -8429,6 +8432,10 @@ vect_slp_optimize_permutes (function *fun)
 
 	gimple_assign_set_rhs3 (src1_1, vect_gen_perm_mask_checked (vectype, indices1_1));
 	gimple_assign_set_rhs3 (src1_2, vect_gen_perm_mask_checked (vectype, indices1_2));
+
+	update_stmt (stmt2);
+	update_stmt (src1_1);
+	update_stmt (src1_2);
       }
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize_permutes.
@ 2024-01-17 19:14 Philipp Tomsich
  0 siblings, 0 replies; 3+ messages in thread
From: Philipp Tomsich @ 2024-01-17 19:14 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6c6e798decb95bfd002bb24abb134a22b9a78e48

commit 6c6e798decb95bfd002bb24abb134a22b9a78e48
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Thu Nov 23 11:28:44 2023 +0100

    Fix issues and improve vect_slp_optimize_permutes.
    
     * Don't use tree_to_vec_perm_builder to get the elements of the
       permute constant vectors. This function uses
       vector_cst_encoded_nelts internally which may return fewer elements
       than VECTOR_CST_NELTS and in turn fewer elements than what is
       observed on the GIMPLE statement. Because we iterate based on NELTS
       this would cause memory corruption issues. The resulting code is
       also more clean and performant.
    
     * Call update_stmt after rewriting permutes because otherwise we may
       get SSA imm_use lists issues.
    
    Ref #346

Diff:
---
 gcc/tree-vect-slp.cc | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 58543629756..20acfba887d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8256,33 +8256,36 @@ vect_slp_optimize_permutes (function *fun)
 	  continue;
 
 	vec_perm_builder sel (nelts, nelts, 1);
-	vec_perm_builder sel1_1, sel1_2, sel2_1, sel2_2;
-	if (!tree_to_vec_perm_builder (&sel1_1, gimple_assign_rhs3 (src1_1))
-	    || !tree_to_vec_perm_builder (&sel1_2, gimple_assign_rhs3 (src1_2))
-	    || !tree_to_vec_perm_builder (&sel2_1, gimple_assign_rhs3 (src2_1))
-	    || !tree_to_vec_perm_builder (&sel2_2, gimple_assign_rhs3 (src2_2)))
-	  continue;
+	vec_perm_builder sel_1 (nelts, nelts, 1);
+	vec_perm_builder sel_2 (nelts, nelts, 1);
 
 	/* Rewrite the permutations based on MASK1_LANES/MASK2_LANES.  */
 	for (i = 0; i < nelts; i++)
 	  {
+	    /* Calculate new mask for STMT2.  */
 	    tree val = VECTOR_CST_ELT (mask2, i);
-	    unsigned HOST_WIDE_INT j
+	    unsigned HOST_WIDE_INT lane
 	      = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
-	    unsigned off = (j < nelts) ? 0 : nelts;
-	    unsigned HOST_WIDE_INT new_lane = *mask2_lanes.get (j - off) + off;
+	    unsigned off = (lane < nelts) ? 0 : nelts;
+	    unsigned HOST_WIDE_INT new_lane
+	      = *mask2_lanes.get (lane - off) + off;
 	    sel.quick_push (new_lane);
 
-	    if (!mask1_lanes.get (i))
-	      {
-		sel1_1[i] = nelts + sel2_1[i];
-		sel1_2[i] = nelts + sel2_2[i];
-	      }
+	    /* Calculate new masks for SRC1_1 and SRC1_2.  */
+	    bool use_src1 = mask1_lanes.get (i);
+	    tree mask1 = gimple_assign_rhs3 (use_src1 ? src1_1 : src2_1);
+	    tree mask2 = gimple_assign_rhs3 (use_src1 ? src1_2 : src2_2);
+	    unsigned HOST_WIDE_INT lane1
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask1, i)) & (nelts - 1);
+	    unsigned HOST_WIDE_INT lane2
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask2, i)) & (nelts - 1);
+	    sel_1.quick_push (lane1 + (use_src1 ? 0 : nelts));
+	    sel_2.quick_push (lane2 + (use_src1 ? 0 : nelts));
 	  }
 
 	vec_perm_indices indices (sel, 2, nelts);
-	vec_perm_indices indices1_1 (sel1_1, 2, nelts);
-	vec_perm_indices indices1_2 (sel1_2, 2, nelts);
+	vec_perm_indices indices1_1 (sel_1, 2, nelts);
+	vec_perm_indices indices1_2 (sel_2, 2, nelts);
 
 	tree vectype = TREE_TYPE (gimple_assign_lhs (stmt2));
 	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)
@@ -8300,6 +8303,10 @@ vect_slp_optimize_permutes (function *fun)
 
 	gimple_assign_set_rhs3 (src1_1, vect_gen_perm_mask_checked (vectype, indices1_1));
 	gimple_assign_set_rhs3 (src1_2, vect_gen_perm_mask_checked (vectype, indices1_2));
+
+	update_stmt (stmt2);
+	update_stmt (src1_1);
+	update_stmt (src1_2);
       }
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize_permutes.
@ 2023-11-28 13:35 Philipp Tomsich
  0 siblings, 0 replies; 3+ messages in thread
From: Philipp Tomsich @ 2023-11-28 13:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e8fccdfe15994b6a9713c44844596b73869fee94

commit e8fccdfe15994b6a9713c44844596b73869fee94
Author: Manolis Tsamis <manolis.tsamis@vrull.eu>
Date:   Thu Nov 23 11:28:44 2023 +0100

    Fix issues and improve vect_slp_optimize_permutes.
    
     * Don't use tree_to_vec_perm_builder to get the elements of the
       permute constant vectors. This function uses
       vector_cst_encoded_nelts internally which may return fewer elements
       than VECTOR_CST_NELTS and in turn fewer elements than what is
       observed on the GIMPLE statement. Because we iterate based on NELTS
       this would cause memory corruption issues. The resulting code is
       also more clean and performant.
    
     * Call update_stmt after rewriting permutes because otherwise we may
       get SSA imm_use lists issues.
    
    Ref #346

Diff:
---
 gcc/tree-vect-slp.cc | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 267cc712595..ae936ca96dd 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8237,33 +8237,36 @@ vect_slp_optimize_permutes (function *fun)
 	  continue;
 
 	vec_perm_builder sel (nelts, nelts, 1);
-	vec_perm_builder sel1_1, sel1_2, sel2_1, sel2_2;
-	if (!tree_to_vec_perm_builder (&sel1_1, gimple_assign_rhs3 (src1_1))
-	    || !tree_to_vec_perm_builder (&sel1_2, gimple_assign_rhs3 (src1_2))
-	    || !tree_to_vec_perm_builder (&sel2_1, gimple_assign_rhs3 (src2_1))
-	    || !tree_to_vec_perm_builder (&sel2_2, gimple_assign_rhs3 (src2_2)))
-	  continue;
+	vec_perm_builder sel_1 (nelts, nelts, 1);
+	vec_perm_builder sel_2 (nelts, nelts, 1);
 
 	/* Rewrite the permutations based on MASK1_LANES/MASK2_LANES.  */
 	for (i = 0; i < nelts; i++)
 	  {
+	    /* Calculate new mask for STMT2.  */
 	    tree val = VECTOR_CST_ELT (mask2, i);
-	    unsigned HOST_WIDE_INT j
+	    unsigned HOST_WIDE_INT lane
 	      = TREE_INT_CST_LOW (val) & (2 * nelts - 1);
-	    unsigned off = (j < nelts) ? 0 : nelts;
-	    unsigned HOST_WIDE_INT new_lane = *mask2_lanes.get (j - off) + off;
+	    unsigned off = (lane < nelts) ? 0 : nelts;
+	    unsigned HOST_WIDE_INT new_lane
+	      = *mask2_lanes.get (lane - off) + off;
 	    sel.quick_push (new_lane);
 
-	    if (!mask1_lanes.get (i))
-	      {
-		sel1_1[i] = nelts + sel2_1[i];
-		sel1_2[i] = nelts + sel2_2[i];
-	      }
+	    /* Calculate new masks for SRC1_1 and SRC1_2.  */
+	    bool use_src1 = mask1_lanes.get (i);
+	    tree mask1 = gimple_assign_rhs3 (use_src1 ? src1_1 : src2_1);
+	    tree mask2 = gimple_assign_rhs3 (use_src1 ? src1_2 : src2_2);
+	    unsigned HOST_WIDE_INT lane1
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask1, i)) & (nelts - 1);
+	    unsigned HOST_WIDE_INT lane2
+	      = TREE_INT_CST_LOW (VECTOR_CST_ELT (mask2, i)) & (nelts - 1);
+	    sel_1.quick_push (lane1 + (use_src1 ? 0 : nelts));
+	    sel_2.quick_push (lane2 + (use_src1 ? 0 : nelts));
 	  }
 
 	vec_perm_indices indices (sel, 2, nelts);
-	vec_perm_indices indices1_1 (sel1_1, 2, nelts);
-	vec_perm_indices indices1_2 (sel1_2, 2, nelts);
+	vec_perm_indices indices1_1 (sel_1, 2, nelts);
+	vec_perm_indices indices1_2 (sel_2, 2, nelts);
 
 	tree vectype = TREE_TYPE (gimple_assign_lhs (stmt2));
 	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)
@@ -8281,6 +8284,10 @@ vect_slp_optimize_permutes (function *fun)
 
 	gimple_assign_set_rhs3 (src1_1, vect_gen_perm_mask_checked (vectype, indices1_1));
 	gimple_assign_set_rhs3 (src1_2, vect_gen_perm_mask_checked (vectype, indices1_2));
+
+	update_stmt (stmt2);
+	update_stmt (src1_1);
+	update_stmt (src1_2);
       }
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-01-23 20:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-23 20:57 [gcc(refs/vendors/vrull/heads/slp-improvements)] Fix issues and improve vect_slp_optimize_permutes Philipp Tomsich
  -- strict thread matches above, loose matches on Subject: below --
2024-01-17 19:14 Philipp Tomsich
2023-11-28 13:35 Philipp Tomsich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).