public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1151] vect: Enhance cost evaluation in vect_transform_slp_perm_load_1
@ 2023-05-24  5:06 Kewen Lin
  0 siblings, 0 replies; only message in thread
From: Kewen Lin @ 2023-05-24  5:06 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e55c134ebeef2fa23ad5f4d8afa36b5949b2852c

commit r14-1151-ge55c134ebeef2fa23ad5f4d8afa36b5949b2852c
Author: Kewen Lin <linkw@linux.ibm.com>
Date:   Wed May 24 00:05:01 2023 -0500

    vect: Enhance cost evaluation in vect_transform_slp_perm_load_1
    
    Following Richi's suggestion in [1], I'm working on deferring
    cost evaluation next to the transformation, this patch is
    to enhance function vect_transform_slp_perm_load_1 which
    could under-cost for vector permutation, since the costing
    doesn't try to consider nvectors_per_build, it's inconsistent
    with the transformation part.
    
    Basically it changes the below
    
      if (index == count)
        {
           if (!noop_p)
             {
               // A ...
               // ++*n_perms;
    
               if (!analyze_only)
                 {
                    // B1 ...
                    // B2 ...
                    for ...
                       // B3 building VEC_PERM_EXPR
                 }
             }
           else if (!analyze_only)
             {
                // no B2 since no any further uses here.
                for ...
                  // B4 building nothing
             }
            // B5 ...
        }
    
    to:
    
      if (index == count)
        {
           if (!noop_p)
             {
               // A ...
    
               if (!analyze_only)
                 // B1 ...
    
               // B2 ... (trivial computations during analyze_only or not)
    
               for ...
                 {
                    // now n_perms is consistent with building VEC_PERM_EXPR
                    // ++*n_perms;
                    if (analyze_only)
                       continue;
                    // B3 building VEC_PERM_EXPR
                 }
             }
           else if (!analyze_only)
             {
                // no B2 since no any further uses here.
                for ...
                  // B4 building nothing
             }
            // B5 ...
        }
    
    [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html
    
    gcc/ChangeLog:
    
            * tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the
            calculation on n_perms by considering nvectors_per_build.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.

Diff:
---
 .../gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c | 23 ++++++++
 gcc/tree-vect-slp.cc                               | 66 +++++++++++-----------
 2 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
new file mode 100644
index 00000000000..e5c4dceddfb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* Specify power9 to ensure the vectorization is profitable
+   and test point stands, otherwise it could be not profitable
+   to vectorize.  */
+/* { dg-additional-options "-mdejagnu-cpu=power9 -mpower9-vector" } */
+
+/* Verify we cost the exact count for required vec_perm.  */
+
+int x[1024], y[1024];
+
+void
+foo ()
+{
+  for (int i = 0; i < 512; ++i)
+    {
+      x[2 * i] = y[1023 - (2 * i)];
+      x[2 * i + 1] = y[1023 - (2 * i + 1)];
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "2 times vec_perm" 1 "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index a6f277c5e21..ab89a82f1b3 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8124,12 +8124,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
 
   mode = TYPE_MODE (vectype);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
   /* Initialize the vect stmts of NODE to properly insert the generated
      stmts later.  */
   if (! analyze_only)
-    for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
-	 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
+    for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++)
       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
 
   /* Generate permutation masks for every NODE. Number of masks for each NODE
@@ -8170,7 +8170,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
 	 (b) the permutes only need a single vector input.  */
       mask.new_vector (nunits, group_size, 3);
       nelts_to_build = mask.encoded_nelts ();
-      nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
+      /* It's possible to obtain zero nstmts during analyze_only, so make
+	 it at least one to ensure the later computation for n_perms
+	 proceed.  */
+      nvectors_per_build = nstmts > 0 ? nstmts : 1;
       in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
     }
   else
@@ -8261,40 +8264,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
 		  return false;
 		}
 
-	      ++*n_perms;
-
+	      tree mask_vec = NULL_TREE;
 	      if (!analyze_only)
-		{
-		  tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
+		mask_vec = vect_gen_perm_mask_checked (vectype, indices);
 
-		  if (second_vec_index == -1)
-		    second_vec_index = first_vec_index;
+	      if (second_vec_index == -1)
+		second_vec_index = first_vec_index;
 
-		  for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+	      for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+		{
+		  ++*n_perms;
+		  if (analyze_only)
+		    continue;
+		  /* Generate the permute statement if necessary.  */
+		  tree first_vec = dr_chain[first_vec_index + ri];
+		  tree second_vec = dr_chain[second_vec_index + ri];
+		  gassign *stmt = as_a<gassign *> (stmt_info->stmt);
+		  tree perm_dest
+		    = vect_create_destination_var (gimple_assign_lhs (stmt),
+						   vectype);
+		  perm_dest = make_ssa_name (perm_dest);
+		  gimple *perm_stmt
+		    = gimple_build_assign (perm_dest, VEC_PERM_EXPR, first_vec,
+					   second_vec, mask_vec);
+		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+					       gsi);
+		  if (dce_chain)
 		    {
-		      /* Generate the permute statement if necessary.  */
-		      tree first_vec = dr_chain[first_vec_index + ri];
-		      tree second_vec = dr_chain[second_vec_index + ri];
-		      gassign *stmt = as_a<gassign *> (stmt_info->stmt);
-		      tree perm_dest
-			= vect_create_destination_var (gimple_assign_lhs (stmt),
-						       vectype);
-		      perm_dest = make_ssa_name (perm_dest);
-		      gimple *perm_stmt
-			= gimple_build_assign (perm_dest, VEC_PERM_EXPR,
-					       first_vec, second_vec, mask_vec);
-		      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
-						   gsi);
-		      if (dce_chain)
-			{
-			  bitmap_set_bit (used_defs, first_vec_index + ri);
-			  bitmap_set_bit (used_defs, second_vec_index + ri);
-			}
-
-		      /* Store the vector statement in NODE.  */
-		      SLP_TREE_VEC_STMTS (node) [vect_stmts_counter++]
-			= perm_stmt;
+		      bitmap_set_bit (used_defs, first_vec_index + ri);
+		      bitmap_set_bit (used_defs, second_vec_index + ri);
 		    }
+
+		  /* Store the vector statement in NODE.  */
+		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
 		}
 	    }
 	  else if (!analyze_only)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-24  5:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-24  5:06 [gcc r14-1151] vect: Enhance cost evaluation in vect_transform_slp_perm_load_1 Kewen Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).