public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-2832] tree-optimization/101801 - rework generic vector vectorization more
@ 2021-08-10  8:12 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2021-08-10  8:12 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:19d1a529fa9f78e7ec7be38d423c90e00cec8f8c

commit r12-2832-g19d1a529fa9f78e7ec7be38d423c90e00cec8f8c
Author: Richard Biener <rguenther@suse.de>
Date:   Mon Aug 9 11:42:47 2021 +0200

    tree-optimization/101801 - rework generic vector vectorization more
    
    This builds ontop of the vect_worthwhile_without_simd_p refactoring
    done earlier.  It was wrong in dropping the appearant double checks
    for operation support since the optab check can happen with an
    integer vector emulation mode and thus succeed but vector lowering
    might not actually support the operation on word_mode.
    
    The following patch adds a vect_emulated_vector_p helper and
    re-instantiates the check where it was previously.  It also adds
    appropriate costing of the scalar stmts emitted by vector lowering
    to vectorizable_operation which should be the only place such
    operations are synthesized.  I've also cared for the case where
    the vector mode is supported but the operation is not (though
    I think this will be unlikely given we're talking about plus, minus
    and negate).
    
    This fixes the observed FAIL of gcc.dg/tree-ssa/gen-vect-11b.c
    with -m32 where we end up vectorizing a multiplication that ends up
    being teared down to scalars again by vector lowering.
    
    I'm not super happy about all the other places where we're now
    and previously feeding scalar modes to optab checks where we
    want to know whether we can vectorize sth but well.
    
    2021-09-08  Richard Biener  <rguenther@suse.de>
    
            PR tree-optimization/101801
            PR tree-optimization/101819
            * tree-vectorizer.h (vect_emulated_vector_p): Declare.
            * tree-vect-loop.c (vect_emulated_vector_p): New function.
            (vectorizable_reduction): Re-instantiate a check for emulated
            operations.
            * tree-vect-stmts.c (vectorizable_shift): Likewise.
            (vectorizable_operation): Likewise.  Cost emulated vector
            operations according to the scalar sequence synthesized by
            vector lowering.

Diff:
---
 gcc/tree-vect-loop.c  | 18 ++++++++++++++++++
 gcc/tree-vect-stmts.c | 45 +++++++++++++++++++++++++++++++++++++--------
 gcc/tree-vectorizer.h |  1 +
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 37c7daa7f9e..995d143dbbd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7234,6 +7234,14 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
 	      dump_printf (MSG_NOTE, "proceeding using word mode.\n");
         }
 
+      if (vect_emulated_vector_p (vectype_in)
+	  && !vect_can_vectorize_without_simd_p (code))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf (MSG_NOTE, "using word mode not possible.\n");
+	  return false;
+	}
+
       /* lane-reducing operations have to go through vect_transform_reduction.
          For the other cases try without the single cycle optimization.  */
       if (!ok)
@@ -7936,6 +7944,16 @@ vectorizable_phi (vec_info *,
   return true;
 }
 
+/* Return true if VECTYPE represents a vector that requires lowering
+   by the vector lowering pass.  */
+
+bool
+vect_emulated_vector_p (tree vectype)
+{
+  return (!VECTOR_MODE_P (TYPE_MODE (vectype))
+	  && (!VECTOR_BOOLEAN_TYPE_P (vectype)
+	      || TYPE_PRECISION (TREE_TYPE (vectype)) != 1));
+}
 
 /* Return true if we can emulate CODE on an integer mode representation
    of a vector.  */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 5b94d41e292..5a5a4dab3f2 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5682,15 +5682,11 @@ vectorizable_shift (vec_info *vinfo,
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                          "op not supported by target.\n");
-      /* Check only during analysis.  */
-      if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
-	  || (!vec_stmt
-	      && !vect_can_vectorize_without_simd_p (code)))
-        return false;
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location,
-                         "proceeding using word mode.\n");
+      return false;
     }
+  /* vector lowering cannot optimize vector shifts using word arithmetic.  */
+  if (vect_emulated_vector_p (vectype))
+    return false;
 
   if (!vec_stmt) /* transformation not required.  */
     {
@@ -6076,6 +6072,7 @@ vectorizable_operation (vec_info *vinfo,
 			  != CODE_FOR_nothing);
     }
 
+  bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
   if (!target_support_p)
     {
       if (dump_enabled_p ())
@@ -6088,6 +6085,15 @@ vectorizable_operation (vec_info *vinfo,
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
                          "proceeding using word mode.\n");
+      using_emulated_vectors_p = true;
+    }
+
+  if (using_emulated_vectors_p
+      && !vect_can_vectorize_without_simd_p (code))
+    {
+      if (dump_enabled_p ())
+	dump_printf (MSG_NOTE, "using word mode not possible.\n");
+      return false;
     }
 
   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
@@ -6134,6 +6140,29 @@ vectorizable_operation (vec_info *vinfo,
       DUMP_VECT_SCOPE ("vectorizable_operation");
       vect_model_simple_cost (vinfo, stmt_info,
 			      ncopies, dt, ndts, slp_node, cost_vec);
+      if (using_emulated_vectors_p)
+	{
+	  /* The above vect_model_simple_cost call handles constants
+	     in the prologue and (mis-)costs one of the stmts as
+	     vector stmt.  See tree-vect-generic.c:do_plus_minus/do_negate
+	     for the actual lowering that will be applied.  */
+	  unsigned n
+	    = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
+	  switch (code)
+	    {
+	    case PLUS_EXPR:
+	      n *= 5;
+	      break;
+	    case MINUS_EXPR:
+	      n *= 6;
+	      break;
+	    case NEGATE_EXPR:
+	      n *= 4;
+	      break;
+	    default:;
+	    }
+	  record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
+	}
       return true;
     }
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index de0ecf86478..9c2c29d61fa 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2061,6 +2061,7 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
 				 gimple **, slp_tree);
 extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
 			      stmt_vector_for_cost *);
+extern bool vect_emulated_vector_p (tree);
 extern bool vect_can_vectorize_without_simd_p (tree_code);
 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
 					stmt_vector_for_cost *,


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-10  8:12 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-10  8:12 [gcc r12-2832] tree-optimization/101801 - rework generic vector vectorization more Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).