public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Support constants and externals in BB reduction vectorization
@ 2023-08-15 12:10 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-08-15 12:10 UTC (permalink / raw)
  To: gcc-patches

The following supports vectorizing BB reductions involving a
constant or an invariant.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	* tree-vectorizer.h (_slp_instance::remain_stmts): Change
	to ...
	(_slp_instance::remain_defs): ... this.
	(SLP_INSTANCE_REMAIN_STMTS): Rename to ...
	(SLP_INSTANCE_REMAIN_DEFS): ... this.
	(slp_root::remain): New.
	(slp_root::slp_root): Adjust.
	* tree-vect-slp.cc (vect_free_slp_instance): Adjust.
	(vect_build_slp_instance): Get extra remain parameter,
	adjust former handling of a cut off stmt.
	(vect_analyze_slp_instance): Adjust.
	(vect_analyze_slp): Likewise.
	(_bb_vec_info::~_bb_vec_info): Likewise.
	(vectorizable_bb_reduc_epilogue): Dump something if we fail.
	(vect_slp_check_for_constructors): Handle non-internal
	defs as remain defs of a reduction.
	(vectorize_slp_instance_root_stmt): Adjust.

	* gcc.dg/vect/bb-slp-75.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-75.c | 25 +++++++++++
 gcc/tree-vect-slp.cc                  | 60 ++++++++++++++++++---------
 gcc/tree-vectorizer.h                 |  9 ++--
 3 files changed, 71 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-75.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-75.c b/gcc/testsuite/gcc.dg/vect/bb-slp-75.c
new file mode 100644
index 00000000000..1abac136f72
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-75.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target { x86_64-*-* i?86-*-* } } } */
+
+float x[4];
+
+float test1 (float a)
+{
+  return x[0] + x[2] + x[1] + x[3] + a;
+}
+
+float test2 (void)
+{
+  return x[3] + x[2] + x[1] + 1.f + x[0];
+}
+
+float test3 (float a)
+{
+  return x[0] + a + x[2] + x[1] + x[3] + 1.f;
+}
+
+/* We currently require a .REDUC_PLUS direct internal function but do not
+   have a dejagnu target for this.  */
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 3 "slp2" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 41997d5a546..cf91b21cf7d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -209,7 +209,7 @@ vect_free_slp_instance (slp_instance instance)
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
   SLP_INSTANCE_LOADS (instance).release ();
   SLP_INSTANCE_ROOT_STMTS (instance).release ();
-  SLP_INSTANCE_REMAIN_STMTS (instance).release ();
+  SLP_INSTANCE_REMAIN_DEFS (instance).release ();
   instance->subgraph_entries.release ();
   instance->cost_vec.release ();
   free (instance);
@@ -3115,6 +3115,7 @@ vect_build_slp_instance (vec_info *vinfo,
 			 slp_instance_kind kind,
 			 vec<stmt_vec_info> &scalar_stmts,
 			 vec<stmt_vec_info> &root_stmt_infos,
+			 vec<tree> &remain,
 			 unsigned max_tree_size, unsigned *limit,
 			 scalar_stmts_to_slp_tree_map_t *bst_map,
 			 /* ???  We need stmt_info for group splitting.  */
@@ -3134,10 +3135,9 @@ vect_build_slp_instance (vec_info *vinfo,
      ???  Selecting the optimal set of lanes to vectorize would be nice
      but SLP build for all lanes will fail quickly because we think
      we're going to need unrolling.  */
-  auto_vec<stmt_vec_info> remain;
   if (kind == slp_inst_kind_bb_reduc
       && (scalar_stmts.length () & 1))
-    remain.safe_push (scalar_stmts.pop ());
+    remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
 
   /* Build the tree for the SLP instance.  */
   unsigned int group_size = scalar_stmts.length ();
@@ -3186,10 +3186,7 @@ vect_build_slp_instance (vec_info *vinfo,
 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
 	  SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
-	  if (!remain.is_empty ())
-	    SLP_INSTANCE_REMAIN_STMTS (new_instance) = remain.copy ();
-	  else
-	    SLP_INSTANCE_REMAIN_STMTS (new_instance) = vNULL;
+	  SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
 	  SLP_INSTANCE_KIND (new_instance) = kind;
 	  new_instance->reduc_phis = NULL;
 	  new_instance->cost_vec = vNULL;
@@ -3469,6 +3466,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
     gcc_unreachable ();
 
   vec<stmt_vec_info> roots = vNULL;
+  vec<tree> remain = vNULL;
   if (kind == slp_inst_kind_ctor)
     {
       roots.create (1);
@@ -3476,7 +3474,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
     }
   /* Build the tree for the SLP instance.  */
   bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
-				      roots,
+				      roots, remain,
 				      max_tree_size, limit, bst_map,
 				      kind == slp_inst_kind_store
 				      ? stmt_info : NULL);
@@ -3521,10 +3519,12 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 	  if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
 				       bb_vinfo->roots[i].stmts,
 				       bb_vinfo->roots[i].roots,
+				       bb_vinfo->roots[i].remain,
 				       max_tree_size, &limit, bst_map, NULL))
 	    {
 	      bb_vinfo->roots[i].stmts = vNULL;
 	      bb_vinfo->roots[i].roots = vNULL;
+	      bb_vinfo->roots[i].remain = vNULL;
 	    }
 	}
     }
@@ -5955,6 +5955,7 @@ _bb_vec_info::~_bb_vec_info ()
     {
       roots[i].stmts.release ();
       roots[i].roots.release ();
+      roots[i].remain.release ();
     }
   roots.release ();
 }
@@ -6405,7 +6406,13 @@ vectorizable_bb_reduc_epilogue (slp_instance instance,
       || !direct_internal_fn_supported_p (reduc_fn, vectype, OPTIMIZE_FOR_BOTH)
       || !useless_type_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
 				     TREE_TYPE (vectype)))
-    return false;
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not vectorized: basic block reduction epilogue "
+			 "operation unsupported.\n");
+      return false;
+    }
 
   /* There's no way to cost a horizontal vector reduction via REDUC_FN so
      cost log2 vector operations plus shuffles and one extraction.  */
@@ -7262,22 +7269,37 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
 		 but record those to be handled in the epilogue.  */
 	      /* ???  For now do not allow mixing ops or externs/constants.  */
 	      bool invalid = false;
+	      unsigned remain_cnt = 0;
 	      for (unsigned i = 0; i < chain.length (); ++i)
-		if (chain[i].dt != vect_internal_def
-		    || chain[i].code != code)
-		  invalid = true;
-	      if (!invalid)
+		{
+		  if (chain[i].code != code)
+		    {
+		      invalid = true;
+		      break;
+		    }
+		  if (chain[i].dt != vect_internal_def)
+		    remain_cnt++;
+		}
+	      if (!invalid && chain.length () - remain_cnt > 1)
 		{
 		  vec<stmt_vec_info> stmts;
+		  vec<tree> remain = vNULL;
 		  stmts.create (chain.length ());
+		  if (remain_cnt > 0)
+		    remain.create (remain_cnt);
 		  for (unsigned i = 0; i < chain.length (); ++i)
-		    stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+		    {
+		      if (chain[i].dt == vect_internal_def)
+			stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+		      else
+			remain.quick_push (chain[i].op);
+		    }
 		  vec<stmt_vec_info> roots;
 		  roots.create (chain_stmts.length ());
 		  for (unsigned i = 0; i < chain_stmts.length (); ++i)
 		    roots.quick_push (bb_vinfo->lookup_stmt (chain_stmts[i]));
 		  bb_vinfo->roots.safe_push (slp_root (slp_inst_kind_bb_reduc,
-						       stmts, roots));
+						       stmts, roots, remain));
 		}
 	    }
 	}
@@ -9160,16 +9182,16 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
 	gcc_unreachable ();
       tree scalar_def = gimple_build (&epilogue, as_combined_fn (reduc_fn),
 				      TREE_TYPE (TREE_TYPE (vec_def)), vec_def);
-      if (!SLP_INSTANCE_REMAIN_STMTS (instance).is_empty ())
+      if (!SLP_INSTANCE_REMAIN_DEFS (instance).is_empty ())
 	{
 	  tree rem_def = NULL_TREE;
-	  for (auto rem : SLP_INSTANCE_REMAIN_STMTS (instance))
+	  for (auto def : SLP_INSTANCE_REMAIN_DEFS (instance))
 	    if (!rem_def)
-	      rem_def = gimple_get_lhs (rem->stmt);
+	      rem_def = def;
 	    else
 	      rem_def = gimple_build (&epilogue, reduc_code,
 				      TREE_TYPE (scalar_def),
-				      rem_def, gimple_get_lhs (rem->stmt));
+				      rem_def, def);
 	  scalar_def = gimple_build (&epilogue, reduc_code,
 				     TREE_TYPE (scalar_def),
 				     scalar_def, rem_def);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5987a327332..1de144988c8 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -259,7 +259,7 @@ public:
 
   /* For slp_inst_kind_bb_reduc the defs that were not vectorized, NULL
      otherwise.  */
-  vec<stmt_vec_info> remain_stmts;
+  vec<tree> remain_defs;
 
   /* The unrolling factor required to vectorized this SLP instance.  */
   poly_uint64 unrolling_factor;
@@ -289,7 +289,7 @@ public:
 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
 #define SLP_INSTANCE_ROOT_STMTS(S)               (S)->root_stmts
-#define SLP_INSTANCE_REMAIN_STMTS(S)             (S)->remain_stmts
+#define SLP_INSTANCE_REMAIN_DEFS(S)              (S)->remain_defs
 #define SLP_INSTANCE_KIND(S)                     (S)->kind
 
 #define SLP_TREE_CHILDREN(S)                     (S)->children
@@ -1027,11 +1027,12 @@ loop_vec_info_for_loop (class loop *loop)
 struct slp_root
 {
   slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
-	    vec<stmt_vec_info> roots_)
-    : kind(kind_), stmts(stmts_), roots(roots_) {}
+	    vec<stmt_vec_info> roots_, vec<tree> remain_ = vNULL)
+    : kind(kind_), stmts(stmts_), roots(roots_), remain(remain_) {}
   slp_instance_kind kind;
   vec<stmt_vec_info> stmts;
   vec<stmt_vec_info> roots;
+  vec<tree> remain;
 };
 
 typedef class _bb_vec_info : public vec_info
-- 
2.35.3

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-15 12:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-15 12:10 [PATCH] Support constants and externals in BB reduction vectorization Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).