public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-322] tree-optimization/108752 - vectorize emulated vectors in lowered form
@ 2023-04-28  9:07 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2023-04-28  9:07 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:821ef93976e750c118d42a2ad33b96dbd1b9f3a5

commit r14-322-g821ef93976e750c118d42a2ad33b96dbd1b9f3a5
Author: Richard Biener <rguenther@suse.de>
Date:   Fri Feb 10 13:09:10 2023 +0100

    tree-optimization/108752 - vectorize emulated vectors in lowered form
    
    The following makes sure to emit operations lowered to bit operations
    when vectorizing using emulated vectors.  This avoids relying on
    the vector lowering pass adhering to the exact same cost considerations
    as the vectorizer.
    
            PR tree-optimization/108752
            * tree-vect-generic.cc (build_replicated_const): Rename
            to build_replicated_int_cst and move to tree.{h,cc}.
            (do_plus_minus): Adjust.
            (do_negate): Likewise.
            * tree-vect-stmts.cc (vectorizable_operation): Emit emulated
            arithmetic vector operations in lowered form.
            * tree.h (build_replicated_int_cst): Declare.
            * tree.cc (build_replicated_int_cst): Moved from
            tree-vect-generic.cc build_replicated_const.

Diff:
---
 gcc/tree-vect-generic.cc |  37 ++---------------
 gcc/tree-vect-stmts.cc   | 106 ++++++++++++++++++++++++++++++++++++++++-------
 gcc/tree.cc              |  30 ++++++++++++++
 gcc/tree.h               |   1 +
 4 files changed, 125 insertions(+), 49 deletions(-)

diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 445da53292e..59115b2e162 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -103,35 +103,6 @@ subparts_gt (tree type1, tree type2)
   return known_gt (n1, n2);
 }
 
-/* Build a constant of type TYPE, made of VALUE's bits replicated
-   every WIDTH bits to fit TYPE's precision.  */
-static tree
-build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
-{
-  int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) 
-    / HOST_BITS_PER_WIDE_INT;
-  unsigned HOST_WIDE_INT low, mask;
-  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
-  int i;
-
-  gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
-
-  if (width == HOST_BITS_PER_WIDE_INT)
-    low = value;
-  else
-    {
-      mask = ((HOST_WIDE_INT)1 << width) - 1;
-      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
-    }
-
-  for (i = 0; i < n; i++)
-    a[i] = low;
-
-  gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
-  return wide_int_to_tree
-    (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
-}
-
 static GTY(()) tree vector_inner_type;
 static GTY(()) tree vector_last_type;
 static GTY(()) int vector_last_nunits;
@@ -255,8 +226,8 @@ do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
   tree low_bits, high_bits, a_low, b_low, result_low, signs;
 
   max = GET_MODE_MASK (TYPE_MODE (inner_type));
-  low_bits = build_replicated_const (word_type, width, max >> 1);
-  high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+  low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+  high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
 
   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
@@ -289,8 +260,8 @@ do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
   tree low_bits, high_bits, b_low, result_low, signs;
 
   max = GET_MODE_MASK (TYPE_MODE (inner_type));
-  low_bits = build_replicated_const (word_type, width, max >> 1);
-  high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
+  low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+  high_bits = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
 
   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
 
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 272839a658c..dc2dc2cfa7e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6134,7 +6134,6 @@ vectorizable_shift (vec_info *vinfo,
   return true;
 }
 
-
 /* Function vectorizable_operation.
 
    Check if STMT_INFO performs a binary, unary or ternary operation that can
@@ -6405,20 +6404,6 @@ vectorizable_operation (vec_info *vinfo,
       return false;
     }
 
-  /* ???  We should instead expand the operations here, instead of
-     relying on vector lowering which has this hard cap on the number
-     of vector elements below it performs elementwise operations.  */
-  if (using_emulated_vectors_p
-      && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
-      && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
-	  || maybe_lt (nunits_out, 4U)))
-    {
-      if (dump_enabled_p ())
-	dump_printf (MSG_NOTE, "not using word mode for +- and less than "
-		     "four vector elements\n");
-      return false;
-    }
-
   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6581,7 +6566,96 @@ vectorizable_operation (vec_info *vinfo,
       vop1 = ((op_type == binary_op || op_type == ternary_op)
 	      ? vec_oprnds1[i] : NULL_TREE);
       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
-      if (masked_loop_p && mask_out_inactive)
+      if (using_emulated_vectors_p
+	  && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
+	{
+	  /* Lower the operation.  This follows vector lowering.  */
+	  unsigned int width = vector_element_bits (vectype);
+	  tree inner_type = TREE_TYPE (vectype);
+	  tree word_type
+	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
+	  HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
+	  tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
+	  tree high_bits
+	    = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
+	  tree wvop0 = make_ssa_name (word_type);
+	  new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
+					  build1 (VIEW_CONVERT_EXPR,
+						  word_type, vop0));
+	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	  tree result_low, signs;
+	  if (code == PLUS_EXPR || code == MINUS_EXPR)
+	    {
+	      tree wvop1 = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
+					      build1 (VIEW_CONVERT_EXPR,
+						      word_type, vop1));
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      signs = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (signs,
+					      BIT_XOR_EXPR, wvop0, wvop1);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      tree b_low = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (b_low,
+					      BIT_AND_EXPR, wvop1, low_bits);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      tree a_low = make_ssa_name (word_type);
+	      if (code == PLUS_EXPR)
+		new_stmt = gimple_build_assign (a_low,
+						BIT_AND_EXPR, wvop0, low_bits);
+	      else
+		new_stmt = gimple_build_assign (a_low,
+						BIT_IOR_EXPR, wvop0, high_bits);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      if (code == MINUS_EXPR)
+		{
+		  new_stmt = gimple_build_assign (NULL_TREE,
+						  BIT_NOT_EXPR, signs);
+		  signs = make_ssa_name (word_type);
+		  gimple_assign_set_lhs (new_stmt, signs);
+		  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+		}
+	      new_stmt = gimple_build_assign (NULL_TREE,
+					      BIT_AND_EXPR, signs, high_bits);
+	      signs = make_ssa_name (word_type);
+	      gimple_assign_set_lhs (new_stmt, signs);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      result_low = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	    }
+	  else
+	    {
+	      tree a_low = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (a_low,
+					      BIT_AND_EXPR, wvop0, low_bits);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      signs = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      new_stmt = gimple_build_assign (NULL_TREE,
+					      BIT_AND_EXPR, signs, high_bits);
+	      signs = make_ssa_name (word_type);
+	      gimple_assign_set_lhs (new_stmt, signs);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	      result_low = make_ssa_name (word_type);
+	      new_stmt = gimple_build_assign (result_low,
+					      MINUS_EXPR, high_bits, a_low);
+	      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	    }
+	  new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
+					  signs);
+	  result_low = make_ssa_name (word_type);
+	  gimple_assign_set_lhs (new_stmt, result_low);
+	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	  new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
+					  build1 (VIEW_CONVERT_EXPR,
+						  vectype, result_low));
+	  result_low = make_ssa_name (vectype);
+	  gimple_assign_set_lhs (new_stmt, result_low);
+	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+	}
+      else if (masked_loop_p && mask_out_inactive)
 	{
 	  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
 					  vectype, i);
diff --git a/gcc/tree.cc b/gcc/tree.cc
index ead4248b8e5..7e6de288886 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -2667,6 +2667,36 @@ build_zero_cst (tree type)
     }
 }
 
+/* Build a constant of integer type TYPE, made of VALUE's bits replicated
+   every WIDTH bits to fit TYPE's precision.  */
+
+tree
+build_replicated_int_cst (tree type, unsigned int width, HOST_WIDE_INT value)
+{
+  int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
+    / HOST_BITS_PER_WIDE_INT;
+  unsigned HOST_WIDE_INT low, mask;
+  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+  int i;
+
+  gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
+
+  if (width == HOST_BITS_PER_WIDE_INT)
+    low = value;
+  else
+    {
+      mask = ((HOST_WIDE_INT)1 << width) - 1;
+      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
+    }
+
+  for (i = 0; i < n; i++)
+    a[i] = low;
+
+  gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
+  return wide_int_to_tree
+    (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
+}
+
 /* If floating-point type TYPE has an IEEE-style sign bit, return an
    unsigned constant in which only the sign bit is set.  Return null
    otherwise.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index dc94c17db76..0b72663e6a1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4685,6 +4685,7 @@ extern tree build_one_cst (tree);
 extern tree build_minus_one_cst (tree);
 extern tree build_all_ones_cst (tree);
 extern tree build_zero_cst (tree);
+extern tree build_replicated_int_cst (tree, unsigned, HOST_WIDE_INT);
 extern tree sign_mask_for (tree);
 extern tree build_string (unsigned, const char * = NULL);
 extern tree build_poly_int_cst (tree, const poly_wide_int_ref &);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-28  9:07 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-28  9:07 [gcc r14-322] tree-optimization/108752 - vectorize emulated vectors in lowered form Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).