public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Fix PR43432, vectorize backwards stepping loads
@ 2010-09-07 13:11 Michael Matz
  2010-09-08  7:51 ` Ira Rosen
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Matz @ 2010-09-07 13:11 UTC (permalink / raw)
  To: gcc-patches

Hi,

this implements handling of consecutive loads whose indices run backwards.  
I'm handling the easy cases only, namely when no explicit realignment 
instructions need to be generated and when we need to emit only one load 
per original load (i.e. no multiple type width in the loop).

This all requires that we be able to reverse the elements of a vector, 
hence two new helper functions for that are added.

Reversed stores aren't handled but should be reasonably easy to extend.

Regstrapped on x86_64-linux.  I've had to add x86_64-*-* to 
target-supports.exp/vect_perm recognition which in turn makes 
vect/slp-perm-8.c and vect/slp-perm-9.c fail then.  That's most probably 
because while x86_64 supports permutation for 4 and 8-sized elements it 
doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c).  I'm going 
to investigate this but seek feedback on the patch itself already now.


Ciao,
Michael.
-- 
	PR tree-optimization/43432
	* tree-vect-data-refs.c (vect_analyze_data_ref_access):
	Accept backwards consecutive accesses.
	(vect_create_data_ref_ptr): If step is negative generate
	decreasing IVs.
	* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
	(perm_mask_for_reverse, reverse_vec_elements): New functions.
	(vectorizable_load): Handle loads with negative steps when easily
	possible.

testsuite/
	PR tree-optimization/43432
	* gcc.dg/vect/pr43432.c: New test.

Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 163773)
+++ tree-vect-data-refs.c	(working copy)
@@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat
     }
 
   /* Consecutive?  */
-  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
+      || (dr_step < 0
+	  && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
     {
       /* Mark that it is not interleaving.  */
       DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
@@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s
   tree vptr;
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
+  bool negative;
   tree indx_before_incr, indx_after_incr;
   gimple incr;
   tree step;
@@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s
     *inv_p = true;
   else
     *inv_p = false;
+  negative = tree_int_cst_compare (step, size_zero_node) < 0;
 
   /* Create an expression for the first address accessed by this load
      in LOOP.  */
@@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s
 	 LOOP is zero. In this case the step here is also zero.  */
       if (*inv_p)
 	step = size_zero_node;
+      else if (negative)
+	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 163773)
+++ tree-vect-stmts.c	(working copy)
@@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
+  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "negative step for store.");
+      return false;
+    }
+
   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       strided_store = true;
@@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_
   return true;
 }
 
+/* Given a vector type VECTYPE returns a builtin DECL to be used
+   for vector permutation and stores a mask into *MASK that implements
+   reversal of the vector elements.  If that is impossible to do
+   returns NULL (and *MASK is unchanged).  */
+
+static tree
+perm_mask_for_reverse (tree vectype, tree *mask)
+{
+  tree builtin_decl;
+  tree mask_element_type, mask_type;
+  tree mask_vec = NULL;
+  int i;
+  int nunits;
+  if (!targetm.vectorize.builtin_vec_perm)
+    return NULL;
+
+  builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
+                                                     &mask_element_type);
+  if (!builtin_decl || !mask_element_type)
+    return NULL;
+
+  mask_type = get_vectype_for_scalar_type (mask_element_type);
+  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
+    return NULL;
+
+  for (i = 0; i < nunits; i++)
+    mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
+  mask_vec = build_vector (mask_type, mask_vec);
+
+  if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
+    return NULL;
+  if (mask)
+    *mask = mask_vec;
+  return builtin_decl;
+}
+
+/* Given a vector variable X, that was generated for the scalar LHS of
+   STMT, generate instructions to reverse the vector elements of X,
+   insert them a *GSI and return the permuted vector variable.  */
+
+static tree
+reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
+{
+  tree vectype = TREE_TYPE (x);
+  tree mask_vec, builtin_decl;
+  tree perm_dest, data_ref;
+  gimple perm_stmt;
+
+  builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
+
+  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+
+  /* Generate the permute statement.  */
+  perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
+  data_ref = make_ssa_name (perm_dest, perm_stmt);
+  gimple_call_set_lhs (perm_stmt, data_ref);
+  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+  return data_ref;
+}
+
 /* vectorizable_load.
 
    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
@@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s
   gimple first_stmt;
   tree scalar_type;
   bool inv_p;
+  bool negative;
   bool compute_in_loop = false;
   struct loop *at_loop;
   int vec_num;
@@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
+  negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+  if (negative && ncopies > 1)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "multiple types with negative step.");
+      return false;
+    }
+
   scalar_type = TREE_TYPE (DR_REF (dr));
   mode = TYPE_MODE (vectype);
 
@@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s
 	return false;
     }
 
+  if (negative)
+    {
+      gcc_assert (!strided_load);
+      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+      if (alignment_support_scheme != dr_aligned
+	  && alignment_support_scheme != dr_unaligned_supported)
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "negative step but alignment required.");
+	  return false;
+	}
+      if (!perm_mask_for_reverse (vectype, NULL))
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "negative step and reversing not supported.");
+	  return false;
+	}
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s
   else
     at_loop = loop;
 
+  if (negative)
+    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
@@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s
 		gcc_unreachable (); /* FORNOW. */
 	    }
 
+	  if (negative)
+	    {
+	      new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
+	    }
+
 	  /* Collect vector loads and later create their permutation in
 	     vect_transform_strided_load ().  */
           if (strided_load || slp_perm)
Index: testsuite/gcc.dg/vect/pr43432.c
===================================================================
--- testsuite/gcc.dg/vect/pr43432.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr43432.c	(revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
+
+
+void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
+int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-09-24 11:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-07 13:11 Fix PR43432, vectorize backwards stepping loads Michael Matz
2010-09-08  7:51 ` Ira Rosen
2010-09-09 15:01   ` Michael Matz
2010-09-09 15:08     ` Paolo Carlini
2010-09-09 15:22       ` Michael Matz
2010-09-09 16:39         ` Paolo Carlini
2010-09-12 11:23     ` Ira Rosen
2010-09-17 16:55     ` H.J. Lu
2010-09-18 16:59       ` H.J. Lu
2010-09-24 15:31         ` H.J. Lu
2010-09-24 17:58           ` Ira Rosen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).