public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Vectorize stores with unknown stride
@ 2015-05-06 15:37 Michael Matz
  2015-05-07 14:05 ` Richard Biener
  2015-05-07 14:34 ` Alan Lawrence
  0 siblings, 2 replies; 5+ messages in thread
From: Michael Matz @ 2015-05-06 15:37 UTC (permalink / raw)
  To: gcc-patches

Hi,

I'm sitting on this since quite some time already and always missed stage 
1.  This implements support for vectorizing strided stores with unknown 
but loop invariant stride, like:

sumit (float * __restrict dest,
       float * __restrict src, float * __restrict src2,
       int stride, int n)
{
  int i;
  for (i = 0; i < n; i++)
    dest[i*stride] = src[i] + src2[i];
}

I use the same scheme like for strided loads, i.e. expanding such store 
with N separate scalar stores (so alignment could also be ignored for such 
ones).

This doesn't yet fix PR65962, because that one uses a _constant_ step.  
That makes us try a grouped access, which isn't supported for stores when 
there's a gap (which there is).  Unfortunately vect_analyze_group_access 
actively changes some vect info even before detecting that it's not a 
grouped access, so there must be some rollback implemented; I decided to 
defer this to a follow up.

Regstrapped on x86-64-linux, no regressions (I had to adjust two 
fortran testcases where one more loop is vectorized now).  Okay for trunk?


Ciao,
Michael.
	* tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
	to strided_p.
	(STMT_VINFO_STRIDE_LOAD_P): Rename to ...
	(STMT_VINFO_STRIDED_P): ... this.
	* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
	(vect_verify_datarefs_alignment): Likewise.
	(vect_enhance_data_refs_alignment): Likewise.
	(vect_analyze_data_ref_access): Likewise.
	(vect_analyze_data_refs): Accept strided stores.
	* tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
	(vect_model_load_cost): Adjust for macro rename.
	(vectorizable_mask_load_store): Likewise.
	(vectorizable_load): Likewise.
	(vectorizable_store): Open code strided stores.

testsuite/
	* gcc.dg/vect/vect-strided-store.c: New test.
	* gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
	* gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.

diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 96afc7a..6d8f17e 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
 
   /* Strided loads perform only component accesses, misalignment information
      is irrelevant for them.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info))
     return true;
 
   misalign = DR_INIT (dr);
@@ -942,7 +942,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 
       /* Strided loads perform only component accesses, alignment is
 	 irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDED_P (stmt_info))
 	continue;
 
       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
@@ -1409,7 +1409,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
       /* Strided loads perform only component accesses, alignment is
 	 irrelevant for them.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+      if (STMT_VINFO_STRIDED_P (stmt_info))
 	continue;
 
       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
@@ -1701,7 +1701,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
 	  /* Strided loads perform only component accesses, alignment is
 	     irrelevant for them.  */
-	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+	  if (STMT_VINFO_STRIDED_P (stmt_info))
 	    continue;
 
 	  save_misalignment = DR_MISALIGNMENT (dr);
@@ -1821,7 +1821,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
 	  /* Strided loads perform only component accesses, alignment is
 	     irrelevant for them.  */
-	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+	  if (STMT_VINFO_STRIDED_P (stmt_info))
 	    continue;
 
 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
@@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
 
   /* Assume this is a DR handled by non-constant strided load case.  */
   if (TREE_CODE (step) != INTEGER_CST)
-    return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+    return STMT_VINFO_STRIDED_P (stmt_info);
 
   /* Not consecutive access - check if it's a part of interleaving group.  */
   return vect_analyze_group_access (dr);
@@ -3764,8 +3764,7 @@ again:
       else if (loop_vinfo
 	       && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
 	{
-	  if (nested_in_vect_loop_p (loop, stmt)
-	      || !DR_IS_READ (dr))
+	  if (nested_in_vect_loop_p (loop, stmt))
 	    {
 	      if (dump_enabled_p ())
 		{
@@ -3777,7 +3776,7 @@ again:
 		}
 	      return false;
 	    }
-	  STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+	  STMT_VINFO_STRIDED_P (stmt_info) = true;
 	}
     }
 
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2ce6d4d..d268eb0 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* Costs of the stores.  */
-  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    {
+      /* N scalar stores plus extracting the elements.  */
+      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      inside_cost += record_stmt_cost (body_cost_vec,
+				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+				       scalar_store, stmt_info, 0, vect_body);
+      inside_cost += record_stmt_cost (body_cost_vec,
+				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+				       vec_to_scalar, stmt_info, 0, vect_body);
+    }
+  else
+    vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -1127,7 +1139,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
     }
 
   /* The loads themselves.  */
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info))
     {
       /* N scalar loads plus gathering them into a vector.  */
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -1820,7 +1832,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     return false;
 
-  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  if (STMT_VINFO_STRIDED_P (stmt_info))
     return false;
 
   if (STMT_VINFO_GATHER_P (stmt_info))
@@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   tree dataref_ptr = NULL_TREE;
   tree dataref_offset = NULL_TREE;
   gimple ptr_incr = NULL;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
   int ncopies;
   int j;
   gimple next_stmt, first_stmt = NULL;
@@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  negative = 
-    tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
-			  ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
-			  size_zero_node) < 0;
-  if (negative && ncopies > 1)
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "multiple types with negative step.\n");
-      return false;
-    }
-
-  if (negative)
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    ;
+  else
     {
-      gcc_assert (!grouped_store);
-      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
-      if (alignment_support_scheme != dr_aligned
-	  && alignment_support_scheme != dr_unaligned_supported)
+      negative = 
+	  tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+				? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+				size_zero_node) < 0;
+      if (negative && ncopies > 1)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "negative step but alignment required.\n");
+			     "multiple types with negative step.\n");
 	  return false;
 	}
-      if (dt != vect_constant_def 
-	  && dt != vect_external_def
-	  && !perm_mask_for_reverse (vectype))
+      if (negative)
 	{
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "negative step and reversing not supported.\n");
-	  return false;
+	  gcc_assert (!grouped_store);
+	  alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+	  if (alignment_support_scheme != dr_aligned
+	      && alignment_support_scheme != dr_unaligned_supported)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "negative step but alignment required.\n");
+	      return false;
+	    }
+	  if (dt != vect_constant_def 
+	      && dt != vect_external_def
+	      && !perm_mask_for_reverse (vectype))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "negative step and reversing not supported.\n");
+	      return false;
+	    }
 	}
     }
 
@@ -5230,6 +5246,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform store. ncopies = %d\n", ncopies);
 
+  if (STMT_VINFO_STRIDED_P (stmt_info))
+    {
+      gimple_stmt_iterator incr_gsi;
+      bool insert_after;
+      gimple incr;
+      tree offvar;
+      tree ivstep;
+      tree running_off;
+      gimple_seq stmts = NULL;
+      tree stride_base, stride_step, alias_off;
+      tree vec_oprnd;
+
+      gcc_assert (!nested_in_vect_loop_p (loop, stmt));
+
+      stride_base
+	= fold_build_pointer_plus
+	    (unshare_expr (DR_BASE_ADDRESS (dr)),
+	     size_binop (PLUS_EXPR,
+			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
+			 convert_to_ptrofftype (DR_INIT(dr))));
+      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+
+      /* For a store with loop-invariant (but other than power-of-2)
+         stride (i.e. not a grouped access) like so:
+
+	   for (i = 0; i < n; i += stride)
+	     array[i] = ...;
+
+	 we generate a new induction variable and new stores from
+	 the components of the (vectorized) rhs:
+
+	   for (j = 0; ; j += VF*stride)
+	     vectemp = ...;
+	     tmp1 = vectemp[0];
+	     array[j] = tmp1;
+	     tmp2 = vectemp[1];
+	     array[j + stride] = tmp2;
+	     ...
+         */
+
+      ivstep = stride_step;
+      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+			    build_int_cst (TREE_TYPE (ivstep),
+					   ncopies * nunits));
+
+      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+
+      create_iv (stride_base, ivstep, NULL,
+		 loop, &incr_gsi, insert_after,
+		 &offvar, NULL);
+      incr = gsi_stmt (incr_gsi);
+      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+
+      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+      if (stmts)
+	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+      prev_stmt_info = NULL;
+      running_off = offvar;
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+      for (j = 0; j < ncopies; j++)
+	{
+	  /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+	     and first_stmt == stmt.  */
+	  if (j == 0)
+	    vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
+	  else
+	    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+
+	  for (i = 0; i < nunits; i++)
+	    {
+	      tree newref, newoff;
+	      gimple incr, assign;
+	      tree size = TYPE_SIZE (elem_type);
+	      /* Extract the i'th component.  */
+	      tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+				      size);
+	      tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
+				       size, pos);
+
+	      elem = force_gimple_operand_gsi (gsi, elem, true,
+					       NULL_TREE, true,
+					       GSI_SAME_STMT);
+
+	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
+			       running_off, alias_off);
+
+	      /* And store it to *running_off.  */
+	      assign = gimple_build_assign (newref, elem);
+	      vect_finish_stmt_generation (stmt, assign, gsi);
+
+	      newoff = copy_ssa_name (running_off, NULL);
+	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+					  running_off, stride_step);
+	      vect_finish_stmt_generation (stmt, incr, gsi);
+
+	      running_off = newoff;
+	      if (j == 0 && i == i)
+		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+	      else
+		STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+	      prev_stmt_info = vinfo_for_stmt (assign);
+	    }
+	}
+      return true;
+    }
+
   dr_chain.create (group_size);
   oprnds.create (group_size);
 
@@ -5846,7 +5969,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 	  return false;
 	}
     }
-  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  else if (STMT_VINFO_STRIDED_P (stmt_info))
     ;
   else
     {
@@ -6079,7 +6202,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 	}
       return true;
     }
-  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+  else if (STMT_VINFO_STRIDED_P (stmt_info))
     {
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 0796cc1..d231626 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -643,7 +643,9 @@ typedef struct _stmt_vec_info {
 
   /* For loads only, true if this is a gather load.  */
   bool gather_p;
-  bool stride_load_p;
+
+  /* True if this is an access with loop-invariant stride.  */
+  bool strided_p;
 
   /* For both loads and stores.  */
   bool simd_lane_access_p;
@@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
 #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
 #define STMT_VINFO_DATA_REF(S)             (S)->data_ref_info
 #define STMT_VINFO_GATHER_P(S)		   (S)->gather_p
-#define STMT_VINFO_STRIDE_LOAD_P(S)	   (S)->stride_load_p
+#define STMT_VINFO_STRIDED_P(S)	   	   (S)->strided_p
 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
 
 #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
index b17ac9c..d5f5d40 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
@@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
   end do
 end subroutine
 
-! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
+! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
 ! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
index 1d13cea..625be83 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
@@ -29,5 +29,5 @@
       return
       end function trs2a2
 
-! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } }
+! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } }
 ! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
new file mode 100644
index 0000000..32bcff9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+void __attribute__((noinline))
+sumit (float * __restrict dest,
+       float * __restrict src, float * __restrict src2,
+       int stride, int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    dest[i*stride] = src[i] + src2[i];
+}
+
+int main()
+{
+  int i;
+  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  float dest[8];
+  check_vect ();
+  sumit (dest, src, src, 1, 8);
+  for (i = 0; i < 8; i++)
+    if (2*src[i] != dest[i])
+      abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Vectorize stores with unknown stride
  2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
@ 2015-05-07 14:05 ` Richard Biener
  2015-05-07 14:34 ` Alan Lawrence
  1 sibling, 0 replies; 5+ messages in thread
From: Richard Biener @ 2015-05-07 14:05 UTC (permalink / raw)
  To: Michael Matz; +Cc: GCC Patches

On Wed, May 6, 2015 at 5:37 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> I'm sitting on this since quite some time already and always missed stage
> 1.  This implements support for vectorizing strided stores with unknown
> but loop invariant stride, like:
>
> sumit (float * __restrict dest,
>        float * __restrict src, float * __restrict src2,
>        int stride, int n)
> {
>   int i;
>   for (i = 0; i < n; i++)
>     dest[i*stride] = src[i] + src2[i];
> }
>
> I use the same scheme like for strided loads, i.e. expanding such store
> with N separate scalar stores (so alignment could also be ignored for such
> ones).
>
> This doesn't yet fix PR65962, because that one uses a _constant_ step.
> That makes us try a grouped access, which isn't supported for stores when
> there's a gap (which there is).  Unfortunately vect_analyze_group_access
> actively changes some vect info even before detecting that it's not a
> grouped access, so there must be some rollback implemented; I decided to
> defer this to a follow up.
>
> Regstrapped on x86-64-linux, no regressions (I had to adjust two
> fortran testcases where one more loop is vectorized now).  Okay for trunk?

Ok.

Thanks,
Richard.

>
> Ciao,
> Michael.
>         * tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
>         to strided_p.
>         (STMT_VINFO_STRIDE_LOAD_P): Rename to ...
>         (STMT_VINFO_STRIDED_P): ... this.
>         * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
>         (vect_verify_datarefs_alignment): Likewise.
>         (vect_enhance_data_refs_alignment): Likewise.
>         (vect_analyze_data_ref_access): Likewise.
>         (vect_analyze_data_refs): Accept strided stores.
>         * tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
>         (vect_model_load_cost): Adjust for macro rename.
>         (vectorizable_mask_load_store): Likewise.
>         (vectorizable_load): Likewise.
>         (vectorizable_store): Open code strided stores.
>
> testsuite/
>         * gcc.dg/vect/vect-strided-store.c: New test.
>         * gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
>         * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.
>
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 96afc7a..6d8f17e 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
>
>    /* Strided loads perform only component accesses, misalignment information
>       is irrelevant for them.  */
> -  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
>      return true;
>
>    misalign = DR_INIT (dr);
> @@ -942,7 +942,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
>
>        /* Strided loads perform only component accesses, alignment is
>          irrelevant for them.  */
> -      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +      if (STMT_VINFO_STRIDED_P (stmt_info))
>         continue;
>
>        supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
> @@ -1409,7 +1409,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
>        /* Strided loads perform only component accesses, alignment is
>          irrelevant for them.  */
> -      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +      if (STMT_VINFO_STRIDED_P (stmt_info))
>         continue;
>
>        supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
> @@ -1701,7 +1701,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
>           /* Strided loads perform only component accesses, alignment is
>              irrelevant for them.  */
> -         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +         if (STMT_VINFO_STRIDED_P (stmt_info))
>             continue;
>
>           save_misalignment = DR_MISALIGNMENT (dr);
> @@ -1821,7 +1821,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
>           /* Strided loads perform only component accesses, alignment is
>              irrelevant for them.  */
> -         if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +         if (STMT_VINFO_STRIDED_P (stmt_info))
>             continue;
>
>           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
> @@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
>
>    /* Assume this is a DR handled by non-constant strided load case.  */
>    if (TREE_CODE (step) != INTEGER_CST)
> -    return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
> +    return STMT_VINFO_STRIDED_P (stmt_info);
>
>    /* Not consecutive access - check if it's a part of interleaving group.  */
>    return vect_analyze_group_access (dr);
> @@ -3764,8 +3764,7 @@ again:
>        else if (loop_vinfo
>                && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
>         {
> -         if (nested_in_vect_loop_p (loop, stmt)
> -             || !DR_IS_READ (dr))
> +         if (nested_in_vect_loop_p (loop, stmt))
>             {
>               if (dump_enabled_p ())
>                 {
> @@ -3777,7 +3776,7 @@ again:
>                 }
>               return false;
>             }
> -         STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
> +         STMT_VINFO_STRIDED_P (stmt_info) = true;
>         }
>      }
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 2ce6d4d..d268eb0 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
>      }
>
>    /* Costs of the stores.  */
> -  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
> +    {
> +      /* N scalar stores plus extracting the elements.  */
> +      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +      inside_cost += record_stmt_cost (body_cost_vec,
> +                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> +                                      scalar_store, stmt_info, 0, vect_body);
> +      inside_cost += record_stmt_cost (body_cost_vec,
> +                                      ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> +                                      vec_to_scalar, stmt_info, 0, vect_body);
> +    }
> +  else
> +    vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
>
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
> @@ -1127,7 +1139,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
>      }
>
>    /* The loads themselves.  */
> -  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
>      {
>        /* N scalar loads plus gathering them into a vector.  */
>        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -1820,7 +1832,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
>    if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
>      return false;
>
> -  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
>      return false;
>
>    if (STMT_VINFO_GATHER_P (stmt_info))
> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>    tree dataref_ptr = NULL_TREE;
>    tree dataref_offset = NULL_TREE;
>    gimple ptr_incr = NULL;
> -  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>    int ncopies;
>    int j;
>    gimple next_stmt, first_stmt = NULL;
> @@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
>
> -  negative =
> -    tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> -                         ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> -                         size_zero_node) < 0;
> -  if (negative && ncopies > 1)
> -    {
> -      if (dump_enabled_p ())
> -        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "multiple types with negative step.\n");
> -      return false;
> -    }
> -
> -  if (negative)
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
> +    ;
> +  else
>      {
> -      gcc_assert (!grouped_store);
> -      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
> -      if (alignment_support_scheme != dr_aligned
> -         && alignment_support_scheme != dr_unaligned_supported)
> +      negative =
> +         tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> +                               ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> +                               size_zero_node) < 0;
> +      if (negative && ncopies > 1)
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "negative step but alignment required.\n");
> +                            "multiple types with negative step.\n");
>           return false;
>         }
> -      if (dt != vect_constant_def
> -         && dt != vect_external_def
> -         && !perm_mask_for_reverse (vectype))
> +      if (negative)
>         {
> -         if (dump_enabled_p ())
> -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "negative step and reversing not supported.\n");
> -         return false;
> +         gcc_assert (!grouped_store);
> +         alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
> +         if (alignment_support_scheme != dr_aligned
> +             && alignment_support_scheme != dr_unaligned_supported)
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                "negative step but alignment required.\n");
> +             return false;
> +           }
> +         if (dt != vect_constant_def
> +             && dt != vect_external_def
> +             && !perm_mask_for_reverse (vectype))
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                "negative step and reversing not supported.\n");
> +             return false;
> +           }
>         }
>      }
>
> @@ -5230,6 +5246,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>      dump_printf_loc (MSG_NOTE, vect_location,
>                       "transform store. ncopies = %d\n", ncopies);
>
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
> +    {
> +      gimple_stmt_iterator incr_gsi;
> +      bool insert_after;
> +      gimple incr;
> +      tree offvar;
> +      tree ivstep;
> +      tree running_off;
> +      gimple_seq stmts = NULL;
> +      tree stride_base, stride_step, alias_off;
> +      tree vec_oprnd;
> +
> +      gcc_assert (!nested_in_vect_loop_p (loop, stmt));
> +
> +      stride_base
> +       = fold_build_pointer_plus
> +           (unshare_expr (DR_BASE_ADDRESS (dr)),
> +            size_binop (PLUS_EXPR,
> +                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
> +                        convert_to_ptrofftype (DR_INIT(dr))));
> +      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
> +
> +      /* For a store with loop-invariant (but other than power-of-2)
> +         stride (i.e. not a grouped access) like so:
> +
> +          for (i = 0; i < n; i += stride)
> +            array[i] = ...;
> +
> +        we generate a new induction variable and new stores from
> +        the components of the (vectorized) rhs:
> +
> +          for (j = 0; ; j += VF*stride)
> +            vectemp = ...;
> +            tmp1 = vectemp[0];
> +            array[j] = tmp1;
> +            tmp2 = vectemp[1];
> +            array[j + stride] = tmp2;
> +            ...
> +         */
> +
> +      ivstep = stride_step;
> +      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
> +                           build_int_cst (TREE_TYPE (ivstep),
> +                                          ncopies * nunits));
> +
> +      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
> +
> +      create_iv (stride_base, ivstep, NULL,
> +                loop, &incr_gsi, insert_after,
> +                &offvar, NULL);
> +      incr = gsi_stmt (incr_gsi);
> +      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
> +
> +      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
> +      if (stmts)
> +       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
> +
> +      prev_stmt_info = NULL;
> +      running_off = offvar;
> +      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
> +      for (j = 0; j < ncopies; j++)
> +       {
> +         /* We've set op and dt above, from gimple_assign_rhs1(stmt),
> +            and first_stmt == stmt.  */
> +         if (j == 0)
> +           vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
> +         else
> +           vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
> +
> +         for (i = 0; i < nunits; i++)
> +           {
> +             tree newref, newoff;
> +             gimple incr, assign;
> +             tree size = TYPE_SIZE (elem_type);
> +             /* Extract the i'th component.  */
> +             tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
> +                                     size);
> +             tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
> +                                      size, pos);
> +
> +             elem = force_gimple_operand_gsi (gsi, elem, true,
> +                                              NULL_TREE, true,
> +                                              GSI_SAME_STMT);
> +
> +             newref = build2 (MEM_REF, TREE_TYPE (vectype),
> +                              running_off, alias_off);
> +
> +             /* And store it to *running_off.  */
> +             assign = gimple_build_assign (newref, elem);
> +             vect_finish_stmt_generation (stmt, assign, gsi);
> +
> +             newoff = copy_ssa_name (running_off, NULL);
> +             incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
> +                                         running_off, stride_step);
> +             vect_finish_stmt_generation (stmt, incr, gsi);
> +
> +             running_off = newoff;
> +             if (j == 0 && i == i)
> +               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
> +             else
> +               STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
> +             prev_stmt_info = vinfo_for_stmt (assign);
> +           }
> +       }
> +      return true;
> +    }
> +
>    dr_chain.create (group_size);
>    oprnds.create (group_size);
>
> @@ -5846,7 +5969,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>           return false;
>         }
>      }
> -  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  else if (STMT_VINFO_STRIDED_P (stmt_info))
>      ;
>    else
>      {
> @@ -6079,7 +6202,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>         }
>        return true;
>      }
> -  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  else if (STMT_VINFO_STRIDED_P (stmt_info))
>      {
>        gimple_stmt_iterator incr_gsi;
>        bool insert_after;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0796cc1..d231626 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -643,7 +643,9 @@ typedef struct _stmt_vec_info {
>
>    /* For loads only, true if this is a gather load.  */
>    bool gather_p;
> -  bool stride_load_p;
> +
> +  /* True if this is an access with loop-invariant stride.  */
> +  bool strided_p;
>
>    /* For both loads and stores.  */
>    bool simd_lane_access_p;
> @@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
>  #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
>  #define STMT_VINFO_DATA_REF(S)             (S)->data_ref_info
>  #define STMT_VINFO_GATHER_P(S)            (S)->gather_p
> -#define STMT_VINFO_STRIDE_LOAD_P(S)       (S)->stride_load_p
> +#define STMT_VINFO_STRIDED_P(S)                   (S)->strided_p
>  #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
>
>  #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address
> diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> index b17ac9c..d5f5d40 100644
> --- a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> @@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
>    end do
>  end subroutine
>
> -! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
> +! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
>  ! { dg-final { cleanup-tree-dump "vect" } }
> diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> index 1d13cea..625be83 100644
> --- a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> @@ -29,5 +29,5 @@
>        return
>        end function trs2a2
>
> -! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } }
> +! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  } }
>  ! { dg-final { cleanup-tree-dump "vect" } }
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> new file mode 100644
> index 0000000..32bcff9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> @@ -0,0 +1,30 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +void __attribute__((noinline))
> +sumit (float * __restrict dest,
> +       float * __restrict src, float * __restrict src2,
> +       int stride, int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    dest[i*stride] = src[i] + src2[i];
> +}
> +
> +int main()
> +{
> +  int i;
> +  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> +  float dest[8];
> +  check_vect ();
> +  sumit (dest, src, src, 1, 8);
> +  for (i = 0; i < 8; i++)
> +    if (2*src[i] != dest[i])
> +      abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Vectorize stores with unknown stride
  2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
  2015-05-07 14:05 ` Richard Biener
@ 2015-05-07 14:34 ` Alan Lawrence
  2015-05-07 16:07   ` Michael Matz
  1 sibling, 1 reply; 5+ messages in thread
From: Alan Lawrence @ 2015-05-07 14:34 UTC (permalink / raw)
  To: Michael Matz; +Cc: gcc-patches

(Below are all minor/style points only, no reason for patch not to go in.)

Michael Matz wrote:
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 96afc7a..6d8f17e 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
> 
>    /* Strided loads perform only component accesses, misalignment information
>       is irrelevant for them.  */
> -  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
>      return true;
 >    misalign = DR_INIT (dr);

Also update comment? (5 identical cases)

> @@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
> 
>    /* Assume this is a DR handled by non-constant strided load case.  */
>    if (TREE_CODE (step) != INTEGER_CST)
> -    return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
> +    return STMT_VINFO_STRIDED_P (stmt_info);

Also update comment?

> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 2ce6d4d..d268eb0 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>    tree dataref_ptr = NULL_TREE;
>    tree dataref_offset = NULL_TREE;
>    gimple ptr_incr = NULL;
> -  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);

Unrelated? (though I don't object to it!)

> @@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
> 
> -  negative =
> -    tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> -                         ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> -                         size_zero_node) < 0;
> -  if (negative && ncopies > 1)
> -    {
> -      if (dump_enabled_p ())
> -        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                        "multiple types with negative step.\n");
> -      return false;
> -    }
> -
> -  if (negative)
> +  if (STMT_VINFO_STRIDED_P (stmt_info))
> +    ;
> +  else

This is not a long chain of ifs, so is there a reason not to have
if (!STMT_VINFO_STRIDED_P (stmt_info))
?

> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0796cc1..d231626 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
>  #define STMT_VINFO_VECTORIZABLE(S)         (S)->vectorizable
>  #define STMT_VINFO_DATA_REF(S)             (S)->data_ref_info
>  #define STMT_VINFO_GATHER_P(S)            (S)->gather_p
> -#define STMT_VINFO_STRIDE_LOAD_P(S)       (S)->stride_load_p
> +#define STMT_VINFO_STRIDED_P(S)                   (S)->strided_p

Spacing (?)

>  #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
> 
>  #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> new file mode 100644
> index 0000000..32bcff9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> @@ -0,0 +1,30 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +void __attribute__((noinline))
> +sumit (float * __restrict dest,
> +       float * __restrict src, float * __restrict src2,
> +       int stride, int n)
> +{
> +  int i;
> +  for (i = 0; i < n; i++)
> +    dest[i*stride] = src[i] + src2[i];
> +}
> +
> +int main()
> +{
> +  int i;
> +  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> +  float dest[8];
> +  check_vect ();
> +  sumit (dest, src, src, 1, 8);
> +  for (i = 0; i < 8; i++)
> +    if (2*src[i] != dest[i])
> +      abort ();
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */

While I appreciate the vectorizer doesn't know the invariant stride is going to 
be '1' when the loop executes...IMVHO I'd feel more reassured if we passed in 
stride>1 at runtime ;). In fact I'd feel more reassured still, if we did the 
thing with a few different values of stride...

Cheers, Alan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Vectorize stores with unknown stride
  2015-05-07 14:34 ` Alan Lawrence
@ 2015-05-07 16:07   ` Michael Matz
  2015-05-07 17:13     ` Alan Lawrence
  0 siblings, 1 reply; 5+ messages in thread
From: Michael Matz @ 2015-05-07 16:07 UTC (permalink / raw)
  To: Alan Lawrence; +Cc: gcc-patches

On Thu, 7 May 2015, Alan Lawrence wrote:

> Also update comment? (5 identical cases)
> 
> Also update comment?

Obviously a good idea, thanks :)  (s/loads/accesses/ for the commit)

> > @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator
> > *gsi, gimple *vec_stmt,
> >    tree dataref_ptr = NULL_TREE;
> >    tree dataref_offset = NULL_TREE;
> >    gimple ptr_incr = NULL;
> > -  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> > +  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> 
> Unrelated? (though I don't object to it!)

No, the patch introduces for a first time in this function a loop over 
unsigned i with nunits as bound, so the compare would warn if I weren't to 
change either the type of i (with a cascade of more such changes) or 
nuinits (with only this one place).  The whole file could use an overhaul 
of signedness (after all nunits in all functions will always be 
non-negative), but as a separate patch.

> > +  if (STMT_VINFO_STRIDED_P (stmt_info))
> > +    ;
> > +  else
> 
> This is not a long chain of ifs, so is there a reason not to have
> if (!STMT_VINFO_STRIDED_P (stmt_info))
> ?

Err, right.  Probably I had some code in there initially; as now it looks 
a bit mannered :)

> > -#define STMT_VINFO_STRIDE_LOAD_P(S)       (S)->stride_load_p
> > +#define STMT_VINFO_STRIDED_P(S)                   (S)->strided_p
> 
> Spacing (?)

No, it's using correct tabs, indentation by the diff '+' prefix deceives 
us.  (Your mailer must be playing games, I've sent it out with TABs 
intact).

> > +  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> > +  float dest[8];
> > +  check_vect ();
> > +  sumit (dest, src, src, 1, 8);
> > +  for (i = 0; i < 8; i++)
> > +    if (2*src[i] != dest[i])
> > +      abort ();
> > +  return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> > +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
> While I appreciate the vectorizer doesn't know the invariant stride is going
> to be '1' when the loop executes...IMVHO I'd feel more reassured if we passed
> in stride>1 at runtime ;). In fact I'd feel more reassured still, if we did
> the thing with a few different values of stride...

Sensible.  I'll use the test case below (hey!  even stride zero! :) )

Regstrapping again with the changes as per above.  Committing tomorrow.  
Many thanks for the review.


Ciao,
Michael.
/* { dg-require-effective-target vect_float } */

#include <stdarg.h>
#include "tree-vect.h"

void __attribute__((noinline))
sumit (float * __restrict dest,
       float * __restrict src, float * __restrict src2,
       int stride, int n)
{
  int i;
  for (i = 0; i < n; i++)
    dest[i*stride] = src[i] + src2[i];
}

int main()
{
  int i, stride;
  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
  float dest[64];
  check_vect ();
  for (stride = 0; stride < 8; stride++)
    {
      sumit (dest, src, src, stride, 8);
      if (!stride && dest[0] != 16)
	abort();
      else if (stride)
	for (i = 0; i < 8; i++)
	  if (2*src[i] != dest[i*stride])
	    abort ();
    }
  return 0;
}

/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Vectorize stores with unknown stride
  2015-05-07 16:07   ` Michael Matz
@ 2015-05-07 17:13     ` Alan Lawrence
  0 siblings, 0 replies; 5+ messages in thread
From: Alan Lawrence @ 2015-05-07 17:13 UTC (permalink / raw)
  To: Michael Matz; +Cc: gcc-patches

NP, and sorry for the spurious comments, hadn't spotted you were using nunits. I 
like the testcase, thanks :).

A.

Michael Matz wrote:
> On Thu, 7 May 2015, Alan Lawrence wrote:
> 
>> Also update comment? (5 identical cases)
>>
>> Also update comment?
> 
> Obviously a good idea, thanks :)  (s/loads/accesses/ for the commit)
> 
>>> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator
>>> *gsi, gimple *vec_stmt,
>>>    tree dataref_ptr = NULL_TREE;
>>>    tree dataref_offset = NULL_TREE;
>>>    gimple ptr_incr = NULL;
>>> -  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>>> +  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>> Unrelated? (though I don't object to it!)
> 
> No, the patch introduces for a first time in this function a loop over 
> unsigned i with nunits as bound, so the compare would warn if I weren't to 
> change either the type of i (with a cascade of more such changes) or 
> nuinits (with only this one place).  The whole file could use an overhaul 
> of signedness (after all nunits in all functions will always be 
> non-negative), but as a separate patch.
> 
>>> +  if (STMT_VINFO_STRIDED_P (stmt_info))
>>> +    ;
>>> +  else
>> This is not a long chain of ifs, so is there a reason not to have
>> if (!STMT_VINFO_STRIDED_P (stmt_info))
>> ?
> 
> Err, right.  Probably I had some code in there initially; as now it looks 
> a bit mannered :)
> 
>>> -#define STMT_VINFO_STRIDE_LOAD_P(S)       (S)->stride_load_p
>>> +#define STMT_VINFO_STRIDED_P(S)                   (S)->strided_p
>> Spacing (?)
> 
> No, it's using correct tabs, indentation by the diff '+' prefix deceives 
> us.  (Your mailer must be playing games, I've sent it out with TABs 
> intact).
> 
>>> +  float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
>>> +  float dest[8];
>>> +  check_vect ();
>>> +  sumit (dest, src, src, 1, 8);
>>> +  for (i = 0; i < 8; i++)
>>> +    if (2*src[i] != dest[i])
>>> +      abort ();
>>> +  return 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>>> +/* { dg-final { cleanup-tree-dump "vect" } } */
>> While I appreciate the vectorizer doesn't know the invariant stride is going
>> to be '1' when the loop executes...IMVHO I'd feel more reassured if we passed
>> in stride>1 at runtime ;). In fact I'd feel more reassured still, if we did
>> the thing with a few different values of stride...
> 
> Sensible.  I'll use the test case below (hey!  even stride zero! :) )
> 
> Regstrapping again with the changes as per above.  Committing tomorrow.  
> Many thanks for the review.
> 
> 
> Ciao,
> Michael.
> /* { dg-require-effective-target vect_float } */
> 
> #include <stdarg.h>
> #include "tree-vect.h"
> 
> void __attribute__((noinline))
> sumit (float * __restrict dest,
>        float * __restrict src, float * __restrict src2,
>        int stride, int n)
> {
>   int i;
>   for (i = 0; i < n; i++)
>     dest[i*stride] = src[i] + src2[i];
> }
> 
> int main()
> {
>   int i, stride;
>   float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
>   float dest[64];
>   check_vect ();
>   for (stride = 0; stride < 8; stride++)
>     {
>       sumit (dest, src, src, stride, 8);
>       if (!stride && dest[0] != 16)
> 	abort();
>       else if (stride)
> 	for (i = 0; i < 8; i++)
> 	  if (2*src[i] != dest[i*stride])
> 	    abort ();
>     }
>   return 0;
> }
> 
> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-05-07 17:13 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
2015-05-07 14:05 ` Richard Biener
2015-05-07 14:34 ` Alan Lawrence
2015-05-07 16:07   ` Michael Matz
2015-05-07 17:13     ` Alan Lawrence

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).