* Vectorize stores with unknown stride
@ 2015-05-06 15:37 Michael Matz
2015-05-07 14:05 ` Richard Biener
2015-05-07 14:34 ` Alan Lawrence
0 siblings, 2 replies; 5+ messages in thread
From: Michael Matz @ 2015-05-06 15:37 UTC (permalink / raw)
To: gcc-patches
Hi,
I'm sitting on this since quite some time already and always missed stage
1. This implements support for vectorizing strided stores with unknown
but loop invariant stride, like:
sumit (float * __restrict dest,
float * __restrict src, float * __restrict src2,
int stride, int n)
{
int i;
for (i = 0; i < n; i++)
dest[i*stride] = src[i] + src2[i];
}
I use the same scheme like for strided loads, i.e. expanding such store
with N separate scalar stores (so alignment could also be ignored for such
ones).
This doesn't yet fix PR65962, because that one uses a _constant_ step.
That makes us try a grouped access, which isn't supported for stores when
there's a gap (which there is). Unfortunately vect_analyze_group_access
actively changes some vect info even before detecting that it's not a
grouped access, so there must be some rollback implemented; I decided to
defer this to a follow up.
Regstrapped on x86-64-linux, no regressions (I had to adjust two
fortran testcases where one more loop is vectorized now). Okay for trunk?
Ciao,
Michael.
* tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
to strided_p.
(STMT_VINFO_STRIDE_LOAD_P): Rename to ...
(STMT_VINFO_STRIDED_P): ... this.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
(vect_verify_datarefs_alignment): Likewise.
(vect_enhance_data_refs_alignment): Likewise.
(vect_analyze_data_ref_access): Likewise.
(vect_analyze_data_refs): Accept strided stores.
* tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
(vect_model_load_cost): Adjust for macro rename.
(vectorizable_mask_load_store): Likewise.
(vectorizable_load): Likewise.
(vectorizable_store): Open code strided stores.
testsuite/
* gcc.dg/vect/vect-strided-store.c: New test.
* gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
* gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 96afc7a..6d8f17e 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
/* Strided loads perform only component accesses, misalignment information
is irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
return true;
misalign = DR_INIT (dr);
@@ -942,7 +942,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
@@ -1409,7 +1409,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
@@ -1701,7 +1701,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr);
@@ -1821,7 +1821,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
@@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
/* Assume this is a DR handled by non-constant strided load case. */
if (TREE_CODE (step) != INTEGER_CST)
- return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
+ return STMT_VINFO_STRIDED_P (stmt_info);
/* Not consecutive access - check if it's a part of interleaving group. */
return vect_analyze_group_access (dr);
@@ -3764,8 +3764,7 @@ again:
else if (loop_vinfo
&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
{
- if (nested_in_vect_loop_p (loop, stmt)
- || !DR_IS_READ (dr))
+ if (nested_in_vect_loop_p (loop, stmt))
{
if (dump_enabled_p ())
{
@@ -3777,7 +3776,7 @@ again:
}
return false;
}
- STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
+ STMT_VINFO_STRIDED_P (stmt_info) = true;
}
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2ce6d4d..d268eb0 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
}
/* Costs of the stores. */
- vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
+ if (STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ /* N scalar stores plus extracting the elements. */
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+ scalar_store, stmt_info, 0, vect_body);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+ vec_to_scalar, stmt_info, 0, vect_body);
+ }
+ else
+ vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1127,7 +1139,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
}
/* The loads themselves. */
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
{
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -1820,7 +1832,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
- if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ if (STMT_VINFO_STRIDED_P (stmt_info))
return false;
if (STMT_VINFO_GATHER_P (stmt_info))
@@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree dataref_ptr = NULL_TREE;
tree dataref_offset = NULL_TREE;
gimple ptr_incr = NULL;
- int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
int j;
gimple next_stmt, first_stmt = NULL;
@@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- negative =
- tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
- ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
- size_zero_node) < 0;
- if (negative && ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types with negative step.\n");
- return false;
- }
-
- if (negative)
+ if (STMT_VINFO_STRIDED_P (stmt_info))
+ ;
+ else
{
- gcc_assert (!grouped_store);
- alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
- if (alignment_support_scheme != dr_aligned
- && alignment_support_scheme != dr_unaligned_supported)
+ negative =
+ tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+ ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+ size_zero_node) < 0;
+ if (negative && ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step but alignment required.\n");
+ "multiple types with negative step.\n");
return false;
}
- if (dt != vect_constant_def
- && dt != vect_external_def
- && !perm_mask_for_reverse (vectype))
+ if (negative)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step and reversing not supported.\n");
- return false;
+ gcc_assert (!grouped_store);
+ alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+ if (alignment_support_scheme != dr_aligned
+ && alignment_support_scheme != dr_unaligned_supported)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "negative step but alignment required.\n");
+ return false;
+ }
+ if (dt != vect_constant_def
+ && dt != vect_external_def
+ && !perm_mask_for_reverse (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "negative step and reversing not supported.\n");
+ return false;
+ }
}
}
@@ -5230,6 +5246,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
dump_printf_loc (MSG_NOTE, vect_location,
"transform store. ncopies = %d\n", ncopies);
+ if (STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ gimple_stmt_iterator incr_gsi;
+ bool insert_after;
+ gimple incr;
+ tree offvar;
+ tree ivstep;
+ tree running_off;
+ gimple_seq stmts = NULL;
+ tree stride_base, stride_step, alias_off;
+ tree vec_oprnd;
+
+ gcc_assert (!nested_in_vect_loop_p (loop, stmt));
+
+ stride_base
+ = fold_build_pointer_plus
+ (unshare_expr (DR_BASE_ADDRESS (dr)),
+ size_binop (PLUS_EXPR,
+ convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
+ convert_to_ptrofftype (DR_INIT(dr))));
+ stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+
+ /* For a store with loop-invariant (but other than power-of-2)
+ stride (i.e. not a grouped access) like so:
+
+ for (i = 0; i < n; i += stride)
+ array[i] = ...;
+
+ we generate a new induction variable and new stores from
+ the components of the (vectorized) rhs:
+
+ for (j = 0; ; j += VF*stride)
+ vectemp = ...;
+ tmp1 = vectemp[0];
+ array[j] = tmp1;
+ tmp2 = vectemp[1];
+ array[j + stride] = tmp2;
+ ...
+ */
+
+ ivstep = stride_step;
+ ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+ build_int_cst (TREE_TYPE (ivstep),
+ ncopies * nunits));
+
+ standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+
+ create_iv (stride_base, ivstep, NULL,
+ loop, &incr_gsi, insert_after,
+ &offvar, NULL);
+ incr = gsi_stmt (incr_gsi);
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+
+ stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+ if (stmts)
+ gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+ prev_stmt_info = NULL;
+ running_off = offvar;
+ alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+ for (j = 0; j < ncopies; j++)
+ {
+ /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+ and first_stmt == stmt. */
+ if (j == 0)
+ vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
+ else
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+
+ for (i = 0; i < nunits; i++)
+ {
+ tree newref, newoff;
+ gimple incr, assign;
+ tree size = TYPE_SIZE (elem_type);
+ /* Extract the i'th component. */
+ tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+ size);
+ tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
+ size, pos);
+
+ elem = force_gimple_operand_gsi (gsi, elem, true,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+
+ newref = build2 (MEM_REF, TREE_TYPE (vectype),
+ running_off, alias_off);
+
+ /* And store it to *running_off. */
+ assign = gimple_build_assign (newref, elem);
+ vect_finish_stmt_generation (stmt, assign, gsi);
+
+ newoff = copy_ssa_name (running_off, NULL);
+ incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+ running_off, stride_step);
+ vect_finish_stmt_generation (stmt, incr, gsi);
+
+ running_off = newoff;
+ if (j == 0 && i == i)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+ prev_stmt_info = vinfo_for_stmt (assign);
+ }
+ }
+ return true;
+ }
+
dr_chain.create (group_size);
oprnds.create (group_size);
@@ -5846,7 +5969,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return false;
}
}
- else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ else if (STMT_VINFO_STRIDED_P (stmt_info))
;
else
{
@@ -6079,7 +6202,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
return true;
}
- else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ else if (STMT_VINFO_STRIDED_P (stmt_info))
{
gimple_stmt_iterator incr_gsi;
bool insert_after;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 0796cc1..d231626 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -643,7 +643,9 @@ typedef struct _stmt_vec_info {
/* For loads only, true if this is a gather load. */
bool gather_p;
- bool stride_load_p;
+
+ /* True if this is an access with loop-invariant stride. */
+ bool strided_p;
/* For both loads and stores. */
bool simd_lane_access_p;
@@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
#define STMT_VINFO_GATHER_P(S) (S)->gather_p
-#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
+#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
index b17ac9c..d5f5d40 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
@@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
end do
end subroutine
-! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
+! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
index 1d13cea..625be83 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
@@ -29,5 +29,5 @@
return
end function trs2a2
-! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
+! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
new file mode 100644
index 0000000..32bcff9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
@@ -0,0 +1,30 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+void __attribute__((noinline))
+sumit (float * __restrict dest,
+ float * __restrict src, float * __restrict src2,
+ int stride, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ dest[i*stride] = src[i] + src2[i];
+}
+
+int main()
+{
+ int i;
+ float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ float dest[8];
+ check_vect ();
+ sumit (dest, src, src, 1, 8);
+ for (i = 0; i < 8; i++)
+ if (2*src[i] != dest[i])
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Vectorize stores with unknown stride
2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
@ 2015-05-07 14:05 ` Richard Biener
2015-05-07 14:34 ` Alan Lawrence
1 sibling, 0 replies; 5+ messages in thread
From: Richard Biener @ 2015-05-07 14:05 UTC (permalink / raw)
To: Michael Matz; +Cc: GCC Patches
On Wed, May 6, 2015 at 5:37 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> I'm sitting on this since quite some time already and always missed stage
> 1. This implements support for vectorizing strided stores with unknown
> but loop invariant stride, like:
>
> sumit (float * __restrict dest,
> float * __restrict src, float * __restrict src2,
> int stride, int n)
> {
> int i;
> for (i = 0; i < n; i++)
> dest[i*stride] = src[i] + src2[i];
> }
>
> I use the same scheme like for strided loads, i.e. expanding such store
> with N separate scalar stores (so alignment could also be ignored for such
> ones).
>
> This doesn't yet fix PR65962, because that one uses a _constant_ step.
> That makes us try a grouped access, which isn't supported for stores when
> there's a gap (which there is). Unfortunately vect_analyze_group_access
> actively changes some vect info even before detecting that it's not a
> grouped access, so there must be some rollback implemented; I decided to
> defer this to a follow up.
>
> Regstrapped on x86-64-linux, no regressions (I had to adjust two
> fortran testcases where one more loop is vectorized now). Okay for trunk?
Ok.
Thanks,
Richard.
>
> Ciao,
> Michael.
> * tree-vectorizer.h (struct _stmt_vec_info): Rename stride_load_p
> to strided_p.
> (STMT_VINFO_STRIDE_LOAD_P): Rename to ...
> (STMT_VINFO_STRIDED_P): ... this.
> * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Adjust.
> (vect_verify_datarefs_alignment): Likewise.
> (vect_enhance_data_refs_alignment): Likewise.
> (vect_analyze_data_ref_access): Likewise.
> (vect_analyze_data_refs): Accept strided stores.
> * tree-vect-stmts.c (vect_model_store_cost): Count strided stores.
> (vect_model_load_cost): Adjust for macro rename.
> (vectorizable_mask_load_store): Likewise.
> (vectorizable_load): Likewise.
> (vectorizable_store): Open code strided stores.
>
> testsuite/
> * gcc.dg/vect/vect-strided-store.c: New test.
> * gfortran.dg/vect/fast-math-pr37021.f90: Adjust.
> * gfortran.dg/vect/fast-math-rnflow-trs2a2.f90: Adjust.
>
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 96afc7a..6d8f17e 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
>
> /* Strided loads perform only component accesses, misalignment information
> is irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> return true;
>
> misalign = DR_INIT (dr);
> @@ -942,7 +942,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
>
> /* Strided loads perform only component accesses, alignment is
> irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> continue;
>
> supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
> @@ -1409,7 +1409,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
> /* Strided loads perform only component accesses, alignment is
> irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> continue;
>
> supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
> @@ -1701,7 +1701,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
> /* Strided loads perform only component accesses, alignment is
> irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> continue;
>
> save_misalignment = DR_MISALIGNMENT (dr);
> @@ -1821,7 +1821,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
>
> /* Strided loads perform only component accesses, alignment is
> irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> continue;
>
> supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
> @@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
>
> /* Assume this is a DR handled by non-constant strided load case. */
> if (TREE_CODE (step) != INTEGER_CST)
> - return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
> + return STMT_VINFO_STRIDED_P (stmt_info);
>
> /* Not consecutive access - check if it's a part of interleaving group. */
> return vect_analyze_group_access (dr);
> @@ -3764,8 +3764,7 @@ again:
> else if (loop_vinfo
> && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
> {
> - if (nested_in_vect_loop_p (loop, stmt)
> - || !DR_IS_READ (dr))
> + if (nested_in_vect_loop_p (loop, stmt))
> {
> if (dump_enabled_p ())
> {
> @@ -3777,7 +3776,7 @@ again:
> }
> return false;
> }
> - STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
> + STMT_VINFO_STRIDED_P (stmt_info) = true;
> }
> }
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 2ce6d4d..d268eb0 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1014,7 +1014,19 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
> }
>
> /* Costs of the stores. */
> - vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> + {
> + /* N scalar stores plus extracting the elements. */
> + tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> + inside_cost += record_stmt_cost (body_cost_vec,
> + ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> + scalar_store, stmt_info, 0, vect_body);
> + inside_cost += record_stmt_cost (body_cost_vec,
> + ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> + vec_to_scalar, stmt_info, 0, vect_body);
> + }
> + else
> + vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
>
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> @@ -1127,7 +1139,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
> }
>
> /* The loads themselves. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> {
> /* N scalar loads plus gathering them into a vector. */
> tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> @@ -1820,7 +1832,7 @@ vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
> if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> return false;
>
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> return false;
>
> if (STMT_VINFO_GATHER_P (stmt_info))
> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> tree dataref_ptr = NULL_TREE;
> tree dataref_offset = NULL_TREE;
> gimple ptr_incr = NULL;
> - int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> int ncopies;
> int j;
> gimple next_stmt, first_stmt = NULL;
> @@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> - negative =
> - tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> - ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> - size_zero_node) < 0;
> - if (negative && ncopies > 1)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "multiple types with negative step.\n");
> - return false;
> - }
> -
> - if (negative)
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> + ;
> + else
> {
> - gcc_assert (!grouped_store);
> - alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
> - if (alignment_support_scheme != dr_aligned
> - && alignment_support_scheme != dr_unaligned_supported)
> + negative =
> + tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> + ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> + size_zero_node) < 0;
> + if (negative && ncopies > 1)
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "negative step but alignment required.\n");
> + "multiple types with negative step.\n");
> return false;
> }
> - if (dt != vect_constant_def
> - && dt != vect_external_def
> - && !perm_mask_for_reverse (vectype))
> + if (negative)
> {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "negative step and reversing not supported.\n");
> - return false;
> + gcc_assert (!grouped_store);
> + alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
> + if (alignment_support_scheme != dr_aligned
> + && alignment_support_scheme != dr_unaligned_supported)
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "negative step but alignment required.\n");
> + return false;
> + }
> + if (dt != vect_constant_def
> + && dt != vect_external_def
> + && !perm_mask_for_reverse (vectype))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "negative step and reversing not supported.\n");
> + return false;
> + }
> }
> }
>
> @@ -5230,6 +5246,113 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> dump_printf_loc (MSG_NOTE, vect_location,
> "transform store. ncopies = %d\n", ncopies);
>
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> + {
> + gimple_stmt_iterator incr_gsi;
> + bool insert_after;
> + gimple incr;
> + tree offvar;
> + tree ivstep;
> + tree running_off;
> + gimple_seq stmts = NULL;
> + tree stride_base, stride_step, alias_off;
> + tree vec_oprnd;
> +
> + gcc_assert (!nested_in_vect_loop_p (loop, stmt));
> +
> + stride_base
> + = fold_build_pointer_plus
> + (unshare_expr (DR_BASE_ADDRESS (dr)),
> + size_binop (PLUS_EXPR,
> + convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
> + convert_to_ptrofftype (DR_INIT(dr))));
> + stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
> +
> + /* For a store with loop-invariant (but other than power-of-2)
> + stride (i.e. not a grouped access) like so:
> +
> + for (i = 0; i < n; i += stride)
> + array[i] = ...;
> +
> + we generate a new induction variable and new stores from
> + the components of the (vectorized) rhs:
> +
> + for (j = 0; ; j += VF*stride)
> + vectemp = ...;
> + tmp1 = vectemp[0];
> + array[j] = tmp1;
> + tmp2 = vectemp[1];
> + array[j + stride] = tmp2;
> + ...
> + */
> +
> + ivstep = stride_step;
> + ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
> + build_int_cst (TREE_TYPE (ivstep),
> + ncopies * nunits));
> +
> + standard_iv_increment_position (loop, &incr_gsi, &insert_after);
> +
> + create_iv (stride_base, ivstep, NULL,
> + loop, &incr_gsi, insert_after,
> + &offvar, NULL);
> + incr = gsi_stmt (incr_gsi);
> + set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
> +
> + stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
> + if (stmts)
> + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
> +
> + prev_stmt_info = NULL;
> + running_off = offvar;
> + alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
> + for (j = 0; j < ncopies; j++)
> + {
> + /* We've set op and dt above, from gimple_assign_rhs1(stmt),
> + and first_stmt == stmt. */
> + if (j == 0)
> + vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
> + else
> + vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
> +
> + for (i = 0; i < nunits; i++)
> + {
> + tree newref, newoff;
> + gimple incr, assign;
> + tree size = TYPE_SIZE (elem_type);
> + /* Extract the i'th component. */
> + tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
> + size);
> + tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
> + size, pos);
> +
> + elem = force_gimple_operand_gsi (gsi, elem, true,
> + NULL_TREE, true,
> + GSI_SAME_STMT);
> +
> + newref = build2 (MEM_REF, TREE_TYPE (vectype),
> + running_off, alias_off);
> +
> + /* And store it to *running_off. */
> + assign = gimple_build_assign (newref, elem);
> + vect_finish_stmt_generation (stmt, assign, gsi);
> +
> + newoff = copy_ssa_name (running_off, NULL);
> + incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
> + running_off, stride_step);
> + vect_finish_stmt_generation (stmt, incr, gsi);
> +
> + running_off = newoff;
> + if (j == 0 && i == i)
> + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
> + else
> + STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
> + prev_stmt_info = vinfo_for_stmt (assign);
> + }
> + }
> + return true;
> + }
> +
> dr_chain.create (group_size);
> oprnds.create (group_size);
>
> @@ -5846,7 +5969,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> return false;
> }
> }
> - else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + else if (STMT_VINFO_STRIDED_P (stmt_info))
> ;
> else
> {
> @@ -6079,7 +6202,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> }
> return true;
> }
> - else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + else if (STMT_VINFO_STRIDED_P (stmt_info))
> {
> gimple_stmt_iterator incr_gsi;
> bool insert_after;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0796cc1..d231626 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -643,7 +643,9 @@ typedef struct _stmt_vec_info {
>
> /* For loads only, true if this is a gather load. */
> bool gather_p;
> - bool stride_load_p;
> +
> + /* True if this is an access with loop-invariant stride. */
> + bool strided_p;
>
> /* For both loads and stores. */
> bool simd_lane_access_p;
> @@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
> #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
> #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
> #define STMT_VINFO_GATHER_P(S) (S)->gather_p
> -#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
> +#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
> #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
>
> #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
> diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> index b17ac9c..d5f5d40 100644
> --- a/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-pr37021.f90
> @@ -14,5 +14,5 @@ subroutine to_product_of(self,a,b,a1,a2)
> end do
> end subroutine
>
> -! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } }
> +! { dg-final { scan-tree-dump "vectorized 2 loops" "vect" } }
> ! { dg-final { cleanup-tree-dump "vect" } }
> diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> index 1d13cea..625be83 100644
> --- a/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> +++ b/gcc/testsuite/gfortran.dg/vect/fast-math-rnflow-trs2a2.f90
> @@ -29,5 +29,5 @@
> return
> end function trs2a2
>
> -! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
> +! { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } }
> ! { dg-final { cleanup-tree-dump "vect" } }
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> new file mode 100644
> index 0000000..32bcff9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> @@ -0,0 +1,30 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +void __attribute__((noinline))
> +sumit (float * __restrict dest,
> + float * __restrict src, float * __restrict src2,
> + int stride, int n)
> +{
> + int i;
> + for (i = 0; i < n; i++)
> + dest[i*stride] = src[i] + src2[i];
> +}
> +
> +int main()
> +{
> + int i;
> + float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> + float dest[8];
> + check_vect ();
> + sumit (dest, src, src, 1, 8);
> + for (i = 0; i < 8; i++)
> + if (2*src[i] != dest[i])
> + abort ();
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Vectorize stores with unknown stride
2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
2015-05-07 14:05 ` Richard Biener
@ 2015-05-07 14:34 ` Alan Lawrence
2015-05-07 16:07 ` Michael Matz
1 sibling, 1 reply; 5+ messages in thread
From: Alan Lawrence @ 2015-05-07 14:34 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches
(Below are all minor/style points only, no reason for patch not to go in.)
Michael Matz wrote:
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 96afc7a..6d8f17e 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -665,7 +665,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
>
> /* Strided loads perform only component accesses, misalignment information
> is irrelevant for them. */
> - if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> return true;
> misalign = DR_INIT (dr);
Also update comment? (5 identical cases)
> @@ -2368,7 +2368,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
>
> /* Assume this is a DR handled by non-constant strided load case. */
> if (TREE_CODE (step) != INTEGER_CST)
> - return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
> + return STMT_VINFO_STRIDED_P (stmt_info);
Also update comment?
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 2ce6d4d..d268eb0 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> tree dataref_ptr = NULL_TREE;
> tree dataref_offset = NULL_TREE;
> gimple ptr_incr = NULL;
> - int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
Unrelated? (though I don't object to it!)
> @@ -5100,38 +5112,42 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> - negative =
> - tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
> - ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
> - size_zero_node) < 0;
> - if (negative && ncopies > 1)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "multiple types with negative step.\n");
> - return false;
> - }
> -
> - if (negative)
> + if (STMT_VINFO_STRIDED_P (stmt_info))
> + ;
> + else
This is not a long chain of ifs, so is there a reason not to have
if (!STMT_VINFO_STRIDED_P (stmt_info))
?
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0796cc1..d231626 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -661,7 +663,7 @@ typedef struct _stmt_vec_info {
> #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
> #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
> #define STMT_VINFO_GATHER_P(S) (S)->gather_p
> -#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
> +#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
Spacing (?)
> #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
>
> #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> new file mode 100644
> index 0000000..32bcff9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store.c
> @@ -0,0 +1,30 @@
> +/* { dg-require-effective-target vect_float } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +void __attribute__((noinline))
> +sumit (float * __restrict dest,
> + float * __restrict src, float * __restrict src2,
> + int stride, int n)
> +{
> + int i;
> + for (i = 0; i < n; i++)
> + dest[i*stride] = src[i] + src2[i];
> +}
> +
> +int main()
> +{
> + int i;
> + float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> + float dest[8];
> + check_vect ();
> + sumit (dest, src, src, 1, 8);
> + for (i = 0; i < 8; i++)
> + if (2*src[i] != dest[i])
> + abort ();
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
While I appreciate the vectorizer doesn't know the invariant stride is going to
be '1' when the loop executes...IMVHO I'd feel more reassured if we passed in
stride>1 at runtime ;). In fact I'd feel more reassured still, if we did the
thing with a few different values of stride...
Cheers, Alan
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Vectorize stores with unknown stride
2015-05-07 14:34 ` Alan Lawrence
@ 2015-05-07 16:07 ` Michael Matz
2015-05-07 17:13 ` Alan Lawrence
0 siblings, 1 reply; 5+ messages in thread
From: Michael Matz @ 2015-05-07 16:07 UTC (permalink / raw)
To: Alan Lawrence; +Cc: gcc-patches
On Thu, 7 May 2015, Alan Lawrence wrote:
> Also update comment? (5 identical cases)
>
> Also update comment?
Obviously a good idea, thanks :) (s/loads/accesses/ for the commit)
> > @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator
> > *gsi, gimple *vec_stmt,
> > tree dataref_ptr = NULL_TREE;
> > tree dataref_offset = NULL_TREE;
> > gimple ptr_incr = NULL;
> > - int nunits = TYPE_VECTOR_SUBPARTS (vectype);
> > + unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>
> Unrelated? (though I don't object to it!)
No, the patch introduces for a first time in this function a loop over
unsigned i with nunits as bound, so the compare would warn if I weren't to
change either the type of i (with a cascade of more such changes) or
nuinits (with only this one place). The whole file could use an overhaul
of signedness (after all nunits in all functions will always be
non-negative), but as a separate patch.
> > + if (STMT_VINFO_STRIDED_P (stmt_info))
> > + ;
> > + else
>
> This is not a long chain of ifs, so is there a reason not to have
> if (!STMT_VINFO_STRIDED_P (stmt_info))
> ?
Err, right. Probably I had some code in there initially; as now it looks
a bit mannered :)
> > -#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
> > +#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
>
> Spacing (?)
No, it's using correct tabs, indentation by the diff '+' prefix deceives
us. (Your mailer must be playing games, I've sent it out with TABs
intact).
> > + float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> > + float dest[8];
> > + check_vect ();
> > + sumit (dest, src, src, 1, 8);
> > + for (i = 0; i < 8; i++)
> > + if (2*src[i] != dest[i])
> > + abort ();
> > + return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> > +/* { dg-final { cleanup-tree-dump "vect" } } */
>
> While I appreciate the vectorizer doesn't know the invariant stride is going
> to be '1' when the loop executes...IMVHO I'd feel more reassured if we passed
> in stride>1 at runtime ;). In fact I'd feel more reassured still, if we did
> the thing with a few different values of stride...
Sensible. I'll use the test case below (hey! even stride zero! :) )
Regstrapping again with the changes as per above. Committing tomorrow.
Many thanks for the review.
Ciao,
Michael.
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
void __attribute__((noinline))
sumit (float * __restrict dest,
float * __restrict src, float * __restrict src2,
int stride, int n)
{
int i;
for (i = 0; i < n; i++)
dest[i*stride] = src[i] + src2[i];
}
int main()
{
int i, stride;
float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
float dest[64];
check_vect ();
for (stride = 0; stride < 8; stride++)
{
sumit (dest, src, src, stride, 8);
if (!stride && dest[0] != 16)
abort();
else if (stride)
for (i = 0; i < 8; i++)
if (2*src[i] != dest[i*stride])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: Vectorize stores with unknown stride
2015-05-07 16:07 ` Michael Matz
@ 2015-05-07 17:13 ` Alan Lawrence
0 siblings, 0 replies; 5+ messages in thread
From: Alan Lawrence @ 2015-05-07 17:13 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches
NP, and sorry for the spurious comments, hadn't spotted you were using nunits. I
like the testcase, thanks :).
A.
Michael Matz wrote:
> On Thu, 7 May 2015, Alan Lawrence wrote:
>
>> Also update comment? (5 identical cases)
>>
>> Also update comment?
>
> Obviously a good idea, thanks :) (s/loads/accesses/ for the commit)
>
>>> @@ -5013,7 +5025,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator
>>> *gsi, gimple *vec_stmt,
>>> tree dataref_ptr = NULL_TREE;
>>> tree dataref_offset = NULL_TREE;
>>> gimple ptr_incr = NULL;
>>> - int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>>> + unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
>> Unrelated? (though I don't object to it!)
>
> No, the patch introduces for a first time in this function a loop over
> unsigned i with nunits as bound, so the compare would warn if I weren't to
> change either the type of i (with a cascade of more such changes) or
> nuinits (with only this one place). The whole file could use an overhaul
> of signedness (after all nunits in all functions will always be
> non-negative), but as a separate patch.
>
>>> + if (STMT_VINFO_STRIDED_P (stmt_info))
>>> + ;
>>> + else
>> This is not a long chain of ifs, so is there a reason not to have
>> if (!STMT_VINFO_STRIDED_P (stmt_info))
>> ?
>
> Err, right. Probably I had some code in there initially; as now it looks
> a bit mannered :)
>
>>> -#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
>>> +#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
>> Spacing (?)
>
> No, it's using correct tabs, indentation by the diff '+' prefix deceives
> us. (Your mailer must be playing games, I've sent it out with TABs
> intact).
>
>>> + float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
>>> + float dest[8];
>>> + check_vect ();
>>> + sumit (dest, src, src, 1, 8);
>>> + for (i = 0; i < 8; i++)
>>> + if (2*src[i] != dest[i])
>>> + abort ();
>>> + return 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
>>> +/* { dg-final { cleanup-tree-dump "vect" } } */
>> While I appreciate the vectorizer doesn't know the invariant stride is going
>> to be '1' when the loop executes...IMVHO I'd feel more reassured if we passed
>> in stride>1 at runtime ;). In fact I'd feel more reassured still, if we did
>> the thing with a few different values of stride...
>
> Sensible. I'll use the test case below (hey! even stride zero! :) )
>
> Regstrapping again with the changes as per above. Committing tomorrow.
> Many thanks for the review.
>
>
> Ciao,
> Michael.
> /* { dg-require-effective-target vect_float } */
>
> #include <stdarg.h>
> #include "tree-vect.h"
>
> void __attribute__((noinline))
> sumit (float * __restrict dest,
> float * __restrict src, float * __restrict src2,
> int stride, int n)
> {
> int i;
> for (i = 0; i < n; i++)
> dest[i*stride] = src[i] + src2[i];
> }
>
> int main()
> {
> int i, stride;
> float src[] = {1, 2, 3, 4, 5, 6, 7, 8};
> float dest[64];
> check_vect ();
> for (stride = 0; stride < 8; stride++)
> {
> sumit (dest, src, src, stride, 8);
> if (!stride && dest[0] != 16)
> abort();
> else if (stride)
> for (i = 0; i < 8; i++)
> if (2*src[i] != dest[i*stride])
> abort ();
> }
> return 0;
> }
>
> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-05-07 17:13 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-06 15:37 Vectorize stores with unknown stride Michael Matz
2015-05-07 14:05 ` Richard Biener
2015-05-07 14:34 ` Alan Lawrence
2015-05-07 16:07 ` Michael Matz
2015-05-07 17:13 ` Alan Lawrence
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).