* [PATCH] Remove strided grouped store restrictions
@ 2016-06-14 14:06 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2016-06-14 14:06 UTC (permalink / raw)
To: gcc-patches
The following patch is similar to the strided grouped load case I fixed
recently - it handles all the missing cases. The testcase needs the
previous dependence fix.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Richard.
2016-06-14 Richard Biener <rguenther@suse.de>
* tree-vect-stmts.c (vectorizable_store): Remove strided grouped
store restrictions.
* gcc.dg/vect/slp-45.c: New testcase.
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 237428)
--- gcc/tree-vect-stmts.c (working copy)
*************** vectorizable_store (gimple *stmt, gimple
*** 5234,5239 ****
--- 5297,5303 ----
enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
enum vect_def_type scatter_src_dt = vect_unknown_def_type;
gimple *new_stmt;
+ int vf;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
*************** vectorizable_store (gimple *stmt, gimple
*** 5270,5276 ****
unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (loop_vinfo)
! loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
--- 5334,5345 ----
unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (loop_vinfo)
! {
! loop = LOOP_VINFO_LOOP (loop_vinfo);
! vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
! }
! else
! vf = 1;
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
*************** vectorizable_store (gimple *stmt, gimple
*** 5365,5380 ****
return false;
}
- if (STMT_VINFO_STRIDED_P (stmt_info)
- && slp
- && (group_size > nunits
- || nunits % group_size != 0))
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unhandled strided group store\n");
- return false;
- }
-
if (first_stmt == stmt)
{
/* STMT is the leader of the group. Check the operands of all the
--- 5434,5439 ----
*************** vectorizable_store (gimple *stmt, gimple
*** 5653,5675 ****
*/
unsigned nstores = nunits;
tree ltype = elem_type;
if (slp)
{
! nstores = nunits / group_size;
! if (group_size < nunits)
! ltype = build_vector_type (elem_type, group_size);
! else
! ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- group_size = 1;
}
ivstep = stride_step;
ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
! build_int_cst (TREE_TYPE (ivstep),
! ncopies * nstores));
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
--- 5712,5742 ----
*/
unsigned nstores = nunits;
+ unsigned lnel = 1;
tree ltype = elem_type;
if (slp)
{
! if (group_size < nunits
! && nunits % group_size == 0)
! {
! nstores = nunits / group_size;
! lnel = group_size;
! ltype = build_vector_type (elem_type, group_size);
! }
! else if (group_size >= nunits
! && group_size % nunits == 0)
! {
! nstores = 1;
! lnel = nunits;
! ltype = vectype;
! }
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
}
ivstep = stride_step;
ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
! build_int_cst (TREE_TYPE (ivstep), vf));
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
*************** vectorizable_store (gimple *stmt, gimple
*** 5700,5705 ****
--- 5767,5775 ----
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
}
+ unsigned int group_el = 0;
+ unsigned HOST_WIDE_INT
+ elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++)
{
/* We've set op and dt above, from gimple_assign_rhs1(stmt),
*************** vectorizable_store (gimple *stmt, gimple
*** 5745,5763 ****
NULL_TREE, true,
GSI_SAME_STMT);
newref = build2 (MEM_REF, ltype,
! running_off, alias_off);
/* And store it to *running_off. */
assign = gimple_build_assign (newref, elem);
vect_finish_stmt_generation (stmt, assign, gsi);
! newoff = copy_ssa_name (running_off, NULL);
! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
! running_off, stride_step);
! vect_finish_stmt_generation (stmt, incr, gsi);
! running_off = newoff;
if (g == group_size - 1
&& !slp)
{
--- 5815,5841 ----
NULL_TREE, true,
GSI_SAME_STMT);
+ tree this_off = build_int_cst (TREE_TYPE (alias_off),
+ group_el * elsz);
newref = build2 (MEM_REF, ltype,
! running_off, this_off);
/* And store it to *running_off. */
assign = gimple_build_assign (newref, elem);
vect_finish_stmt_generation (stmt, assign, gsi);
! group_el += lnel;
! if (! slp
! || group_el == group_size)
! {
! newoff = copy_ssa_name (running_off, NULL);
! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
! running_off, stride_step);
! vect_finish_stmt_generation (stmt, incr, gsi);
! running_off = newoff;
! group_el = 0;
! }
if (g == group_size - 1
&& !slp)
{
*************** vectorizable_store (gimple *stmt, gimple
*** 5771,5776 ****
--- 5849,5856 ----
}
}
next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+ if (slp)
+ break;
}
return true;
}
Index: gcc/testsuite/gcc.dg/vect/slp-45.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/slp-45.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/slp-45.c (working copy)
***************
*** 0 ****
--- 1,78 ----
+ /* { dg-do run } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-O3" } */
+
+ #include <string.h>
+ #include "tree-vect.h"
+
+ #define FOO(T,N) \
+ void __attribute__((noinline,noclone)) \
+ foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
+ { \
+ T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
+ T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
+ for (int i = 0; i < 16; i++) \
+ { \
+ for (int j = 0; j < N; ++j) \
+ out[j] = in[j]; \
+ in += N; \
+ out += s*N; \
+ } \
+ }
+
+ #define TEST(T,N) \
+ do { \
+ memset (out, 0, 4096); \
+ foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
+ if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+ __builtin_abort (); \
+ for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+ if (out[i] != 0) \
+ __builtin_abort (); \
+ } while (0)
+
+ FOO(char, 1)
+ FOO(char, 2)
+ FOO(char, 3)
+ FOO(char, 4)
+ FOO(char, 6)
+ FOO(char, 8)
+ FOO(int, 1)
+ FOO(int, 2)
+ FOO(int, 3)
+ FOO(int, 4)
+ FOO(int, 6)
+ FOO(int, 8)
+ FOO(int, 16)
+
+ char in[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+ char out[4096] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
+
+ int main()
+ {
+ check_vect ();
+
+ for (int i = 0; i < 4096; ++i)
+ {
+ in[i] = i;
+ __asm__ volatile ("" : : : "memory");
+ }
+
+ TEST(char, 1);
+ TEST(char, 2);
+ TEST(char, 3);
+ TEST(char, 4);
+ TEST(char, 6);
+ TEST(char, 8);
+ TEST(int, 1);
+ TEST(int, 2);
+ TEST(int, 3);
+ TEST(int, 4);
+ TEST(int, 6);
+ TEST(int, 8);
+ TEST(int, 16);
+
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 13 "vect" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2016-06-14 14:06 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-14 14:06 [PATCH] Remove strided grouped store restrictions Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).